diff --git "a/consolidated_data_optimized.json" "b/consolidated_data_optimized.json" new file mode 100644--- /dev/null +++ "b/consolidated_data_optimized.json" @@ -0,0 +1,82818 @@ +[ + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 59, + "text": "49 implementation at the colline, commune, province and central levels. The MIS will need to interface with the beneficiary database for the cash transfer program and potentially other databases constructed on the same structure for potential additional programs and with the payment agency ( ies ) system. ( b ) The design and implementation of grievance redress mechanisms with different points of entry at the colline, commune and province-levels and different channels for citizen feedback. ( c ) A specific MIS operational manual with a clear definition of roles in access, quality control, update, and safeguarding of personal data. ( d ) The implementation of the modules with the development of software and acquisition of key IT equipment and hardware ( some parts of the MIS and the registry may be hosted in a cloud or on local servers, depending on relative cost-efficiency in terms of safeguarding and resilience ) ( e ) Related communication and multi-media outreach materials as well as training for key stakeholders. Subcomponent 2. 3: Monitoring and evaluation ( US $ 2. 8 million equivalent ) 38. Since the project is supporting new interventions and processes in Burundi and to ensure transparency, the third sub-component will also support process evaluations of the key program processes and an impact evaluation including beneficiary surveys.", + "ner_text": [ + [ + 781, + 784, + "named" + ] + ], + "validated": false, + "empirical_context": "( c ) A specific MIS operational manual with a clear definition of roles in access, quality control, update, and safeguarding of personal data. ( d ) The implementation of the modules with the development of software and acquisition of key IT equipment and hardware ( some parts of the MIS and the registry may be hosted in a cloud or on local servers, depending on relative cost-efficiency in terms of safeguarding and resilience ) ( e ) Related communication and multi-media outreach materials as well as training for key stakeholders. Subcomponent 2.", + "type": "system", + "explanation": "However, 'MIS' is described as a management information system, not a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is related to data management.", + "contextual_reason_agent": "However, 'MIS' is described as a management information system, not a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "013_BOSIB0efb09b920d90858a0135df22da7d1", + "page": 25, + "text": "The other beneficiaries are public and private service providers. Fayda and the Ethiopia Digital Stack will enable public and private sector service providers to reduce the costs and risks of delivering services. More innovative products and services will be built by leveraging new functionalities that would allow a shift to online channels instead of depending on brick-and-mortar service delivery channels. The GoE will benefit from reduced fraud and leakages, including in social protection and subsidy programs, through the ability, for the first time, to uniquely identify and securely verify individuals. D. Results Chain Figure 2. Project Results Chain E. Rationale for Bank Involvement and Role of Partners 52. The World Bank brings deep knowledge and international experience related to the development of inclusive and trusted identification and civil registration systems and harnessing these for development. Through the ID4D", + "ner_text": [ + [ + 935, + 939, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank brings deep knowledge and international experience related to the development of inclusive and trusted identification and civil registration systems and harnessing these for development. Through the ID4D", + "type": "program", + "explanation": "'ID4D' is mentioned as a program related to identification systems, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'ID4D' is a dataset because it is associated with the World Bank's initiatives on identification systems.", + "contextual_reason_agent": "'ID4D' is mentioned as a program related to identification systems, not as a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 144, + "text": "According to preliminary results of the Central African Republic refugee poverty analysis ( American University of Beirut, forthcoming ), using expenditure data from a Household Economy Analysis ( UNHCR / IFORD 2017 ), Central African Republic refugees in the East, Adamawa, and North regions show alarming levels of poverty. Data suggest that 96 percent of refugees fall below the extreme poverty line ( less than CFAF 17, 962 per person per month ). If the Minimum Food Basket ( MFB ) calculated by the WFP is used as a reference, 71 percent of the Central African Republic refugee population fall below this line, meaning that their expenditure is less than CFAF 8, 800 per person per month \u2014 the amount necessary to purchase minimum food energy requirements ( emergency standards: 2, 100 kcal per person per day ).", + "ner_text": [ + [ + 144, + 160, + "named" + ], + [ + 40, + 64, + "expenditure data <> data geography" + ], + [ + 92, + 121, + "expenditure data <> author" + ], + [ + 211, + 215, + "expenditure data <> publication year" + ], + [ + 219, + 252, + "expenditure data <> reference population" + ] + ], + "validated": true, + "empirical_context": "According to preliminary results of the Central African Republic refugee poverty analysis ( American University of Beirut, forthcoming ), using expenditure data from a Household Economy Analysis ( UNHCR / IFORD 2017 ), Central African Republic refugees in the East, Adamawa, and North regions show alarming levels of poverty. Data suggest that 96 percent of refugees fall below the extreme poverty line ( less than CFAF 17, 962 per person per month ).", + "type": "data", + "explanation": "In this context, 'expenditure data' is indeed used as a source of information for the poverty analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'expenditure data' is a dataset because it refers to specific data used in the analysis.", + "contextual_reason_agent": "In this context, 'expenditure data' is indeed used as a source of information for the poverty analysis.", + "contextual_signal": "follows 'using data from'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "182_multi0page", + "page": 10, + "text": "The Govermment faces real difficulties in responding to these challenges: ( i ) current household level survey data is not representative of the population, and other information on living standards is ad hoc, irregular and of varying quality; ( ii ) the capacity of the Government to evaluate policy-relevant information is limited by knowledge and resource constraints; ( iii ) the policy environment is extremely fluid, there is high government turnover of officials, and Government efforts tend to be focused on crisis management; and ( iv ) roles and responsibilities for policy development and evaluation remain unclear, both within and between agencies. The project will address these issues by ( i ) supporting the national statistical agency ( INSTAT ) to develop and implement a permanent household survey instrument that will collect data on a regular basis, utilizing a new sample frame derived from the 2001 Census as well as new questionnaires and data management procedures; ( ii ) developing and implementing a strategic plan for social policy monitoring and evaluation within and between agencies, covering both social insurance and social assistance; and ( iii ) developing and implementing an associated training plan. - 7 -", + "ner_text": [ + [ + 916, + 927, + "named" + ] + ], + "validated": true, + "empirical_context": "The Govermment faces real difficulties in responding to these challenges: ( i ) current household level survey data is not representative of the population, and other information on living standards is ad hoc, irregular and of varying quality; ( ii ) the capacity of the Government to evaluate policy-relevant information is limited by knowledge and resource constraints; ( iii ) the policy environment is extremely fluid, there is high government turnover of officials, and Government efforts tend to be focused on crisis management; and ( iv ) roles and responsibilities for policy development and evaluation remain unclear, both within and between agencies. The project will address these issues by ( i ) supporting the national statistical agency ( INSTAT ) to develop and implement a permanent household survey instrument that will collect data on a regular basis, utilizing a new sample frame derived from the 2001 Census as well as new questionnaires and data management procedures; ( ii ) developing and implementing a strategic plan for social policy monitoring and evaluation within and between agencies, covering both social insurance and social assistance; and ( iii ) developing and implementing an associated training plan. - 7 -", + "type": "census", + "explanation": "The 2001 Census is explicitly mentioned as a source for deriving a new sample frame, indicating its use as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific census that typically contains structured data about the population.", + "contextual_reason_agent": "The 2001 Census is explicitly mentioned as a source for deriving a new sample frame, indicating its use as a data source.", + "contextual_signal": "mentioned as a source for deriving a new sample frame", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "131_PAD7910PAD0P14010Box385199B00OUO090", + "page": 10, + "text": "1 I. STRATEGIC CONTEXT A. Country Context 1. Djibouti is a small low-income country located in the Horn of Africa with a high level of urbanization. Faced with scarcity of arable land and water resources, the country \u2019 s traditionally nomadic people have migrated to urban areas. Approximately 71 percent of Djibouti \u2019 s estimated 865, 000 inhabitants live in cities which are expanding annually by more than four percent ( Djibouti Census 2009 ). The capital of Djibouti City ( Djibouti Ville ) has 475, 322 inhabitants \u2014 including 25 percent of \u201c particular population, \u201d the official term for foreigners, formal, and informal residents. Fifty-eight percent of the total population and 82 percent of the total urban population is concentrated in Djibouti City. 2. Sustained economic growth has been driven by large inflows of foreign investment, particularly in port activities and the transport and logistics sector. Over the last decade, landlocked Ethiopia \u2019 s high demand for transit trade and transshipment activities has allowed Djibouti to capitalize on its strategic location along the Red Sea, turning its ports into the engines of its economy. In 2000, the Government of Djibouti signed a 20-year management concession with Dubai Port World ( DPW ), which contributed to attracting large amounts of foreign direct investment ( FDI ).", + "ner_text": [ + [ + 424, + 439, + "named" + ], + [ + 45, + 53, + "Djibouti Census <> data geography" + ], + [ + 99, + 113, + "Djibouti Census <> data geography" + ], + [ + 440, + 444, + "Djibouti Census <> publication year" + ], + [ + 463, + 476, + "Djibouti Census <> data geography" + ], + [ + 748, + 761, + "Djibouti Census <> data geography" + ], + [ + 1037, + 1045, + "Djibouti Census <> data geography" + ] + ], + "validated": true, + "empirical_context": "Faced with scarcity of arable land and water resources, the country \u2019 s traditionally nomadic people have migrated to urban areas. Approximately 71 percent of Djibouti \u2019 s estimated 865, 000 inhabitants live in cities which are expanding annually by more than four percent ( Djibouti Census 2009 ). The capital of Djibouti City ( Djibouti Ville ) has 475, 322 inhabitants \u2014 including 25 percent of \u201c particular population, \u201d the official term for foreigners, formal, and informal residents.", + "type": "census", + "explanation": "The context confirms it is a dataset as it provides empirical data about the population of Djibouti.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Djibouti Census' refers to a structured collection of demographic data.", + "contextual_reason_agent": "The context confirms it is a dataset as it provides empirical data about the population of Djibouti.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "144_84657-PAD-P132268-Project-Commitment-Paper", + "page": 17, + "text": "Indicator ( 1 ) includes the following studies and actions to be implemented in the first year: - Indicator ( 1 ) SWM Strategy: \uf0b7 Strategy Development: this entails development of plans for management and treatment of slaughterhouse and medical waste and the development of plans for closure and rehabilitation of unsanitary dumpsites8. \uf0b7 Implementation of the MIS: the design and implementation of the MIS is targeted for the first year. Relevant SWM data such as waste tonnage handled, costs, etc., is currently tracked by service providers; however, the MIS will facilitate the consolidation and reporting of this data and allow JSC-H & B to more effectively track progress and performance. B3. Project Design Targeting In the absence of data on household income, the Project uses geographical targeting at the level of the governorates where poverty level is estimated at 18. 3 % on average, 32. 5 % in Hebron and 21. 3 % in Bethlehem ( 2010 ) 9. Both governorates also have the highest unemployment rates in the West Bank with averages of 22. 8 % and 22. 4 %, respectively10. The vulnerability of these groups is exacerbated by mobility restrictions and poor market access. Output-based subsidy payment 8 These closures are part of the World Bank project and including them in the OBA targets will further incentivize JSC-H & B to achieve closures according to the planned schedule. In addition, these closures are necessary to meet OBA targets for Indicator ( 3 ) Waste Managed. 9 PCBS: West Bank Southern Governorates Statistical Yearbook, 2011 10 PCSBS Press Release on Labour Force Survey Results, Labour Force Survey ( January-March, 2011 ) Round ( Q1 / 2011 ).", + "ner_text": [ + [ + 1607, + 1626, + "named" + ], + [ + 907, + 913, + "Labour Force Survey <> data geography" + ], + [ + 929, + 938, + "Labour Force Survey <> data geography" + ], + [ + 941, + 945, + "Labour Force Survey <> reference year" + ], + [ + 1017, + 1026, + "Labour Force Survey <> data geography" + ], + [ + 1493, + 1502, + "Labour Force Survey <> data geography" + ], + [ + 1547, + 1551, + "Labour Force Survey <> publication year" + ], + [ + 1555, + 1560, + "Labour Force Survey <> publisher" + ], + [ + 1644, + 1648, + "Labour Force Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "In addition, these closures are necessary to meet OBA targets for Indicator ( 3 ) Waste Managed. 9 PCBS: West Bank Southern Governorates Statistical Yearbook, 2011 10 PCSBS Press Release on Labour Force Survey Results, Labour Force Survey ( January-March, 2011 ) Round ( Q1 / 2011 ).", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned in the context as a source of information for the Labour Force Survey results.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Labour Force Survey' is a recognized term for a structured collection of data related to employment statistics.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned in the context as a source of information for the Labour Force Survey results.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "173_multi0page", + "page": 28, + "text": "Project Development Outcome / Impact Project reports: ( from Objective to Goal ) Objective: Indicators: Limiting and reversing the - At least 55 % of high risk KAP studies Continued political support by trend of the epidemic by population ( youth, army, sex ministers and religious groups preventing new infections workers, truck drivers, miners ) used a condom in their last sexual encounter. - at least 50 % of people who Baseline is the 1999 DHS are aware of AIDS have used a which showed 27 % for men condom with a partner other and 18 % for women. than their regular partner in the last 12 months - Keep the HIV prevalence Data from sentinel sites, and armong 15 to 24 year old sero-prevalence surveys in urban pregnant women below 2002 and in 2006 5 % by 2006. Output from each Output Indicators: Project reports: ( from Outputs to Objective ) Component: Support to Commrunity based initiatives A social support system is - Increase by at least 1, 500 the Surveys Contracted agency ( ies ) able to developed in which CCC number of orphans who attend provide the relevant service. activities are strengthened in school regularly as of 2004. the communities for all groups affected by HIV / AIDS ( orphans, PLWHA, high risk groups ) - Decrease by at least 20 % the Baseline is the 1999 DHS; % of women and men who KAP Surveys don ' t know any mean to CNLS progress reports - 25 -", + "ner_text": [ + [ + 1318, + 1329, + "named" + ], + [ + 339, + 345, + "KAP Surveys <> reference population" + ], + [ + 440, + 448, + "KAP Surveys <> reference year" + ], + [ + 749, + 753, + "KAP Surveys <> publication year" + ], + [ + 1140, + 1144, + "KAP Surveys <> publication year" + ], + [ + 1211, + 1216, + "KAP Surveys <> reference population" + ], + [ + 1285, + 1293, + "KAP Surveys <> reference year" + ] + ], + "validated": true, + "empirical_context": "activities are strengthened in school regularly as of 2004. the communities for all groups affected by HIV / AIDS ( orphans, PLWHA, high risk groups ) - Decrease by at least 20 % the Baseline is the 1999 DHS; % of women and men who KAP Surveys don ' t know any mean to CNLS progress reports - 25 -", + "type": "survey", + "explanation": "In the context, 'KAP Surveys' is mentioned in relation to measuring knowledge and attitudes, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'KAP Surveys' implies a structured collection of data related to knowledge, attitudes, and practices.", + "contextual_reason_agent": "In the context, 'KAP Surveys' is mentioned in relation to measuring knowledge and attitudes, indicating it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 99, + "text": "training of CPC animators, and the school councils Results Area 5: Establishment of a standardized student learning assessment system for primary and secondary education \u2022 To undertake an assessment of current efforts in student learning assessments and develop a plan for undertaking regular assessments \u2022 Support for the establishment of the national unit for learning outcomes assessment and build its technical capacity \u2022 Development of assessment tests, administer them and produce learning assessment reports Results Area 6: Integrated EMIS functional and operational \u2022 Support for the development of national strategy for integrated EMIS involving four ministries \u2022 Support for increased technical capacity in support of EMIS systems development and information management \u2022 Support for training at deconcentrated levels on the use of the information technology platform for data management, descriptive statistics, and data management techniques ( data validation, tabulations, metadata and database management, report generation, among others ). Support will also include EMIS monitoring at central and deconcentrated levels. Results Area 7: Improved learning environment and quality of education for children in host community schools with refugees \u2022 Survey of schools to identify potential beneficiaries from refugee-affected local councils \u2022 Mobilization and sensitization of communities in reaching refugees for enrolling and retaining children in school \u2022 Development of Response Strategy and Plan 2019-2023 Priority Area 3: Improving service delivery through performance-based Financing ( PBF ) at school level \u2022 Strengthening capacity of school management councils \u2022 Mobilizing and strengthening community participation in school management", + "ner_text": [ + [ + 728, + 732, + "named" + ] + ], + "validated": false, + "empirical_context": "training of CPC animators, and the school councils Results Area 5: Establishment of a standardized student learning assessment system for primary and secondary education \u2022 To undertake an assessment of current efforts in student learning assessments and develop a plan for undertaking regular assessments \u2022 Support for the establishment of the national unit for learning outcomes assessment and build its technical capacity \u2022 Development of assessment tests, administer them and produce learning assessment reports Results Area 6: Integrated EMIS functional and operational \u2022 Support for the development of national strategy for integrated EMIS involving four ministries \u2022 Support for increased technical capacity in support of EMIS systems development and information management \u2022 Support for training at deconcentrated levels on the use of the information technology platform for data management, descriptive statistics, and data management techniques ( data validation, tabulations, metadata and database management, report generation, among others ). Support will also include EMIS monitoring at central and deconcentrated levels.", + "type": "system", + "explanation": "However, EMIS is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with data management and information systems.", + "contextual_reason_agent": "However, EMIS is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 30, + "text": "The MIS will collect information from applicants during registration, such as ( a ) key contact information, gender, age, marital status, education status, refugee status, employment; ( b ) business level outcomes such as business partners, business age, monthly profits, number of employees; and ( c ) access to other programs and loans. The MIS database will be updated as program applicants make use of specific components. The GROW_ID can be entered to update the database with any additional information on services received to ensure the applicant is only registered once. For example, dates of business trainings, service provider, or amount of funds will all be entered into the MIS. This database will help support operations through feedback loops as it can track who is accessing which services in real time. The MIS will also be important in being able to establish a sample of study participants to draw on for an impact evaluation or other learning activities. 61. The project will also support MGLSD \u2019 s capacity to lead and oversight social risk management. At the national level, this will support capacity building of unit staff, public fora, SRM experts, etc.", + "ner_text": [ + [ + 687, + 690, + "named" + ] + ], + "validated": false, + "empirical_context": "The GROW_ID can be entered to update the database with any additional information on services received to ensure the applicant is only registered once. For example, dates of business trainings, service provider, or amount of funds will all be entered into the MIS. This database will help support operations through feedback loops as it can track who is accessing which services in real time.", + "type": "system", + "explanation": "However, 'MIS' is mentioned as a management information system, not as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MIS' suggests a system that manages information.", + "contextual_reason_agent": "However, 'MIS' is mentioned as a management information system, not as a data source itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "060_Yemen-Emergency-COVID-19-Project", + "page": 18, + "text": "This improves essential healthcare service delivery and enables people to access the appropriate care, which builds resilience that is especially key for the poor who are the most vulnerable and least equipped to handle the impacts of climate change. C. Project Beneficiaries 29. The expected project beneficiaries will be the entire population in Yemen including nationals and non-nationals, medical and emergency personnel, laboratory and testing facilities, and health agencies across the country. In 2018, the total estimated population size was 28. 9 million, 13 including about 24 million needing humanitarian assistance. 14 For immediate response to stop the transmission and allocate necessary resources for treatment of cases, the project will specifically target communities that have high risks of local transmission, such as highly populated cities across the whole country. 9 https: / / www. who. int / countries / yem / en /. Accessed on March 23, 2020. 10 https: / / www. who. int / bulletin / volumes / 93 / 10 / 15-021015 / en /. Accessed on March 23, 2020. 11 https: / / apps. who. int / gho / data / node. country. country-YEM. Accessed on March 23, 2020. 12 https: / / data. worldbank. org / indicator / SH. MED. BEDS. ZS? view = chart. Accessed on March 23, 2020. 13 The World Bank. World Development Indicators Data Bank. https: / / databank. worldbank. org / source / world-development-indicators. Accessed on March 21, 2020. 14 United Nations Office for the Coordination of Humanitarian Affairs. Relief Web data on Yemen. https: / / m. reliefweb. int / report / 3422113. Accessed on March 21, 2020.", + "ner_text": [ + [ + 1520, + 1535, + "named" + ], + [ + 348, + 353, + "Relief Web data <> data geography" + ], + [ + 504, + 508, + "Relief Web data <> publication year" + ], + [ + 1452, + 1518, + "Relief Web data <> author" + ], + [ + 1539, + 1544, + "Relief Web data <> data geography" + ] + ], + "validated": true, + "empirical_context": "14 United Nations Office for the Coordination of Humanitarian Affairs. Relief Web data on Yemen. https: / / m.", + "type": "data", + "explanation": "The term 'Relief Web data' is explicitly mentioned as a source of information related to Yemen, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'data' from a recognized source.", + "contextual_reason_agent": "The term 'Relief Web data' is explicitly mentioned as a source of information related to Yemen, indicating it functions as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "010_BOSIB1554c314c0a2187c019d7e85bc2a91", + "page": 35, + "text": "The World Bank Building Resilient and Responsive Health Systems ( P179698 ) Feb 21, 2024 Page 30 of 43 Monitoring & Evaluation Plan: PDO Indicators by PDO Outcomes Outcome 1: Improve utilization of quality primary health care services Percentage of women receiving postnatal care within 48 hours ( Percentage ) Description Numerator: Number of women receiving postnatal care after delivery within 48 hours. Denominator: Total number of expected live births during the reporting period Frequency Every six months Data source KHIS Methodology for Data Collection Routine Health Management Information System ( HMIS ) data collection Responsibility for Data Collection MoH Percentage of women receiving postnatal care within 48 hours in the 10 selected counties ( Percentage ) Description Numerator: Number of women, in the 10 selected counties, receiving postnatal care after delivery within 48 hours. Denominator: Total number of expected live births, in the 10 selected counties, during the reporting period Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Percentage of host community women receiving postnatal care within 48 hours in Garissa and Turkana ( Percentage ) Description Numerator: Number of host community women in Garissa and Turkana, receiving postnatal care after delivery within 48 hours.", + "ner_text": [ + [ + 569, + 605, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Building Resilient and Responsive Health Systems ( P179698 ) Feb 21, 2024 Page 30 of 43 Monitoring & Evaluation Plan: PDO Indicators by PDO Outcomes Outcome 1: Improve utilization of quality primary health care services Percentage of women receiving postnatal care within 48 hours ( Percentage ) Description Numerator: Number of women receiving postnatal care after delivery within 48 hours. Denominator: Total number of expected live births during the reporting period Frequency Every six months Data source KHIS Methodology for Data Collection Routine Health Management Information System ( HMIS ) data collection Responsibility for Data Collection MoH Percentage of women receiving postnatal care within 48 hours in the 10 selected counties ( Percentage ) Description Numerator: Number of women, in the 10 selected counties, receiving postnatal care after delivery within 48 hours. Denominator: Total number of expected live births, in the 10 selected counties, during the reporting period Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Percentage of host community women receiving postnatal care within 48 hours in Garissa and Turkana ( Percentage ) Description Numerator: Number of host community women in Garissa and Turkana, receiving postnatal care after delivery within 48 hours.", + "type": "system", + "explanation": "However, it is described as a system for data collection rather than a dataset itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "However, it is described as a system for data collection rather than a dataset itself.", + "contextual_signal": "system but mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 52, + "text": "The World Bank Education Quality Improvement Project ( P179363 ) Page 43 of 68 inclusion, and sustainability ) Number of sector staff participating in capacity building trainings under the project This indicator will measure the number of sector staff trained on ( EMIS, national and international assessments and data analysis, tutoring / accelerated learning programs, ECEC reforms, integrated approach to designing and implementing resilient, sustainable, and inclusive school and preschool projects ). Progress will be tracked, and the capacity building plan will be updated every two years. Annual Progress and monitoring reports, capacity building plan Progress and monitoring reports NORLD, MoER, PMT Monitoring tool with reliable gender - disaggregated data to identify disadvantaged students to receive accelerated learning This indicator will capture the development of the monitoring tool with reliable gender-disaggregated data to identify disadvantaged students to receive tutoring, accelerated learning or other catch-up program Annual EMIS EMIS and monitoring tool data CTICE, MoER, PMT Improved regulatory environment for affordable and quality childcare for preschool children This indicator will measure the number of changes made to subsector regulations, with adjustments to the norms on the space, trainings for Annual Policy documents, progress and monitoring reports Policy documents, progress and monitoring reports MoER, PMT", + "ner_text": [ + [ + 265, + 269, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Education Quality Improvement Project ( P179363 ) Page 43 of 68 inclusion, and sustainability ) Number of sector staff participating in capacity building trainings under the project This indicator will measure the number of sector staff trained on ( EMIS, national and international assessments and data analysis, tutoring / accelerated learning programs, ECEC reforms, integrated approach to designing and implementing resilient, sustainable, and inclusive school and preschool projects ). Progress will be tracked, and the capacity building plan will be updated every two years.", + "type": "program", + "explanation": "However, EMIS is mentioned as part of a capacity building training program, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it relates to data analysis and assessments.", + "contextual_reason_agent": "However, EMIS is mentioned as part of a capacity building training program, not as a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 25, + "text": "In addition to software and hardware management, QRC is also responsible for data verification and will coordinate with the DCU and individual units to provide all necessary disaggregated data needed to monitor Program indicators and DLIs. 53. While training on the use of the OpenEMIS was conducted during the 2016 \u2013 2017 academic year, there is a critical need to build MOE staff capacity in this regard. In order to exploit the full potential of the OpenEMIS for policy development and planning, staff capacity in the fundamentals of data analytics is necessary and will be supported by this Program. C. Disbursement Arrangements 54. For the PforR part, the DLIs will be used for disbursement. There are eight DLIs spread across five years. 55. The disbursement will be contingent upon the Government furnishing evidence satisfactory to the World Bank that it has achieved the respective disbursement \u2010 linked results ( DLRs ) and these are verified by the Independent Verification Agency ( IVA ) as specified in the verification protocol. Application for withdrawal from the World Bank \u2019 s financing account of amounts allocated to individual DLRs and calculated in accordance with the relevant formula will be sent to the World Bank any time after the World Bank has notified the GOJ in writing that it has accepted evidence of achievement of the DLRs and the amount eligible for payment.", + "ner_text": [ + [ + 453, + 461, + "named" + ] + ], + "validated": false, + "empirical_context": "While training on the use of the OpenEMIS was conducted during the 2016 \u2013 2017 academic year, there is a critical need to build MOE staff capacity in this regard. In order to exploit the full potential of the OpenEMIS for policy development and planning, staff capacity in the fundamentals of data analytics is necessary and will be supported by this Program. C.", + "type": "program", + "explanation": "However, OpenEMIS is mentioned as a program aimed at building capacity, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed OpenEMIS is a dataset because it is associated with data analytics and policy development.", + "contextual_reason_agent": "However, OpenEMIS is mentioned as a program aimed at building capacity, not as a structured collection of data.", + "contextual_signal": "mentioned only as a program, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 9, + "validated": 2, + "not_validated": 7 + } + }, + { + "filename": "148_492780PAD0P111101Official0Use0Only1", + "page": 35, + "text": "One could argue that all Palestinian students are currently experiencing a diversity of needs, educational and / or psychosocial, and that these should be recognized and catered for. This requires a change of focus from providing access to providing quality education relevant to the diverse needs of all students \u2013 a paradigm shift from a special education and disability focus to inclusive education. The limited ability of the school system to provide psychosocial counseling and other services for affected children, and especially for students with special needs, has become evident. Although the EDSP calls for disability disaggregated monitoring of results, the EMIS and the M & E system are currently not capable of producing reliable data and indicators for evidence-based decision-making related to disabled learners. 118. The EDSP reports an alarming decline in pre-school education. Poor and special needs children stand to benefit most of preschool programs. In regular classrooms the current academic and overloaded school curriculum presents disproportionate challenges to learners with special needs. Meeting the needs of conflict-affected children, as well as the needs of their teachers and parents, deserves special attention especially in Gaza but also in the West Bank. Providing access and improving support services for students with special educational needs is among the key challenges of the EDSP. Supervision 119. Current Situation. Supervision is the essential link between the school and the planner.", + "ner_text": [ + [ + 682, + 694, + "named" + ] + ], + "validated": false, + "empirical_context": "The limited ability of the school system to provide psychosocial counseling and other services for affected children, and especially for students with special needs, has become evident. Although the EDSP calls for disability disaggregated monitoring of results, the EMIS and the M & E system are currently not capable of producing reliable data and indicators for evidence-based decision-making related to disabled learners. 118.", + "type": "system", + "explanation": "However, the context indicates that it is not functioning as a data source since it is described as not capable of producing reliable data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'M & E system' suggests a structured approach to monitoring and evaluation.", + "contextual_reason_agent": "However, the context indicates that it is not functioning as a data source since it is described as not capable of producing reliable data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 41, + "text": "It is expected that an M & E specialist will be recruited in the PCU to work closely with the MEP / MES and build government capacities to ensure that data collection, cleaning, analysis, and reporting are completed according to the project requirements within three months after project effectiveness. The project is also designed to support activities that will enhance the EMIS, modernize the information systems at the MEP and MES, and build the capacities of education staff in M & E as outlined in the project description. 81. The M & E activities will be performed by the TMC with regular implementation support from the World Bank. At the project level, the TMC, comprising the PCU and the MEP / MES component team leads, listed in annex 1, will meet with the World Bank team once a month to review the project \u2019 s implementation progress. The TMC will be responsible for preparing a semiannual report on progress of the project, including indicators of the project \u2019 s Results Framework. These reports will cover six-month periods across the life of the project and will be delivered to the World Bank within 30 days following the end of the period covered and for the World Bank \u2019 s implementation support missions.", + "ner_text": [ + [ + 376, + 380, + "named" + ] + ], + "validated": false, + "empirical_context": "It is expected that an M & E specialist will be recruited in the PCU to work closely with the MEP / MES and build government capacities to ensure that data collection, cleaning, analysis, and reporting are completed according to the project requirements within three months after project effectiveness. The project is also designed to support activities that will enhance the EMIS, modernize the information systems at the MEP and MES, and build the capacities of education staff in M & E as outlined in the project description. 81.", + "type": "system", + "explanation": "However, EMIS is mentioned as a system for managing information rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it relates to information systems and data management.", + "contextual_reason_agent": "However, EMIS is mentioned as a system for managing information rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 69, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 57 refugee host areas percentage of refugee children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in high refugee host areas Share of new entrants in the first grade of primary school attended pre-primary education in 29 selected SPG woredas The indicator measures children enrolled in the first year of primary school with some exposure to preschool education and calculated by dividing the gross enrollment rate for pre-primary level by the gross intake rate of primary level. Annual EMIS Annual data collection through school census MOE and WOFED Pregnant women & caregivers of children 0-23 months participating in Community Conversations ( CC ) in 29 selected SPG woredas These are currently pregnant women and caregivers of children 0-23 months who attended at least 1 CC session per month Annual UNISE / DHIS2 UNISE / DHIS2 - - Data collected by woreda health offices MOH and MOF Quarterly reports of minutes of multisectoral committee meetings in 29 selected SPG woredas Quarterly minutes report submitted by WOFED to MOF and verified by ESS Annual Admin WOFED report quarterly meeting minutes with all committee members signed to MOF WOFED Students benefiting from direct interventions to enhance learning Annual Project M & E Questionnaires will be developed to keep track on number of beneficiaries MOE and WOEFD", + "ner_text": [ + [ + 907, + 912, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 57 refugee host areas percentage of refugee children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in high refugee host areas Share of new entrants in the first grade of primary school attended pre-primary education in 29 selected SPG woredas The indicator measures children enrolled in the first year of primary school with some exposure to preschool education and calculated by dividing the gross enrollment rate for pre-primary level by the gross intake rate of primary level. Annual EMIS Annual data collection through school census MOE and WOFED Pregnant women & caregivers of children 0-23 months participating in Community Conversations ( CC ) in 29 selected SPG woredas These are currently pregnant women and caregivers of children 0-23 months who attended at least 1 CC session per month Annual UNISE / DHIS2 UNISE / DHIS2 - - Data collected by woreda health offices MOH and MOF Quarterly reports of minutes of multisectoral committee meetings in 29 selected SPG woredas Quarterly minutes report submitted by WOFED to MOF and verified by ESS Annual Admin WOFED report quarterly meeting minutes with all committee members signed to MOF WOFED Students benefiting from direct interventions to enhance learning Annual Project M & E Questionnaires will be developed to keep track on number of beneficiaries MOE and WOEFD", + "type": "program", + "explanation": "'UNISE' is mentioned as a source of data collection but is not described as a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'UNISE' is a dataset because it appears in a context discussing data collection and reporting.", + "contextual_reason_agent": "'UNISE' is mentioned as a source of data collection but is not described as a structured collection of data itself.", + "contextual_signal": "mentioned only as a program, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 3, + "validated": 1, + "not_validated": 2 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 79, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 74 of 117 prior to construction. Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "ner_text": [ + [ + 406, + 411, + "named" + ], + [ + 578, + 581, + "NEMIS <> author" + ], + [ + 589, + 608, + "NEMIS <> data type" + ], + [ + 777, + 793, + "NEMIS <> reference population" + ], + [ + 829, + 832, + "NEMIS <> author" + ] + ], + "validated": true, + "empirical_context": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 74 of 117 prior to construction. Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "type": "system", + "explanation": "NEMIS is indeed a dataset as it is described as an online platform that collects and manages data related to primary education.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is referenced in relation to data utilization for education management.", + "contextual_reason_agent": "NEMIS is indeed a dataset as it is described as an online platform that collects and manages data related to primary education.", + "contextual_signal": "mentioned as a data source for managing primary education", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 59, + "text": "49 implementation at the colline, commune, province and central levels. The MIS will need to interface with the beneficiary database for the cash transfer program and potentially other databases constructed on the same structure for potential additional programs and with the payment agency ( ies ) system. ( b ) The design and implementation of grievance redress mechanisms with different points of entry at the colline, commune and province-levels and different channels for citizen feedback. ( c ) A specific MIS operational manual with a clear definition of roles in access, quality control, update, and safeguarding of personal data. ( d ) The implementation of the modules with the development of software and acquisition of key IT equipment and hardware ( some parts of the MIS and the registry may be hosted in a cloud or on local servers, depending on relative cost-efficiency in terms of safeguarding and resilience ) ( e ) Related communication and multi-media outreach materials as well as training for key stakeholders. Subcomponent 2. 3: Monitoring and evaluation ( US $ 2. 8 million equivalent ) 38. Since the project is supporting new interventions and processes in Burundi and to ensure transparency, the third sub-component will also support process evaluations of the key program processes and an impact evaluation including beneficiary surveys.", + "ner_text": [ + [ + 76, + 79, + "named" + ] + ], + "validated": false, + "empirical_context": "49 implementation at the colline, commune, province and central levels. The MIS will need to interface with the beneficiary database for the cash transfer program and potentially other databases constructed on the same structure for potential additional programs and with the payment agency ( ies ) system. ( b ) The design and implementation of grievance redress mechanisms with different points of entry at the colline, commune and province-levels and different channels for citizen feedback.", + "type": "system", + "explanation": "However, 'MIS' is described as a management information system, not a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is related to data management.", + "contextual_reason_agent": "However, 'MIS' is described as a management information system, not a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 42, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 31 Indicator Name Percent of the service areas achieve cost recovery ratio of 1. 1 under the project Definition / Description Percent of service areas supported by the project that achieve cost recovery ratio of 1. 1. Cost recovery ratio is the revenues collected over operating expenses for selected service areas under the project. Baseline was derived from the existing service areas that have been gazetted: Gulu, Mbale, Adjumani, Busia, Namasale, Kyegegwa-Mpara-Ruyonza, Namungalwe-Kaliro and Budaka-Kadama-Tirinyi-Kibuku - Butaleja-Busolwe. Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC Accounting System ( SCALA ), Refugee Database and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE / DWD Rural and Urban / NWSC", + "ner_text": [ + [ + 685, + 701, + "named" + ] + ], + "validated": false, + "empirical_context": "Baseline was derived from the existing service areas that have been gazetted: Gulu, Mbale, Adjumani, Busia, Namasale, Kyegegwa-Mpara-Ruyonza, Namungalwe-Kaliro and Budaka-Kadama-Tirinyi-Kibuku - Butaleja-Busolwe. Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC Accounting System ( SCALA ), Refugee Database and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a structured collection of data or a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'system' in its name, which can imply data collection.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a structured collection of data or a dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 16, + "text": "Data vary for numbers of out of school children: while the United Nations Educational, Scientific and Cultural Organization ( UNESCO ) Institute for Statistics ( UIS ) puts the number at 41, 062 ( 2018 ), and Djibouti \u2019 s national statistical institute ( DISED ) puts it at 32, 750, the latest EDAM4 household survey suggests that approximately 20, 880 children from 6-14 years old are out of school, which represents 19 percent nationally ( though this figure is greater than 30 percent in four regions ). These regional disparities suggest that there may be some correlation between access to services and the percentage of children out of the school system. According to the survey, the main reasons cited by parents for not schooling their children are \u2018 lack of interest \u2019, age ( \u2018 children are too young \u2019 ), opportunity costs ( \u2018 children need to work \u2019 ), access, quality of education and lack of necessary documentation ( birth certificates, etc. ). 20. MENFOP has outlined three priority actions to improve access: reduce class size, eliminate double shifting, and achieve universal primary education. Targets are given in the PAE 2017-20 for the number of new classes needed by year but often the resources allocated in the national budget are not adequate. School expansion plans are usually for the immediate short term and become possible only when funding is identified. Longer term planning that can provide various hypothetical planning scenarios ( depending on MENFOP \u2019 s choice of priority or priorities to be addressed ), is not available. The procedure for selecting sites depends on location: in the capital city and especially in high density areas like Balbala ( a peri-urban neighborhood bordering Djibouti Ville ), MENFOP works with the Ministry of Habitat to select sites, while in", + "ner_text": [ + [ + 294, + 316, + "named" + ], + [ + 197, + 201, + "EDAM4 household survey <> publication year" + ], + [ + 209, + 217, + "EDAM4 household survey <> data geography" + ], + [ + 1677, + 1684, + "EDAM4 household survey <> data geography" + ], + [ + 1824, + 1842, + "EDAM4 household survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "Data vary for numbers of out of school children: while the United Nations Educational, Scientific and Cultural Organization ( UNESCO ) Institute for Statistics ( UIS ) puts the number at 41, 062 ( 2018 ), and Djibouti \u2019 s national statistical institute ( DISED ) puts it at 32, 750, the latest EDAM4 household survey suggests that approximately 20, 880 children from 6-14 years old are out of school, which represents 19 percent nationally ( though this figure is greater than 30 percent in four regions ). These regional disparities suggest that there may be some correlation between access to services and the percentage of children out of the school system.", + "type": "survey", + "explanation": "The EDAM4 household survey is explicitly mentioned as providing data on the number of out of school children, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a household survey that provides specific data on out of school children.", + "contextual_reason_agent": "The EDAM4 household survey is explicitly mentioned as providing data on the number of out of school children, confirming its role as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "074_Djibouti-Integrated-Cash-Transfer-and-Human-Capital-Project", + "page": 35, + "text": "The World Bank Integrated Cash Transfer and Human Capital Project ( P166220 ) Page 31 of 44 Beneficiaries of Safety Nets programs - Other cash transfers programs ( number ) Quarterly Project administrativ e data Routine monitoring SEAS Beneficiary women with a child aged 0-6 months practicing exclusive breastfeeding Percentage of beneficiary women with a child aged 0 - 6 months who participate in community sessions Twice Survey Survey at middle and end of project SEAS Beneficiary households below the poverty line Percentage of households benefitting from cash transfers with consumption levels below the poverty line Once Survey Targeting evaluation SEAS Households registered in the national social registry Number of unique heads of households in the registry, regardless of poverty status or program eligibility Quarterly Project administrativ e data Routine monitoring SEAS Beneficiaries with access to basic services infrastructure financed by the project Number of people estimated to be direct beneficiaries of sub-projects under Component 3 Quarterly Project administrativ e data Routine monitoring SEAS ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Cash transfers paid to eligible beneficiaries Total amount of cash transfers paid Quarterly Project administrativ e data Routine monitoring SEAS Community sessions organized as part of accompanying measures Number of community sessions under Sub - Quarterly Project administrativ Routine monitoring SEAS", + "ner_text": [ + [ + 690, + 714, + "named" + ], + [ + 661, + 671, + "national social registry <> reference population" + ], + [ + 715, + 751, + "national social registry <> data description" + ] + ], + "validated": true, + "empirical_context": "The World Bank Integrated Cash Transfer and Human Capital Project ( P166220 ) Page 31 of 44 Beneficiaries of Safety Nets programs - Other cash transfers programs ( number ) Quarterly Project administrativ e data Routine monitoring SEAS Beneficiary women with a child aged 0-6 months practicing exclusive breastfeeding Percentage of beneficiary women with a child aged 0 - 6 months who participate in community sessions Twice Survey Survey at middle and end of project SEAS Beneficiary households below the poverty line Percentage of households benefitting from cash transfers with consumption levels below the poverty line Once Survey Targeting evaluation SEAS Households registered in the national social registry Number of unique heads of households in the registry, regardless of poverty status or program eligibility Quarterly Project administrativ e data Routine monitoring SEAS Beneficiaries with access to basic services infrastructure financed by the project Number of people estimated to be direct beneficiaries of sub-projects under Component 3 Quarterly Project administrativ e data Routine monitoring SEAS ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Cash transfers paid to eligible beneficiaries Total amount of cash transfers paid Quarterly Project administrativ e data Routine monitoring SEAS Community sessions organized as part of accompanying measures Number of community sessions under Sub - Quarterly Project administrativ Routine monitoring SEAS", + "type": "registry", + "explanation": "In the context, it is explicitly mentioned as 'Households registered in the national social registry', indicating it serves as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a collection of registered households.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as 'Households registered in the national social registry', indicating it serves as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 62, + "text": "The World Bank Cash for Jobs Project ( P175327 ) Page 58 of 86 VII. RESULTS FRAMEWORK AND MONITORING Results Framework COUNTRY: Burundi Cash for Jobs Project Project Development Objectives ( s ) The development objective of the project is to strengthen management capacity, scale up safety net programs, and promote productive inclusion and access to jobs Project Development Objective Indicators RESULT_FRAME_TBL_PDO Indicator Name PBC Baseline End Target To strengthen management capacity Households in targeted areas included in the National Social Registry ( Number ) 0. 00 200, 000. 00 Households in targeted areas included in the National Social Registry - refugees, disaggregated by gender ( Number ) 0. 00 15, 000. 00 Households in targeted areas included in the National Social Registry - host communities, disaggregated by gender ( Number ) 0. 00 25, 000. 00 Proportion of households enrolled in the beneficiary database living below the extreme poverty line ( Percentage ) 0. 00 80. 00", + "ner_text": [ + [ + 536, + 560, + "named" + ], + [ + 128, + 135, + "National Social Registry <> data geography" + ], + [ + 491, + 501, + "National Social Registry <> reference population" + ], + [ + 726, + 736, + "National Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "The World Bank Cash for Jobs Project ( P175327 ) Page 58 of 86 VII. RESULTS FRAMEWORK AND MONITORING Results Framework COUNTRY: Burundi Cash for Jobs Project Project Development Objectives ( s ) The development objective of the project is to strengthen management capacity, scale up safety net programs, and promote productive inclusion and access to jobs Project Development Objective Indicators RESULT_FRAME_TBL_PDO Indicator Name PBC Baseline End Target To strengthen management capacity Households in targeted areas included in the National Social Registry ( Number ) 0. 00 200, 000.", + "type": "registry", + "explanation": "In the context, it is explicitly mentioned as the National Social Registry, which serves as a structured collection of data for the project.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a registry that includes households in targeted areas.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as the National Social Registry, which serves as a structured collection of data for the project.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 63, + "text": "54 Annex 7. Economic Analysis 1. The net effect of the Program at the individual \u2019 s level is calculated as the additional benefit that a representative child obtains as a result of the Program. This effect is estimated from a present discounted value ( PDV ) calculation. This approach estimates the stream of benefits and costs of schooling over a lifetime in the labor market with and without the Program. 2. Data for this analysis are obtained mainly from the 2010 Income and Expenditure Survey, a household survey of the labor force, and from the 2015 Population and Housing Census conducted by the Department of Statistics. This representative information accounts for the entire population of Jordan of all ages. It is worth noting that the estimates below are considered under estimates as they do not account for the social benefits of more and better education. Estimation of expected economic benefits 3. The private benefits ( returns to schooling ) are measured following the standard literature on cost \u2010 benefit analysis for investments in education and by calculating the earnings over the course of the working life.", + "ner_text": [ + [ + 552, + 586, + "named" + ] + ], + "validated": true, + "empirical_context": "2. Data for this analysis are obtained mainly from the 2010 Income and Expenditure Survey, a household survey of the labor force, and from the 2015 Population and Housing Census conducted by the Department of Statistics. This representative information accounts for the entire population of Jordan of all ages.", + "type": "census", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source of data for the analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a census, which typically involves structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source of data for the analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "068_Burundi-Integrated-Community-Development-Project", + "page": 12, + "text": "The three poorest provinces \u2013 Ruyigi, Muyinga and Cankuzo \u2013 are located in the north-east and have poverty rates of 85. 4, 83. 5 and 79. 5 percent respectively measured against the national poverty line, well above the national average of 64. 9 percent. Suffering from degraded and scarce land resources, high population density and isolation from centers of economic activity, the north - eastern provinces face acute food security and nutrition challenges. Access to basic infrastructure and services is also limited in the country, but particularly in the north-east. 7 1 ECVMB, Enqu\u00eate sur les Conditions de Vie des M\u00e9nages Burundais ( 2017 ). 2 Demographic and Health Survey ( DHS ) 2016-2017. 3 About 10 percent of Burundian children under 2 years consume a minimum acceptable diet: Demographic and Health Survey 2016-17. 4 After the 2000 Arusha Peace Accords and democratic elections in 2005, Burundi \u2019 s growth accelerated to an average of 4. 3 percent annually from 2007 to 2014. Moreover, its Human Capital Index ( HCI ) value went up from 0. 35 to 0. 38 between 2012 and 2017. 5 The World Bank Systematic Country Diagnostic identifies \u2018 ensuring basic needs are met \u2019 as a priority for the coming years. See World Bank ( 2018 ) Republic of Burundi: Systematic Country Diagnostic. Report No. 122549-BI. 6 See World Bank ( 2011 ) World Development Report 2011: Conflict, Security and Development and World Bank ( 2017 ) World Development Report 2017: Governance and the Law. 7 Data from the United Nations INFORM vulnerability index shows: ( a ) on food security, Ruyigi ranks as the most vulnerable province, with Ngozi second and Cankuzo fifth; ( b ) on infrastructure, Ruyigi ranks number two and Cankuzo third; ( c ) on education, Muyinga ranks second most vulnerable, Ngozi forth, Ruyigi fifth and Cankuzo seventh; and ( d ) on access to health, Ruyigi ranks third and Cankuzo forth.", + "ner_text": [ + [ + 789, + 818, + "named" + ], + [ + 30, + 36, + "Demographic and Health Survey <> data geography" + ], + [ + 575, + 580, + "Demographic and Health Survey <> author" + ], + [ + 640, + 644, + "Demographic and Health Survey <> publication year" + ], + [ + 682, + 685, + "Demographic and Health Survey <> acronym" + ], + [ + 688, + 697, + "Demographic and Health Survey <> publication year" + ], + [ + 721, + 753, + "Demographic and Health Survey <> reference population" + ], + [ + 1422, + 1426, + "Demographic and Health Survey <> publication year" + ], + [ + 1454, + 1458, + "Demographic and Health Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "2 Demographic and Health Survey ( DHS ) 2016-2017. 3 About 10 percent of Burundian children under 2 years consume a minimum acceptable diet: Demographic and Health Survey 2016-17. 4 After the 2000 Arusha Peace Accords and democratic elections in 2005, Burundi \u2019 s growth accelerated to an average of 4.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey providing empirical data on health and demographics.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides demographic and health data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey providing empirical data on health and demographics.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "158_40156", + "page": 44, + "text": "programs for CBMPs in line with the IGAD HIV strategy b ) Number of persons from IGAD Member states trained in M & E including the use of data M & E system ( including structured learning agenda ) a ) Number of website hits in the last 12 months 0 b ) Percentage of NACs that submit program monitoring data to the PFO within specified reporting time frames 0 c ) Percentage of refugee, IDP, returnee and cross border areas included in IGAD member states \u2019 National Sentinel Surveillance system in the last 12 months d ) Number of operational research studies funded 40", + "ner_text": [ + [ + 283, + 306, + "named" + ] + ], + "validated": true, + "empirical_context": "programs for CBMPs in line with the IGAD HIV strategy b ) Number of persons from IGAD Member states trained in M & E including the use of data M & E system ( including structured learning agenda ) a ) Number of website hits in the last 12 months 0 b ) Percentage of NACs that submit program monitoring data to the PFO within specified reporting time frames 0 c ) Percentage of refugee, IDP, returnee and cross border areas included in IGAD member states \u2019 National Sentinel Surveillance system in the last 12 months d ) Number of operational research studies funded 40", + "type": "data", + "explanation": "This is indeed a dataset as it refers to data collected for monitoring purposes, which is used in the context of program evaluation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific data collected for monitoring programs.", + "contextual_reason_agent": "This is indeed a dataset as it refers to data collected for monitoring purposes, which is used in the context of program evaluation.", + "contextual_signal": "mentioned as a data source for program monitoring", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 14, + "text": "In FY16 / 17, 64 percent of boys and 78 percent of girls ( between the ages of 5 and 16 ) were not enrolled in primary and secondary schools in Balochistan, compared to 40 percent of boys and 49 percent of girls at the national level. 22 The overall net enrollment and effective transition rates, from primary to middle school and middle to high school, were low compared to national rates, especially among girls. When it comes to student learning metrics, children in Balochistan also perform poorly in comparison to the same age groups in rural Pakistan communities. For example, approximately 60 percent of children in grade 5 could not perform a two \u2010 digit division problem. The 2018 ASER report also highlighted a wide gender gap in student learning, with 31 percent of boys and 20 percent of girls ( ages 5 to 16 years ) being able to read second \u2010 grade level sentences 19 Expanded Program on Immunization ( EPI ), Tuberculosis, Malaria and Vector Borne Diseases Control Program, Maternal, Newborn, and Child Health ( MNCH ), Lady Health Workers ( LHW ) Program, District Health Information System ( DHIS ), Nutrition Program, HIV \u2010 AIDS, Prime Minister \u2019 s Initiative for Hepatitis Control Program, Leprosy Control Program, National Program for Prevention and Control of Blindness, and Provincial Disaster and Surveillance", + "ner_text": [ + [ + 1072, + 1106, + "named" + ] + ], + "validated": false, + "empirical_context": "For example, approximately 60 percent of children in grade 5 could not perform a two \u2010 digit division problem. The 2018 ASER report also highlighted a wide gender gap in student learning, with 31 percent of boys and 20 percent of girls ( ages 5 to 16 years ) being able to read second \u2010 grade level sentences 19 Expanded Program on Immunization ( EPI ), Tuberculosis, Malaria and Vector Borne Diseases Control Program, Maternal, Newborn, and Child Health ( MNCH ), Lady Health Workers ( LHW ) Program, District Health Information System ( DHIS ), Nutrition Program, HIV \u2010 AIDS, Prime Minister \u2019 s Initiative for Hepatitis Control Program, Leprosy Control Program, National Program for Prevention and Control of Blindness, and Provincial Disaster and Surveillance", + "type": "system", + "explanation": "However, it is mentioned as a program and not explicitly as a data source in the context provided.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a data-related function.", + "contextual_reason_agent": "However, it is mentioned as a program and not explicitly as a data source in the context provided.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 46, + "text": "In addition, a survey will be conducted in sampled SPG woredas as part of the impact evaluation to measure composite indicators not collected through routine information systems. 71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2. The MOH will share HCO-related data with the regions and woredas to provide any disaggregated data needed to monitor HCO results indicators and DLIs. Likewise, the central MOE is responsible for the management of EMIS and NLA which collects varying aspects of data at the school level. MOE will validate and share disaggregated data with regions and woredas to monitor HCO results indicators and DLIs. Other M & E data sources will be coordinated with various implementation units and service providers. The SPG impact evaluation survey will be 71 Such as Minimal Acceptable Diet for children 6 \u2013 23 months. Minimal Acceptable Diet is calculated according to breasfeeding status, number of food groups consumed, and number of times fed. https: / / inddex. nutrition. tufts. edu / data4diets / indicator / minimum-acceptable-diet-mad.", + "ner_text": [ + [ + 301, + 304, + "named" + ] + ], + "validated": false, + "empirical_context": "In addition, a survey will be conducted in sampled SPG woredas as part of the impact evaluation to measure composite indicators not collected through routine information systems. 71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2.", + "type": "system", + "explanation": "NLA is mentioned as part of a system but not explicitly as a data source or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed NLA is a dataset because it is mentioned alongside other data collection systems.", + "contextual_reason_agent": "NLA is mentioned as part of a system but not explicitly as a data source or dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "113_Project-Appraisal-Document-PAD-disclosable-version-P156479-08122016", + "page": 37, + "text": "25 vulnerable individuals or families, including those to help cope with consequences of economic or other shock. Beneficiaries of Safety Nets programs - Female ( number ) This indicator measures female participation in SSN programs. It has the same definition as the \" Beneficiaries of Safety Nets programs \" but applies only to female. This indicator will yield a measure of coverage of SSN projects disaggregate d by gender ( in absolute numbers ) Yearly MIS Reports CFS Beneficiaries of Safety Nets programs - Unconditional cash transfers ( number ) Follows the safety nets programs \u2019 classification used in SP Atlas. Yearly MIS reports CFS Beneficiaries of Safety Nets programs - Cash-for-work, food-for-work and public works ( number ) Follows the safety nets programs \u2019 classification used in SP Atlas. Yearly MIS reports CFS Share of beneficiaries who live below the poverty line % of beneficiaries with an annual consumption below the poverty line Once Targeting assessment CFS Intermediate Results Indicators Indicator Name Description ( indicator definition etc. ) Frequency Data Source / Methodology Responsibility for Data Collection Number of days worked This is an aggregate figure for all beneficiaries.", + "ner_text": [ + [ + 612, + 620, + "named" + ] + ], + "validated": false, + "empirical_context": "It has the same definition as the \" Beneficiaries of Safety Nets programs \" but applies only to female. This indicator will yield a measure of coverage of SSN projects disaggregate d by gender ( in absolute numbers ) Yearly MIS Reports CFS Beneficiaries of Safety Nets programs - Unconditional cash transfers ( number ) Follows the safety nets programs \u2019 classification used in SP Atlas. Yearly MIS reports CFS Beneficiaries of Safety Nets programs - Cash-for-work, food-for-work and public works ( number ) Follows the safety nets programs \u2019 classification used in SP Atlas.", + "type": "framework", + "explanation": "However, it is mentioned as a classification system rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced in relation to classifications of safety nets programs.", + "contextual_reason_agent": "However, it is mentioned as a classification system rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "144_84657-PAD-P132268-Project-Commitment-Paper", + "page": 17, + "text": "Indicator ( 1 ) includes the following studies and actions to be implemented in the first year: - Indicator ( 1 ) SWM Strategy: \uf0b7 Strategy Development: this entails development of plans for management and treatment of slaughterhouse and medical waste and the development of plans for closure and rehabilitation of unsanitary dumpsites8. \uf0b7 Implementation of the MIS: the design and implementation of the MIS is targeted for the first year. Relevant SWM data such as waste tonnage handled, costs, etc., is currently tracked by service providers; however, the MIS will facilitate the consolidation and reporting of this data and allow JSC-H & B to more effectively track progress and performance. B3. Project Design Targeting In the absence of data on household income, the Project uses geographical targeting at the level of the governorates where poverty level is estimated at 18. 3 % on average, 32. 5 % in Hebron and 21. 3 % in Bethlehem ( 2010 ) 9. Both governorates also have the highest unemployment rates in the West Bank with averages of 22. 8 % and 22. 4 %, respectively10. The vulnerability of these groups is exacerbated by mobility restrictions and poor market access. Output-based subsidy payment 8 These closures are part of the World Bank project and including them in the OBA targets will further incentivize JSC-H & B to achieve closures according to the planned schedule. In addition, these closures are necessary to meet OBA targets for Indicator ( 3 ) Waste Managed. 9 PCBS: West Bank Southern Governorates Statistical Yearbook, 2011 10 PCSBS Press Release on Labour Force Survey Results, Labour Force Survey ( January-March, 2011 ) Round ( Q1 / 2011 ).", + "ner_text": [ + [ + 557, + 560, + "named" + ] + ], + "validated": false, + "empirical_context": "\uf0b7 Implementation of the MIS: the design and implementation of the MIS is targeted for the first year. Relevant SWM data such as waste tonnage handled, costs, etc., is currently tracked by service providers; however, the MIS will facilitate the consolidation and reporting of this data and allow JSC-H & B to more effectively track progress and performance. B3.", + "type": "system", + "explanation": "However, 'MIS' is described as a management information system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MIS' could imply a system that manages data.", + "contextual_reason_agent": "However, 'MIS' is described as a management information system, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "050_Cameroon-Secondary-Education-and-Skills-Development-Project", + "page": 17, + "text": "As a result, teachers hired by parents to compensate for lack of state-paid teachers constitute 5. 5 percent of teaching force, but in some regions these numbers are much higher, for example, in the West region \u2013 19. 5 percent and in the Far North \u2013 12. 3 percent. 16. Similarly, the labor market relevance of the skills that graduates develop is weak. Based on the latest employer survey ( 2015 ), 28 only 42 percent of employers reported being fully satisfied with the competencies of their staff who possessed TVET qualifications. Dissatisfaction with the skills level of TVET graduates is particularly acute among employers in the agriculture, wood, construction, and cotton / textile industries. Limited collaboration between TVET providers and employers hampers the delivery of market-relevant skills training with training programs not informed by private sector input and students having few opportunities for job placement and practical training. Employers are not involved in the program design and delivery. The quality of skills development programs is also affected by a lack of teaching and learning materials, poor infrastructure, and low qualifications and limited industry exposure of teaching staff at all levels. Generally, at all levels of technical education, programs are characterized by limited infrastructure, poor supply of learning and training materials, poor quality of teaching staff, and low levels of financing.", + "ner_text": [ + [ + 373, + 388, + "named" + ], + [ + 199, + 210, + "employer survey <> data geography" + ], + [ + 238, + 247, + "employer survey <> data geography" + ], + [ + 391, + 395, + "employer survey <> publication year" + ], + [ + 575, + 589, + "employer survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "Similarly, the labor market relevance of the skills that graduates develop is weak. Based on the latest employer survey ( 2015 ), 28 only 42 percent of employers reported being fully satisfied with the competencies of their staff who possessed TVET qualifications. Dissatisfaction with the skills level of TVET graduates is particularly acute among employers in the agriculture, wood, construction, and cotton / textile industries.", + "type": "survey", + "explanation": "This is indeed a dataset as it provides empirical data on employer satisfaction with TVET graduates.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'employer survey' suggests a structured collection of responses from employers.", + "contextual_reason_agent": "This is indeed a dataset as it provides empirical data on employer satisfaction with TVET graduates.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "084_Lebanon-PAD-final-06082018", + "page": 35, + "text": "Building a strong evidence base on the impact of interventions in the Program will be particularly important for components that make use of grant funding to subsidize firms or individuals ( matching grant, entrepreneurship grant, and subsidy for first-time jobseekers ), as this evidence will guide decisions on if and how to implement these components following the pilot phase. Where possible, randomized controlled trials are envisaged to generate convincing evidence on the performance of the VC matching and entrepreneurship grant components. The degree of over - subscription of programs and the quality of applications will further determine whether randomization is feasible and desirable. Administrative data available from the MOF will allow for a high-quality non - experimental evaluation of activities including support to the TSEZ, and the value chains and broadband access components ( difference in difference approaches with pre-trends ). Approximately US $ 600, 000 have been secured to fund robust impact evaluation from the Jobs Multi-Donor Trust Fund \u201d ( parent Trust Fund number TF072322 ). 66. Impact evaluation will draw upon data collected for monitoring of the Program \u2019 s results, as well as additional dedicated surveys. Data collected primarily for monitoring will play an important role in facilitating impact evaluation. This is true for both: ( i ) data on actions and beneficiaries that is collected through the Program \u2019 s M & E system; and ( ii ) administrative data made available by the GOL to enable monitoring. The design of evaluations will continue to evolve alongside the NJP as program details are being refined. The administrative details of program elements will influence identification strategies ( for instance, roll-out schedules and outreach campaigns ).", + "ner_text": [ + [ + 699, + 718, + "named" + ], + [ + 855, + 899, + "Administrative data <> data description" + ], + [ + 1483, + 1502, + "Administrative data <> data type" + ], + [ + 1525, + 1528, + "Administrative data <> publisher" + ], + [ + 1821, + 1839, + "Administrative data <> usage context" + ] + ], + "validated": true, + "empirical_context": "The degree of over - subscription of programs and the quality of applications will further determine whether randomization is feasible and desirable. Administrative data available from the MOF will allow for a high-quality non - experimental evaluation of activities including support to the TSEZ, and the value chains and broadband access components ( difference in difference approaches with pre-trends ). Approximately US $ 600, 000 have been secured to fund robust impact evaluation from the Jobs Multi-Donor Trust Fund \u201d ( parent Trust Fund number TF072322 ).", + "type": "data", + "explanation": "In this context, 'administrative data' is explicitly mentioned as a source of information that will be used for evaluation, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'administrative data' typically refers to structured data collected by organizations for administrative purposes.", + "contextual_reason_agent": "In this context, 'administrative data' is explicitly mentioned as a source of information that will be used for evaluation, confirming its role as a dataset.", + "contextual_signal": "mentioned as a source of information for evaluation", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "016_BOSIB029693bc600f090cb0745b2b12ee69", + "page": 122, + "text": "Supporting the elevation of Kakuma and Dadaab to a Municipality under the Second Kenya Urban Support Program ( KUSP 2 ), to be implemented by the Ministry of Lands, Public Works, Housing and Urban Development in close coordination with the Department of Refugee Services. This program will provides an important opportunity to implement the Government \u2019 s settlement policy commitments. 2. Creating economic opportunities for youth and strengthening the economic environment in refugee hosing districts to drive growth in line with Kenya \u2019 s commitments under the Djibouti Declaration ( 2017 ), the Nairobi Declaration ( 2017 ) and the Kampala Declaration ( 2019 ). 3. Supporting devolution and local economic development by addressing key infrastructure and social services needs in health and education in refugee hosting counties. This will build on the local economic development models outlined in KISEDP 2 and the draft GISEDP. 4. Improving connectivity and infrastructure services in refugee hosting communities focusing on addressing rural water and sanitation needs. This area aligns to Kenya \u2019 s commitment under the Kampala Declaration ( 2019 ). 5. Enhancing the capacity of DRS through review and implementation of the scheme of Service for Refugees. This will enable the department of have refugee management officers of all cadres who will be critical in implementation of the Marshal Plan. 6. Establishment of a Government owned Refugee Data Base which will enable the government to have the data for all Asylum Seekers and Refugees needed for planning and interlinking this data with other government systems so as to enhance service delivery. On behalf of the Government of Kenya, I recognize the partnership and support provided by the World Bank and we look forward to further collaboration under the IDA20 WHR to provide development focused socio - economic solutions for host communities and refugees.", + "ner_text": [ + [ + 1427, + 1461, + "named" + ], + [ + 28, + 34, + "Government owned Refugee Data Base <> data geography" + ], + [ + 39, + 45, + "Government owned Refugee Data Base <> data geography" + ], + [ + 81, + 86, + "Government owned Refugee Data Base <> data geography" + ], + [ + 240, + 270, + "Government owned Refugee Data Base <> author" + ], + [ + 532, + 537, + "Government owned Refugee Data Base <> data geography" + ], + [ + 1096, + 1101, + "Government owned Refugee Data Base <> data geography" + ], + [ + 1520, + 1534, + "Government owned Refugee Data Base <> reference population" + ], + [ + 1539, + 1547, + "Government owned Refugee Data Base <> reference population" + ] + ], + "validated": true, + "empirical_context": "6. Establishment of a Government owned Refugee Data Base which will enable the government to have the data for all Asylum Seekers and Refugees needed for planning and interlinking this data with other government systems so as to enhance service delivery. On behalf of the Government of Kenya, I recognize the partnership and support provided by the World Bank and we look forward to further collaboration under the IDA20 WHR to provide development focused socio - economic solutions for host communities and refugees.", + "type": "database", + "explanation": "This is indeed a dataset as it is explicitly described as a 'Refugee Data Base' intended for collecting and managing data on asylum seekers and refugees.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a 'data base' which implies a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly described as a 'Refugee Data Base' intended for collecting and managing data on asylum seekers and refugees.", + "contextual_signal": "described as a data base for collecting data on asylum seekers and refugees", + "tags": [] + }, + { + "filename": "040_Iraq-COVID-19-Vaccination-Project", + "page": 47, + "text": "The World Bank Iraq COVID-19 Vaccination Project ( P177038 ) Page 42 of 54 UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Percentage of specific priority populations fully vaccinated The indicator will track the number of the eligible people as defined being among a specific set of priority groups in the National Deployment and Vaccination Plan ( NVDP ) / government prioritization list who are fully vaccinated from COVID-19 using vaccines that meet Bank ' s vaccine approval criteria. 3 months NDVP, digital vaccination registry, national paper-based vaccination registry Administrative data PMU / MOHE Percentage of fully vaccinated priority groups who are female The denominator is the number of people who were in the target groups and were fully vaccinated with 2 doses, and the numerator will be the number of women vaccinated with 2 doses in the target groups. 3 months NDVP, digital vaccination registry, national paper-based vaccination registry Administrative data PMU / MOHE Number of project-supported COVID-19 vaccinations sites with adequate health care waste management for vaccination The project will invest in providing adequate waste management equipment at the facility level.", + "ner_text": [ + [ + 645, + 673, + "named" + ], + [ + 263, + 323, + "digital vaccination registry <> data description" + ], + [ + 717, + 736, + "digital vaccination registry <> data type" + ], + [ + 748, + 809, + "digital vaccination registry <> data description" + ] + ], + "validated": true, + "empirical_context": "The World Bank Iraq COVID-19 Vaccination Project ( P177038 ) Page 42 of 54 UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Percentage of specific priority populations fully vaccinated The indicator will track the number of the eligible people as defined being among a specific set of priority groups in the National Deployment and Vaccination Plan ( NVDP ) / government prioritization list who are fully vaccinated from COVID-19 using vaccines that meet Bank ' s vaccine approval criteria. 3 months NDVP, digital vaccination registry, national paper-based vaccination registry Administrative data PMU / MOHE Percentage of fully vaccinated priority groups who are female The denominator is the number of people who were in the target groups and were fully vaccinated with 2 doses, and the numerator will be the number of women vaccinated with 2 doses in the target groups. 3 months NDVP, digital vaccination registry, national paper-based vaccination registry Administrative data PMU / MOHE Number of project-supported COVID-19 vaccinations sites with adequate health care waste management for vaccination The project will invest in providing adequate waste management equipment at the facility level.", + "type": "registry", + "explanation": "In this context, it is confirmed as a dataset because it is explicitly mentioned as a source of administrative data for tracking vaccination statistics.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'registry', which often refers to a structured collection of data.", + "contextual_reason_agent": "In this context, it is confirmed as a dataset because it is explicitly mentioned as a source of administrative data for tracking vaccination statistics.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "141_760530PAD0P127010Box377322B00OUO090", + "page": 89, + "text": "- 74 - Annex 8: Summary of Baseline Study of Local Governments - December 2012 1. A baseline study for the status of the local governments in Mauritania was conducted in November and December 2012. 2. The main objective of the baseline survey was to provide the Government with updated information about the capacity of local governments in core areas, including: ( i ) Functionality of the LG Council and administration ( ii ) Staffing and HR management ( iii ) Relations with regional departments ( iv ) Financial management ( budget preparation, own source revenue collection, expenditure management, reporting, internal controls ) ( v ) Budgeting and planning ( vi ) Procurement management ( vii ) Asset management ( viii ) Environmental and social safeguards and management 3. The methodology of the survey was mainly qualitative and based on a simple questionnaire with specific questions for each thematic area. The qualitative information was then complemented by collection of quantitative information, in particular fiscal data. 4. The key findings confirm the overall trends in the LG system in Mauritania in terms of relatively weak capacity, with relatively little difference in capacity between rural and urban LGs.", + "ner_text": [ + [ + 227, + 242, + "named" + ], + [ + 65, + 78, + "baseline survey <> reference year" + ], + [ + 121, + 138, + "baseline survey <> reference population" + ], + [ + 142, + 152, + "baseline survey <> data geography" + ], + [ + 170, + 196, + "baseline survey <> reference year" + ], + [ + 370, + 420, + "baseline survey <> data description" + ], + [ + 428, + 454, + "baseline survey <> data description" + ], + [ + 463, + 498, + "baseline survey <> data description" + ], + [ + 506, + 526, + "baseline survey <> data description" + ], + [ + 1026, + 1037, + "baseline survey <> data type" + ] + ], + "validated": true, + "empirical_context": "2. The main objective of the baseline survey was to provide the Government with updated information about the capacity of local governments in core areas, including: ( i ) Functionality of the LG Council and administration ( ii ) Staffing and HR management ( iii ) Relations with regional departments ( iv ) Financial management ( budget preparation, own source revenue collection, expenditure management, reporting, internal controls ) ( v ) Budgeting and planning ( vi ) Procurement management ( vii ) Asset management ( viii ) Environmental and social safeguards and management 3. The methodology of the survey was mainly qualitative and based on a simple questionnaire with specific questions for each thematic area.", + "type": "survey", + "explanation": "The term is indeed a dataset as it refers to a survey designed to collect specific information for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'baseline survey' implies a structured collection of data gathered through a systematic approach.", + "contextual_reason_agent": "The term is indeed a dataset as it refers to a survey designed to collect specific information for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "108_Jordan-PforR-PAD-P159522-FINAL-DISCLOSURE-10052016", + "page": 79, + "text": "This approach improves transparency with respect to the purposes of resources and the efficiency of their use. It is also helpful in improving communications between the Government and its citizens. The GFMIS will easily provide information on how the annual Budget Law and its execution support the strategic priorities of the Government. The GFMIS has been fully rolled out to budget units ( ministries, departments, and regional financial centers ) encompassed in the Budget Law except three ( the Ministry of Foreign Affairs, Water Authority, and civil defense ) that would be completed in 2016. The Government has launched GFMIS phase two aiming to connect IPUs by 2018. 28. The current GFMIS implementation utilizes a subset of the functionalities of the underlying application software. The current core application software comprises: ( a ) Hyperion for budget preparation; ( b ) Oracle Financials for budget execution; and ( c ) software for interfacing to other software for debt management, payroll, bank reconciliation, and revenue management. The GFMIS is fully utilized for", + "ner_text": [ + [ + 1060, + 1065, + "named" + ] + ], + "validated": false, + "empirical_context": "The current core application software comprises: ( a ) Hyperion for budget preparation; ( b ) Oracle Financials for budget execution; and ( c ) software for interfacing to other software for debt management, payroll, bank reconciliation, and revenue management. The GFMIS is fully utilized for", + "type": "system", + "explanation": "However, GFMIS is described as a system and not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed GFMIS is a dataset because it is mentioned in the context of software applications related to financial management.", + "contextual_reason_agent": "However, GFMIS is described as a system and not as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "010_BOSIB1554c314c0a2187c019d7e85bc2a91", + "page": 16, + "text": "Int J Equity Health 19, 23 ( 2020 ). 16 Nyawira, L., Tsofa, B., Musiega, A. et al. Management of human resources for health: implications for health systems efficiency in Kenya. BMC Health Serv Res 22, 1046 ( 2022 ). 17 McCollum R, Limato R, Otiso L, et al. Health system governance following devolution: comparing experiences of decentralisation in Kenya and IndonesiaBMJ Global Health 2018; 3: e000939 18 Kairu, A., Orangi, S., Mbuthia, B. et al. Examining health facility financing in Kenya in the context of devolution. BMC Health Serv Res 21, 1086 ( 2021 ). 19 Ministry of Health Kenya Harmonized Health Facility Assessment 2018-19. The diagnostic tests were: HIV, malaria, and syphilis rapid test; urine test for pregnancy; blood glucose; urine dipstick for glucose and protein; and hemoglobin levels", + "ner_text": [ + [ + 704, + 728, + "named" + ] + ], + "validated": false, + "empirical_context": "19 Ministry of Health Kenya Harmonized Health Facility Assessment 2018-19. The diagnostic tests were: HIV, malaria, and syphilis rapid test; urine test for pregnancy; blood glucose; urine dipstick for glucose and protein; and hemoglobin levels", + "type": "test", + "explanation": "This is not a dataset as it refers to a specific type of diagnostic test rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific diagnostic test used in health assessments.", + "contextual_reason_agent": "This is not a dataset as it refers to a specific type of diagnostic test rather than a structured collection of data.", + "contextual_signal": "mentioned only as a diagnostic test, not as a data source", + "tags": [] + }, + { + "filename": "159_38147core", + "page": 71, + "text": "Social Imuact and Mitigation: The IDP household will need to demonstrate clear title to land to receive the housing support cash grant. This i s intended to safeguard against the threat o f eviction and reduce the potential for political and economic exploitation. The Ministry o f Resettlement has established a Land Task Force to help regularize issues relating to land title in relation to the PHP. This was set up on the lines o f that currently in operation in the North and East to expedite the issuance o f land deeds. According to the U N H C R supervised Survey ( 2006 ), 74 % o f IDPs in the refugee camps own land in Puttalam and 55 % possess legal documentation to support that. According to the Survey, 72 % o f those with landownership informed that they had purchased the land - in some cases, they bought the land on which the refugee camp i s located where all IDP families o f the camp possess title deeds; others collectively bought land outside the refugee camp. 3 % o f those with land ownership revealed that they had received the land through donation. Among those who possess documentary evidence o f landownership, an overwhelming 97 % had outright deeds, whereas only 1. 5 % held permits and 1 % grants or 0. 5 % leases. 66", + "ner_text": [ + [ + 543, + 570, + "named" + ], + [ + 573, + 577, + "U N H C R supervised Survey <> publication year" + ], + [ + 590, + 615, + "U N H C R supervised Survey <> reference population" + ], + [ + 628, + 636, + "U N H C R supervised Survey <> data geography" + ], + [ + 1265, + 1283, + "U N H C R supervised Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "This was set up on the lines o f that currently in operation in the North and East to expedite the issuance o f land deeds. According to the U N H C R supervised Survey ( 2006 ), 74 % o f IDPs in the refugee camps own land in Puttalam and 55 % possess legal documentation to support that. According to the Survey, 72 % o f those with landownership informed that they had purchased the land - in some cases, they bought the land on which the refugee camp i s located where all IDP families o f the camp possess title deeds; others collectively bought land outside the refugee camp.", + "type": "survey", + "explanation": "This is indeed a dataset as it provides empirical data regarding land ownership among IDPs, used for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a supervised survey that provides statistical information.", + "contextual_reason_agent": "This is indeed a dataset as it provides empirical data regarding land ownership among IDPs, used for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 59, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 55 of 74 MGCSW Percentage of beneficiaries paid using the enhanced integrated biometric and Management Information Systems The total number of beneficiaries paid using the enhanced biometric and management information systems divided by the total number of beneficiaries. Use of the enhanced biometric system includes biometric registration at the point of beneficiary registration as well as the use of biometric to verify identity at the time of payment. Use of the management information system ( MIS ) involves electronic wage request generations to be approved through the MIS. This indicator will be measured during missions and ISRs Project MIS Registration and Payments data will be obtained from the project MIS to determine the percentage of payments processes through the biometric and management information systems. Selected implementing partner ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Number of beneficiary households receiving cash for performing labor intensive public works The number of total beneficiary HHs that are selected to participate in LIPW under sub-component 1. 1, in accordance with the This indicator will be reviewed at a Registration and payment data stored in the SNSOP Management Household and sub - component specific data will be collected at registration and updated over the Selected Implementing Partner", + "ner_text": [ + [ + 1461, + 1466, + "named" + ] + ], + "validated": false, + "empirical_context": "Selected implementing partner ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Number of beneficiary households receiving cash for performing labor intensive public works The number of total beneficiary HHs that are selected to participate in LIPW under sub-component 1. 1, in accordance with the This indicator will be reviewed at a Registration and payment data stored in the SNSOP Management Household and sub - component specific data will be collected at registration and updated over the Selected Implementing Partner", + "type": "system", + "explanation": "However, SNSOP is described as a management system rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed SNSOP is a dataset because it is mentioned in the context of data collection and management.", + "contextual_reason_agent": "However, SNSOP is described as a management system rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "010_BOSIB1554c314c0a2187c019d7e85bc2a91", + "page": 36, + "text": "Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Percentage of host community pregnant women attending 4 or more ANC visits in Garissa and Turkana ( Percentage ) Description Numerator: Number of pregnant women within the host cummunity of Garissa and Turkana attending 4 or more ANC visits. Denominator: Total number of expected live births during the reporting period within the host community of Garissa and Turkana Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Percentage of refugee pregnant women attending 4 or more ANC visits in Garissa and Turkana ( Percentage ) Description Numerator: Number of refugee pregnant women attending 4 or more ANC visits. Denominator: Total number of expected live births during the reporting period within the refugee community of Garissa and Turkana Frequency Every six months Data source UNHCR reports Methodology for Data Collection Routine UNHCR data collection Responsibility for Data Collection MoH Proportion of Children Under 5 with diarrhea treated with Zinc / ORS Co-Pack ( Percentage )", + "ner_text": [ + [ + 84, + 88, + "named" + ] + ], + "validated": false, + "empirical_context": "Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Percentage of host community pregnant women attending 4 or more ANC visits in Garissa and Turkana ( Percentage ) Description Numerator: Number of pregnant women within the host cummunity of Garissa and Turkana attending 4 or more ANC visits. Denominator: Total number of expected live births during the reporting period within the host community of Garissa and Turkana Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Percentage of refugee pregnant women attending 4 or more ANC visits in Garissa and Turkana ( Percentage ) Description Numerator: Number of refugee pregnant women attending 4 or more ANC visits.", + "type": "system", + "explanation": "'HMIS' is mentioned as a methodology for data collection, not as a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'HMIS' is a dataset because it is associated with data collection methodologies.", + "contextual_reason_agent": "'HMIS' is mentioned as a methodology for data collection, not as a structured collection of data itself.", + "contextual_signal": "described as a methodology for data collection", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 21, + "validated": 4, + "not_validated": 17 + } + }, + { + "filename": "185_multi-page", + "page": 201, + "text": "Impact monitoring: The impact of the national response to the epidemic will be reflected in a broad range of indicators, including: 1. prevalence rates of HIV infection, by age group and gender; 2. median age at first sex; 3. reported condom use at last sex with non-regular partner; 4. STI incidence / prevalence; 5. primary school enrollment and completion rates among orphans; 6. the number of non-regular sexual partners during a defined period, by marital status, age group, and gender. The collection of these indicators will be the responsibility of the various implementing agencies and will be measured through a variety of instruments, including the Demographic and Health Survey of 2003. The Ministry of Health collects monthly HIV / AIDS prevalence data from 22 sentinel surveillance sites ( 13 urban and nine rural ). This existing surveillance system of HIV prevalence will be improved to include behavioral indicators ( \" second-generation surveillance \" ) with the assistance of cooperating partners. Trends in the above-mentioned indicators will be attributed to the collective efforts of the partnership against HIV / AIDS in Kenya, and to the project as part thereof.", + "ner_text": [ + [ + 660, + 697, + "named" + ] + ], + "validated": true, + "empirical_context": "the number of non-regular sexual partners during a defined period, by marital status, age group, and gender. The collection of these indicators will be the responsibility of the various implementing agencies and will be measured through a variety of instruments, including the Demographic and Health Survey of 2003. The Ministry of Health collects monthly HIV / AIDS prevalence data from 22 sentinel surveillance sites ( 13 urban and nine rural ).", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source of data collection for the indicators being measured.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that collects indicators related to sexual partners and health data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source of data collection for the indicators being measured.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 10, + "text": "While refugees initially lived in special villages, since the 1990s, they have been relatively free to move out to urban areas and access social services. They are mainly employed in the informal sector and recently have been allowed to open bank accounts. 7. Refugees in Pakistan access education and health services in the same way as other residents including those related to the COVID \u2010 19 response. Access to public services among refugees has been similar to that of residents since the shift from refugee camps to host communities. Since the outbreak of COVID \u2010 19, refugee children, similar to Pakistani children, have been deprived of education due to school closures and access to health services has become constrained. Estimates from the United Nations High Commissioner for Refugees ( UNHCR ) indicate that approximately 35 percent of refugee households in Pakistan will be severely impacted by the pandemic as they are daily wagers who would have lost all pre \u2010 COVID \u2010 19 sources of income, and have no assets to draw upon and / or other forms of social support. Such households ( 85, 000 ) are expected to be supported with a monthly stipend for a period of 4 months, 5 Asian Development Bank. 2005. Balochistan Economic Report. 6 World Bank Group. 2017. Pakistan \u2010 Sindh: Public Expenditure Review. 7 World Bank. 2019. Data4Pakistan \u2010 District Development Portal ( accessed on August 28, 2019 ). https: / / geosdndev. worldbank. org / Data4Pakistan /.", + "ner_text": [ + [ + 1337, + 1350, + "named" + ] + ], + "validated": false, + "empirical_context": "2019. Data4Pakistan \u2010 District Development Portal ( accessed on August 28, 2019 ). https: / / geosdndev.", + "type": "project", + "explanation": "However, the context indicates it is a portal, not explicitly a structured collection of data or a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Data' in the name, suggesting a data-related initiative.", + "contextual_reason_agent": "However, the context indicates it is a portal, not explicitly a structured collection of data or a dataset.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 30, + "text": "The World Bank SPF: Improved Livelihoods for Internally Displaced Persons in Azerbaijan ( P178125 ) Page 28 of 34 Indicator Name Corporate Unit of Measur e Baseline End Target Frequency Data Source / Methodology Responsibility for Data Collection Name: Number of local governments participating in training on civic engagement Number 0. 00 15. 00 Semi-annually Reports provided by Civic Engagement Service Provider Civic Engagement Service Provider, M & E specialist Description: Total number of local government units who attended trainings Name: Percentage increase in participant knowledge of civic engagement concepts and methodologies Percentag e 0. 00 50. 00 Before and after each civic engagement training event Pre and post training tests Civic Engagement Service Provider Percentage increase in knowledge of civic engagement concepts and methodologies among persons with disabilities and representatives of organizations of persons with disabilities Percentag e 0. 00 50. 00 Before and after each civic engagement training event Pre and post training tests Civic Engagement Service Provider Description: Percentage increase in pre and post training test scores for individuals who participated in civic engagement training Name: Percentage of new businesses and employment still active after three months Percentag e 0. 00 75. 00 Once, at least three months after training completion, with the possibility of Post-Training Completion Survey M & E Specialist and supplementary data collectors, as needed.", + "ner_text": [ + [ + 1418, + 1449, + "named" + ], + [ + 77, + 87, + "Post-Training Completion Survey <> data geography" + ], + [ + 381, + 414, + "Post-Training Completion Survey <> author" + ], + [ + 867, + 892, + "Post-Training Completion Survey <> reference population" + ], + [ + 1113, + 1169, + "Post-Training Completion Survey <> data description" + ], + [ + 1238, + 1313, + "Post-Training Completion Survey <> data description" + ] + ], + "validated": true, + "empirical_context": "00 75. 00 Once, at least three months after training completion, with the possibility of Post-Training Completion Survey M & E Specialist and supplementary data collectors, as needed.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to a survey that collects data post-training.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a survey that collects data post-training.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "108_Jordan-PforR-PAD-P159522-FINAL-DISCLOSURE-10052016", + "page": 65, + "text": "About 85 percent of Syrians refugees registered with UNHCR live in Jordanian communities, while 15 percent live in refugee camps. 34. Employment issues are a long-standing concern in Jordan. Many of the most educated Jordanians emigrate to work in the Gulf countries. High reservation wages and a preference for public sector work result in high unemployment rates at the same time as Jordan brings in a large number of economic migrants to fill less desirable jobs. Currently, there are approximately 650, 000 economic migrants in Jordan, 324, 000 of whom have work permits, while the remainder work informally. 12 Escribano, A., and J. L. Guasch. 2005. \u201c Assessing the Impact of the Investment Climate on Productivity using Firm-Level Data: Methodology and Cases of Guatemala, Honduras, and Nicaragua. \u201d World Bank Research Paper 3621.. 13 Qureshi and Valde. 2007. \u201c State-Business Relations, Investment Climate Reform, and Firm Productivity in Sub-Saharan Africa. \u201d 14 Mi\u0161kinis and Byrka. 2014. \u201c The Role of Investment Promotion Agencies in Attracting Foreign Direct Investment. \u201d Ekonomica. 15 The census lists approximately 640, 000 Egyptians and 634, 000 Palestinians ( without Jordanian ID numbers ).", + "ner_text": [ + [ + 1103, + 1109, + "named" + ], + [ + 183, + 189, + "census <> data geography" + ], + [ + 385, + 391, + "census <> data geography" + ], + [ + 1139, + 1148, + "census <> reference population" + ], + [ + 1162, + 1174, + "census <> reference population" + ] + ], + "validated": true, + "empirical_context": "\u201d Ekonomica. 15 The census lists approximately 640, 000 Egyptians and 634, 000 Palestinians ( without Jordanian ID numbers ).", + "type": "census", + "explanation": "In this context, the census is explicitly mentioned as listing a specific number of individuals, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because a census typically involves a structured collection of demographic data.", + "contextual_reason_agent": "In this context, the census is explicitly mentioned as listing a specific number of individuals, indicating it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "104_PAD2161-PUBLIC-P162004", + "page": 17, + "text": "In addition, the project will also facilitate expanding education infrastructure investments in Istanbul, Ankara, Bursa, \u0130zmir, Konya and Kayseri which host a high number of SuTP and has high concentration of out-of - school SuTP at the district level. 33. The criteria to select the twelve provinces, and locations within the provinces, included a detailed analysis of concentration of SuTP at the district level, their access to education, and population densities of SuTP compared to host communities. Based on these criteria, the selected locations represent the following conditions: a. Districts which host more than 50, 000 SuTP and with a resident population over 500, 000 ( where the ratio of SuTP versus host community creates considerable distress for existing infrastructure and service capacity ) b. Districts where the majority of school-aged SuTP reside and which currently host the highest concentration ( ratio ) of out-of-school SuTP due to very limited ( deprived ) access to education services, c. Districts where a planned change from double-shifts to single-shifts and abolishment of Temporary Education Centers are imminent, d. Districts where the appropriate public land is available and secured for construction. 34. The Bank team jointly with MoNE used various sources of available data to ensure all dimensions of the aforementioned criteria were addressed in the selection. Apart from administrative data from MoNE and DGMM, the team also used the National Muhtar Survey ( NMS ). The NMS was conducted in every neighborhood and village throughout Turkey, jointly by the World Bank and the Government, to ascertain the accessibility criteria. The survey relies on the information provided by muhtars, who are the elected heads for neighborhoods and villages. They report on population related", + "ner_text": [ + [ + 1413, + 1432, + "named" + ], + [ + 96, + 104, + "administrative data <> data geography" + ], + [ + 106, + 112, + "administrative data <> data geography" + ], + [ + 138, + 145, + "administrative data <> data geography" + ], + [ + 174, + 178, + "administrative data <> reference population" + ], + [ + 446, + 503, + "administrative data <> data description" + ], + [ + 1575, + 1581, + "administrative data <> data geography" + ], + [ + 1598, + 1608, + "administrative data <> author" + ] + ], + "validated": true, + "empirical_context": "The Bank team jointly with MoNE used various sources of available data to ensure all dimensions of the aforementioned criteria were addressed in the selection. Apart from administrative data from MoNE and DGMM, the team also used the National Muhtar Survey ( NMS ). The NMS was conducted in every neighborhood and village throughout Turkey, jointly by the World Bank and the Government, to ascertain the accessibility criteria.", + "type": "data", + "explanation": "In this context, 'administrative data' is indeed used as a source of information for the selection criteria.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'administrative data' is a dataset because it refers to a collection of data used in the research.", + "contextual_reason_agent": "In this context, 'administrative data' is indeed used as a source of information for the selection criteria.", + "contextual_signal": "mentioned as a source of data used in the selection", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 11, + "validated": 10, + "not_validated": 1 + } + }, + { + "filename": "124_PAD8220PAD0P12010Box385319B00OUO090", + "page": 37, + "text": "They spend a disproportionate amount of time carrying out time-intensive domestic tasks, a burden which is only compounded in situations where chronic deficiencies exist in water provision. 17 103. Eight percent of women in the GBML service area are reported as the main breadwinners. 18 The project mainstreams gender by: ( i ) quantifying the differentiated impact of the burdens and benefits of improved water supply among male and female residents in the project affected areas and GBML service zones, and ( ii ) identifying areas of engagement by men and women during the operationalization of Component 1 and through the support of Component 2 in terms of citizen feedback and awareness. A qualitative study in the form of twelve semi - structured focus groups was carried out over the period of project preparation to provide deeper understanding of the gendered dimension and inform the design of gender-responsive indicators for measuring how the project is performing in this particular area. 19 Details of the analysis are provided in Annex 10. The Project Implementation Manual ( PIM ) contains gender-sensitive language that monitors and guarantees inclusiveness during such activities including citizen outreach, communications and recruitment to project positions. This requirement will help ensure equal representation of all diverse population groups in the GBML. 17 Literature review includes: World Bank Social Development Department. Making Water Supply and Sanitation Work for Women and Men, December 2010. A policy brief on Gender, Water and Sanitation developed by the Inter-agency Task Force on Gender and Water ( GWTF ) under the UN-Water and the Interagency Network on Women and Gender Equality ( IANWGE ) in support of Water for Life 2005-2015. June 2006. 18 World Bank Water Supply Augmentation Project ( P125184 ) Household Survey, March \u2013 April 2014. 19 Gender-responsive indicators can encapsulate gender-specific or gender-inclusive performance outcomes. The former measures specific needs of men and women whereas the latter focuses on relative benefits and provides comparable information. 26", + "ner_text": [ + [ + 905, + 933, + "named" + ] + ], + "validated": false, + "empirical_context": "18 The project mainstreams gender by: ( i ) quantifying the differentiated impact of the burdens and benefits of improved water supply among male and female residents in the project affected areas and GBML service zones, and ( ii ) identifying areas of engagement by men and women during the operationalization of Component 1 and through the support of Component 2 in terms of citizen feedback and awareness. A qualitative study in the form of twelve semi - structured focus groups was carried out over the period of project preparation to provide deeper understanding of the gendered dimension and inform the design of gender-responsive indicators for measuring how the project is performing in this particular area. 19 Details of the analysis are provided in Annex 10.", + "type": "indicator", + "explanation": "However, 'gender-responsive indicators' are not a structured collection of data but rather metrics for evaluation.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'indicators' can imply measurable data points.", + "contextual_reason_agent": "However, 'gender-responsive indicators' are not a structured collection of data but rather metrics for evaluation.", + "contextual_signal": "mentioned only as a concept, not as a data source", + "tags": [] + }, + { + "filename": "102_Kenya-Water-PAD-04072017", + "page": 36, + "text": "26 Annex 1: Results Framework and Monitoring Kenya: Water and Sanitation Development Project ( P156634 ) Results Framework Project Development Objective: To improve water supply and sanitation services in select coastal and northeastern regions in Kenya. PDO Level Results Indicators C o r e Unit of Meas ure Cumulative Target Values Fre - quency Data source / method - ology Respon - sibility for data collec - tion Comments Baselin e in 2016 2017 2018 2019 2020 2021 2022 People in urban areas provided with access to improved water sources under the project. X Number 0 0 2, 500 18, 400 50, 900 70, 000 90, 000 Semi - ann - ually Project reports on construct ion and operatio n of infrastru cture. WSP data on connec - tions. WSPs Coast counties, Wajir and Garissa. One household connection serves 5 people, one community water point serves 30 people, and one kiosk serves 400 people. People provided with access to improved sanitation services under the project \u2014 urban. X Number 0 0 0 10, 000 30, 000 40, 000 50, 000 Semi - ann - ually Project reports on construc - tion and operatio n of infra - structure. WSP data on connec - tions. WSPs Primarily, Wajir and Garissa.", + "ner_text": [ + [ + 1113, + 1121, + "named" + ], + [ + 45, + 50, + "WSP data <> data geography" + ], + [ + 474, + 495, + "WSP data <> reference population" + ], + [ + 734, + 748, + "WSP data <> data geography" + ], + [ + 750, + 755, + "WSP data <> data geography" + ], + [ + 760, + 767, + "WSP data <> data geography" + ] + ], + "validated": true, + "empirical_context": "X Number 0 0 0 10, 000 30, 000 40, 000 50, 000 Semi - ann - ually Project reports on construc - tion and operatio n of infra - structure. WSP data on connec - tions. WSPs Primarily, Wajir and Garissa.", + "type": "data", + "explanation": "In this context, 'WSP data' is indeed used as a source of information related to infrastructure connections.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'WSP data' refers to a dataset because it is mentioned in the context of project reports and connections.", + "contextual_reason_agent": "In this context, 'WSP data' is indeed used as a source of information related to infrastructure connections.", + "contextual_signal": "mentioned as a data source in project reports", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "045_Turkey-Agricultural-Employment-Support-for-Refugees-and-Turkish-Citizens-through-Enhanced-Market-Linkages-Project", + "page": 48, + "text": "The World Bank Agricultural Employment Support for Refugees and Turkish Citizens through Enhanced Market Linkages ( P171543 ) Page 45 of 85 Number of refugee workers who completed short term vocational skills trainings Measures the number of refugee workers who received technical training and assesses the increased skills of workers Quarterly Quarterly monitoring reports Administrative data ACC PIU Number of formal jobs created Measures the number of formal jobs created, which is defined as the provision of a wage subsidy ( disaggregated by gender, age and refugee status ) Quarterly Quarterly monitoring reports MIS ACC PIU Number of formal jobs created for refugees Measures the number of formal jobs created for refugees Quarterly Regular monitoring reports Regular monitoring reports ACC PIU Number of employers participating in the project This indicator measures the number of employers participating in the project, which shows the increase in demand for labor through the project. Employers are defined as farmers who receive any one element of the package of support detailed under Component 1 and the primary ACCs that receive support under Component 2 ( disaggregated by gender, age ).", + "ner_text": [ + [ + 374, + 393, + "named" + ], + [ + 4, + 14, + "Administrative data <> publisher" + ], + [ + 140, + 218, + "Administrative data <> data description" + ], + [ + 402, + 431, + "Administrative data <> data description" + ] + ], + "validated": true, + "empirical_context": "The World Bank Agricultural Employment Support for Refugees and Turkish Citizens through Enhanced Market Linkages ( P171543 ) Page 45 of 85 Number of refugee workers who completed short term vocational skills trainings Measures the number of refugee workers who received technical training and assesses the increased skills of workers Quarterly Quarterly monitoring reports Administrative data ACC PIU Number of formal jobs created Measures the number of formal jobs created, which is defined as the provision of a wage subsidy ( disaggregated by gender, age and refugee status ) Quarterly Quarterly monitoring reports MIS ACC PIU Number of formal jobs created for refugees Measures the number of formal jobs created for refugees Quarterly Regular monitoring reports Regular monitoring reports ACC PIU Number of employers participating in the project This indicator measures the number of employers participating in the project, which shows the increase in demand for labor through the project. Employers are defined as farmers who receive any one element of the package of support detailed under Component 1 and the primary ACCs that receive support under Component 2 ( disaggregated by gender, age ).", + "type": "data", + "explanation": "In this context, 'administrative data' is used as a source of information for monitoring and evaluating project indicators, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'administrative data' typically refers to structured data collected by organizations for administrative purposes.", + "contextual_reason_agent": "In this context, 'administrative data' is used as a source of information for monitoring and evaluating project indicators, confirming its role as a dataset.", + "contextual_signal": "mentioned as a source of information for monitoring reports", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "167_27761", + "page": 19, + "text": "Special care has been placed on the beneficiary selection process using the level of household poverty as an eligibility criterion. For this purpose, an econometric formula i s used that predicts the level of household consumption using proxy variables derived from the information gathered during the pilot survey. Given that the Palestine census office carries out a census of the West Bank and Gaza every three years, this formula will be updated regularly. Furthermore, the verification instruments are simple and take into account the lessons learned from experience in other countries. Privacy of beneficiary information i s respected and the time consumed in filling out the pertinent forms i s minimal. Outcome and output indicator monitoring takes into account lessons learned from other CCT projects and will augment the information gathered. The forms to verify education, health, and training / awareness conditions will include basic indicators to facilitate monitoring. ( i ) Improving targeting How is it auurouriate to the borrower \u2019 s needs? Under the existing MOSA systems, the assessments made by individual social workers were the sole determinant of eligibility. This approach i s too discretionary and can result in errors of inclusion. The new beneficiary evaluation and selection process will improve targeting and will also free the social workers to spend more time assisting beneficiary households. Roles, responsibilities, and time allocations of social workers under the new system will be better delineated. ( ii ) Will reverse declining school and health allocations outcomes of children Passing grades in Arabic have declined from 71 percent to 38 percent and in math from 54 percent to 26 percent and dropout rates have increased precipitously. Approximately, 34 percent of children under five years old suffer mild anemia and about 9 percent suffer acute protein - calorie malnutrition. Data on the first population decile in the West Bank and Gaza i s scant, the degree of decline i s likely to have been greater for children in the poorest households. The SSNRP program will provide the government and households with an instrument to help mitigate these declines. 16", + "ner_text": [ + [ + 331, + 347, + "named" + ], + [ + 76, + 102, + "Palestine census <> data description" + ], + [ + 331, + 354, + "Palestine census <> publisher" + ], + [ + 383, + 392, + "Palestine census <> data geography" + ], + [ + 397, + 401, + "Palestine census <> data geography" + ] + ], + "validated": true, + "empirical_context": "For this purpose, an econometric formula i s used that predicts the level of household consumption using proxy variables derived from the information gathered during the pilot survey. Given that the Palestine census office carries out a census of the West Bank and Gaza every three years, this formula will be updated regularly. Furthermore, the verification instruments are simple and take into account the lessons learned from experience in other countries.", + "type": "census", + "explanation": "In this context, it is confirmed as a dataset since it is mentioned that the census office carries out a census every three years, indicating it is a source of data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'census' typically refers to a structured collection of data about a population.", + "contextual_reason_agent": "In this context, it is confirmed as a dataset since it is mentioned that the census office carries out a census every three years, indicating it is a source of data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 53, + "text": "43 Commission21 developed with support from the African Development Bank, UNICEF and the World Bank in 2014-15. The strategy has three key objectives: ( i ) increase access to basic social services, ( ii ) ensure food security and basic income security, ( iii ) strengthen social and natural risk management; and a transversal objective ( iv ) contribute to decrease chronic malnutrition. ( Box 2 ). 22. The four key instruments include: a targeting mechanism and potential beneficiary database; core modules of a basic management information system; a monitoring and evaluation module; and capacity building for the implementation of the NSPS. The instruments will be initially anchored around the cash transfer program described in Component 1, but with a view to serve a broader set of targeted programs that would contribute to the realization of the NSPS objectives. Sub-component 2. 1: Implementation of a precursor database for the registry ( US $ 2. 0 million equivalent ) 23. The beneficiary database and its associated targeting mechanism are a key pillar for the coordination of programs. They are also critical for ensuring transparency in the selection of beneficiaries in the Burundian context of recurrent fragility and entrenched structural poverty The database will be initially be developed to identify the extreme poor households for the purpose of the cash transfer program and its complementary activities.", + "ner_text": [ + [ + 514, + 549, + "named" + ] + ], + "validated": false, + "empirical_context": "22. The four key instruments include: a targeting mechanism and potential beneficiary database; core modules of a basic management information system; a monitoring and evaluation module; and capacity building for the implementation of the NSPS. The instruments will be initially anchored around the cash transfer program described in Component 1, but with a view to serve a broader set of targeted programs that would contribute to the realization of the NSPS objectives.", + "type": "system", + "explanation": "However, it is described as a management information system, which does not function as a data source in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'information system' which often relates to data management.", + "contextual_reason_agent": "However, it is described as a management information system, which does not function as a data source in this context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 22, + "text": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 18 of 47 Figure 1. Theory of Change Output Inadequate infrastructure, equipment, and medicine \uf0b7 Renovation / upgradation of selected HFs to 24 / 7 \uf0b7 Purchasing of equipment and medicines \uf0b7 Increased number of HFs renovated / upgraded including female staff hostels * \uf0b7 Increased number of HFs with essential inputs Improved utilization of quality health services in selected refugee hosting districts in Balochistan Improved child health outcomes ( reduced under \u2010 five mortality, reduced stunting ) among refugees and host communities in Balochistan Improved human capital among refugees and host communities in Balochistan Shortage, absence, low productivity, and competency of providers \uf0b7 Contracting of especially female providers * \uf0b7 Creation of HRH database \uf0b7 Competency \u2010 based training \uf0b7 Increased availability of ( especially female ) providers * at the HFs \uf0b7 Functional HRH database \uf0b7 Increased number of providers with minimum clinical competency Low quality and suboptimal use of health data \uf0b7 Digitization and integration of the HMIS into the DHIS with dashboards \uf0b7 Data review meetings \uf0b7 HMIS ( DHIS, vertical programs ) digitized and integrated into DHIS / DHIS2 \uf0b7 Improved quality of DHIS reports Weak institutional capacity \uf0b7 Capacity building ( HMIS, monitoring, supervision, and management ) \uf0b7 Increased number of targeted staff with basic / advanced competencies Low demand of health and education services \uf0b7 Advocacy and awareness \u2010 raising", + "ner_text": [ + [ + 1133, + 1137, + "named" + ], + [ + 4, + 14, + "DHIS <> publisher" + ], + [ + 15, + 26, + "DHIS <> data geography" + ], + [ + 481, + 492, + "DHIS <> data geography" + ], + [ + 583, + 591, + "DHIS <> reference population" + ], + [ + 616, + 627, + "DHIS <> data geography" + ], + [ + 657, + 665, + "DHIS <> reference population" + ], + [ + 690, + 701, + "DHIS <> data geography" + ], + [ + 1069, + 1080, + "DHIS <> data type" + ] + ], + "validated": true, + "empirical_context": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 18 of 47 Figure 1. Theory of Change Output Inadequate infrastructure, equipment, and medicine \uf0b7 Renovation / upgradation of selected HFs to 24 / 7 \uf0b7 Purchasing of equipment and medicines \uf0b7 Increased number of HFs renovated / upgraded including female staff hostels * \uf0b7 Increased number of HFs with essential inputs Improved utilization of quality health services in selected refugee hosting districts in Balochistan Improved child health outcomes ( reduced under \u2010 five mortality, reduced stunting ) among refugees and host communities in Balochistan Improved human capital among refugees and host communities in Balochistan Shortage, absence, low productivity, and competency of providers \uf0b7 Contracting of especially female providers * \uf0b7 Creation of HRH database \uf0b7 Competency \u2010 based training \uf0b7 Increased availability of ( especially female ) providers * at the HFs \uf0b7 Functional HRH database \uf0b7 Increased number of providers with minimum clinical competency Low quality and suboptimal use of health data \uf0b7 Digitization and integration of the HMIS into the DHIS with dashboards \uf0b7 Data review meetings \uf0b7 HMIS ( DHIS, vertical programs ) digitized and integrated into DHIS / DHIS2 \uf0b7 Improved quality of DHIS reports Weak institutional capacity \uf0b7 Capacity building ( HMIS, monitoring, supervision, and management ) \uf0b7 Increased number of targeted staff with basic / advanced competencies Low demand of health and education services \uf0b7 Advocacy and awareness \u2010 raising", + "type": "system", + "explanation": "In this context, 'DHIS' is referred to as a system that integrates and manages health data, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'DHIS' is a dataset because it is mentioned in the context of health data management and integration.", + "contextual_reason_agent": "In this context, 'DHIS' is referred to as a system that integrates and manages health data, indicating it functions as a data source.", + "contextual_signal": "system but mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 11, + "validated": 5, + "not_validated": 6 + } + }, + { + "filename": "185_multi-page", + "page": 40, + "text": "41 2 ) Improved care and support Percent of graduates of medical Project data services for those both infected and nursing school in the past and affected by HIV / AIDS year trained in natural history of HIV and in diagnosis and care of common opportunistic infections Percentage of health facilities Survey data that are currently stocked with drugs for commnon opportunistic infections and to provide palliative care, and report no stock-outs in the past year Percent of districts with at least Service delivery data one center staffed by trained counselors providing HIV testing and counseling at either free or affordable rates Increase in the percentage of clients served by VCT services that meet minimum requirements for provision of quality counseling and testing services Percent increase in number of communities with improved prevention services, care, and support Increase in percent of orphaned children under 15 who are currently attending school Increase in percent of population receiving quality HIV / AIDS / STI / TB case management", + "ner_text": [ + [ + 497, + 518, + "named" + ], + [ + 899, + 925, + "Service delivery data <> reference population" + ] + ], + "validated": true, + "empirical_context": "41 2 ) Improved care and support Percent of graduates of medical Project data services for those both infected and nursing school in the past and affected by HIV / AIDS year trained in natural history of HIV and in diagnosis and care of common opportunistic infections Percentage of health facilities Survey data that are currently stocked with drugs for commnon opportunistic infections and to provide palliative care, and report no stock-outs in the past year Percent of districts with at least Service delivery data one center staffed by trained counselors providing HIV testing and counseling at either free or affordable rates Increase in the percentage of clients served by VCT services that meet minimum requirements for provision of quality counseling and testing services Percent increase in number of communities with improved prevention services, care, and support Increase in percent of orphaned children under 15 who are currently attending school Increase in percent of population receiving quality HIV / AIDS / STI / TB case management", + "type": "data", + "explanation": "In this context, 'Service delivery data' is used to quantify and assess the provision of services, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'data' which often implies a structured collection of information.", + "contextual_reason_agent": "In this context, 'Service delivery data' is used to quantify and assess the provision of services, indicating it functions as a data source.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "101_Project-Appraisal-Document-PAD-P163830-2017-05-22-CLEAN-05232017", + "page": 79, + "text": "Initially the survey verifies and ensures the beneficiaries identified in the selection process meet the selection criteria ( male, female, age, IDP, HH heads, etc. ) of the project. In a second stage, the survey focuses on the work performed ( site, number of days, tools used, type of infrastructure, supervision, payments received from the money vendor, etc. ). In a third stage the Call Centre will focus on further work performed and verifies that the payments or benefits that were to be received by beneficiaries from earlier stages of the project, were in fact received. Based on validation and clearances of the Call Centre data, the contractual payments will be made to the Service Provider ( SP ) and the beneficiaries. Any diversions or lack of compliance with contractual obligations will result that a payment requested by the SP or due to the beneficiaries will be put on hold until an analysis / evaluation of the results found by the Call Centre is clarified or found to be justified. The payments due to the SP or the beneficiaries will be adjusted in cases where compliance with contractual obligation ( s ) is weak or lacking. 36.", + "ner_text": [ + [ + 206, + 212, + "named" + ], + [ + 46, + 59, + "survey <> reference population" + ], + [ + 506, + 519, + "survey <> reference population" + ], + [ + 716, + 729, + "survey <> reference population" + ], + [ + 858, + 871, + "survey <> reference population" + ], + [ + 1036, + 1049, + "survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "Initially the survey verifies and ensures the beneficiaries identified in the selection process meet the selection criteria ( male, female, age, IDP, HH heads, etc. ) of the project. In a second stage, the survey focuses on the work performed ( site, number of days, tools used, type of infrastructure, supervision, payments received from the money vendor, etc. ). In a third stage the Call Centre will focus on further work performed and verifies that the payments or benefits that were to be received by beneficiaries from earlier stages of the project, were in fact received.", + "type": "survey", + "explanation": "In this context, the survey is explicitly described as a method for collecting data on beneficiaries and their work, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because surveys are often structured collections of data used for analysis.", + "contextual_reason_agent": "In this context, the survey is explicitly described as a method for collecting data on beneficiaries and their work, confirming its role as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "110_Lebaon-RACE2-PforR-Board-Package-PAD-WB-9-5-16-09072016", + "page": 98, + "text": "Quality of planning, quality of bidding documents, and overall quality of process Number of processes changed, terminated or revoked / total number of all processes per year Contract variation and extensions Percentage of contracts completed on time and with reasonable variations ( up to 25 percent ). Quality of planning and evaluation process. Number of contracts observing variations and extensions / total number of all contracts per year. All relevant financial and operational information relating to the Program is uploaded in the website of MEHE and Procurement related information in the website of CTB Information related to the program, including progress, expenditures and procurement documents are timely uploaded and kept updated on a daily basis on the websites of MEHE and CTB. Prevention of fraud and corruption, transparency and citizens \u2019 outreach. N / A Enhanced Grievance / Complaints Handling System put in place in MEHE with systematic recording and tracking of complaints and their resolution Number of complaints received, recorded and processed by MEHE through the regular processes. In addition, a tracking system will be in place to follow on the progress of each complaint up to its resolution. Prevention of fraud and corruption, institutional capacity building at MEHE and CTB. Existing complaints handling system does not keep records of complaints and their resolution. Establishment / upgrade of an information management system to record detailed data at the school level ( including financial data ) Number of reports generated by the information system on a periodical basis ( quarterly ) which will include detailed data related to various aggregates ( geographical / demographical distribution at each school level, financial ( transfers and expenditures ) and non-financial ). Prevention of fraud and corruption down to the school level. Comparing physical implementation to financial outflow. N / A", + "ner_text": [ + [ + 1434, + 1463, + "named" + ] + ], + "validated": false, + "empirical_context": "Existing complaints handling system does not keep records of complaints and their resolution. Establishment / upgrade of an information management system to record detailed data at the school level ( including financial data ) Number of reports generated by the information system on a periodical basis ( quarterly ) which will include detailed data related to various aggregates ( geographical / demographical distribution at each school level, financial ( transfers and expenditures ) and non-financial ). Prevention of fraud and corruption down to the school level.", + "type": "system", + "explanation": "However, it is described as a system for managing information, not as a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it mentions recording detailed data.", + "contextual_reason_agent": "However, it is described as a system for managing information, not as a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 35, + "text": "This can also provide a platform for remote supervision and real-time risk monitoring of project activities. 89. Data verification will be built into the M & E arrangements. Data will be verified on a sample basis by a third - party monitoring ( TPM ) agent. The TPM will be recruited to monitor progress of project activities and verify project data on a sample basis throughout implementation. The TPM will report directly to the PSC. The information collected by the TPM will help triangulate the data submitted by the various implementing arms including NTTIs and universities. Spending reported by participants will be verified through an independent audit of MoGEI and MoHEST accounts. Indicators that are reported will be cross-referenced with EMIS data. Effective verification will depend on the quality of EMIS data available, which will be strengthened with support from the Project. 90. The Project will invest in an integrated system for the management and analysis of data collected through various channels for effective monitoring. The integrated monitoring system will be designed to capture the data needed to report across all the indicators outlined in the Results Framework. This system will incorporate capacities for data importing, either manually or directly via online survey and / or SMS survey platforms and EMIS data. Data collected on mobile devices by project monitoring officers or from the communities will also be uploaded to the system.", + "ner_text": [ + [ + 1310, + 1330, + "named" + ] + ], + "validated": false, + "empirical_context": "The integrated monitoring system will be designed to capture the data needed to report across all the indicators outlined in the Results Framework. This system will incorporate capacities for data importing, either manually or directly via online survey and / or SMS survey platforms and EMIS data. Data collected on mobile devices by project monitoring officers or from the communities will also be uploaded to the system.", + "type": "platform", + "explanation": "However, it is not a dataset itself but rather a tool used for collecting data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'SMS survey platforms' suggests a method of data collection.", + "contextual_reason_agent": "However, it is not a dataset itself but rather a tool used for collecting data.", + "contextual_signal": "mentioned as a method for data collection, not as a data source", + "tags": [] + }, + { + "filename": "026_Djibouti-Health-System-Strengthening-Project", + "page": 27, + "text": "For refugee populations in particular, limited coverage of health services and WASH facilities among these populations increases the risk of disease outbreaks, such as acute watery diarrhea ( e. g., cholera ), measles, chicken pox and multi-drug resistant tuberculosis. 38. Inadequate civil registration and vital statistics ( CRVS ) system as well as medical health information systems contribute to the challenges in health sector \u2019 s M & E and planning. The upcoming transition to the new national identification system, with unique ID, will help solve this challenge. However, a strategy to provide the unique ID is yet to be developed for refugee populations in Ali Addeh, Hol Hol and Obock. A lack of a unique ID is currently not presenting a barrier to access health services but ensuring that better patient / user data is collected at the point of service, including for refugee populations is essential for budgeting and M & E processes. 39. Development partners are engaged in the health sector and on the refugee agenda in Djibouti. UNICEF focuses on building community structures for health service delivery accountability, working both on the demand for and supply of health services. It also has a strong focus on nutrition, given the high rates of stunting and malnutrition in Djibouti and is currently planning to use the community platform for improving nutrition indicators among children. UNICEF supports 33 health posts that contain nutrition units. In close collaboration with UNICEF", + "ner_text": [ + [ + 352, + 386, + "named" + ] + ], + "validated": false, + "empirical_context": "38. Inadequate civil registration and vital statistics ( CRVS ) system as well as medical health information systems contribute to the challenges in health sector \u2019 s M & E and planning. The upcoming transition to the new national identification system, with unique ID, will help solve this challenge.", + "type": "system", + "explanation": "However, it is not a dataset as it is described as a system contributing to challenges, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'information systems' which often relates to data management.", + "contextual_reason_agent": "However, it is not a dataset as it is described as a system contributing to challenges, not as a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "028_Ethiopia-Response-Recovery-Resilience-for-Conflict-Affected-Communities-in-Ethiopia-Project", + "page": 18, + "text": "As in other countries, fear of stigmatization, retaliation, and rejection likely impede women from reporting experiences of violence or seeking care. Harmful practices persist in Ethiopia, with 65 percent of women having experienced some form of female genital 27 IOM estimates that there were 2, 091, 387 IDPs in Ethiopia in December 2020 / January 2021 ( as reported in its Ethiopia National Displacement Report 7 ) and noted that there were 4, 239, 636 IDPs in Ethiopia in September 2021 ( as reported in its Ethiopia National Displacement Report 10, which is the most recent data available ). 28 Ibid. 29 World Bank. 2020. Inclusive Development in Local Areas of Violence and IDP Hosting Areas: Risks and Opportunities. Washington, D. C. Mimeo. 30 https: / / dhsprogram. com / pubs / pdf / FR328 / FR328. pdf. 31 Survey locations included the Addis Ababa, Afar, Amhara, Benishangul-Gumuz, Oromia, and SNNPR regions. 32 Population Council and UNFPA, 2010. The age of the survey also highlights the lack of data in this area. 33 Ethiopia Central Statistical Agency. 2017. Ethiopia Demographic and Health Survey 2016. Addis Ababa.", + "ner_text": [ + [ + 1074, + 1112, + "named" + ], + [ + 88, + 93, + "Ethiopia Demographic and Health Survey <> reference population" + ], + [ + 179, + 187, + "Ethiopia Demographic and Health Survey <> data geography" + ], + [ + 847, + 858, + "Ethiopia Demographic and Health Survey <> data geography" + ], + [ + 860, + 864, + "Ethiopia Demographic and Health Survey <> data geography" + ], + [ + 866, + 872, + "Ethiopia Demographic and Health Survey <> data geography" + ], + [ + 874, + 891, + "Ethiopia Demographic and Health Survey <> data geography" + ], + [ + 893, + 899, + "Ethiopia Demographic and Health Survey <> data geography" + ], + [ + 905, + 910, + "Ethiopia Demographic and Health Survey <> data geography" + ], + [ + 923, + 941, + "Ethiopia Demographic and Health Survey <> author" + ], + [ + 1068, + 1072, + "Ethiopia Demographic and Health Survey <> publication year" + ], + [ + 1074, + 1082, + "Ethiopia Demographic and Health Survey <> data geography" + ], + [ + 1113, + 1117, + "Ethiopia Demographic and Health Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "2017. Ethiopia Demographic and Health Survey 2016. Addis Ababa.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to a specific demographic and health survey conducted in Ethiopia, which is a structured collection of data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a specific demographic and health survey conducted in Ethiopia, which is a structured collection of data used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "105_Mali-Reinsertion-of-Ex-combatants-Project", + "page": 40, + "text": "30 Sensitive information will only be accessible to authorized staff and CNDDR will ensure data confidentiality and access levels as per the instructions of the Government of Mali and responsible implementing agency. Each candidate and their household will, therefore, have a detailed record allowing for a proper personalized assessment and facilitation. Implementing partners will utilize the unique DDR ID, photographs and wherever possible, fingerprints to uniquely identify each candidate and provide service delivery feedback to the national implementing agency. 19. World Bank task team will support the development of the second component of the MIS specifically for reinsertion project. This component will be developed using smartphone and tablet based technologies that have been proven reliable and practical in fragile states with lack of Internet and telecommunications infrastructure. The system will allow data collection in the field for surveys, monitoring, service delivery and implementing partner \u2019 s feedback on the service delivery progress. The World Bank task team will work with the MINUSMA team to assist with the design of the necessary system architecture to achieve the project objectives. The World Bank will share with MINUSMA the type of information that needs to be captured during registration process in cantonments that will provide vital information for the reinsertion project. The data obtained during registration will be used in planning the reinsertion activities, and will be utilized by reinsertion component of the MIS system.", + "ner_text": [ + [ + 402, + 408, + "named" + ] + ], + "validated": false, + "empirical_context": "Each candidate and their household will, therefore, have a detailed record allowing for a proper personalized assessment and facilitation. Implementing partners will utilize the unique DDR ID, photographs and wherever possible, fingerprints to uniquely identify each candidate and provide service delivery feedback to the national implementing agency. 19.", + "type": "identifier", + "explanation": "'DDR ID' is not a dataset as it functions as an identifier rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'DDR ID' is a dataset because it appears to be a unique identifier related to candidates.", + "contextual_reason_agent": "'DDR ID' is not a dataset as it functions as an identifier rather than a structured collection of data.", + "contextual_signal": "mentioned as a unique identifier, not as a data source", + "tags": [] + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 92, + "text": "The World Bank Niger Accelerating Electricity Access Project ( Hask\u00e9 ) ( P174034 ) Page 85 of 174 ANPER and ANERSOL project databases. acquire an off-grid system. This is obtained from NIGELEC ' s customers database, ANPER and ANERSOL ( for mini-grid and standalone systems ). The number of people would be estimated by multiplying the number of households by the average household size at the national or regional level. People provided with new or improved electricity service - Female Semi - annually. National statistics / Proj ect data. Estimated based on the share of female in the national population as published by the national statistics institute or other credible sources. NIGELEC / ANPER / ANERS OL. Refugee and host beneficiaries provided with electricity services through grid, mini grids and electrified public institutions This relates to the number of refugees and host populations living in localities and settings where the project provides electricity through grid, mini grids and off-grid electrification ( under components 1, 2 and 3 ). Semi - annually. National statistics / Proj ect data / UNHCR data / World Bank data. Estimated based on collected data about refugees and host communities zones and project-level data. NIGELEC / ANPER / ANERS OL.", + "ner_text": [ + [ + 1225, + 1243, + "named" + ], + [ + 4, + 14, + "project-level data <> publisher" + ], + [ + 713, + 743, + "project-level data <> reference population" + ], + [ + 1128, + 1138, + "project-level data <> publisher" + ] + ], + "validated": true, + "empirical_context": "National statistics / Proj ect data / UNHCR data / World Bank data. Estimated based on collected data about refugees and host communities zones and project-level data. NIGELEC / ANPER / ANERS OL.", + "type": "data", + "explanation": "In this context, 'project-level data' is indeed used as a source of information related to the analysis of refugees and host communities.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'project-level data' is a dataset because it refers to data collected specifically for projects.", + "contextual_reason_agent": "In this context, 'project-level data' is indeed used as a source of information related to the analysis of refugees and host communities.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "028_Ethiopia-Response-Recovery-Resilience-for-Conflict-Affected-Communities-in-Ethiopia-Project", + "page": 12, + "text": "Its real gross domestic product growth slowed to 6. 1 percent in 2019 / 20 due to the Coronavirus Disease ( COVID-19 ). Employment rates plunged in the early days of the pandemic, particularly in urban areas where the rate dropped from 80 percent before COVID-19 to 65 percent in April 2020, with changes noted more prominently in female-headed households. This, coupled with slow agricultural growth related to high levels of sensitivity and vulnerability of the agriculture sector to impacts of climate variability and change, is expected to lead to a rise in poverty. 3 Projections based on a high-frequency phone survey results4 suggest that there were two million more poor in December 2020 compared to 2019. Though employment levels have been recovering, vulnerability is expected to remain high, resulting from the transition of many Ethiopians to fewer steady types of employment, such as self-employment, casual employment, and family work. 5 3. Ethiopia aims to reach a lower-middle-income status by 2025, and to reduce the poverty level to seven percent by 2029 / 30. To reach these goals, it has embarked on a series of reforms and accelerated investments.", + "ner_text": [ + [ + 596, + 623, + "named" + ], + [ + 280, + 290, + "high-frequency phone survey <> reference year" + ], + [ + 682, + 695, + "high-frequency phone survey <> reference year" + ], + [ + 841, + 851, + "high-frequency phone survey <> reference population" + ], + [ + 955, + 963, + "high-frequency phone survey <> data geography" + ], + [ + 1221, + 1239, + "high-frequency phone survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "This, coupled with slow agricultural growth related to high levels of sensitivity and vulnerability of the agriculture sector to impacts of climate variability and change, is expected to lead to a rise in poverty. 3 Projections based on a high-frequency phone survey results4 suggest that there were two million more poor in December 2020 compared to 2019. Though employment levels have been recovering, vulnerability is expected to remain high, resulting from the transition of many Ethiopians to fewer steady types of employment, such as self-employment, casual employment, and family work.", + "type": "survey", + "explanation": "This is indeed a dataset as it provides empirical data used for analysis regarding poverty levels in Ethiopia.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data on poverty levels.", + "contextual_reason_agent": "This is indeed a dataset as it provides empirical data used for analysis regarding poverty levels in Ethiopia.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 38, + "text": "The World Bank Pakistan: Balochistan Human Capital Investment Project ( P166308 ) Page 34 of 47 teachers trained using scripted lessons; ( c ) boundary wall and functional toilets; ( d ) desk and chair for each student; ( e ) trained PTSMCs meeting regularly and endorsing monthly teacher attendance ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Targeted HFs having majority of essential medicines for RMNCHN services Percentage of targeted HFs having > 75 percent of essential RMNCHN medicines. Bi \u2010 annual Health institutional database Self \u2010 reported data Health Department, HMIS, PPHI Absenteeism among key staff to provide RMNCHN services Percentage of key staff present at HFs during duty time to provide RMNCHN services. Baseline is a preliminary estimate. Bi \u2010 annual Real time monitoring system Administrative data Health Department, PMU Targeted health care providers with minimum knowledge and competencies in RMNCHN services Percentage of targeted staff with minimum knowledge and competencies in RMNCHN services. Bi \u2010 annual PMU Training report Health Department, PMU Targeted HMIS digitally integrated into DHIS ( 2 ) Cumulative number of RMNCHN relevant health information systems digitally integrated into DHIS ( 2 ).", + "ner_text": [ + [ + 1284, + 1288, + "named" + ] + ], + "validated": false, + "empirical_context": "Bi \u2010 annual Real time monitoring system Administrative data Health Department, PMU Targeted health care providers with minimum knowledge and competencies in RMNCHN services Percentage of targeted staff with minimum knowledge and competencies in RMNCHN services. Bi \u2010 annual PMU Training report Health Department, PMU Targeted HMIS digitally integrated into DHIS ( 2 ) Cumulative number of RMNCHN relevant health information systems digitally integrated into DHIS ( 2 ).", + "type": "system", + "explanation": "'DHIS' is mentioned as a system but not as a data source in the context provided.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'DHIS' is a dataset because it is mentioned in the context of health information systems.", + "contextual_reason_agent": "'DHIS' is mentioned as a system but not as a data source in the context provided.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 11, + "validated": 5, + "not_validated": 6 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 72, + "text": "for drafting bidding documents; ( 10 ) Coordination and integration of the Program will be done by a central agency, such as MOPIC, for Results Areas involving multiple agencies. Planning and Budgeting 4. The assessment confirms that Jordan \u2019 s budget systems are adequate for the Program. Implementing agencies use the GFMIS for budget preparation and execution. The GFMIS is fully utilized for budget execution, but not for budget preparation, given that manual interventions with various IT tools are being used. The GFMIS offers limited functionalities include: ( a ) the presentation of an initial policy-focused budget submission; ( b ) the entering of multiple quarters and multi - year commitments; and ( c ) capturing and populating the outstanding commitments and outstanding invoices for arrears monitoring. As a result, arrears have continued to be a problem. Jordan \u2019 s budgetary central government budget classification meets Government Financial Statistics / Classification of the Functions of Government standards. 31 These classifications are included in the current chart of accounts, allowing for all transactions to be reported in accordance with the appropriate standards. The budget is published on the GBD \u2019 s website ( www. gbd. gov. jo ). The final accounts and the monthly General Government Finance Bulletin, which include budgetary government finance statistics aggregated according to the economic and functional classifications ), are also published on the Ministry of Finance \u2019 s website. 31 Jordan: Public Expenditure and Financial Accountability ( PEFA ) Assessment ( 2022 ).", + "ner_text": [ + [ + 520, + 525, + "named" + ] + ], + "validated": false, + "empirical_context": "The GFMIS is fully utilized for budget execution, but not for budget preparation, given that manual interventions with various IT tools are being used. The GFMIS offers limited functionalities include: ( a ) the presentation of an initial policy-focused budget submission; ( b ) the entering of multiple quarters and multi - year commitments; and ( c ) capturing and populating the outstanding commitments and outstanding invoices for arrears monitoring. As a result, arrears have continued to be a problem.", + "type": "system", + "explanation": "However, GFMIS is described as a system used for budget execution, not as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed GFMIS is a dataset because it is mentioned in the context of budget execution and data management.", + "contextual_reason_agent": "However, GFMIS is described as a system used for budget execution, not as a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "185_multi-page", + "page": 59, + "text": "A country program should identify and measure at least one indicator within each of the following categories: * Program coverage, including the share of communities covered * Condom accessibility and quality * Stigma and discrimination * Knowledge of HIV transmission and prevention * Rates of counseling, testing and referral * Parent-to-child transmission ( where such services are offered ) * Sexual behavior in the general population and among young people B Blood safety * STD care and treatment ( e. g. appropriate diagnoses, STD drug supply ) C Care and support for those infected and affected by HIV / AIDS H Health and social impact ( including rates of STIs, especially among young people, and prevalence of orphans ) Given the importance of such information, each country program should invest substantially in establishing a robust M & E system for which coordination may be done by the secretariat of a country ' s national HIV / AIDS council. This need not be done from scratch. Most countries have some system in place, and some of the data needed are available from existing information systems. But most systems need strengthening, especially in the areas of systematic data synthesis and use. M & E units will be able to call on technical expertise from IPAA partners in reinforcing their systems.", + "ner_text": [ + [ + 1082, + 1110, + "named" + ] + ], + "validated": false, + "empirical_context": "This need not be done from scratch. Most countries have some system in place, and some of the data needed are available from existing information systems. But most systems need strengthening, especially in the areas of systematic data synthesis and use.", + "type": "system", + "explanation": "However, 'existing information systems' refers to systems rather than a specific dataset or data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'information systems' can imply structured data collections.", + "contextual_reason_agent": "However, 'existing information systems' refers to systems rather than a specific dataset or data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "068_Burundi-Integrated-Community-Development-Project", + "page": 33, + "text": "The Project MIS will have a publicly accessible portal and dashboard that will provide information on overall project progress and the status of key results indicators, as well as detailed sub - project, commune and province level data. 86. Evaluations and Assessments. A baseline study will be conducted during the inception phase and at project closing, to evaluate qualitative and quantitative aspects of project results. Several special studies will be undertaken to enhance understanding of key aspects of the project, as follows: ( a ) Economic Analysis of selected completed sub - projects, including an assessment of the impact on income from the investments made under Component 2; ( b ) Technical and Maintenance Reviews of the quality of infrastructure works and maintenance. A sample of completed sub-projects will be visited by teams of engineers to assess the quality of construction, ongoing functionality and identify issues with maintenance; and ( c ) Process Evaluation: teams of qualitative researchers will spend extended periods of time in a small sample of communes to document the bottom-up planning process and project implementation to identify good practices that can be shared and potential bottlenecks that need to be dealt with by management. The studies will ensure a gendered analysis, documenting good practices for ensuring women \u2019 s involvement in the activities. Work will commence on these studies in year two so results are available for the mid-term review to inform mid-stream corrections as needed. 87. Monitoring of the refugee protection framework. The World Bank, in cooperation with UNHCR, will monitor the ongoing adequacy of the refugee protection framework in Burundi, including compliance with national legislation and", + "ner_text": [ + [ + 4, + 15, + "named" + ] + ], + "validated": false, + "empirical_context": "The Project MIS will have a publicly accessible portal and dashboard that will provide information on overall project progress and the status of key results indicators, as well as detailed sub - project, commune and province level data. 86.", + "type": "program", + "explanation": "However, 'Project MIS' is described as a management information system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Project MIS' is a dataset because it mentions providing information and data on project progress.", + "contextual_reason_agent": "However, 'Project MIS' is described as a management information system, not a structured collection of data itself.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 4, + "validated": 1, + "not_validated": 3 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 13, + "text": "Poverty is disproportionately concentrated in rural areas and in the northern regions of the country. The 2014 household survey found that 56. 8 percent of rural families are poor, compared to just 8. 9 percent of urban families. 3 Overall, approximately 87 percent of the poor live in rural areas. Moreover, a majority of poor individuals are concentrated in the three northern regions of the country: the Far North, North, and Adamawa regions. More than one-half ( 56 percent ) of all poor inhabitants are located in the Far North and North regions, a significant increase from 34 percent in 2001. While poverty has increased in northern Cameroon, the incidence of poverty in the center-west of the country ( in the Littoral, Center, West, and South West regions ), as well as in Douala and Yaound\u00e9, has declined. 3. A rapid increase in poverty in northern Cameroon had been observed before the heightened regional insecurity associated with Boko Haram activities in 2014. The deterioration of the security environment in northern Cameroon occurred largely over the course of the past three years, following the collection of data in support of the 2014 Enqu\u00eate Camerounaise aupr\u00e8s des M\u00e9nages ( ECAM \u2013 Cameroon Household Survey ) household survey.", + "ner_text": [ + [ + 1205, + 1230, + "named" + ], + [ + 106, + 110, + "Cameroon Household Survey <> publication year" + ], + [ + 111, + 127, + "Cameroon Household Survey <> data type" + ], + [ + 156, + 170, + "Cameroon Household Survey <> reference population" + ], + [ + 429, + 444, + "Cameroon Household Survey <> data geography" + ], + [ + 969, + 973, + "Cameroon Household Survey <> reference year" + ], + [ + 1151, + 1155, + "Cameroon Household Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "A rapid increase in poverty in northern Cameroon had been observed before the heightened regional insecurity associated with Boko Haram activities in 2014. The deterioration of the security environment in northern Cameroon occurred largely over the course of the past three years, following the collection of data in support of the 2014 Enqu\u00eate Camerounaise aupr\u00e8s des M\u00e9nages ( ECAM \u2013 Cameroon Household Survey ) household survey.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referred to as a household survey that collected data for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referred to as a household survey that collected data for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "151_533550PAD0P117101Official0Use0Only1", + "page": 62, + "text": "Monitoring and Evaluation Arrangement 162. In order for monitoring and evaluation to be completed, several different types of data will be collected during the six-year project period. This data will together allow for reporting of the results indicators and also be used for the purposes of impact evaluation. 163. For Component 1, the primary source of data will be the entry forms for new applicants and the assessments of beneficiaries who are due for recertification. The information on new applicants and also on beneficiaries due for recertification will be collected by the district offices, which will enter them into the electronic databases and then transmit the information to the branch offices and eventually the head office in Sana \u2019 a. The forms and assessments will obtain all the information necessary for the application of the PMT method. The district offices will collect the data on a timely basis and provide them to the branch offices without significant delay. The PMT method will be applied to the data at the main office in order to classify households in one of the six PMT groups. The Monitoring and Evaluation Department at the SWF will ensure that the data are tabulated in time to meet the monitoring requirements of the project. 164.", + "ner_text": [ + [ + 411, + 471, + "named" + ] + ], + "validated": false, + "empirical_context": "163. For Component 1, the primary source of data will be the entry forms for new applicants and the assessments of beneficiaries who are due for recertification. The information on new applicants and also on beneficiaries due for recertification will be collected by the district offices, which will enter them into the electronic databases and then transmit the information to the branch offices and eventually the head office in Sana \u2019 a.", + "type": "assessment", + "explanation": "However, it is not a structured collection of data but rather a type of evaluation or assessment process.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a collection of assessments related to beneficiaries.", + "contextual_reason_agent": "However, it is not a structured collection of data but rather a type of evaluation or assessment process.", + "contextual_signal": "mentioned only as a process, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 10, + "text": "As of August 2017, Jordan hosts 660, 5822 registered Syrian refugees, of which 232, 8683 are school \u2010 aged children requiring the provision of education services. Eighty percent of refugees live in host communities, representing 10 percent of Jordan \u2019 s population, while the rest live in dedicated refugee camps. Per the recent 2016 government census, the total number of Syrians in Jordan reached an estimated 1. 265 million, representing 13. 2 percent of population. 4 Jordan has been committed to integrating Syrian refugee children in the public formal sector, and as of June 2017, approximately 10 percent of children in public schools were Syrian refugees. Therefore, it is important that education services to refugee children in Jordan respond to the nature of the challenges they face in the education system. 3. Jordan \u2019 s economic development hinges on the existence of an education system that provides students with the cognitive and socioemotional skills needed to succeed in the labor market. Realizing the full potential of educational investments for economic prosperity requires improving access and quality of education for both girls and boys. 5 Additionally, the cost of not educating refugee children is high in terms of loss of human capital for regional economic development, as well as for the long \u2010 term processes of peace, stability, and reconstruction. It is thus necessary for the education system to be agile and address the existing education quality challenges by strengthening the system ' s ability to manage a growing number of students, including refugee and vulnerable children. 1 Human Development Data ( 1990 \u2013 2015 ); Human Development Report. 2016. 2 United Nations High Commissioner for Refugees ( UNHCR ). August 6, 2017. 3 Brussels Conference Paper. 2017. 4 Department of Statistics ( DOS ); National census. November 2016. 5 OECD. 2016. PISA. 2015 Results ( Volume I ): Excellence and Equity in Education, OECD Publishing, Paris. http: / / dx. doi. org / 10. 1787 / 9789264266490 \u2010 en.", + "ner_text": [ + [ + 1838, + 1853, + "named" + ], + [ + 19, + 25, + "National census <> data geography" + ], + [ + 472, + 478, + "National census <> data geography" + ], + [ + 823, + 829, + "National census <> data geography" + ], + [ + 1804, + 1828, + "National census <> author" + ], + [ + 1855, + 1868, + "National census <> reference year" + ] + ], + "validated": true, + "empirical_context": "2017. 4 Department of Statistics ( DOS ); National census. November 2016.", + "type": "census", + "explanation": "The national census is explicitly mentioned as a data source in the context, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because a national census typically involves a structured collection of demographic data.", + "contextual_reason_agent": "The national census is explicitly mentioned as a data source in the context, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 11, + "text": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 7 of 47 delivered by UNHCR through the Pakistan Post. This program tries to mirror the support to citizens under the Ehsaas umbrella in order to minimize unequal distribution of support. 8. Balochistan hosts around 325, 000 registered Afghan refugees. About 47 percent of them are females, and more than half of them ( 53 percent ) are less than 18 years of age. Districts with the highest presence of registered refugees include Quetta, Pishin, Chagai, Loralai, Killa Saifullah, and Killa Abdullah ( table 1 ). More than half of the refugees in Balochistan live in urban Quetta ( 56 percent ), whereas the remaining live in rural settlements ( 29 percent ) and refugee villages ( 15 percent ). Table 1. Number and Percentage of Afghan Refugees by District in Balochistan District Population Number of Registered Afghan Refugees Percentage of Registered Afghan Refugees Quetta 2, 275, 699 187, 031 8. 2 Pishin 736, 481 54, 691 7. 4 Chagai 226, 008 28, 901 12. 8 Loralai 397, 400 18, 894 4. 8 Killa Saifullah 342, 814 18, 842 5. 5 Killa Abdullah 757, 578 10, 775 1. 4 Source: Population data from Census 2017; Registered refugee data from UNHCR as", + "ner_text": [ + [ + 1173, + 1179, + "named" + ], + [ + 15, + 26, + "Census <> data geography" + ], + [ + 267, + 278, + "Census <> data geography" + ], + [ + 507, + 513, + "Census <> data geography" + ], + [ + 515, + 521, + "Census <> data geography" + ], + [ + 523, + 529, + "Census <> data geography" + ], + [ + 531, + 538, + "Census <> data geography" + ], + [ + 561, + 575, + "Census <> data geography" + ], + [ + 649, + 655, + "Census <> data geography" + ], + [ + 947, + 953, + "Census <> data geography" + ], + [ + 1152, + 1167, + "Census <> data type" + ], + [ + 1173, + 1184, + "Census <> publication year" + ] + ], + "validated": true, + "empirical_context": "5 Killa Abdullah 757, 578 10, 775 1. 4 Source: Population data from Census 2017; Registered refugee data from UNHCR as", + "type": "census", + "explanation": "In this context, 'Census' is explicitly mentioned as a source of population data, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'Census' is a dataset because it is a structured collection of population data.", + "contextual_reason_agent": "In this context, 'Census' is explicitly mentioned as a source of population data, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 27, + "text": "Financing under this component will provide support for the establishment of the WSS unit within the ministry, creation of the WSS MIS, development and approval of the necessary reporting protocols, and support for the preparation of the WSS sector assessment report to be published annually after the Year 3 of the project. The WSS unit will be tasked with ensuring coordination with the stakeholder agencies in designing climate-smart WSW interventions and their implementation at the utility and community levels. The MIS will ensure availability of the WSS sector data and monitor gender disaggregation of the utility workforce so that gender gaps in economic opportunities can be measured. The MIS data will lay the basis for development of national sectoral policies ( sectoral policies and monitoring reports content analysis ) and climate adaptation strategies. The component includes support to the Department of Geology in digitalization of the registry of wells used for water supply purposes as part of the water cadaster, with particular focus on the Khatlon region. Climate change is expected to lead to diminished groundwater recharge in some areas because of reduced precipitation and decreased runoff. Monitoring data for aquifer water level, changes in chemistry, and detection of", + "ner_text": [ + [ + 521, + 524, + "named" + ] + ], + "validated": false, + "empirical_context": "The WSS unit will be tasked with ensuring coordination with the stakeholder agencies in designing climate-smart WSW interventions and their implementation at the utility and community levels. The MIS will ensure availability of the WSS sector data and monitor gender disaggregation of the utility workforce so that gender gaps in economic opportunities can be measured. The MIS data will lay the basis for development of national sectoral policies ( sectoral policies and monitoring reports content analysis ) and climate adaptation strategies.", + "type": "system", + "explanation": "However, 'MIS' is described as a management information system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is associated with data availability and monitoring.", + "contextual_reason_agent": "However, 'MIS' is described as a management information system, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "113_Project-Appraisal-Document-PAD-disclosable-version-P156479-08122016", + "page": 45, + "text": "This approach will, reduce potential inclusion errors, link with existing approaches such as the HEA, and lay the foundation of a methodological approach to support the establishment of a social registry. 29. Registration system and social registry. Based on the harmonized data collection procedures and survey instruments, the project will support the Government in developing a social registry, which will eventually function as a single registry. A social registry is a database that is capable of collecting, analyzing and storing the following information: personally identifying data ( either at an individual level or grouped into family or households ); socio - economic data which would be used to classify individual identities into poverty or vulnerability categories through the application of PMT. Hence, the social registry supports targeting, scoring, selection, on-boarding, identification, and, verification processes all linked to identity. Moreover, the registry eventually would allow: ( a ) different actors and programs to target households according to their own program objectives; ( b ) better coordinate interventions, avoid duplication and save significant costs in data collection activities; and ( c ) improve capacity to quickly scale up safety net programs in face of shocks.", + "ner_text": [ + [ + 563, + 590, + "named" + ] + ], + "validated": false, + "empirical_context": "Based on the harmonized data collection procedures and survey instruments, the project will support the Government in developing a social registry, which will eventually function as a single registry. A social registry is a database that is capable of collecting, analyzing and storing the following information: personally identifying data ( either at an individual level or grouped into family or households ); socio - economic data which would be used to classify individual identities into poverty or vulnerability categories through the application of PMT. Hence, the social registry supports targeting, scoring, selection, on-boarding, identification, and, verification processes all linked to identity.", + "type": "data", + "explanation": "However, it is not a dataset itself but rather a type of data that may be included in a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'personally identifying data' sounds like a structured collection of information.", + "contextual_reason_agent": "However, it is not a dataset itself but rather a type of data that may be included in a dataset.", + "contextual_signal": "mentioned only as a type of data, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "008_BOSIB-60d57288-4e09-4519-ae6c-ffdc0037e0b1", + "page": 15, + "text": "According to data from the International Labor Organization, a lack of safe transport reduces the probability of women accessing economic opportunities by 16. 5 percent. 15 In 2020, the regional rate was 18. 7 fatalities per 100, 000 inhabitants, which is higher than the national rate ( 15. 5 ) ( DATASUS data ). 16 Looking at the formal jobs of the Region, 69 percent of the people that receive more than ten minimum wages, on average, are men. Meanwhile, 94 percent of indigenous or black women receive less than 3 minimum wages. Data Source: RAIS, 2019. 17 Gender-disaggregated mobility data for Foz do R\u00edo Itaja\u00ed were collected during the preparation of the Stakeholder Engagement Plan and other project-related documents. 18 Data derived from PNAD-Cont\u00ednua ( 2019 ) State of Santa Catarina. 19 Hayd\u00e9e Svab, Marina Kohler Harkot, and Beatriz Moura Dos Santos, A Baseline Study of Gender and Transport in Sao Paulo, Brazil: Present Initiatives to Improve Women \u2019 s Mobility ( English ) ( Washington, DC: World Bank, 2021 ). 20 Brazil - Improving Mobility and Urban Inclusion in the Amazonas Corridor in Belo Horizonte Project ( Washington, DC: World Bank ). 21 Data reflect cases of harassment in public spaces without disaggregation. However, given the trend in Brazil and the Region, it can be inferred that in the Santa Catarina Region, more women are also survivors of sexual harassment.", + "ner_text": [ + [ + 749, + 762, + "named" + ], + [ + 552, + 556, + "PNAD-Cont\u00ednua <> publication year" + ], + [ + 561, + 595, + "PNAD-Cont\u00ednua <> data type" + ], + [ + 600, + 617, + "PNAD-Cont\u00ednua <> data geography" + ], + [ + 765, + 769, + "PNAD-Cont\u00ednua <> reference year" + ], + [ + 772, + 795, + "PNAD-Cont\u00ednua <> data geography" + ], + [ + 813, + 833, + "PNAD-Cont\u00ednua <> author" + ], + [ + 1321, + 1342, + "PNAD-Cont\u00ednua <> data geography" + ] + ], + "validated": true, + "empirical_context": "17 Gender-disaggregated mobility data for Foz do R\u00edo Itaja\u00ed were collected during the preparation of the Stakeholder Engagement Plan and other project-related documents. 18 Data derived from PNAD-Cont\u00ednua ( 2019 ) State of Santa Catarina. 19 Hayd\u00e9e Svab, Marina Kohler Harkot, and Beatriz Moura Dos Santos, A Baseline Study of Gender and Transport in Sao Paulo, Brazil: Present Initiatives to Improve Women \u2019 s Mobility ( English ) ( Washington, DC: World Bank, 2021 ).", + "type": "dataset", + "explanation": "In the context, it is explicitly mentioned as a source of data derived from PNAD-Cont\u00ednua, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a source of data in the context.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as a source of data derived from PNAD-Cont\u00ednua, confirming its role as a dataset.", + "contextual_signal": "follows 'data derived from'", + "tags": [] + }, + { + "filename": "023_Ethiopia-Second-Phase-Development-Response-to-Displacement-Impacts-Project-in-the-Horn-of-Africa-Project", + "page": 37, + "text": "The World Bank Development Response to Displacement Impacts Project in the Horn of Africa Phase II ( P178047 ) Page 32 of 80 88. High rates of GBV nationally are likely to be worse in the project areas, as survivors from host and refugee communities often face challenges in accessing care. The 2016 Ethiopia Demographic and Health Survey ( DHS ) indicates that 26 percent of all women aged 15 to 49 reported having experienced physical or sexual violence in their lifetime, with 23 percent reporting physical violence and 10 percent experiencing sexual violence. 60 Experience of IPV is particularly pronounced. More than one-third of women ( 34 percent ) have experienced some form of spousal violence \u2013 physical, sexual, or emotional. The survey found that acceptability of use of violence at home was high, with 63 percent of women and 28 percent of men believing that wife beating was justified for at least one specified reason. The DHS also showed that help-seeking behavior of GBV survivors was limited \u2013 only 23 percent of women who experienced GBV sought help, while 66 percent of women neither sought help nor told anyone about their experience. DRDIP offers additional resources to prevent and respond to GBV, which is among the major protection risks preventing women from actively engaging in development processes and pursuing livelihood opportunities. 89.", + "ner_text": [ + [ + 300, + 338, + "named" + ], + [ + 295, + 299, + "Ethiopia Demographic and Health Survey <> publication year" + ], + [ + 300, + 308, + "Ethiopia Demographic and Health Survey <> data geography" + ], + [ + 380, + 399, + "Ethiopia Demographic and Health Survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "High rates of GBV nationally are likely to be worse in the project areas, as survivors from host and refugee communities often face challenges in accessing care. The 2016 Ethiopia Demographic and Health Survey ( DHS ) indicates that 26 percent of all women aged 15 to 49 reported having experienced physical or sexual violence in their lifetime, with 23 percent reporting physical violence and 10 percent experiencing sexual violence. 60 Experience of IPV is particularly pronounced.", + "type": "survey", + "explanation": "This is indeed a dataset as it is referenced for empirical analysis of GBV rates among women in Ethiopia.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a survey that provides statistical data on GBV rates.", + "contextual_reason_agent": "This is indeed a dataset as it is referenced for empirical analysis of GBV rates among women in Ethiopia.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 62, + "text": "This indicator will be measured at least on a quarterly basis SNSOP MIS This data will be collected through registration and payments Implementing Partner Number of beneficiaries receiving economic opportunities who are youth Number of beneficiaries receiving economic opportunities under Component 2, in accordance with the Project Operations Manual, of which are youth, defined as people between the ages of 18 and 35 years, and have receive at least 1 This indicator will be measured, at a minimum, on a quarterly basis SNSOP MIS Beneficiary data will be gathered at registration and will be updated over the course of project implementation. Payment data will be regularly updated in the SNSOP MIS The Implementing Partner in charge of Component 2 will be responsible for data collection", + "ner_text": [ + [ + 533, + 549, + "named" + ], + [ + 155, + 211, + "Beneficiary data <> data description" + ], + [ + 220, + 225, + "Beneficiary data <> reference population" + ], + [ + 646, + 658, + "Beneficiary data <> data type" + ] + ], + "validated": true, + "empirical_context": "This indicator will be measured at least on a quarterly basis SNSOP MIS This data will be collected through registration and payments Implementing Partner Number of beneficiaries receiving economic opportunities who are youth Number of beneficiaries receiving economic opportunities under Component 2, in accordance with the Project Operations Manual, of which are youth, defined as people between the ages of 18 and 35 years, and have receive at least 1 This indicator will be measured, at a minimum, on a quarterly basis SNSOP MIS Beneficiary data will be gathered at registration and will be updated over the course of project implementation. Payment data will be regularly updated in the SNSOP MIS The Implementing Partner in charge of Component 2 will be responsible for data collection", + "type": "data", + "explanation": "In the context, 'Beneficiary data' is explicitly mentioned as being gathered and updated, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'Beneficiary data' is a dataset because it refers to a collection of information about beneficiaries.", + "contextual_reason_agent": "In the context, 'Beneficiary data' is explicitly mentioned as being gathered and updated, indicating it functions as a data source.", + "contextual_signal": "described as data collected through registration and updated during project implementation", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "108_Jordan-PforR-PAD-P159522-FINAL-DISCLOSURE-10052016", + "page": 31, + "text": "23 Include refugees and economic migrants in the Department of Statistics Employment and Unemployment Surveys ( target date: quarterly survey starting January 2017 ) Y Carry out an awareness-raising campaign regarding household enterprises to inform the public about the availability of this option and the reduced restrictions and incentives to foster the creation of such businesses. Y Instruct the JIC to recruit 26 high caliber staff with relaxed salary constraints as recently allowed by the Cabinet ( target date: June 2017 ) Y Publish and publicize an \u2018 investor \u2019 s guide \u2019 for domestic and foreign investors with specific information for Syrian investors and other restricted nationalities ( target date: December 2017 ) Y Set up a Client Relationship Management ( CRM ) information technology ( IT ) system and database for investor targeting and aftercare at the JIC ( target date: December 2018 ) Y Strengthen the MOL \u2019 s Inspectorate Unit \u2019 s capacity to identify, track, and more effectively resolve noncompliance on labor and environmental standards through the development of IT tools ( database and program ) to enable staff to input and monitor data ( including development of a baseline data on labor and environmental compliance to standards ); establish linkages with other entities within the MOL ( such as hotline, child labor, operational safety and health [ OSH ] ), Ministry of Environment, and Ministry of Social Development; and increase effectiveness", + "ner_text": [ + [ + 49, + 109, + "named" + ], + [ + 11, + 19, + "Department of Statistics Employment and Unemployment Surveys <> reference population" + ], + [ + 24, + 41, + "Department of Statistics Employment and Unemployment Surveys <> reference population" + ], + [ + 151, + 163, + "Department of Statistics Employment and Unemployment Surveys <> publication year" + ] + ], + "validated": true, + "empirical_context": "23 Include refugees and economic migrants in the Department of Statistics Employment and Unemployment Surveys ( target date: quarterly survey starting January 2017 ) Y Carry out an awareness-raising campaign regarding household enterprises to inform the public about the availability of this option and the reduced restrictions and incentives to foster the creation of such businesses. Y Instruct the JIC to recruit 26 high caliber staff with relaxed salary constraints as recently allowed by the Cabinet ( target date: June 2017 ) Y Publish and publicize an \u2018 investor \u2019 s guide \u2019 for domestic and foreign investors with specific information for Syrian investors and other restricted nationalities ( target date: December 2017 ) Y Set up a Client Relationship Management ( CRM ) information technology ( IT ) system and database for investor targeting and aftercare at the JIC ( target date: December 2018 ) Y Strengthen the MOL \u2019 s Inspectorate Unit \u2019 s capacity to identify, track, and more effectively resolve noncompliance on labor and environmental standards through the development of IT tools ( database and program ) to enable staff to input and monitor data ( including development of a baseline data on labor and environmental compliance to standards ); establish linkages with other entities within the MOL ( such as hotline, child labor, operational safety and health [ OSH ] ), Ministry of Environment, and Ministry of Social Development; and increase effectiveness", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to structured surveys conducted by the Department of Statistics to collect employment and unemployment data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Surveys', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it refers to structured surveys conducted by the Department of Statistics to collect employment and unemployment data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "161_28046", + "page": 30, + "text": "Annex 4: Detailed Project Description GUINEA: HEALTH SECTOR SUPPORT PROJECT TARGETING The project would target the 16 poorest prefectures in the country ( as identified by the Poverty Map ), and 2 prefectures ( Kissidougou and GuCckCdou ) which have been particularly hit with a difficult refugees situation, as listed below: LIST OF PREFECTURES TARGETED BY THE PROJECT Prdfectures Koundara Gaoual TklimelC Mali Koubia Tougue Lklouma Dalaba Pita Dinguiraye Dabola Kissidougou GuCckCdou Beyla Siguiri Kouroussa Mandiana KerouanC Total: Administrative Region Bok6 Bok6 Kindia Labe Labe Labe Lab6 Mamou Mamou Faranah Faranah Far anah N ' ZCrCkore N ' ZerCkork Kankan Kankan Kankan Kankan Natural Region Moyenne Guinee Basse GuinCe C Y Moyenne Guinee C Y C Y C Y C Y Haute GuinCe Guinke Forestibre C Y C Y C Y Haute Guin6e C Y C Y C Y Nbr. Of health centers 7 8 14 13 6 10 11 10 12 8 9 15 13 15 15 12 12 8 198 ____ - - Nbr.", + "ner_text": [ + [ + 176, + 187, + "named" + ], + [ + 38, + 44, + "Poverty Map <> data geography" + ], + [ + 115, + 137, + "Poverty Map <> reference population" + ], + [ + 211, + 222, + "Poverty Map <> data geography" + ], + [ + 227, + 236, + "Poverty Map <> data geography" + ], + [ + 464, + 475, + "Poverty Map <> data geography" + ] + ], + "validated": true, + "empirical_context": "Annex 4: Detailed Project Description GUINEA: HEALTH SECTOR SUPPORT PROJECT TARGETING The project would target the 16 poorest prefectures in the country ( as identified by the Poverty Map ), and 2 prefectures ( Kissidougou and GuCckCdou ) which have been particularly hit with a difficult refugees situation, as listed below: LIST OF PREFECTURES TARGETED BY THE PROJECT Prdfectures Koundara Gaoual TklimelC Mali Koubia Tougue Lklouma Dalaba Pita Dinguiraye Dabola Kissidougou GuCckCdou Beyla Siguiri Kouroussa Mandiana KerouanC Total: Administrative Region Bok6 Bok6 Kindia Labe Labe Labe Lab6 Mamou Mamou Faranah Faranah Far anah N ' ZCrCkore N ' ZerCkork Kankan Kankan Kankan Kankan Natural Region Moyenne Guinee Basse GuinCe C Y Moyenne Guinee C Y C Y C Y C Y Haute GuinCe Guinke Forestibre C Y C Y C Y Haute Guin6e C Y C Y C Y Nbr. Of health centers 7 8 14 13 6 10 11 10 12 8 9 15 13 15 15 12 12 8 198 ____ - - Nbr.", + "type": "map", + "explanation": "The Poverty Map is used as a source to identify specific prefectures targeted by the project, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced in the context of identifying the poorest prefectures.", + "contextual_reason_agent": "The Poverty Map is used as a source to identify specific prefectures targeted by the project, indicating it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 4, + "validated": 3, + "not_validated": 1 + } + }, + { + "filename": "033_IBArchive-38b35d89-2c5c-4636-ab7e-0ba662b95a5f", + "page": 44, + "text": "The PIU will include a dedicated Project Director and three technical specialists to cover TVET governance; curriculum, assessment, and quality assurance; and monitoring and evaluation. The PIU will also bring on board additional staff to support fiduciary ( both procurement and financial management ) and safeguards related activities but embed them in existing implementation structures in MENFOP which are in place to support other on-going projects - Expanding Opportunities for Learning ( P166059 ) and Education Emergency Response to COVID-19 ( P174128 ). This will ensure that World Bank projects help strengthen ministerial capacity in a manner that does not add to the MENFOP \u2019 s administrative burden. 85. The implementation arrangements for the Project ensures that the issue of refugee integration will reach the Council for Economic Development ( CDE ), the highest-level policy body in the country. The issue of including refugee candidates / beneficiaries into the training system will be managed and determined by the implementing agency, MENFOP, together with guidance from the CDE. B. Results Monitoring and Evaluation Arrangements 86. Monitoring and evaluation mechanism. A wide variety of mechanisms will be used to measure project outcomes including but not limited to administrative data collected through both the MENFOP and MT, other centrally sourced data in the country, standardized data collected from training providers, and data from periodic surveys, tracer studies, labor market analysis, and studies planned to be carried out annually to support project implementation. This multi-level data gathering system will be supported by the Project and will develop a linked system of information allowing different data to be mapped across sub-systems. This will help address one of the key issues in collecting data from TVET systems \u2013 avoidance of data from a fragmented system. The Project places 55 A draft training plan for select PIU and other staff is included in Annex 3.", + "ner_text": [ + [ + 1465, + 1481, + "named" + ], + [ + 1291, + 1310, + "periodic surveys <> data type" + ], + [ + 1398, + 1415, + "periodic surveys <> data type" + ], + [ + 1483, + 1497, + "periodic surveys <> data description" + ], + [ + 1499, + 1520, + "periodic surveys <> data description" + ], + [ + 2023, + 2041, + "periodic surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "Monitoring and evaluation mechanism. A wide variety of mechanisms will be used to measure project outcomes including but not limited to administrative data collected through both the MENFOP and MT, other centrally sourced data in the country, standardized data collected from training providers, and data from periodic surveys, tracer studies, labor market analysis, and studies planned to be carried out annually to support project implementation. This multi-level data gathering system will be supported by the Project and will develop a linked system of information allowing different data to be mapped across sub-systems.", + "type": "survey", + "explanation": "In the context, 'periodic surveys' is explicitly mentioned as a method for gathering data to measure project outcomes, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'periodic surveys' is a dataset because it refers to a systematic method of data collection.", + "contextual_reason_agent": "In the context, 'periodic surveys' is explicitly mentioned as a method for gathering data to measure project outcomes, confirming its role as a data source.", + "contextual_signal": "mentioned as a method for gathering data to measure project outcomes", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 61, + "text": "This data is inputted into the SNSOP MIS to generate payment schedules Implementing partner Number of beneficiaries receiving cash for performing labor intensive public works who are refugees or host communities Number of total beneficiaries that directly receive cash transfer for This indicator will be Registration and payment data in the Beneficiary data is collected during registration and Implementing Partner", + "ner_text": [ + [ + 31, + 40, + "named" + ] + ], + "validated": false, + "empirical_context": "This data is inputted into the SNSOP MIS to generate payment schedules Implementing partner Number of beneficiaries receiving cash for performing labor intensive public works who are refugees or host communities Number of total beneficiaries that directly receive cash transfer for This indicator will be Registration and payment data in the Beneficiary data is collected during registration and Implementing Partner", + "type": "system", + "explanation": "However, the context indicates that SNSOP MIS is a system used for generating payment schedules, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'MIS' which often stands for Management Information System, suggesting data handling.", + "contextual_reason_agent": "However, the context indicates that SNSOP MIS is a system used for generating payment schedules, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 18, + "validated": 10, + "not_validated": 8 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 51, + "text": "The World Bank Expanding Opportunities for Learning Project ( P166059 ) Page 46 of 85 IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection PDO # 1: Additional primary school-level OOSC enrolled, disaggregated by gender and vulnerable populations ( girls, refugees, rural students, and special needs students ) MENFOP will monitor the number of newly enrolled, formerly out of school children ( OOSC ). To do this, each student must have a unique identifier which, in the EMIS, includes descriptors to indicate gender, grade, and geographic location ( urban / rural ), and status ( refugee ). Once OOSC return to a structured learning environment, they are monitored in the following cycle. MENFOP is expected to report on: \u2022 number of previously OOSC enrolled who remain in the primary education program from one year to the next; \u2022 number of Annually MENFOP Calculated based on the preceding annual statistical tables Director of Planning and Statistics", + "ner_text": [ + [ + 621, + 625, + "named" + ], + [ + 4, + 14, + "EMIS <> publisher" + ], + [ + 494, + 541, + "EMIS <> reference population" + ] + ], + "validated": true, + "empirical_context": "The World Bank Expanding Opportunities for Learning Project ( P166059 ) Page 46 of 85 IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection PDO # 1: Additional primary school-level OOSC enrolled, disaggregated by gender and vulnerable populations ( girls, refugees, rural students, and special needs students ) MENFOP will monitor the number of newly enrolled, formerly out of school children ( OOSC ). To do this, each student must have a unique identifier which, in the EMIS, includes descriptors to indicate gender, grade, and geographic location ( urban / rural ), and status ( refugee ). Once OOSC return to a structured learning environment, they are monitored in the following cycle.", + "type": "system", + "explanation": "EMIS is indeed a data source as it is used to track and monitor student enrollment and characteristics.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is mentioned in the context of monitoring and collecting data on students.", + "contextual_reason_agent": "EMIS is indeed a data source as it is used to track and monitor student enrollment and characteristics.", + "contextual_signal": "mentioned as a data source for monitoring student enrollment", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "012_BOSIB-9a6accb6-73d1-4bd1-8307-d41a339a51ab", + "page": 14, + "text": "In line with the 2016 Water Act, WSS provision is devolved to the counties who are the owners of WSPs. 16 On average, Kenyan water utilities lose about US $ 90 million annually due to low operational efficiencies. 17 Non - revenue water ( NRW ) has stagnated at a national average of 45 percent over the past decade, while energy costs have remained high, reaching as high as 50 percent of total operating costs for some WSPs. This has had a negative impact on the financial stability and capacity of WSPs to expand their services. Further, COVID-19 imposed significant financial constraints on WSPs. 18 12. Service gaps and inefficiencies are larger in rural areas. Most rural water supply systems fall outside the jurisdiction of licensed WSPs. Such schemes are operated by unregulated community groups, with tariffs that do not reflect actual costs and revenues that are not well monitored. The result is unreliable service provision and frequent non - functionality of these schemes. A more sustainable operation and financing model for rural WSPs is required to improve their efficiency and enable them to increase operating cost coverage, thus expanding services to more rural households. 14 Kenya Population and Housing Census ( 2019 ). 15 The average water volume per capita in Kakuma camp is 12. 75 litres per capita per day, decreasing to only 6 litres per day in Kalobeyei. Compared to refugees, the host communities have even lower access levels. 16 There are about 87 public and three privately owned WSPs that are licensed and regulated by WASREB. 17 WASREB. 2022. A Performance Report of Kenya \u2019 s Water Services Sector - 2020 / 21. 18 For instance, revenues declined by 40 percent between February and April 2020.", + "ner_text": [ + [ + 1198, + 1233, + "named" + ], + [ + 1198, + 1203, + "Kenya Population and Housing Census <> data geography" + ], + [ + 1236, + 1240, + "Kenya Population and Housing Census <> publication year" + ], + [ + 1251, + 1282, + "Kenya Population and Housing Census <> data description" + ], + [ + 1286, + 1297, + "Kenya Population and Housing Census <> data geography" + ], + [ + 1374, + 1383, + "Kenya Population and Housing Census <> data geography" + ] + ], + "validated": true, + "empirical_context": "A more sustainable operation and financing model for rural WSPs is required to improve their efficiency and enable them to increase operating cost coverage, thus expanding services to more rural households. 14 Kenya Population and Housing Census ( 2019 ). 15 The average water volume per capita in Kakuma camp is 12.", + "type": "census", + "explanation": "This is indeed a dataset as it is explicitly referenced in the context and serves as a source of empirical data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is a recognized census that typically provides structured data for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referenced in the context and serves as a source of empirical data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 43, + "text": "The increase in service utilization is translated into deaths averted using published empirical studies; mortality rates for Balochistan were taken from the 2012 and 2017 \u2013 18 PDHS. 70 Effect sizes for the impacts of an additional skilled birth on maternal and neonatal mortality rates come from Graham, Bell, and Bullough ( 2001 ) 71 and Bhutta et al. ( 2014 ), 72 respectively. The effect size used to estimate the impact of measles vaccination on mortality of children under five is based on McGovern and Canning ( 2015 ). 73 Because their effect size is for full child vaccination, an adjustment factor of 0. 89 is applied to the share of children with 69 Pakistan Bureau of Statistics. Population Census 2017. http: / / www. pbs. gov. pk / content / block \u2010 wise \u2010 provisional \u2010 summary \u2010 results \u2010 6th \u2010 population \u2010 housing \u2010 census \u2010 2017 \u2010 january \u2010 03 \u2010 2018; for data from the 1998 and earlier censuses, see http: / / www. pbs. gov. pk / sites / default / files / population_census / Administrative % 20Units. pdf. 70 Maternal mortality rates from PDHS 2012 \u2013 13 are used since no maternal mortality data is available from PDHS 2017 \u2013 18. 71 Graham, W. J., J. S.", + "ner_text": [ + [ + 691, + 708, + "named" + ], + [ + 125, + 136, + "Population Census <> data geography" + ], + [ + 157, + 161, + "Population Census <> publication year" + ], + [ + 166, + 175, + "Population Census <> publication year" + ], + [ + 660, + 689, + "Population Census <> publisher" + ], + [ + 709, + 713, + "Population Census <> publication year" + ], + [ + 888, + 892, + "Population Census <> reference year" + ], + [ + 1064, + 1073, + "Population Census <> reference year" + ] + ], + "validated": true, + "empirical_context": "89 is applied to the share of children with 69 Pakistan Bureau of Statistics. Population Census 2017. http: / / www.", + "type": "census", + "explanation": "In this context, it is confirmed as a dataset since it is explicitly referenced alongside the Pakistan Bureau of Statistics and used to derive a statistic.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Population Census' typically refers to a structured collection of demographic data.", + "contextual_reason_agent": "In this context, it is confirmed as a dataset since it is explicitly referenced alongside the Pakistan Bureau of Statistics and used to derive a statistic.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "170_multi0page", + "page": 11, + "text": "The biggest risk that Sierra Leone ' s poor face is a return to civil conflict, political instability and chaos in public administration that would prevent the government from responding to the population ' s needs for food, shelter and economically productive activity. The project is expected to respond to this risk through investments in rehabilitation, employment, and the reinforcement of basic services. As conditions improve, endogenous resistance to a resurgence of conflict is expected to increase. However there is still a need to understand the profile of risks, identify high risk groups, define the interface between vulnerability mapping and poverty mapping, coordinate public programs to reduce nsks and reinforce the coping capacity of the poor. Initially, a participatory assessment of risks and vulnerability will be commissioned using available and forthcoming data from the living standards measurement survey ( LSMS ) of 2003. Risk and vulnerability concepts have already been introduced into the PRSP preparation process by including appropriate questions in the 2003 LSMS. This should enhance the poverty diagnostic dimensions of the PRSP, and inform the development of strategies to ensure that poverty levels do not increase. Risk and vulnerability concepts would be introduced in the design of individual sub-projects selected by communities. Sub-projects would address the most common risks faced by communities, such as inadequate infrastructure, poor health, low school enrollment, and the resumption of conflict. A - 6 -", + "ner_text": [ + [ + 895, + 930, + "named" + ], + [ + 22, + 34, + "living standards measurement survey <> data geography" + ], + [ + 933, + 937, + "living standards measurement survey <> acronym" + ], + [ + 943, + 947, + "living standards measurement survey <> publication year" + ], + [ + 1086, + 1090, + "living standards measurement survey <> publication year" + ], + [ + 1121, + 1150, + "living standards measurement survey <> data description" + ] + ], + "validated": true, + "empirical_context": "However there is still a need to understand the profile of risks, identify high risk groups, define the interface between vulnerability mapping and poverty mapping, coordinate public programs to reduce nsks and reinforce the coping capacity of the poor. Initially, a participatory assessment of risks and vulnerability will be commissioned using available and forthcoming data from the living standards measurement survey ( LSMS ) of 2003. Risk and vulnerability concepts have already been introduced into the PRSP preparation process by including appropriate questions in the 2003 LSMS.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source of data used for the participatory assessment of risks and vulnerability.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides data for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source of data used for the participatory assessment of risks and vulnerability.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "158_40156", + "page": 55, + "text": "\u0083 Quality, utilization, coverage and satisfaction with HIV services provided to refugees, returnees, IDPs and surrounding populations \u0083 Quality, utilization, coverage and satisfaction with HIV services provided to CBMPs \u0083 Analysis of differential HIV knowledge and behavior in refugee and surrounding communities to guide intervention strategies \u0083 Assessment of the referral system \u0083 Best / good practice HIV service delivery in the region 35. Dissemination of data to facilitate their use is essential. The PFO will prepare standardized information products ( reports ), which IGAD will disseminate through an annual information sharing seminar about the IGAD HIV / AIDS initiative \u2013 bi annual physical progress report, annual mapping assessment and annual IGAD HIV progress report ( also for the IGAD Annual Heads of State meeting ). These information products will be ready before new work plans are developed for the following year \u2013 therefore \u2013 the time frame for the annual report will lag three months behind the planning cycle, to ensure that M & E data are available when decisions are made about implementation of activities.", + "ner_text": [ + [ + 685, + 719, + "named" + ] + ], + "validated": false, + "empirical_context": "Dissemination of data to facilitate their use is essential. The PFO will prepare standardized information products ( reports ), which IGAD will disseminate through an annual information sharing seminar about the IGAD HIV / AIDS initiative \u2013 bi annual physical progress report, annual mapping assessment and annual IGAD HIV progress report ( also for the IGAD Annual Heads of State meeting ). These information products will be ready before new work plans are developed for the following year \u2013 therefore \u2013 the time frame for the annual report will lag three months behind the planning cycle, to ensure that M & E data are available when decisions are made about implementation of activities.", + "type": "report", + "explanation": "However, it is not a dataset as it is described as a report rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'report', which can imply data collection.", + "contextual_reason_agent": "However, it is not a dataset as it is described as a report rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a report, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 20, + "validated": 4, + "not_validated": 16 + } + }, + { + "filename": "098_CAMEROON-PAD-NEW-08032017", + "page": 83, + "text": "70 Table 5. 1: Investment and Length of Major Roads Municipality Road Length ( km ) Investment ( CFAF, billion ) Yaound\u00e9 Yaound\u00e9 5 6. 505 10, 509, 679, 081 Yaound\u00e9 7 5. 631 7, 256, 986, 078 Douala Douala 3 5. 210 7, 357, 746, 238 Douala 5 3. 320 4, 718, 125, 392 Kumba Kumba 2 5. 772 4, 624, 733, 278 Ngaound\u00e9r\u00e9 Ngaound\u00e9r\u00e9 2 6. 342 5, 188, 111, 850 Batouri Batouri 2. 100 1, 435, 421, 012 Total 42. 885 41, 090, 802, 929 8. Total investment cost for option 1 is estimated at CFAF 41. 1 billion ( US $ 68, 484, 672 ) 21. 9. Benefits were estimated based on Vehicle Operational Costs ( VOC ) savings owing to the construction of the road, building on traffic data and operation costs of vehicles using the road work: ( i ) The Average Daily Traffic ( ADT ) and Average Travel Time ( ATT ) were estimated based on field surveys conducted at the different points along the axis of the selected road sections presented earlier ( early 2017 ).", + "ner_text": [ + [ + 649, + 661, + "named" + ], + [ + 121, + 128, + "traffic data <> data geography" + ], + [ + 190, + 196, + "traffic data <> data geography" + ], + [ + 230, + 236, + "traffic data <> data geography" + ], + [ + 301, + 311, + "traffic data <> data geography" + ], + [ + 312, + 322, + "traffic data <> data geography" + ], + [ + 725, + 746, + "traffic data <> data description" + ], + [ + 759, + 778, + "traffic data <> data description" + ], + [ + 811, + 824, + "traffic data <> data type" + ], + [ + 930, + 934, + "traffic data <> publication year" + ] + ], + "validated": true, + "empirical_context": "9. Benefits were estimated based on Vehicle Operational Costs ( VOC ) savings owing to the construction of the road, building on traffic data and operation costs of vehicles using the road work: ( i ) The Average Daily Traffic ( ADT ) and Average Travel Time ( ATT ) were estimated based on field surveys conducted at the different points along the axis of the selected road sections presented earlier ( early 2017 ).", + "type": "data", + "explanation": "In this context, 'traffic data' is indeed used as a data source for estimating benefits based on vehicle operational costs.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'traffic data' is a dataset because it refers to information collected from field surveys.", + "contextual_reason_agent": "In this context, 'traffic data' is indeed used as a data source for estimating benefits based on vehicle operational costs.", + "contextual_signal": "follows 'building on traffic data'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 48, + "text": "Frequency Quarterly Data source MINEMA, BRD, RTDA & BDF grievance redress mechanisms. Methodology for Data Collection Monitoring of grievances through the GRMs. Responsibility for Data Collection MINEMA, BRD, RTDA & BDF. MINEMA consolidates data for regular reporting. Beneficiaries that feel project investments reflect their needs ( Percentage ) Description Quantitative indicator counting proportion of beneficiaries indicating that project investments reflect their needs. Indictor is a composite of beneficiaries responding to a Likert scale or similar instrument. Frequency Annual. Data source Annual survey. Methodology for Data Collection Survey. Responsibility for Data Collection MINEMA. Increase in social cohesion score between refugees and host community members ( Percentage ) Description Composite first-order indicator composed of data from responses to questions gauging changes in horizontal and vertical social chesion in beneficiary communities. Frequency Annual. Data source Annual survey. Methodology for Data Collection Survey Responsibility for Data Collection MINEMA.", + "ner_text": [ + [ + 221, + 227, + "named" + ] + ], + "validated": false, + "empirical_context": "Responsibility for Data Collection MINEMA, BRD, RTDA & BDF. MINEMA consolidates data for regular reporting. Beneficiaries that feel project investments reflect their needs ( Percentage ) Description Quantitative indicator counting proportion of beneficiaries indicating that project investments reflect their needs.", + "type": "organization", + "explanation": "However, MINEMA is mentioned as an organization responsible for data collection, not as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed MINEMA is a dataset because it is involved in data collection.", + "contextual_reason_agent": "However, MINEMA is mentioned as an organization responsible for data collection, not as a data source itself.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "054_Sudan-Basic-Education-Emergency-Support-Project", + "page": 12, + "text": "According to the data from 2014 / 15 Multiple Indicator Cluster Survey ( MICS ), Net Enrollment Rate ( NER ) is 69 percent with NER for boys 2 percentage points higher compared to girls ( 70 and 68 percent, respectively ). While girls \u2019 and boys \u2019 Grade 1 enrollment rates in urban areas are similar, male Grade 1 enrollment rates in rural areas are six percentage points higher than those for girls. Grade 8 enrollment rates are in favor of boys, and the gap is especially evident in rural areas. Figure 1: Primary education enrollment rates Access to basic education in Sudan at the beginning and end of the cycle in Sudan by gender, location, and wealth quintile ( 2014 ) Primary education GER in 2016 or the latest available, selected countries Source: Authors \u2019 estimates based on MICS2014 / 15. Source: Authors on UNESCO UIS data. 12. Socioeconomic disparities in basic education are large. While Grade 1 enrollment rates for the wealthiest fifth of households were universal, only 81 percent of children in the poorest fifth of households were enrolled. This socioeconomic gap in primary access widens by the end of the education cycle. Only 34 percent of children from the poorest quintile reach the last grade of primary education compared to 94 percent of children from the wealthiest quintile. The socioeconomic disparities further translate into access to secondary education: only 9 percent of 98 82 92 55 96 77 86 46 Grade 1 Grade 8 Grade 1 Grade 8 Urban Rural Boys Girls 81 34 100 94 Grade 1 Grade 8 Bottom 20 % Top 20 % 73 81 91 99 100 102 102 105 106 119 131 139 Sudan Tanzania Burkina Faso Zimbabwe Uganda Ethiopia Zambia Kenya Mozambique Cameroon Burundi Malawi", + "ner_text": [ + [ + 37, + 70, + "named" + ], + [ + 27, + 36, + "Multiple Indicator Cluster Survey <> reference year" + ], + [ + 73, + 77, + "Multiple Indicator Cluster Survey <> acronym" + ], + [ + 81, + 100, + "Multiple Indicator Cluster Survey <> data description" + ], + [ + 572, + 577, + "Multiple Indicator Cluster Survey <> data geography" + ], + [ + 619, + 624, + "Multiple Indicator Cluster Survey <> data geography" + ], + [ + 757, + 764, + "Multiple Indicator Cluster Survey <> author" + ], + [ + 809, + 816, + "Multiple Indicator Cluster Survey <> author" + ], + [ + 820, + 826, + "Multiple Indicator Cluster Survey <> publisher" + ], + [ + 1580, + 1585, + "Multiple Indicator Cluster Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "According to the data from 2014 / 15 Multiple Indicator Cluster Survey ( MICS ), Net Enrollment Rate ( NER ) is 69 percent with NER for boys 2 percentage points higher compared to girls ( 70 and 68 percent, respectively ). While girls \u2019 and boys \u2019 Grade 1 enrollment rates in urban areas are similar, male Grade 1 enrollment rates in rural areas are six percentage points higher than those for girls.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source of data for the Net Enrollment Rate.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides statistical data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source of data for the Net Enrollment Rate.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 79, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 74 of 117 prior to construction. Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "ner_text": [ + [ + 925, + 930, + "named" + ], + [ + 578, + 581, + "NEMIS <> author" + ], + [ + 589, + 608, + "NEMIS <> data type" + ], + [ + 777, + 793, + "NEMIS <> reference population" + ], + [ + 829, + 832, + "NEMIS <> author" + ] + ], + "validated": true, + "empirical_context": "Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "type": "system", + "explanation": "NEMIS is indeed a dataset as it is described as an online platform that collects and manages data related to primary education.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is referenced in the context of data utilization for education management.", + "contextual_reason_agent": "NEMIS is indeed a dataset as it is described as an online platform that collects and manages data related to primary education.", + "contextual_signal": "mentioned as a data source for managing primary education", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "110_Lebaon-RACE2-PforR-Board-Package-PAD-WB-9-5-16-09072016", + "page": 67, + "text": "One of the main challenges faced during the implementation of the first phase of RACE is the unavailability of timely and reliable data, structured data collection systems, and systems-capacity to analyze data that could inform policy decisions or programmatic interventions. There are currently multiple data collection mechanisms working in parallel, in addition to long delays in the ability for MEHE or CERD to collect and use school-level data for decision-making. As a result, MEHE and CERD will develop a data management framework detailing how data about schools, students, teachers, and other personnel is collected, validated, categorized, and used by different stakeholders. MEHE will also deploy an EMIS in order to support the timely collection of the data, so that decision-making at both the school and the central MEHE and CERD levels is based on accurate and updated data.", + "ner_text": [ + [ + 711, + 715, + "named" + ] + ], + "validated": false, + "empirical_context": "As a result, MEHE and CERD will develop a data management framework detailing how data about schools, students, teachers, and other personnel is collected, validated, categorized, and used by different stakeholders. MEHE will also deploy an EMIS in order to support the timely collection of the data, so that decision-making at both the school and the central MEHE and CERD levels is based on accurate and updated data.", + "type": "system", + "explanation": "However, EMIS is described as a system for managing data rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it relates to data management and collection.", + "contextual_reason_agent": "However, EMIS is described as a system for managing data rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "154_431120PAD0P1021e0only10R20081014311", + "page": 13, + "text": "Despite continued efforts, prevention gaps related to HIV / AIDS, sexually transmitted infections ( STIs ), and reproductive health remain an obstacle to reducing epidemic growth. As noted earlier, although there has been a reduction in prevalence among specific population sub-groups since 2003, overall prevalence rates still remain unacceptably high. With respect to knowledge levels, a 2004 national survey found that although 93 percent o f the respondents had heard o f HIV / AIDS, the proportion o f respondents 15-24 years who both correctly identify ways o f preventing the sexual transmission of HIV and who reject major misconceptions about HIV transmission or prevention increased merely from 36 percent in 2001 to 38 percent in 2004. The Government \u2019 s target for this critical knowledge indicator was 90 percent by 2005. It i s unfortunately clear that this, and several other key prevention targets outlined in the National HIV / AIDS Strategic Framework ( 2003-2009 ), will not be met. 13. With respect to behavioral risk, the BAIS I1 Survey ( 2004 ) indicated that 76 percent o f young people ( 15-24 years ) have had sex with a non-marital, non-cohabiting sexual partner in the last 12 months. Additionally, this assessment indicated an increase in the proportion o f people aged 15-24 years reporting unprotected sex in the past month ( after consuming alcohol ) - from 5 4", + "ner_text": [ + [ + 390, + 410, + "named" + ] + ], + "validated": true, + "empirical_context": "As noted earlier, although there has been a reduction in prevalence among specific population sub-groups since 2003, overall prevalence rates still remain unacceptably high. With respect to knowledge levels, a 2004 national survey found that although 93 percent o f the respondents had heard o f HIV / AIDS, the proportion o f respondents 15-24 years who both correctly identify ways o f preventing the sexual transmission of HIV and who reject major misconceptions about HIV transmission or prevention increased merely from 36 percent in 2001 to 38 percent in 2004. The Government \u2019 s target for this critical knowledge indicator was 90 percent by 2005.", + "type": "survey", + "explanation": "This is indeed a dataset as it is a structured collection of data from a national survey used to analyze knowledge levels about HIV/AIDS.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a national survey that collects data on knowledge levels regarding HIV/AIDS.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data from a national survey used to analyze knowledge levels about HIV/AIDS.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "005_BOSIB-8191b179-7209-4faa-b5e0-11783bcd492d", + "page": 11, + "text": "Costa Rica reactivated standardized national assessments in 2023; the first results from 2023 were released in March 2024 and indicated that about a third of the students at the end of Primary schooling are at the \u201c basic \u201d level of learning. Internationally comparable Programme for International Student Assessment ( PISA ) data for Costa Rica for 2018 shows that while 28 percent of children from families in the top quintile of the PISA index for economic, social and cultural status were below Level 2 on Reading in PISA ( considered the minimum of adequate performance ), 72 percent of children from families from the lowest quintile of the PISA index were below level 2. Foundational learning ( literacy and numeracy ) in early grades paves the way for future learning, and differences in educational attainment become magnified through youth and adult life in the acquisition of human capital. Without any claims regarding causation, it is useful to look at labor earnings, which are more closely related to human capital than earnings from other assets. OECD figures show that 32 percent of 25 \u2013 64-year-olds in Costa Rica with low levels of educational attainment ( below upper-secondary education ) earned less than half of the median earnings for the country and were thus at risk of poverty, if not already poor. 10 Employability 6.", + "ner_text": [ + [ + 270, + 316, + "named" + ], + [ + 0, + 10, + "Programme for International Student Assessment <> data geography" + ], + [ + 319, + 323, + "Programme for International Student Assessment <> acronym" + ], + [ + 335, + 345, + "Programme for International Student Assessment <> data geography" + ], + [ + 350, + 354, + "Programme for International Student Assessment <> publication year" + ], + [ + 1063, + 1067, + "Programme for International Student Assessment <> publisher" + ], + [ + 1121, + 1131, + "Programme for International Student Assessment <> data geography" + ], + [ + 1361, + 1379, + "Programme for International Student Assessment <> usage context" + ] + ], + "validated": true, + "empirical_context": "Costa Rica reactivated standardized national assessments in 2023; the first results from 2023 were released in March 2024 and indicated that about a third of the students at the end of Primary schooling are at the \u201c basic \u201d level of learning. Internationally comparable Programme for International Student Assessment ( PISA ) data for Costa Rica for 2018 shows that while 28 percent of children from families in the top quintile of the PISA index for economic, social and cultural status were below Level 2 on Reading in PISA ( considered the minimum of adequate performance ), 72 percent of children from families from the lowest quintile of the PISA index were below level 2. Foundational learning ( literacy and numeracy ) in early grades paves the way for future learning, and differences in educational attainment become magnified through youth and adult life in the acquisition of human capital.", + "type": "assessment", + "explanation": "It is indeed a dataset as it provides structured data on student performance across different countries, used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to an international assessment that provides data on student performance.", + "contextual_reason_agent": "It is indeed a dataset as it provides structured data on student performance across different countries, used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "158_40156", + "page": 52, + "text": "29. Advocacy and communications about this new HIV ME system is essential. For this reason, communications plan will be included as part of the annual costed M & E work plan and will involve the PFO and M & E champions in each IGAD country. 30. Surveys and surveillance data will be used to track PDO-level project results. There are six types of surveys and surveillance that need to be carried out, partially with project funding ( additional resources would need to be mobilized by IGAD for all these surveys and surveillance to be carried out ): BSS, sentinel HIV surveillance, health facility survey, rapid assessment at hot spots, mapping assessment, and regional assessment. a ) BSS: Customized behavioral surveillance surveys ( BSSs ) will be undertaken amongst refugees, returnees, IDPs, and surrounding populations. The BSS will be customized to allow for the inclusion of unique characteristics of mobile populations, pre and post displacement, and conflict situations.", + "ner_text": [ + [ + 830, + 833, + "named" + ], + [ + 702, + 733, + "BSS <> data type" + ], + [ + 770, + 778, + "BSS <> reference population" + ], + [ + 780, + 789, + "BSS <> reference population" + ], + [ + 791, + 795, + "BSS <> reference population" + ], + [ + 996, + 1014, + "BSS <> usage context" + ] + ], + "validated": true, + "empirical_context": "a ) BSS: Customized behavioral surveillance surveys ( BSSs ) will be undertaken amongst refugees, returnees, IDPs, and surrounding populations. The BSS will be customized to allow for the inclusion of unique characteristics of mobile populations, pre and post displacement, and conflict situations.", + "type": "survey", + "explanation": "In the context, 'BSS' is explicitly described as surveys that will be conducted to gather data from specific populations.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'BSS' is a dataset because it refers to customized behavioral surveillance surveys that collect data.", + "contextual_reason_agent": "In the context, 'BSS' is explicitly described as surveys that will be conducted to gather data from specific populations.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 4, + "validated": 2, + "not_validated": 2 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 2, + "text": "DRFM Directorate of Financial Resources and Equipment ( Direction des Ressources Financi\u00e8res et Mat\u00e9rielles ) DRH Human Resources Department ( Direction des Ressources Humaines ) DSCE Growth and Employment Strategy Paper ( Document de Strat\u00e9gie pour la Croissance et l \u2019 Emploi ) ECAM Fourth Cameroon Household Survey ( Enqu\u00eate Camerounaise aupr\u00e8s des M\u00e9nages ) ECD Early Childhood Development EEP Eligible Expenditure Program EiE EMIS Education in Emergency Education Management Information System ENIEG Teacher Training Institute ( Ecoles Normales d \u2019 Instituteurs de l \u2019 Enseignement G\u00e9n\u00e9ral ) ERSP Education Reform Support Project ESMF Environmental and Social Managemental Framework ESS Education Sector Strategy 2013 ( Document de Strat\u00e9gie du Secteur de l \u2019 Education et de la Formation 2013 ) FCGDO Fiscal Consolidation and Growth Development Policy Operation FM Financial Management GDP Gross Domestic Product GER Gross Enrollment Rate GPE Global Partnership for Education HSPRP Health System Performance Reinforcement Project IDA International Development Association IDB Islamic Development Bank IDF Institutional Development Fund IDP Internally Displaced Person IUFR Interim Unaudited Financial Report INS Institut National de Statistique ( National Institute of Statistics ) IPF Investment Project Financing IPP Indigenous Peoples Plan IPPF Indigenous Peoples Plan Framework ISP Implementation Support Plan IVA Independent Verification Agency LEG Local Education Group MDG Millennium Development Goal MINATD Ministry of Territorial Administration and Decentralization ( Minist\u00e8re de l \u2019 Administration Territoriale et de la D\u00e9centralisation ) MINEDUB Ministry of Basic Education ( Minist\u00e8re de l ' Education de Base ) MINEFOP Ministry of Employment and Professional", + "ner_text": [ + [ + 285, + 317, + "named" + ], + [ + 292, + 300, + "Fourth Cameroon Household Survey <> data geography" + ], + [ + 718, + 722, + "Fourth Cameroon Household Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "DRFM Directorate of Financial Resources and Equipment ( Direction des Ressources Financi\u00e8res et Mat\u00e9rielles ) DRH Human Resources Department ( Direction des Ressources Humaines ) DSCE Growth and Employment Strategy Paper ( Document de Strat\u00e9gie pour la Croissance et l \u2019 Emploi ) ECAM Fourth Cameroon Household Survey ( Enqu\u00eate Camerounaise aupr\u00e8s des M\u00e9nages ) ECD Early Childhood Development EEP Eligible Expenditure Program EiE EMIS Education in Emergency Education Management Information System ENIEG Teacher Training Institute ( Ecoles Normales d \u2019 Instituteurs de l \u2019 Enseignement G\u00e9n\u00e9ral ) ERSP Education Reform Support Project ESMF Environmental and Social Managemental Framework ESS Education Sector Strategy 2013 ( Document de Strat\u00e9gie du Secteur de l \u2019 Education et de la Formation 2013 ) FCGDO Fiscal Consolidation and Growth Development Policy Operation FM Financial Management GDP Gross Domestic Product GER Gross Enrollment Rate GPE Global Partnership for Education HSPRP Health System Performance Reinforcement Project IDA International Development Association IDB Islamic Development Bank IDF Institutional Development Fund IDP Internally Displaced Person IUFR Interim Unaudited Financial Report INS Institut National de Statistique ( National Institute of Statistics ) IPF Investment Project Financing IPP Indigenous Peoples Plan IPPF Indigenous Peoples Plan Framework ISP Implementation Support Plan IVA Independent Verification Agency LEG Local Education Group MDG Millennium Development Goal MINATD Ministry of Territorial Administration and Decentralization ( Minist\u00e8re de l \u2019 Administration Territoriale et de la D\u00e9centralisation ) MINEDUB Ministry of Basic Education ( Minist\u00e8re de l ' Education de Base ) MINEFOP Ministry of Employment and Professional", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referred to as a household survey, which typically collects structured data for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referred to as a household survey, which typically collects structured data for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "089_UGANDA-PAD-04272018", + "page": 53, + "text": "The largest part of the funding goes to the LG level \u2013 the municipal development grant ( now named DDEG under the IGFTR ) US $ 136 million, and the CB grants ( CBG ) US $ 15 million with the balance going to support results at the MLHUD level to support CB activities as well as Program implementation. The last grant cycle has just been released, based on the results from the annual performance assessments ( APA ). 3. USMID AF will provide support to part of the overall GoU Intergovernmental Fiscal Transfer Reform Program, which is aiming at improving the overall grant system, including size, allocation, modalities and efficiency in the use of transfers. Under this program, the discretionary development equalisation grant ( DDEG ) is supporting multi-sectoral investments at the LG level, and under this is the \u201c urban window \u201d with the targeting of the USMID municipalities being a critical element. The sub-window \u201c USMID municipalities \u201d as mentioned in the MTEF and DDEG guidelines from Office of the Prime Minister, 2017 will be the target for financial support, and the transitional grant window for development grants when it comes to the sub-window for refugees and host communities providing additional funding the DDEG allocations for these host areas. 68 4. Program funds will be provided through disbursement-linked indicators ( DLI ).", + "ner_text": [ + [ + 378, + 408, + "named" + ] + ], + "validated": false, + "empirical_context": "The largest part of the funding goes to the LG level \u2013 the municipal development grant ( now named DDEG under the IGFTR ) US $ 136 million, and the CB grants ( CBG ) US $ 15 million with the balance going to support results at the MLHUD level to support CB activities as well as Program implementation. The last grant cycle has just been released, based on the results from the annual performance assessments ( APA ). 3.", + "type": "assessment", + "explanation": "However, it is not a dataset as it refers to a process of evaluation rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'assessments' which can imply data collection.", + "contextual_reason_agent": "However, it is not a dataset as it refers to a process of evaluation rather than a structured collection of data.", + "contextual_signal": "mentioned only as a process, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 45, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 34 Indicator Name of which citizens in host communities Definition / Description People who benefitted from improved water supply services that have been constructed or rehabilitated under the project. Per UNICEF-WHO Joint Monitoring Program definition, \u201c improved water sources \u201d include piped household connection ( house or yard connections ), public standpipe, boreholes, protected dug well, protected spring and rainwater collection, and do not include unprotected well, unprotected spring, surface water ( river, pond, dam, lake, stream, irrigation channel ), or bottled water. Frequency Bi-annually. Data Source MWE M & E system, NWSC Area performance reports, Refugee Database, Refugees GIS Mapping for WASH and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE / NWSC.", + "ner_text": [ + [ + 698, + 714, + "named" + ] + ], + "validated": false, + "empirical_context": "Frequency Bi-annually. Data Source MWE M & E system, NWSC Area performance reports, Refugee Database, Refugees GIS Mapping for WASH and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'system' which can imply data management.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 18, + "text": "For example, the project may support training of providers on infection prevention and control, use of HMIS to strengthen reporting of cases and use of community interventions for sensitization and behavior change ( for example, self \u2010 distancing, hygiene at home ). The project will also consider financing additional interventions such as transport for health care workers to reach the facilities or telemedicine to address specific service delivery challenges that may emerge as a consequence of COVID \u2010 19 and other outbreaks. 32. This subcomponent will support selected HFs36 to meet a set of agreed minimum delivery standards for RMNCHN services using a cluster \u2010 based service delivery model. One HF will be designated to each cluster offering comprehensive EmONC services that will serve as the \u2018 hub \u2019 and will be linked to a network of HFs that offer basic EmONC services. Selected BHUs will be upgraded to 24 / 7 RHCs to provide critical care, especially EmONC services. This subcomponent will support on \u2010 the \u2010 job training to ensure all 36 In each target district, HFs are selected based on proximity to a refugee village; ongoing or planned investments by other development partners ( DPs ); and distance from the district headquarter hospitals in Pishin, Chagai, and Killa Abdullah and the city center for Quetta to enable the creation of a cluster.", + "ner_text": [ + [ + 103, + 107, + "named" + ] + ], + "validated": false, + "empirical_context": "For example, the project may support training of providers on infection prevention and control, use of HMIS to strengthen reporting of cases and use of community interventions for sensitization and behavior change ( for example, self \u2010 distancing, hygiene at home ). The project will also consider financing additional interventions such as transport for health care workers to reach the facilities or telemedicine to address specific service delivery challenges that may emerge as a consequence of COVID \u2010 19 and other outbreaks.", + "type": "system", + "explanation": "However, HMIS is mentioned as a system used for reporting rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HMIS is a dataset because it relates to health management information systems, which often handle data.", + "contextual_reason_agent": "However, HMIS is mentioned as a system used for reporting rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 21, + "validated": 4, + "not_validated": 17 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 25, + "text": "More specifically, it will finance ( a ) development and integration of the e-Catalog / e-Register and other modules to EMIS that would allow a switch from paper-based to a greener, more efficient electronic format of data reporting to various stakeholders and interoperability of the NBS system and EMIS; ( b ) creation of the dedicated module and interface in EMIS that would allow the MoER to identify students who require support with adherence to the personal data protection legislation; and ( c ) the development and integration to EMIS of a module for ANACEC on external assessment of education institutions. 42. Support for implementation of national and international student assessments includes the following activities: ( a ) analysis of PISA 2022 results in student learning and Moldova \u2019 s participation in PISA 2025 and 2028; ( b ) preparation activities conducted by NACE, including the development, translation, and editing of test materials, education, training, and preparatory workshops, and piloting of assessments; ( c ) test implementation, including printing, distribution, and supervision of assessments, data processing and analysis, and dissemination of results; and ( d ) strengthening of the capacity of the national implementing agency responsible for carrying out national and international student assessments, including for refugee children from Ukraine. Years of administration of PISA are indicative based on the frequency of previous cycles ( pending decision about the frequency of future cycles ). 43. This subcomponent will also support TA for: ( a ) revision of the teacher and manager professional standards, ( b ) nationwide education infrastructure investment planning for modern school", + "ner_text": [ + [ + 362, + 366, + "named" + ] + ], + "validated": false, + "empirical_context": "More specifically, it will finance ( a ) development and integration of the e-Catalog / e-Register and other modules to EMIS that would allow a switch from paper-based to a greener, more efficient electronic format of data reporting to various stakeholders and interoperability of the NBS system and EMIS; ( b ) creation of the dedicated module and interface in EMIS that would allow the MoER to identify students who require support with adherence to the personal data protection legislation; and ( c ) the development and integration to EMIS of a module for ANACEC on external assessment of education institutions. 42.", + "type": "system", + "explanation": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is involved in data reporting and integration.", + "contextual_reason_agent": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "187_multi-page", + "page": 30, + "text": "Direct Bank involvement and assistance in undertaking and ensuring widespread dissemination of key impact monitoring inteventions will be particularly important in this regard These include the surveys of public officials, as well as the Public Expenditure Tracking Surveys. In addition, intensive and close supervision of the PAR by the Bank will be required to continuously adjust strategy and tactics to rapid and constantly changing conditions and challenges. To this end, the pairing of the PAR with ( i ) related Bank policy operations ( the PESP and SAC ), and ( ii ) complementary ESW ( Public Expenditure Review ) should help to address the Bank ' s capacity to continually reinforce counterpart commitment. - 27 -", + "ner_text": [ + [ + 194, + 221, + "named" + ], + [ + 7, + 11, + "surveys of public officials <> publisher" + ], + [ + 338, + 342, + "surveys of public officials <> publisher" + ], + [ + 519, + 523, + "surveys of public officials <> publisher" + ], + [ + 650, + 654, + "surveys of public officials <> publisher" + ], + [ + 739, + 757, + "surveys of public officials <> usage context" + ] + ], + "validated": true, + "empirical_context": "Direct Bank involvement and assistance in undertaking and ensuring widespread dissemination of key impact monitoring inteventions will be particularly important in this regard These include the surveys of public officials, as well as the Public Expenditure Tracking Surveys. In addition, intensive and close supervision of the PAR by the Bank will be required to continuously adjust strategy and tactics to rapid and constantly changing conditions and challenges.", + "type": "survey", + "explanation": "This is indeed a dataset as it is mentioned in the context of key impact monitoring interventions and is used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'surveys' which typically collect structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is mentioned in the context of key impact monitoring interventions and is used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 67, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 56 systems. Groundwater development is also being considered for small-scale irrigation and other uses as an adaptation measure to climate change and in situations where surface water sources are not available at a reasonable cost. Despite Uganda \u2019 s growing dependency on groundwater, concerns remain over its sustainability. The problems associated with excessive groundwater development are very localized. Effective planning and management strategies to regulate and control groundwater activities are key to ensuring that the groundwater is utilized sustainably to address unmet water demands. The availability of groundwater and its vulnerability to human and climatic impacts needs to be further assessed given that many towns and RGCS are using groundwater for solar-powered water schemes. 32. This component will support a comprehensive groundwater assessment and the development of a toolkit to inform further development of groundwater. The toolkit will include guidelines on aquifer utilization as well as reports and maps showing the aquifers \u2019 characteristics, distribution, and responses to pumping. This work will be closely coordinated with the implementation of Component 1 to ensure that the WSS investments are adequately guided on the potential for and sustainability of groundwater development, the spacing of production boreholes as well as the pumping regimes. The information generated will be fed into the WIS for use in future groundwater development projects. Water Information System 33.", + "ner_text": [ + [ + 1567, + 1591, + "named" + ] + ], + "validated": false, + "empirical_context": "The information generated will be fed into the WIS for use in future groundwater development projects. Water Information System 33.", + "type": "system", + "explanation": "However, it is not a dataset as it is described as a system rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Information System', which often relates to data management.", + "contextual_reason_agent": "However, it is not a dataset as it is described as a system rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 80, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 68. Verification Protocol Table: Disbursement Linked Indicators DLI_TBL_VERIFICATION DLI 1 Girls \u2019 promotion rate from grade 11 to 12, nationwide Description The improvement in grils enrollment is measured by the proportion of girls enrolled in grade 11 at a given school year who study in the grade 12 in the following school year. The indicator is calculated by dividing the number of new enrolments of girls in grade 12 in school year t + 1 by the number of girls enrolled in grade 11 in the previous school year t. Data source / Agency EMIS Verification Entity ESS Procedure Data collected by MOE through the annual EMIS process and verified by ESS using a representative sample. The verified information is then submitted by MOF to the World Bank in a consolidated DLI verification report as part of each semiannual Government / World Bank review. DLI_TBL_VERIFICATION DLI 2 Children 0-59 months with diarrhea treated with ORS and Zinc in low performing regions Description The indicator will be measured by the percentage of children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc Data source / Agency DHIS2 Verification Entity ESS Procedure Data collected by MOH through the annual DHIS2 and verified by ESS using a representative sample.", + "ner_text": [ + [ + 680, + 684, + "named" + ] + ], + "validated": false, + "empirical_context": "The indicator is calculated by dividing the number of new enrolments of girls in grade 12 in school year t + 1 by the number of girls enrolled in grade 11 in the previous school year t. Data source / Agency EMIS Verification Entity ESS Procedure Data collected by MOE through the annual EMIS process and verified by ESS using a representative sample. The verified information is then submitted by MOF to the World Bank in a consolidated DLI verification report as part of each semiannual Government / World Bank review.", + "type": "system", + "explanation": "However, EMIS is described as a system for collecting data, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with data collection processes.", + "contextual_reason_agent": "However, EMIS is described as a system for collecting data, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 15, + "text": "On the supply side, despite being well capitalized and profitable, the microfinance institutions ( MFIs ) and finance institutions ( a ) focus on collateral-based lending instead of cashflow-based financing, ( b ) face regulatory constraints in financing micro and small enterprises, and ( c ) incur high costs of lending and monitoring relative to the size of loans. To unlock the flow of finance required by women entrepreneurs to scale their enterprises, the systemic barriers that MFIs and financial institutions face need to be addressed. It is an imperative that GROW, in partnership with the Private Sector Foundation Uganda ( PFSU ), works with a limited pool of motivated local financial intermediaries that that see women entrepreneurs as core to their future client base. 17. Lack of access to infrastructure \u2014 particularly infrastructure that accommodates women \u2019 s needs \u2014 is one of the biggest obstacles preventing women-owned MSMEs from growing their businesses. The Uganda Enterprise Survey for 2014 found that infrastructure was the most important constraint, with 33. 4 percent of firms ranking it as the biggest obstacle. 26 This includes poor road and transport infrastructure, poorly maintained markets and trading centers, and unreliable electricity and internet services, among others.", + "ner_text": [ + [ + 982, + 1006, + "named" + ], + [ + 410, + 429, + "Uganda Enterprise Survey <> reference population" + ], + [ + 982, + 988, + "Uganda Enterprise Survey <> data geography" + ], + [ + 1011, + 1015, + "Uganda Enterprise Survey <> publication year" + ], + [ + 1158, + 1196, + "Uganda Enterprise Survey <> data description" + ], + [ + 1198, + 1243, + "Uganda Enterprise Survey <> data description" + ] + ], + "validated": true, + "empirical_context": "Lack of access to infrastructure \u2014 particularly infrastructure that accommodates women \u2019 s needs \u2014 is one of the biggest obstacles preventing women-owned MSMEs from growing their businesses. The Uganda Enterprise Survey for 2014 found that infrastructure was the most important constraint, with 33. 4 percent of firms ranking it as the biggest obstacle.", + "type": "survey", + "explanation": "The Uganda Enterprise Survey is explicitly mentioned as a source of data regarding the constraints faced by firms, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides empirical data on infrastructure constraints.", + "contextual_reason_agent": "The Uganda Enterprise Survey is explicitly mentioned as a source of data regarding the constraints faced by firms, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 57, + "text": "Given the large demand of the national statistical system, the choice is made to focus either on the areas where the World Bank clearly has a comparative advantage among the donors or areas not supported by the other donors, namely, household survey, population census, national account, and archiving and dissemination. At the core of all this are two aspects: data collection and capacity building. The philosophy underlying data collection is to improve the design of the surveys to take into account the most recent methodological approaches. As for capacity building, the preference is given either to train staff locally or to use the learning-by - doing approach. Three subcomponents are distinguished as follows: Subcomponent 4. 1. Improving poverty-related data 39. Objective. The objective of this subcomponent is to improve the poverty related data production and analysis. 40. Current status. INS has implemented living conditions surveys in 1996, 2001, 2007, and 2014. The last three surveys have used very close methodologies and poverty indicators are comparable over", + "ner_text": [ + [ + 925, + 950, + "named" + ], + [ + 233, + 249, + "living conditions surveys <> data type" + ], + [ + 905, + 908, + "living conditions surveys <> author" + ], + [ + 954, + 958, + "living conditions surveys <> reference year" + ], + [ + 960, + 964, + "living conditions surveys <> reference year" + ], + [ + 966, + 970, + "living conditions surveys <> reference year" + ], + [ + 976, + 980, + "living conditions surveys <> publication year" + ], + [ + 1044, + 1062, + "living conditions surveys <> data description" + ] + ], + "validated": true, + "empirical_context": "Current status. INS has implemented living conditions surveys in 1996, 2001, 2007, and 2014. The last three surveys have used very close methodologies and poverty indicators are comparable over", + "type": "survey", + "explanation": "These surveys are explicitly mentioned as having been implemented and used for empirical analysis, confirming their role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'living conditions surveys' implies a structured collection of data collected over multiple years.", + "contextual_reason_agent": "These surveys are explicitly mentioned as having been implemented and used for empirical analysis, confirming their role as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "124_PAD8220PAD0P12010Box385319B00OUO090", + "page": 37, + "text": "101. Public Information Center ( PIC ): A PIC will be created at the PMU in Beirut with a branch in an easily accessible location in works area. The PIC will comprise of a Social Specialist responsible for coordinating and responding to citizen queries. Feedback can be submitted via multiple tools and in person. Once hired, the Social Specialist will be trained in Citizen Engagement methodologies to carry out activities prior to and during the expropriation process and to liaise with civil society organizations, community cooperatives, religious institutions, local municipality officials in their role as social intermediaries and project stakeholders. Such activities will contribute to the development of a benefit-sharing program that responds to citizen feedback and promotes new sources of income for people impacted by the project. Special attention will be paid to the inclusion of women, youth and the elderly in order to ensure adequate voice and representation. Additionally, a satisfaction survey will be carried out during project reviews to assess effectiveness of the PIC and GRM, and a final survey will be carried out among beneficiaries upon operation of the dam. 102. Gender: Shared, gender-based differences exist in the water sector. Globally, women are primarily responsible for managing water and hygiene at the household and community levels. Furthermore, women and girls are most impacted by limited access to infrastructure services.", + "ner_text": [ + [ + 995, + 1014, + "named" + ], + [ + 896, + 901, + "satisfaction survey <> reference population" + ], + [ + 1147, + 1160, + "satisfaction survey <> reference population" + ], + [ + 1271, + 1276, + "satisfaction survey <> reference population" + ], + [ + 1481, + 1499, + "satisfaction survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "Special attention will be paid to the inclusion of women, youth and the elderly in order to ensure adequate voice and representation. Additionally, a satisfaction survey will be carried out during project reviews to assess effectiveness of the PIC and GRM, and a final survey will be carried out among beneficiaries upon operation of the dam. 102.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned to be carried out to assess effectiveness, indicating it will collect data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'satisfaction survey' implies a structured collection of responses.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned to be carried out to assess effectiveness, indicating it will collect data.", + "contextual_signal": "follows 'will be carried out to assess effectiveness'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "033_IBArchive-38b35d89-2c5c-4636-ab7e-0ba662b95a5f", + "page": 44, + "text": "The PIU will include a dedicated Project Director and three technical specialists to cover TVET governance; curriculum, assessment, and quality assurance; and monitoring and evaluation. The PIU will also bring on board additional staff to support fiduciary ( both procurement and financial management ) and safeguards related activities but embed them in existing implementation structures in MENFOP which are in place to support other on-going projects - Expanding Opportunities for Learning ( P166059 ) and Education Emergency Response to COVID-19 ( P174128 ). This will ensure that World Bank projects help strengthen ministerial capacity in a manner that does not add to the MENFOP \u2019 s administrative burden. 85. The implementation arrangements for the Project ensures that the issue of refugee integration will reach the Council for Economic Development ( CDE ), the highest-level policy body in the country. The issue of including refugee candidates / beneficiaries into the training system will be managed and determined by the implementing agency, MENFOP, together with guidance from the CDE. B. Results Monitoring and Evaluation Arrangements 86. Monitoring and evaluation mechanism. A wide variety of mechanisms will be used to measure project outcomes including but not limited to administrative data collected through both the MENFOP and MT, other centrally sourced data in the country, standardized data collected from training providers, and data from periodic surveys, tracer studies, labor market analysis, and studies planned to be carried out annually to support project implementation. This multi-level data gathering system will be supported by the Project and will develop a linked system of information allowing different data to be mapped across sub-systems. This will help address one of the key issues in collecting data from TVET systems \u2013 avoidance of data from a fragmented system. The Project places 55 A draft training plan for select PIU and other staff is included in Annex 3.", + "ner_text": [ + [ + 1398, + 1415, + "named" + ] + ], + "validated": false, + "empirical_context": "Monitoring and evaluation mechanism. A wide variety of mechanisms will be used to measure project outcomes including but not limited to administrative data collected through both the MENFOP and MT, other centrally sourced data in the country, standardized data collected from training providers, and data from periodic surveys, tracer studies, labor market analysis, and studies planned to be carried out annually to support project implementation. This multi-level data gathering system will be supported by the Project and will develop a linked system of information allowing different data to be mapped across sub-systems.", + "type": "data", + "explanation": "'Standardized data' is mentioned as part of a broader data collection effort and not as a standalone dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'standardized data' refers to a dataset due to its structured nature.", + "contextual_reason_agent": "'Standardized data' is mentioned as part of a broader data collection effort and not as a standalone dataset.", + "contextual_signal": "mentioned only as part of a data gathering system", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "031_IBArchive-bd2c789e-ee04-4df7-a219-9409a5f705d3", + "page": 83, + "text": "While data on WTP for electricity in Chad are not available, according to the results of the most recent expenditure survey data, households currently use on average 2. 5 light points across rural areas in Chad for around five hours a day for which they spend about US $ 4. 7 per month per household. 60 Even if an inefficient 40 W light point is assumed, a household in Chad would consume only about 15 kWh per month for which it is currently spending about US \u00a2 30 per kWh, which can be considered as a lower bound on WTP for electricity. As WTP per kWh would decrease with greater consumption, and to be on the conservative side, for the analysis, a WTP of US \u00a2 25 per kWh is used. 8. While the analysis does not consider other indirect benefits, it is expected that the project will contribute toward other economic benefits that are more difficult to quantify and monetize. These indirect benefits include improved air quality from reduced consumption of kerosene; reduced poisoning and accidental fires; and wider benefits that can be linked to access to modern electricity solutions such as improved health, improved connectivity, and improved security. Access to modern energy solutions is also expected to increase income-generating opportunities and improve the socioeconomic situation of households and MSMEs, with an expected positive impact on education and overall lifestyle. This means that the results from the economic analysis can be considered as conservative estimates of the overall economic benefits of the project. 9. The project is also expected to bring some benefits from reduced GHG emissions and local pollution. In addition to the quantifiable benefits discussed above, the economic analysis also considers 59 It is noted that this is a regional estimate as for Chad, such data are not available. 60 Results of the household expenditure survey are provided in annex 6.", + "ner_text": [ + [ + 1844, + 1872, + "named" + ], + [ + 37, + 41, + "household expenditure survey <> data geography" + ], + [ + 105, + 128, + "household expenditure survey <> data type" + ], + [ + 130, + 140, + "household expenditure survey <> reference population" + ], + [ + 206, + 210, + "household expenditure survey <> data geography" + ], + [ + 371, + 375, + "household expenditure survey <> data geography" + ], + [ + 1299, + 1309, + "household expenditure survey <> reference population" + ], + [ + 1791, + 1795, + "household expenditure survey <> data geography" + ], + [ + 1950, + 1968, + "household expenditure survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "In addition to the quantifiable benefits discussed above, the economic analysis also considers 59 It is noted that this is a regional estimate as for Chad, such data are not available. 60 Results of the household expenditure survey are provided in annex 6.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned to provide results and is used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey that collects data on household expenditures.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned to provide results and is used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 80, + "text": "70 59. The analysis of the Batwa \u2019 s socio-economic situation highlight the following issues: ( i ) marginalization and discrimination of the Batwa by other ethnic communities, ( ii ) lack of access to health especially for children under 5 and pregnant and lactating women, ( iii ) lack of access to education, ( iv ) weak knowledge about nutrition, ( v ) food insecurity, ( vi ) precarious hygiene practices, ( vi ) access to justice issues, ( vii ) low Batwa participation in the instances of decision making resulting in their low representation in the various sectoral committees set up at Community level, ( viii ) lack of access to land and ( ix ) low access to housing. 60. Based on the experiences in addressing the specific vulnerabilities and discrimination facing the Batwa, the Project will ensure that they are included in the first listing ( by conducting a separate listing of Batwa households in the participating collines based on the provincial census ) and included in the beneficiary registry per the results of the targeting survey. The project will also ensure that beneficiary Batwa households can participate in the accompanying measures by partnering with NGOs that are experienced in taking into account their specificity.", + "ner_text": [ + [ + 993, + 1013, + "named" + ], + [ + 27, + 32, + "beneficiary registry <> reference population" + ], + [ + 893, + 909, + "beneficiary registry <> reference population" + ], + [ + 1101, + 1117, + "beneficiary registry <> reference population" + ], + [ + 1282, + 1300, + "beneficiary registry <> usage context" + ] + ], + "validated": true, + "empirical_context": "60. Based on the experiences in addressing the specific vulnerabilities and discrimination facing the Batwa, the Project will ensure that they are included in the first listing ( by conducting a separate listing of Batwa households in the participating collines based on the provincial census ) and included in the beneficiary registry per the results of the targeting survey. The project will also ensure that beneficiary Batwa households can participate in the accompanying measures by partnering with NGOs that are experienced in taking into account their specificity.", + "type": "registry", + "explanation": "This is a dataset as it is explicitly mentioned as a registry that includes data on beneficiaries.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'beneficiary registry' suggests a structured collection of data about beneficiaries.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a registry that includes data on beneficiaries.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 15, + "text": "The situation is particularly worrisome in rural healthcare facilities, schools, and other public places, which have been assigned a priority by the health authorities. Recent WHO data suggest that 53 percent of healthcare facilities do not have any water supply service and 57 percent report no access to sanitation. 14. Poor WASH conditions have a disproportional impact on women, children, and the elderly. This is largely due to the traditional distribution of household chores in society with women taking primary responsibility for water collection and taking care of the ill and most vulnerable family members. In rural households, where majority of men have migrated for work, these responsibilities limit women \u2019 s time and mobility to engage in educational, income earning, and community activities and pose heightened physical and health risks. 27 Not only do women spend most of their time fetching water and have narrow paths of daily movement within their community, but they are also reluctant to venture from their own communities due to the lack of safe and secure sanitation facilities in public places ( marketplaces and so on ). Inadequate WSS at local health facilities affects the quality of health services for population in general and particularly for women in relation to maternal care and delivery. 15. Climate change will exacerbate WSS-related risks to public health and the economy. In many areas, increased water temperatures will cause eutrophication and excess algal growth, which will reduce 26 Tajikistan JMP data ( 2020 ). https: / / washdata. org / data. 27 Rural Water Supply and Sanitation Project ( RWSSP ) Baseline Survey 2020. 0 500 1000 1500 2000 2500 3000 3500 Norak Vakhsh Dusti J. Balkhi A. Jomi Panj Yovon Kulob Dangara M. S. Hamadoni Temurmalik Sh. Shohin Levakant Khatlon region 0 1 2 3 4 5 6", + "ner_text": [ + [ + 1540, + 1548, + "named" + ], + [ + 176, + 179, + "JMP data <> publisher" + ], + [ + 198, + 270, + "JMP data <> data description" + ], + [ + 1529, + 1539, + "JMP data <> data geography" + ], + [ + 1551, + 1555, + "JMP data <> publication year" + ], + [ + 1663, + 1667, + "JMP data <> publication year" + ] + ], + "validated": true, + "empirical_context": "Climate change will exacerbate WSS-related risks to public health and the economy. In many areas, increased water temperatures will cause eutrophication and excess algal growth, which will reduce 26 Tajikistan JMP data ( 2020 ). https: / / washdata.", + "type": "data", + "explanation": "In the context, 'JMP data' is explicitly mentioned as a source of information used to support claims about water-related risks.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'JMP data' is a dataset because it is referenced in the context of empirical analysis related to water and sanitation.", + "contextual_reason_agent": "In the context, 'JMP data' is explicitly mentioned as a source of information used to support claims about water-related risks.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "005_BOSIB-8191b179-7209-4faa-b5e0-11783bcd492d", + "page": 20, + "text": "This will be supported through: ( i ) learning standards in line with the competency-based curricula for all grades and levels; ( ii ) a high-quality digital item bank to automatically generate competency-based formative and summative assessments; ( iii ) development of a digital assessment platform whose data will feed into automated information system modules available at the central and regional levels; and ( iv ) training on the use and maintenance of the platform for end users. The digital assessment platform will be used to implement the national standardized assessments. This subcomponent will finance technical assistance ( consultancy firms and individual ) and 26 It is expected that most of the hardware will be leased to MEP, as explained in the next paragraph. Computational services would be provided through modular mobile carts that will be wheeled to classrooms. Modeling with empirical data indicates that 153, 785 computers would be sufficient to cover the needs of PNFT. PNFT has two dimensions: the first comprises computational thinking or computational science, which concerns the implementation of the PNFT curriculum that requires 2 mandatory lessons per week. Dimension 2 comprises the use of computers and digital tools for all subjects in the curriculum, which is expected to grow over time as progress in Dimension 1 generates awareness, interest, and capabilities.", + "ner_text": [ + [ + 273, + 300, + "named" + ] + ], + "validated": false, + "empirical_context": "This will be supported through: ( i ) learning standards in line with the competency-based curricula for all grades and levels; ( ii ) a high-quality digital item bank to automatically generate competency-based formative and summative assessments; ( iii ) development of a digital assessment platform whose data will feed into automated information system modules available at the central and regional levels; and ( iv ) training on the use and maintenance of the platform for end users. The digital assessment platform will be used to implement the national standardized assessments.", + "type": "program", + "explanation": "However, it is described as a platform and not as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves data collection and assessment.", + "contextual_reason_agent": "However, it is described as a platform and not as a structured collection of data or a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 65, + "text": "There has been significant progress in Jordan \u2019 s public FM where it has been able to: ( i ) improve its budget process through implementing a results based budget for the year 2008; ( ii ) advance the full implementation of a single treasury account with the Central Bank; and ( iii ) modernize the spending procedures and accounting through adopting a new chart of account and developing the GFMIS for implementation by the MoF, government departments and other spending units. So far, the GFMIS is still a work in progress and most of the ministries ( including MoE and GBD ) still do not have an accounting and reporting system in place that can be used for reporting on the project accounts. In addition, Jordan \u2019 s Corruption Perceived Index ( CPI ) as per Transparency International has improved from 4. 7 in 2007 to 5. 1 in 2008, ranking Jordan 5 / 20 regionally and 41 / 180 internationally. 9. The Project will be implemented through opening budget lines under MoE ( for counterpart funds ) and MoPIC ( for the Bank loan ) and funds will be allocated accordingly. MoE has already opened a budget line under its 2009 budget. 10. Project FM Risk. MoE and GBD \u2019 s FM arrangements were assessed based on the World Bank \u2019 s FM Guidelines, to determine if the FM arrangements for the Project are acceptable to the World Bank. Detailed FM questionnaires were completed for MoE and GBD and are included in the Project \u2019 s files. The risks identified and the mitigating measures addressing theses risks are detailed in the table below: 4 The Institutional Financial Management Assessment ( IFMCA ) for the Education and Social Sectors \u2013 June 2006.", + "ner_text": [ + [ + 394, + 399, + "named" + ] + ], + "validated": false, + "empirical_context": "There has been significant progress in Jordan \u2019 s public FM where it has been able to: ( i ) improve its budget process through implementing a results based budget for the year 2008; ( ii ) advance the full implementation of a single treasury account with the Central Bank; and ( iii ) modernize the spending procedures and accounting through adopting a new chart of account and developing the GFMIS for implementation by the MoF, government departments and other spending units. So far, the GFMIS is still a work in progress and most of the ministries ( including MoE and GBD ) still do not have an accounting and reporting system in place that can be used for reporting on the project accounts.", + "type": "system", + "explanation": "GFMIS is described as a management information system, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed GFMIS is a dataset because it is related to accounting and reporting.", + "contextual_reason_agent": "GFMIS is described as a management information system, not as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "138_781290PAD0JO0R0t0Box377365B00OUO090", + "page": 33, + "text": "In addition, 23 percent of Syrian refugees have chronic diseases or serious medical conditions that require medical follow up. Comparative morbidity data show a different disease profile with increased levels of morbidity for Syrian refugees than Jordanians which may affect the disease burden in the future. According to Jordan \u2019 s national cancer statistics, Syrian refugees presenting with cancer at health facilities rose from 134 in 2011 to 169 in the first quarter of 2013, representing 14 percent increase in Jordan \u2019 s total cancer disease burden. Similarly, MOH morbidity data show a rise in selected communicable diseases. For example, TB case notification increased from 5 / 100, 000 among Jordanians in 2009 to 13 / 100, 000 among Syrian refugees in 2013. While no measles cases have been reported in Jordan since 2009, recent MOH data show that 18 Jordanians and 23 Syrians have been diagnosed with the disease in 2013. Polio which had been eradicated since 1999 was also detected in two cases in 2013. With this higher demand for 1414 World Health Organization Statistics, 2013", + "ner_text": [ + [ + 567, + 585, + "named" + ], + [ + 27, + 42, + "MOH morbidity data <> reference population" + ], + [ + 322, + 328, + "MOH morbidity data <> data geography" + ], + [ + 474, + 478, + "MOH morbidity data <> publication year" + ], + [ + 516, + 522, + "MOH morbidity data <> data geography" + ], + [ + 646, + 666, + "MOH morbidity data <> data description" + ], + [ + 715, + 719, + "MOH morbidity data <> reference year" + ], + [ + 762, + 766, + "MOH morbidity data <> reference year" + ], + [ + 813, + 819, + "MOH morbidity data <> data geography" + ], + [ + 927, + 931, + "MOH morbidity data <> reference year" + ], + [ + 1010, + 1014, + "MOH morbidity data <> reference year" + ], + [ + 1087, + 1091, + "MOH morbidity data <> publication year" + ] + ], + "validated": true, + "empirical_context": "According to Jordan \u2019 s national cancer statistics, Syrian refugees presenting with cancer at health facilities rose from 134 in 2011 to 169 in the first quarter of 2013, representing 14 percent increase in Jordan \u2019 s total cancer disease burden. Similarly, MOH morbidity data show a rise in selected communicable diseases. For example, TB case notification increased from 5 / 100, 000 among Jordanians in 2009 to 13 / 100, 000 among Syrian refugees in 2013.", + "type": "data", + "explanation": "This is indeed a dataset as it provides structured health data used for empirical analysis of morbidity trends.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific morbidity data collected by the Ministry of Health (MOH).", + "contextual_reason_agent": "This is indeed a dataset as it provides structured health data used for empirical analysis of morbidity trends.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "082_PAD2809-PAD-PUBLIC-disclosed-9-12-2018-IDA-R2018-0286-1", + "page": 29, + "text": "The World Bank Chad - Refugees and Host Communities Support Project ( P164748 ) Page 24 Box 4. Collaboration with the EU-funded DIZA The project is closely aligned with the Inclusive Development Program in Hosting Areas ( Programme de D\u00e9veloppement Inclusif dans les Zones d ' Accueil, DIZA ) funded by the EU. This EUR 15 million program is being developed jointly by the EU Delegation and ECHO in Chad and will be implemented by two NGO consortia in refugee hosting areas in the South and East of Chad. The overall objective for this three-year program is to improve the living conditions of local populations, refugees and returnees in hosting areas through support for inclusive local development. DIZA subscribes to the same principles of engagement as the Bank project in order to ensure alignment on areas of intervention and their modalities: \u2022 Targeting beneficiaries based on the same harmonized questionnaire \u2022 Including beneficiaries in the same national database ( Unified Social Registry ) \u2022 Using the Government \u2019 s norms and standards in rehabilitating and building basic service infrastructure as well as the provision of services \u2022 Supporting a phased transition from humanitarian interventions to development programs that benefit refugees and host communities \u2022 Aiming to harmonize the level of cash transfer benefits to poor households.", + "ner_text": [ + [ + 978, + 1001, + "named" + ], + [ + 15, + 19, + "Unified Social Registry <> data geography" + ], + [ + 399, + 403, + "Unified Social Registry <> data geography" + ], + [ + 613, + 621, + "Unified Social Registry <> reference population" + ], + [ + 863, + 876, + "Unified Social Registry <> reference population" + ], + [ + 932, + 945, + "Unified Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "The overall objective for this three-year program is to improve the living conditions of local populations, refugees and returnees in hosting areas through support for inclusive local development. DIZA subscribes to the same principles of engagement as the Bank project in order to ensure alignment on areas of intervention and their modalities: \u2022 Targeting beneficiaries based on the same harmonized questionnaire \u2022 Including beneficiaries in the same national database ( Unified Social Registry ) \u2022 Using the Government \u2019 s norms and standards in rehabilitating and building basic service infrastructure as well as the provision of services \u2022 Supporting a phased transition from humanitarian interventions to development programs that benefit refugees and host communities \u2022 Aiming to harmonize the level of cash transfer benefits to poor households.", + "type": "registry", + "explanation": "This is a dataset as it is explicitly mentioned as a national database that collects and organizes data on beneficiaries.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a national database that includes beneficiaries.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a national database that collects and organizes data on beneficiaries.", + "contextual_signal": "mentioned as a national database that includes beneficiaries", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "013_BOSIB0efb09b920d90858a0135df22da7d1", + "page": 11, + "text": "Gender disparities in Ethiopia, including in access to economic opportunities for women, are profound. The 2022 Global Gender Gap report ranked Ethiopia 74 out of 146 countries and 15 in Africa. The rank drops to 112 for economic participation and opportunities and to 133 for educational attainment. 6 The 2022 Global Findex Survey7 found 1 International Monetary Fund, 2023. Website, accessed November 14th: imf. org / en / Countries / ETH # featured 2 Drought is the most destructive climate-related natural hazard. Through 2100, there is a likely 20 percent increase in extreme high rainfall events. Flash floods and seasonal river floods are becoming more frequent and widespread. World Bank, 2021, Ethiopia Climate Risk Profile. 3 This is particularly due to dependence on key sectors that are highly affected by climate change such as agriculture, water, tourism, and forestry ( World Food Programme. Ethiopia Annual Country Report 2022 ). 4 Ethiopia Country Climate Development Report, 2023, draft, World Bank. 5 World Bank, 2021, Ethiopia Climate Risk Profile. 6 World Economic Forum. 2022. Global Gender Gap Report, published July 2022. URL: https: / / www3. weforum. org / docs / WEF_GGGR_2022. pdf. 7 World Bank. 2022. The Global Findex Database 2021: Financial Inclusion, Digital Payments, and Resilience in the age of COVID-19. https: / / www. worldbank. org / en / publication / globalfindex # sec1.", + "ner_text": [ + [ + 1235, + 1257, + "named" + ], + [ + 22, + 30, + "Global Findex Database <> data geography" + ], + [ + 107, + 111, + "Global Findex Database <> publication year" + ], + [ + 686, + 696, + "Global Findex Database <> publisher" + ], + [ + 698, + 702, + "Global Findex Database <> reference year" + ], + [ + 704, + 712, + "Global Findex Database <> data geography" + ], + [ + 908, + 916, + "Global Findex Database <> data geography" + ], + [ + 939, + 943, + "Global Findex Database <> publication year" + ], + [ + 1007, + 1017, + "Global Findex Database <> publisher" + ], + [ + 1021, + 1031, + "Global Findex Database <> publisher" + ], + [ + 1213, + 1223, + "Global Findex Database <> publisher" + ], + [ + 1258, + 1262, + "Global Findex Database <> publication year" + ], + [ + 1430, + 1448, + "Global Findex Database <> usage context" + ] + ], + "validated": true, + "empirical_context": "2022. The Global Findex Database 2021: Financial Inclusion, Digital Payments, and Resilience in the age of COVID-19. https: / / www.", + "type": "database", + "explanation": "This is indeed a dataset as it is explicitly referred to as a database that provides data on financial inclusion and related topics.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Database' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referred to as a database that provides data on financial inclusion and related topics.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "182_multi0page", + "page": 48, + "text": "On the data production side, the plan calls for institutionalizing the regular collection of household-level information by introducing a multi-topic panel household survey in the Strategic Plan of Statistics of INSTAT. As a result of the Project ' s support and the consensus reached among stakeholders, the next 5-year plan calls for the implementation of a Living Standard Measurement Survey ( LSMS ) in 2002, to be repeated after 3 years. The system also includes annual panel surveys on a sub-sample of LSMS households to monitor trends in areas like poverty, unemployment, migration and social services on a more frequent basis than the LSMS allows for. An important component of the assistance will be to promote activities to foster a closer dialogue between data producers and the various data users from governmental and non-governmental institutions, the donor community, and the civil society at large. This consultation process will be encouraged since the early stages of the Project ' s cycle and is expected to increase the relevance of the information produced, and so augment its role for policy-making purposes. On the data use side, the strategy calls for building analytical capacity within the Government to take full advantage of the statistical information being generated by INSTAT. Towards this end, the Project will provide continuous support both in the analysis and dissemination of policy-relevant information primarily through technical assistance, training and the provision of the necessary equipment and software for the Poverty Unit to be established in MOLSA. Sub-Component 2: Social Services Policy Development: The objectives of the sub-component are to assist the MOLSA to: a ) strengthen its policy formulation capacity, b ) further develop legislative and institutional framework for social services, c ) develop capacity for policy monitoring, evaluation and program improvement, d ) design and implement a national public awareness campaign on social exclusion, including - 45 -", + "ner_text": [ + [ + 138, + 172, + "named" + ], + [ + 407, + 411, + "multi-topic panel household survey <> publication year" + ], + [ + 468, + 488, + "multi-topic panel household survey <> data description" + ], + [ + 508, + 523, + "multi-topic panel household survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "On the data production side, the plan calls for institutionalizing the regular collection of household-level information by introducing a multi-topic panel household survey in the Strategic Plan of Statistics of INSTAT. As a result of the Project ' s support and the consensus reached among stakeholders, the next 5-year plan calls for the implementation of a Living Standard Measurement Survey ( LSMS ) in 2002, to be repeated after 3 years.", + "type": "survey", + "explanation": "This is a dataset as it is described as a survey aimed at collecting regular household-level information for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured collection of household-level information.", + "contextual_reason_agent": "This is a dataset as it is described as a survey aimed at collecting regular household-level information for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "143_749060PAD0P124010Box374388B00OUO090", + "page": 65, + "text": "The results monitoring framework assesses progress towards the PDO through key indicators, focusing on achieving improvement in quantity and quality of social services provided by MOSA, expanded coverage of SSNs ( the NPTP ), and viable income generating sub - projects ( see Annex 1 ). In addition, intermediate indicators will monitor the progress of each component over the life of the project. Monitoring and evaluation ( M & E ) arrangements will be centralized at the level of the PM, which will have a dedicated M & E Specialist, and rely on an M & E system adapted to the needs of each component. A computerized, modular MIS will be the central piece of the M & E system and will include: ( a ) a database on capacity building activities carried out under Component 1 and their beneficiaries ( e. g., date, place, topic, and the names of beneficiaries of a specific training ); ( b ) a module to track the processing of applications under Component 2, input baseline data for approved sub-projects, as well as monitor the physical progress in sub - project implementation and related tranche disbursements; ( c ) a module to register households in the NPTP, record the results of their eligibility assessment ( including their NPTP score ), and follow their utilization of benefits; ( d ) a financial management module for the whole project. 52. The SDCs will be the primary providers of information to be entered into the MIS and will be trained accordingly, as well as equipped with computers and connected to the internet. The MIS will be designed to allow data entry directly from the field, while the PM will perform data checks and produce summary reports. For transparency purposes, part of the data contained in the MIS will be accessible to the public ( on a viewing-only basis ) through the MOSA website. The M & E specialist will randomly visit SDCs, sub-project sites, and beneficiary households to verify the accuracy of the data entered, accompanied by the Internal Auditor as needed. 66", + "ner_text": [ + [ + 1732, + 1735, + "named" + ] + ], + "validated": false, + "empirical_context": "The MIS will be designed to allow data entry directly from the field, while the PM will perform data checks and produce summary reports. For transparency purposes, part of the data contained in the MIS will be accessible to the public ( on a viewing-only basis ) through the MOSA website. The M & E specialist will randomly visit SDCs, sub-project sites, and beneficiary households to verify the accuracy of the data entered, accompanied by the Internal Auditor as needed.", + "type": "system", + "explanation": "However, 'MIS' is described as a management information system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MIS' suggests a system for managing information related to data.", + "contextual_reason_agent": "However, 'MIS' is described as a management information system, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "015_BOSIB0e7334a5d0570a3e40f8ae4d0c1266", + "page": 20, + "text": "A large gap emerges between the road crash fatalities reported by the Government of Cameroon and the World Health Organization ( WHO ) estimates: whereas the 2016 government-reported road crash fatalities for the country were 1, 879, WHO \u2019 s estimate was 7, 066, almost four times the officially reported figure. With the support of the closed Economic and Monetary Community of Central Africa ( Communaut\u00e9 Economique et Mon\u00e9taire de l \u2019 Afrique Centrale, CEMAC ) Trade and Transit Facilitation Project ( TTFP, P079736 ) and the ongoing Transport Sector Development Project ( TSDP, P150999 ), the government established a road accident database and plans to create a lead agency responsible for the coordination of road safety efforts with the involvement of all relevant stakeholders, such as the ministries in charge of Public Works, Health, Education, the Police or Gendarmerie, and the private sector. 25. Employment segregation appears in the transport and road sectors in Cameroon. According to 2021 data from the International Labour Organization, only 10 percent of the people employed in the transport, storage, and communication sectors are women. 41 This reality is reflected in the public administration agencies such as the Ministry of Public Works ( MINTP ), where the percentage of working women does not exceed 17 percent, and is even less in the northern region.", + "ner_text": [ + [ + 622, + 644, + "named" + ], + [ + 84, + 92, + "road accident database <> data geography" + ], + [ + 158, + 162, + "road accident database <> reference year" + ], + [ + 163, + 204, + "road accident database <> data description" + ] + ], + "validated": true, + "empirical_context": "A large gap emerges between the road crash fatalities reported by the Government of Cameroon and the World Health Organization ( WHO ) estimates: whereas the 2016 government-reported road crash fatalities for the country were 1, 879, WHO \u2019 s estimate was 7, 066, almost four times the officially reported figure. With the support of the closed Economic and Monetary Community of Central Africa ( Communaut\u00e9 Economique et Mon\u00e9taire de l \u2019 Afrique Centrale, CEMAC ) Trade and Transit Facilitation Project ( TTFP, P079736 ) and the ongoing Transport Sector Development Project ( TSDP, P150999 ), the government established a road accident database and plans to create a lead agency responsible for the coordination of road safety efforts with the involvement of all relevant stakeholders, such as the ministries in charge of Public Works, Health, Education, the Police or Gendarmerie, and the private sector. 25.", + "type": "database", + "explanation": "This is indeed a dataset as it is established by the government to collect and coordinate data on road accidents.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a 'road accident database', which implies a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is established by the government to collect and coordinate data on road accidents.", + "contextual_signal": "mentioned as a database established for collecting road accident data", + "tags": [] + }, + { + "filename": "029_IBArchive-ea5ff2ad-6f1f-45bf-a2ad-036440562c18", + "page": 63, + "text": "The World Bank Electricity Access Scale-up Project ( EASP ) ( P166685 ) Page 58 of 130 connections provided under the project with grid, mini - grid, and off-grid solutions. on progress reports. MEMD database, IVA reports, approved loan and grants applications. People provided with access to electricity under the project with grid and mini-grid The indicator will track the number of people benefiting from grid and mini-grid electricity access under the project, disaggregated by gender. Average household size of 4. 7 people, UBOS National Household Survey 2016-2017. Quarterly Project implementati on progress reports. Household size based on UBOS National Household Survey. Service Providers customer database, MEMD database, IVA reports MEMD People provided with access to electricity under the project with grid and mini-grid of which female The indicator will track the number of females benefiting from grid and mini-grid electricity access under the project. Share of female is 51 %, UBOS Women and Men in Uganda, Facts and Figures 2016. Quarterly Project implementati on progress reports. UBOS Women and Men in Uganda, Facts and Figures, 2016. Service Providers customer database, MEMD database, IVA reports MEMD People provided with access to electricity under the project with off-grid The indicator will track the number of people benefiting from access to off-grid Quarterly Project implementati on progress Approved loan and grants applications.", + "ner_text": [ + [ + 648, + 678, + "named" + ], + [ + 491, + 528, + "UBOS National Household Survey <> data description" + ], + [ + 561, + 570, + "UBOS National Household Survey <> publication year" + ], + [ + 1017, + 1023, + "UBOS National Household Survey <> data geography" + ], + [ + 1043, + 1047, + "UBOS National Household Survey <> publication year" + ], + [ + 1123, + 1129, + "UBOS National Household Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "Quarterly Project implementati on progress reports. Household size based on UBOS National Household Survey. Service Providers customer database, MEMD database, IVA reports MEMD People provided with access to electricity under the project with grid and mini-grid of which female The indicator will track the number of females benefiting from grid and mini-grid electricity access under the project.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source of household size data used in the context of the project.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides household size data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source of household size data used in the context of the project.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "082_PAD2809-PAD-PUBLIC-disclosed-9-12-2018-IDA-R2018-0286-1", + "page": 18, + "text": "The objective is to have in place a highly adaptable system that can be scaled up to respond to urgent situations, such a sudden inflow of refugees, that impacts host communities. 18 17 The 2017-2021 National Development Plan ( NDP ) is the Government of Chad \u2019 s first five-year strategy. It aims at supporting the Government \u2019 s longer-term development strategy, Chad 2030 Vision. 18 As part of the combined efforts to assist the Government in building a shock-responsive social protection system, many WFP, ECHO and UNHCR partners ( NGOs ) are using the harmonized questionnaire during the lean season. The harmonized questionnaire was introduced by Government Decree 038 / PR / PM / MEPD / SE / SG / DGEP / 2017 dated September 23, 2017 and it is the first step toward building a Unified Social Registry ( USR ). Currently the Government, through the Cellule Filets Sociaux, is moving towards finalizing the USR manual and procuring all necessary hardware ( servers, mainframes ) and software to establish the registry. It is envisaged that a USR unit will eventually be created within the Government.", + "ner_text": [ + [ + 784, + 807, + "named" + ], + [ + 255, + 259, + "Unified Social Registry <> data geography" + ], + [ + 557, + 581, + "Unified Social Registry <> data type" + ], + [ + 711, + 715, + "Unified Social Registry <> publication year" + ], + [ + 722, + 740, + "Unified Social Registry <> reference year" + ], + [ + 810, + 813, + "Unified Social Registry <> acronym" + ], + [ + 855, + 877, + "Unified Social Registry <> author" + ] + ], + "validated": true, + "empirical_context": "18 As part of the combined efforts to assist the Government in building a shock-responsive social protection system, many WFP, ECHO and UNHCR partners ( NGOs ) are using the harmonized questionnaire during the lean season. The harmonized questionnaire was introduced by Government Decree 038 / PR / PM / MEPD / SE / SG / DGEP / 2017 dated September 23, 2017 and it is the first step toward building a Unified Social Registry ( USR ). Currently the Government, through the Cellule Filets Sociaux, is moving towards finalizing the USR manual and procuring all necessary hardware ( servers, mainframes ) and software to establish the registry.", + "type": "registry", + "explanation": "The Unified Social Registry is explicitly mentioned as a registry being developed to collect and manage data for social protection purposes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured collection of data intended for social protection.", + "contextual_reason_agent": "The Unified Social Registry is explicitly mentioned as a registry being developed to collect and manage data for social protection purposes.", + "contextual_signal": "described as a Unified Social Registry that collects data", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 11, + "text": "These vulnerabilities are heightened given the economy \u2019 s strong reliance on climate-sensitive sectors like agriculture and tourism. While women are particularly vulnerable to the impacts of climate change, given their reliance on subsistence agriculture, they are also active in safeguarding resources such as water, giving them important roles in protecting the environment. 4. Supporting women \u2019 s entrepreneurship holds a critical place in Uganda \u2019 s efforts to revive its economy. Currently, women are less likely than men to be paid employees ( 13 percent of women compared with 23 percent of men ), and more likely to be self-employed ( 80 percent compared to men \u2019 s 70 percent ). 3 In this context, promoting ways for women to grow and expand their businesses is a good option to promote economic recovery. Micro, small, and medium enterprises ( MSMEs ) created within the past five years now generate over 50 percent of formal jobs, and household enterprises provide employment for another 3. 1 million households. 4 Furthermore, women are particularly vulnerable 1 The Uganda Bureau of Statistics ( UBOS ) has recently announced poverty rates based on the UNHS 2019 / 2020. The data for this survey was collected in two periods with a break during the strictest lockdown period between March \u2013 June 2020. The first data collection period started in September 2019 and ended in February 2020, then it resumed in July 2020 and ended in November 2020. 2 Government of Uganda ( 2020 ), Third National Development Plan ( NDP III ). 3 GoU 2018. National Labour Force Survey. 4 World Bank. 2019. \u201c Uganda Jobs Strategy for Inclusive Growth. \u201d", + "ner_text": [ + [ + 1551, + 1579, + "named" + ], + [ + 445, + 451, + "National Labour Force Survey <> data geography" + ], + [ + 1545, + 1549, + "National Labour Force Survey <> publication year" + ], + [ + 1663, + 1681, + "National Labour Force Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "3 GoU 2018. National Labour Force Survey. 4 World Bank.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly referred to as a National Labour Force Survey, indicating it is a structured collection of data used for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a survey, which typically collects structured data.", + "contextual_reason_agent": "This is a dataset as it is explicitly referred to as a National Labour Force Survey, indicating it is a structured collection of data used for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 13, + "text": "The World Bank Tajikistan Water Supply and Sanitation Investment Project ( P177325 ) Page 10 of 89 less than 5, 000 inhabitants. 20 The lack of consolidated and reliable data on distribution of population by water sources; low local capacity for on-site water storage to bridge intermittent water supply; and exposure to prolonged periods of droughts, which are expected to become more frequent and longer because of climate change, make long-term planning of service delivery a challenge, especially in the context of the evident impact of climate change on different water sources. Figure 1. Distribution of Population by Density, 2020 Figure 2. Spatial Use of Improved Water Sources by Regions, Household Survey 2017 Source: TajStat, Population Census Data, 2020 Source: WB, WASH Poverty Diagnostics, 2017 10. Even where WSS infrastructure is available for provision of piped safely managed WSW services, this infrastructure is largely outdated and inefficient. This infrastructure was not designed to ensure resilience to climate change shocks, associated with high operation and maintenance ( O & M ) costs, and the institutions \u2019 capacity to consider alternative WSW collection / treatment options is limited. Maintenance is limited to quick fixes of apparent system breakdowns, affecting operational performance in the long term. The number of incidents in the piped water supply networks is extremely high.", + "ner_text": [ + [ + 698, + 714, + "named" + ], + [ + 4, + 14, + "Household Survey <> publisher" + ], + [ + 15, + 25, + "Household Survey <> data geography" + ], + [ + 633, + 637, + "Household Survey <> publication year" + ], + [ + 648, + 696, + "Household Survey <> data description" + ], + [ + 715, + 719, + "Household Survey <> publication year" + ], + [ + 774, + 776, + "Household Survey <> publisher" + ], + [ + 804, + 808, + "Household Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "Distribution of Population by Density, 2020 Figure 2. Spatial Use of Improved Water Sources by Regions, Household Survey 2017 Source: TajStat, Population Census Data, 2020 Source: WB, WASH Poverty Diagnostics, 2017 10. Even where WSS infrastructure is available for provision of piped safely managed WSW services, this infrastructure is largely outdated and inefficient.", + "type": "survey", + "explanation": "In the context, 'Household Survey 2017' is explicitly mentioned as a source of data, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Household Survey' suggests a structured collection of data related to households.", + "contextual_reason_agent": "In the context, 'Household Survey 2017' is explicitly mentioned as a source of data, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 30, + "text": "The PDO-level and intermediate results indicators would be monitored using the following sources and methodologies: ( i ) data collected through MISs supported by the project ( registry, payment systems ); ( ii ) regular administrative data collection processes; ( iii ) beneficiary surveys ( spot checks ) supported by the project and conducted by outsourced external firm ( s ); ( iv ) process evaluations of the Social Registry, the cash transfers and the human development ( social promotion ) interventions supported by the project; ( v ) an additional round of the cash transfer impact evaluation conducted by outsourced external firms; ( vi ) other M & E studies conducted by the Client; and ( vii ) progress reports to be prepared by the project implementation team ( especially the M & E specialist ). 78. The Taazour team would be responsible for gathering the relevant reports and information from CSA representatives and other relevant parties involved in project implementation to monitor the PDO and results, and for communicating with the World Bank according to the frequency of reports to be described in the project \u2019 s Results Framework.", + "ner_text": [ + [ + 271, + 290, + "named" + ], + [ + 1189, + 1207, + "beneficiary surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "The PDO-level and intermediate results indicators would be monitored using the following sources and methodologies: ( i ) data collected through MISs supported by the project ( registry, payment systems ); ( ii ) regular administrative data collection processes; ( iii ) beneficiary surveys ( spot checks ) supported by the project and conducted by outsourced external firm ( s ); ( iv ) process evaluations of the Social Registry, the cash transfers and the human development ( social promotion ) interventions supported by the project; ( v ) an additional round of the cash transfer impact evaluation conducted by outsourced external firms; ( vi ) other M & E studies conducted by the Client; and ( vii ) progress reports to be prepared by the project implementation team ( especially the M & E specialist ). 78.", + "type": "survey", + "explanation": "In the context, 'beneficiary surveys' are explicitly mentioned as a method for data collection, indicating they are used to gather empirical data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'beneficiary surveys' imply a structured collection of data from participants.", + "contextual_reason_agent": "In the context, 'beneficiary surveys' are explicitly mentioned as a method for data collection, indicating they are used to gather empirical data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "033_IBArchive-38b35d89-2c5c-4636-ab7e-0ba662b95a5f", + "page": 13, + "text": "The World Bank Djibouti Skills Development for Employment Project ( P175483 ) Page 9 of 68 through to the end of the century, although the timing of precipitation is likely to impact both livestock grazing periods tended to by nomadic groups and for the very small part of the country that supports agriculture. The country is highly vulnerable to droughts, floods, heat waves, and earthquakes. Its strategic coastline also increases exposure to risks posed by sea level rise, and thereby potentially affecting the country ' s strategic port assets and its ability to use tourism as means of economic growth. As one of the most water scarce countries in the world, these climate related events would continue to play havoc with human and livestock populations in the coming century. Further studies are needed to better understand some of these anticipated patterns in Djibouti to improve government \u2019 s planning and preparation. For example, recent studies using data from 14 weather stations covering the period 1946-2017, illustrates spatial and temporal variability and specifically identifies two spatially coherent regions - eastern coast and western inland areas of the country, and across January-February ( JF ); March - May ( MAM ); June-September ( JJAS ); and October-December ( OND ) over the year.", + "ner_text": [ + [ + 964, + 993, + "named" + ], + [ + 15, + 23, + "data from 14 weather stations <> data geography" + ], + [ + 1014, + 1023, + "data from 14 weather stations <> reference year" + ] + ], + "validated": true, + "empirical_context": "Further studies are needed to better understand some of these anticipated patterns in Djibouti to improve government \u2019 s planning and preparation. For example, recent studies using data from 14 weather stations covering the period 1946-2017, illustrates spatial and temporal variability and specifically identifies two spatially coherent regions - eastern coast and western inland areas of the country, and across January-February ( JF ); March - May ( MAM ); June-September ( JJAS ); and October-December ( OND ) over the year.", + "type": "dataset", + "explanation": "This is indeed a dataset as it is explicitly used in the research to analyze spatial and temporal variability.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to data collected from multiple weather stations over a specific time period.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly used in the research to analyze spatial and temporal variability.", + "contextual_signal": "follows 'using data from'", + "tags": [] + }, + { + "filename": "172_multi0page", + "page": 47, + "text": "Procedures in place. 2. 2 Strengthening of US $ 1. 5M Quarterly project Information and reporting MEST ' s planning and implementation progress needs clearly defined. management capacity. report for the first year and the Staff trained in collecting and annual EMIS reports from end recording information for 2002. EMIS. Hardware and software purchased and EMIS set up accordingly. 2. 3 Support to education US $ 1. 6M Inspectors duly trained to service delivery. provide pedagogical support and monitor implementation of the education curriculum. Public information campaign to mobilize communities in setting up SMC. SMC members trained - 42 -", + "ner_text": [ + [ + 261, + 265, + "named" + ] + ], + "validated": false, + "empirical_context": "management capacity. report for the first year and the Staff trained in collecting and annual EMIS reports from end recording information for 2002. EMIS.", + "type": "program", + "explanation": "However, EMIS is mentioned in the context of management capacity and training, indicating it is a program rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is related to information collection and reporting.", + "contextual_reason_agent": "However, EMIS is mentioned in the context of management capacity and training, indicating it is a program rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "069_Pakistan-Strengthening-Institutions-for-Refugee-Administration-Project", + "page": 14, + "text": "Direct benefits to refugees including better application by staff of relevant global norms and standards applicable to refugees. # 6. Data on socio-economic characteristics of refugees and refugee hosting communities published regularly. ( Linked to PDO indicator # 5 ). Work under this DLI will support implementation of a program to strengthen data availability and continuous collection of data on the socio-economic profiles of Afghan refugees. Direct benefits to refugees include improved understanding of the Afghan refugee situation and evidence-based policy making through availability of data. # 7. Improved implementation arrangements for the visa policy ( Linked to PDO indicator # 3 ). Work under this DLI will support implementation of the visa policy. Direct benefit to refugees includes streamlined processes for visa application, better information on how to apply for the visa and information about complaints handling. Component 2: Technical Assistance for improved management of refugees and host communities ( US $ 10. 0 million ) 20. This component will support specific inputs required to achieve the results in the implementation of the Government policy supported by this Project. The Project will support activities to strengthen the CCAR \u2019 s and CARs \u2019 organizational and institutional capacity for management of refugees. This component comprises six sub-components:", + "ner_text": [ + [ + 134, + 216, + "named" + ], + [ + 432, + 447, + "Data on socio-economic characteristics of refugees and refugee hosting communities <> reference population" + ] + ], + "validated": true, + "empirical_context": "# 6. Data on socio-economic characteristics of refugees and refugee hosting communities published regularly. ( Linked to PDO indicator # 5 ).", + "type": "data", + "explanation": "This is indeed a dataset as it refers to a structured collection of socio-economic data published regularly.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'data' on specific characteristics.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a structured collection of socio-economic data published regularly.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "010_BOSIB1554c314c0a2187c019d7e85bc2a91", + "page": 40, + "text": "The World Bank Building Resilient and Responsive Health Systems ( P179698 ) Feb 21, 2024 Page 35 of 43 Number of deliveries attended by skilled health personnel among the host community in Garissa and Turkana ( Number ) Description Total number of deliveries attended by skilled health personnel among the host community in Garissa and Turkana. Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Number of deliveries attended by skilled health personnel among refugees in Garissa and Turkana ( Number ) Description Total number of deliveries attended by skilled health personnel among the refugees in Garissa and Turkana Frequency Every six months Data source UNHCR reports Methodology for Data Collection Routine UNHCR data collection Responsibility for Data Collection PMT Project management and evaluation Percentage of complaints in the GRM satisfactorily addressed within 4 weeks of initial complaint being recorded ( Percentage ) Description Numerator: Number of complaints to the GRM satisfactorily addressed within 4 weeks of initial complaint being recorded. Denominator: Total number of recorded complaints to the GRM.", + "ner_text": [ + [ + 429, + 433, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Building Resilient and Responsive Health Systems ( P179698 ) Feb 21, 2024 Page 35 of 43 Number of deliveries attended by skilled health personnel among the host community in Garissa and Turkana ( Number ) Description Total number of deliveries attended by skilled health personnel among the host community in Garissa and Turkana. Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Number of deliveries attended by skilled health personnel among refugees in Garissa and Turkana ( Number ) Description Total number of deliveries attended by skilled health personnel among the refugees in Garissa and Turkana Frequency Every six months Data source UNHCR reports Methodology for Data Collection Routine UNHCR data collection Responsibility for Data Collection PMT Project management and evaluation Percentage of complaints in the GRM satisfactorily addressed within 4 weeks of initial complaint being recorded ( Percentage ) Description Numerator: Number of complaints to the GRM satisfactorily addressed within 4 weeks of initial complaint being recorded. Denominator: Total number of recorded complaints to the GRM.", + "type": "system", + "explanation": "'HMIS' is mentioned as a system for data collection, but it is not explicitly described as a structured collection of data or a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'HMIS' is a dataset because it is associated with data collection methodologies.", + "contextual_reason_agent": "'HMIS' is mentioned as a system for data collection, but it is not explicitly described as a structured collection of data or a dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 21, + "validated": 4, + "not_validated": 17 + } + }, + { + "filename": "182_multi0page", + "page": 31, + "text": "It also calls for increased public awareness about social problems, and suggests that the SSDP focus on certain key g 7oups of vulnerable people, including women at risk of violence, youths at risk, street children, and the abandloned elderly. For the project preparation stage, the VNICA has set up baseline data on the number of people in vulnerable situations and the type and number of supplied services in four Albanian districts ( Tirana, Durres, Skodra and Vlora ). In order to monitor and evaluate progress of projects in the four districts; list of indicators has been selected for periodic follow-up. Monitoring and evaluation will be done at the district level and carried out once a year. In one year, the SSDP project will develop a national monitoring strategy. Preliminary suggestions for the preparation of a national strategy include conducting research throughout Albania using the research instruments used in the VNICA. These include a rapid quantitative assessment of vulnerable groups in the districts, a \" vulnerability census \" ( data collection and survey ), and relative ranking of vulnerability. In addition, the project will support a permnanent household survey that will assist the Government in monitoring trends in living standards and poverty correlates over time.", + "ner_text": [ + [ + 1163, + 1190, + "named" + ], + [ + 437, + 443, + "permnanent household survey <> data geography" + ], + [ + 453, + 459, + "permnanent household survey <> data geography" + ], + [ + 464, + 469, + "permnanent household survey <> data geography" + ], + [ + 882, + 889, + "permnanent household survey <> data geography" + ], + [ + 1029, + 1049, + "permnanent household survey <> data type" + ], + [ + 1212, + 1222, + "permnanent household survey <> author" + ] + ], + "validated": true, + "empirical_context": "These include a rapid quantitative assessment of vulnerable groups in the districts, a \" vulnerability census \" ( data collection and survey ), and relative ranking of vulnerability. In addition, the project will support a permnanent household survey that will assist the Government in monitoring trends in living standards and poverty correlates over time.", + "type": "survey", + "explanation": "This is a dataset as it is described as a survey that collects data to monitor trends in living standards and poverty.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'survey', which often refers to structured data collection.", + "contextual_reason_agent": "This is a dataset as it is described as a survey that collects data to monitor trends in living standards and poverty.", + "contextual_signal": "mentioned as a data source for monitoring trends", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "121_PAD1190-PAD-P152848-PUBLIC-Box391435B-LB-EESSP-Final-PAD-for-printing", + "page": 2, + "text": "LU Lebanese University M & E Monitoring and Evaluation MEHE Ministry of Education and Higher Education MENA Middle East and North Africa Region MIC Middle Income Country MOF Ministry of Finance NQF National Qualifications Framework NVS New Vision for the School OECD Organization for Economic Cooperation and Development POM Project Operations Manual PD Professional Development PDO Project Development Objective PFS Project Financial Statements PISA Program for International Student Assessment PMU Project Management Unit PPP Purchasing Power Parity RACE Reaching All Children with Education in Lebanon SBD Standard Bidding Documents SIS School Information System TA Technical Assistance TIMSS Trends in International Math and Science Studies TPD Teacher Profession Development TVET / VET Technical and Vocational Education and Training UNHCR United Nations High Commissioner for Refugees UNICEF United Nations Children ' s Fund WB World Bank Regional Vice President: Hafez Ghanem Country Director: Ferid Belhaj Senior Global Practice Director: Claudia Costin Practice Manager: Harry Patrinos Task Team Leader: Noah Yarrow", + "ner_text": [ + [ + 636, + 665, + "named" + ] + ], + "validated": false, + "empirical_context": "LU Lebanese University M & E Monitoring and Evaluation MEHE Ministry of Education and Higher Education MENA Middle East and North Africa Region MIC Middle Income Country MOF Ministry of Finance NQF National Qualifications Framework NVS New Vision for the School OECD Organization for Economic Cooperation and Development POM Project Operations Manual PD Professional Development PDO Project Development Objective PFS Project Financial Statements PISA Program for International Student Assessment PMU Project Management Unit PPP Purchasing Power Parity RACE Reaching All Children with Education in Lebanon SBD Standard Bidding Documents SIS School Information System TA Technical Assistance TIMSS Trends in International Math and Science Studies TPD Teacher Profession Development TVET / VET Technical and Vocational Education and Training UNHCR United Nations High Commissioner for Refugees UNICEF United Nations Children ' s Fund WB World Bank Regional Vice President: Hafez Ghanem Country Director: Ferid Belhaj Senior Global Practice Director: Claudia Costin Practice Manager: Harry Patrinos Task Team Leader: Noah Yarrow", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'School Information System' suggests a collection of data related to schools.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "066_Jordan-Youth-Technology-and-Jobs-Project", + "page": 10, + "text": "Of the few women active in Jordan \u2019 s labor market, a large share is unemployed ( 23 percent of the female labor force is unemployed, compared to 13 percent of the male labor force ), with young women particularly disadvantaged ( 55 percent of female youth are unemployed, compared to 33 percent of male youth ). The government has recently strengthened its commitment to gender equality and female social and economic empowerment through Jordan \u2019 s Renaissance Plan 2019-2020 and through its the Women 1 Department of Statistics, Jordan ( Q3 2019 ) 2 International Labor Organization, ILOSTAT database. Data retrieved in Sept 2019. 3 The gender analysis in this document has been adapted from \u201c Jordan: Improving Women Economic Opportunities - Select Entry Points for Policy Dialogue and Operational Interventions \u201d, World Bank ( June 2019 ). 4 World Development Indicators. Data retrieved in June 2019 Figure 1: The Job Challenge", + "ner_text": [ + [ + 846, + 874, + "named" + ], + [ + 27, + 33, + "World Development Indicators <> data geography" + ], + [ + 439, + 445, + "World Development Indicators <> data geography" + ], + [ + 531, + 537, + "World Development Indicators <> data geography" + ], + [ + 696, + 702, + "World Development Indicators <> data geography" + ], + [ + 818, + 828, + "World Development Indicators <> publisher" + ], + [ + 831, + 840, + "World Development Indicators <> reference year" + ], + [ + 894, + 903, + "World Development Indicators <> reference year" + ] + ], + "validated": true, + "empirical_context": "3 The gender analysis in this document has been adapted from \u201c Jordan: Improving Women Economic Opportunities - Select Entry Points for Policy Dialogue and Operational Interventions \u201d, World Bank ( June 2019 ). 4 World Development Indicators. Data retrieved in June 2019 Figure 1: The Job Challenge", + "type": "index", + "explanation": "In this context, it is indeed a dataset as it is used to provide empirical data for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced in the context of data retrieval.", + "contextual_reason_agent": "In this context, it is indeed a dataset as it is used to provide empirical data for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 7, + "validated": 3, + "not_validated": 4 + } + }, + { + "filename": "108_Jordan-PforR-PAD-P159522-FINAL-DISCLOSURE-10052016", + "page": 68, + "text": "Concerns have been raised throughout the region, that economic migrants are too often the victims of abuse by employers. 33 In manufacturing, Jordan has taken important steps with the help of Better Work Jordan to ensure fair labor practices. 34 Concerns remain in other sectors, however, in which the Kafala system is practiced. 35 Although labor inspectors have an important role to play in enforcing worker rights. 28 The unemployment rate was 13 percent in 2015, 13 percent in 2010, 15 percent in 2005 and 14 percent in 2000. There are approximately 210, 000 unemployed Jordanians in 2015. See Employment Unemployment Survey for 2015. Available online at: http: / / www. dos. gov. jo / dos_home_e / main / linked-html / Emp & Un. htm 29 In 2015, unemployment rates were 23 percent among women versus 11 percent among men; 19 percent among those with a bachelor degree or higher versus 11 percent among those with less than secondary education; and 15 percent among 20 \u2013 24 year olds, 26 percent among 25 \u2013 29 years, and 14 percent among 40 \u2013 54 year olds. Employment Unemployment Survey for 2015. Available online at: http: / / www. dos. gov. jo / dos_home_e / main / linked-html / Emp & Un. htm.", + "ner_text": [ + [ + 598, + 628, + "named" + ], + [ + 142, + 148, + "Employment Unemployment Survey <> data geography" + ], + [ + 461, + 465, + "Employment Unemployment Survey <> publication year" + ], + [ + 501, + 505, + "Employment Unemployment Survey <> reference year" + ], + [ + 524, + 528, + "Employment Unemployment Survey <> reference year" + ], + [ + 633, + 637, + "Employment Unemployment Survey <> publication year" + ], + [ + 1095, + 1099, + "Employment Unemployment Survey <> publication year" + ], + [ + 1216, + 1234, + "Employment Unemployment Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "There are approximately 210, 000 unemployed Jordanians in 2015. See Employment Unemployment Survey for 2015. Available online at: http: / / www.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey that provides data on unemployment.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey that provides data on unemployment.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 22, + "text": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 18 of 47 Figure 1. Theory of Change Output Inadequate infrastructure, equipment, and medicine \uf0b7 Renovation / upgradation of selected HFs to 24 / 7 \uf0b7 Purchasing of equipment and medicines \uf0b7 Increased number of HFs renovated / upgraded including female staff hostels * \uf0b7 Increased number of HFs with essential inputs Improved utilization of quality health services in selected refugee hosting districts in Balochistan Improved child health outcomes ( reduced under \u2010 five mortality, reduced stunting ) among refugees and host communities in Balochistan Improved human capital among refugees and host communities in Balochistan Shortage, absence, low productivity, and competency of providers \uf0b7 Contracting of especially female providers * \uf0b7 Creation of HRH database \uf0b7 Competency \u2010 based training \uf0b7 Increased availability of ( especially female ) providers * at the HFs \uf0b7 Functional HRH database \uf0b7 Increased number of providers with minimum clinical competency Low quality and suboptimal use of health data \uf0b7 Digitization and integration of the HMIS into the DHIS with dashboards \uf0b7 Data review meetings \uf0b7 HMIS ( DHIS, vertical programs ) digitized and integrated into DHIS / DHIS2 \uf0b7 Improved quality of DHIS reports Weak institutional capacity \uf0b7 Capacity building ( HMIS, monitoring, supervision, and management ) \uf0b7 Increased number of targeted staff with basic / advanced competencies Low demand of health and education services \uf0b7 Advocacy and awareness \u2010 raising", + "ner_text": [ + [ + 1242, + 1246, + "named" + ], + [ + 4, + 14, + "DHIS <> publisher" + ], + [ + 15, + 26, + "DHIS <> data geography" + ], + [ + 481, + 492, + "DHIS <> data geography" + ], + [ + 616, + 627, + "DHIS <> data geography" + ], + [ + 657, + 665, + "DHIS <> reference population" + ], + [ + 690, + 701, + "DHIS <> data geography" + ], + [ + 1069, + 1080, + "DHIS <> data type" + ] + ], + "validated": true, + "empirical_context": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 18 of 47 Figure 1. Theory of Change Output Inadequate infrastructure, equipment, and medicine \uf0b7 Renovation / upgradation of selected HFs to 24 / 7 \uf0b7 Purchasing of equipment and medicines \uf0b7 Increased number of HFs renovated / upgraded including female staff hostels * \uf0b7 Increased number of HFs with essential inputs Improved utilization of quality health services in selected refugee hosting districts in Balochistan Improved child health outcomes ( reduced under \u2010 five mortality, reduced stunting ) among refugees and host communities in Balochistan Improved human capital among refugees and host communities in Balochistan Shortage, absence, low productivity, and competency of providers \uf0b7 Contracting of especially female providers * \uf0b7 Creation of HRH database \uf0b7 Competency \u2010 based training \uf0b7 Increased availability of ( especially female ) providers * at the HFs \uf0b7 Functional HRH database \uf0b7 Increased number of providers with minimum clinical competency Low quality and suboptimal use of health data \uf0b7 Digitization and integration of the HMIS into the DHIS with dashboards \uf0b7 Data review meetings \uf0b7 HMIS ( DHIS, vertical programs ) digitized and integrated into DHIS / DHIS2 \uf0b7 Improved quality of DHIS reports Weak institutional capacity \uf0b7 Capacity building ( HMIS, monitoring, supervision, and management ) \uf0b7 Increased number of targeted staff with basic / advanced competencies Low demand of health and education services \uf0b7 Advocacy and awareness \u2010 raising", + "type": "system", + "explanation": "In this context, 'DHIS' is described as a system that integrates health data, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'DHIS' is mentioned in the context of data integration and quality improvement.", + "contextual_reason_agent": "In this context, 'DHIS' is described as a system that integrates health data, indicating it functions as a data source.", + "contextual_signal": "system but mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 11, + "validated": 5, + "not_validated": 6 + } + }, + { + "filename": "183_multi0page", + "page": 25, + "text": "Annex 1: Project Design Summary WEST BANK AND GAZA: Education Action Project Key Prformance Hierarchy of ObjtivesE; Indicators Monitoring & Evaluation Critical Assumptions Sector-related CAS Goal: Sector Indicators: Sector / country reports: ( from Goal to Bank Mission ) Public Sector management and Improved efficiency and * Budget circular * Economy improves and provides isiuinlcapacity budn rnprnyo ulcopportunities for the educated to institutional capacitybuilding rmanagement, especially * Audit report of budgetary earn incomes. managemnt, espcially: process p. Transparency in budgetary * National and ministerial * Political stability is gained and process and practice. bdesmaintained during p - oject 2. Improved educational pro outcomes as measured by * National and international * Emphasis on good data appropriate indicators ( test assessment data. Data collection is maintai t ed and scores, enrollments, improved collection from statistical govermment participates in teacher performance ) over the department. international assessment long term.", + "ner_text": [ + [ + 844, + 864, + "named" + ] + ], + "validated": true, + "empirical_context": "bdesmaintained during p - oject 2. Improved educational pro outcomes as measured by * National and international * Emphasis on good data appropriate indicators ( test assessment data. Data collection is maintai t ed and scores, enrollments, improved collection from statistical govermment participates in teacher performance ) over the department.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to collected scores and indicators used for measuring educational outcomes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'test assessment data' suggests a structured collection of scores and evaluations.", + "contextual_reason_agent": "This is indeed a dataset as it refers to collected scores and indicators used for measuring educational outcomes.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 63, + "text": "In terms of modelling, the effect sizes found elsewhere provide a probability distribution of effects for programs established under this project. These effect sizes are discussed subsequently. For a given effect size, the impact on earnings is modelled based on the lower bound average of the association between cognitive skills and earnings in Patrinos and Psacharopoulos ( 2010 ) of 17 percent per SD of effect. The effect is modelled to increase earnings by this amount across an individual \u2019 s working life. The earnings of an individual with a particular level of education and age ( the age-earnings profile ) are modelled based on the Mincerian earnings function presented in table 2. 1. Individuals are assumed to be able to earn from ages 15 to 65, and the proportion of individuals at each level of education is based on the educational attainment of individuals ages 25 to 19 in the 2020 Labor Force Survey ( LFS ). Table 2. 1.", + "ner_text": [ + [ + 896, + 919, + "named" + ] + ], + "validated": true, + "empirical_context": "1. Individuals are assumed to be able to earn from ages 15 to 65, and the proportion of individuals at each level of education is based on the educational attainment of individuals ages 25 to 19 in the 2020 Labor Force Survey ( LFS ). Table 2.", + "type": "survey", + "explanation": "The 2020 Labor Force Survey is explicitly mentioned as the source of educational attainment data for individuals, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey that collects data on labor force characteristics.", + "contextual_reason_agent": "The 2020 Labor Force Survey is explicitly mentioned as the source of educational attainment data for individuals, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "133_PAD840P1216480010Box382166B00OUO090", + "page": 32, + "text": "22 Annex 1: Results Framework and Monitoring WEST BANK AND GAZA: Gaza Solid Waste Management Project Project Development Objective ( PDO ): The objective of the project is to improve solid waste management services in the Gaza Strip. PDO Level Results Indicators * Core Unit of Measure Baseline Cumulative Target Values Frequency Data Source / Methodology Responsibility for Data Collection Description ( indicator definition etc. ) YR 1 YR 2 YR3 YR 4 YR5 PDO Indicator One: Percentage of solid waste collected from the targeted population, disposed in a new sanitary landfill developed under the project % 0 % 0 % 0 % 50 % 70 % 80 % Semi-annually Monthly site Operator report Landfill site log Data from member municipalities Site Operator MDLF-PDSU This indicator measures the percentage against volume of solid waste generated by these populations which is disposed in the sanitary landfill PDO Indicator Two: Percentage increase in fees collected annually within the member municipalities towards cost recovery % 15 % 0 % 0 % 20 % 40 % 60 % Annually Annual Operating Budget of JSC - KRM and its member municipalities MDLF PDSU Reporting MDLF PDSU JSC-KRM Percentage Point is the unit of arithmetic difference between two percentages.", + "ner_text": [ + [ + 677, + 699, + "named" + ], + [ + 222, + 232, + "Landfill site log Data <> data geography" + ], + [ + 705, + 726, + "Landfill site log Data <> reference population" + ] + ], + "validated": true, + "empirical_context": "22 Annex 1: Results Framework and Monitoring WEST BANK AND GAZA: Gaza Solid Waste Management Project Project Development Objective ( PDO ): The objective of the project is to improve solid waste management services in the Gaza Strip. PDO Level Results Indicators * Core Unit of Measure Baseline Cumulative Target Values Frequency Data Source / Methodology Responsibility for Data Collection Description ( indicator definition etc. ) YR 1 YR 2 YR3 YR 4 YR5 PDO Indicator One: Percentage of solid waste collected from the targeted population, disposed in a new sanitary landfill developed under the project % 0 % 0 % 0 % 50 % 70 % 80 % Semi-annually Monthly site Operator report Landfill site log Data from member municipalities Site Operator MDLF-PDSU This indicator measures the percentage against volume of solid waste generated by these populations which is disposed in the sanitary landfill PDO Indicator Two: Percentage increase in fees collected annually within the member municipalities towards cost recovery % 15 % 0 % 0 % 20 % 40 % 60 % Annually Annual Operating Budget of JSC - KRM and its member municipalities MDLF PDSU Reporting MDLF PDSU JSC-KRM Percentage Point is the unit of arithmetic difference between two percentages.", + "type": "log", + "explanation": "This is a dataset as it is explicitly mentioned as a source of data collection for the project indicators.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'log', which often refers to a record of data entries.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a source of data collection for the project indicators.", + "contextual_signal": "mentioned as a data source", + "tags": [] + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 52, + "text": "The integration of digitally verifiable credentials by DPI ecosystem participants is verified as follows: ( 1 ) digital representations of a collection of data attributes can be securely and reliably verified by relying parties using trusted digital technologies, enabling data to be shared with a relying party from an authoritative data source while keeping the individual data subject at the center of the data sharing transaction. Examples of digitally verifiable credentials include electronic passports, national identity cards, digital education diplomas, and other credentials that can be issued to individuals and be digitally verified by relying parties; and ( 2 ) the credentials are portable, resistant to tampering, enable streamlined verification processes in online and offline scenarios, and have adequate measures in place to ensure that they can be trusted by verifiers. The availability of these trust measures is verified as follows: ( a ) there is a common, agreed-upon set of rules ( a trust framework ) describing the standards and operational requirements that must be followed by issuers and verifiers of credentials ( participating entities ); ( b ) the standards and procedures reflected in the trust framework follow internationally-recognized standards to allow for interoperability between credential issuers as well as", + "ner_text": [ + [ + 488, + 508, + "named" + ] + ], + "validated": false, + "empirical_context": "The integration of digitally verifiable credentials by DPI ecosystem participants is verified as follows: ( 1 ) digital representations of a collection of data attributes can be securely and reliably verified by relying parties using trusted digital technologies, enabling data to be shared with a relying party from an authoritative data source while keeping the individual data subject at the center of the data sharing transaction. Examples of digitally verifiable credentials include electronic passports, national identity cards, digital education diplomas, and other credentials that can be issued to individuals and be digitally verified by relying parties; and ( 2 ) the credentials are portable, resistant to tampering, enable streamlined verification processes in online and offline scenarios, and have adequate measures in place to ensure that they can be trusted by verifiers. The availability of these trust measures is verified as follows: ( a ) there is a common, agreed-upon set of rules ( a trust framework ) describing the standards and operational requirements that must be followed by issuers and verifiers of credentials ( participating entities ); ( b ) the standards and procedures reflected in the trust framework follow internationally-recognized standards to allow for interoperability between credential issuers as well as", + "type": "document", + "explanation": "'Electronic passports' are mentioned as examples of credentials, not as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'electronic passports' are a dataset because they contain data attributes that can be verified.", + "contextual_reason_agent": "'Electronic passports' are mentioned as examples of credentials, not as a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 55, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 44 Indicator Name National Water information systems established and operationalized under the project Definition / Description National water information systems established to track water data. Operationalized is defined as the systems are functional for data collection, monitoring and reporting. Frequency Bi-annually Data Source MWE / DWRM progress reports, annual Water & environment sector performance report and Water Supply Atlas. Methodology for Data Collection Responsibility for Data Collection MWE and DWRM Indicator Name State of water resources reports produced to inform decision making Definition / Description State of water resources management reports developed to inform decision making. Frequency Every two years Data Source MWE / DWRM Annual reports, annual water and environment sector performance reports. Methodology for Data Collection Responsibility for Data Collection MWE / DWDRM", + "ner_text": [ + [ + 97, + 131, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Integrated Water Management and Development Project ( P163782 ) 44 Indicator Name National Water information systems established and operationalized under the project Definition / Description National water information systems established to track water data. Operationalized is defined as the systems are functional for data collection, monitoring and reporting.", + "type": "system", + "explanation": "However, it is not a dataset as it is described as a system for tracking data rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'information systems' which suggests data handling.", + "contextual_reason_agent": "However, it is not a dataset as it is described as a system for tracking data rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 37, + "text": "criteria covering: ( a ) dedicated ECE classroom and teacher; ( b ) 50 % of Annual School Profiles Census based analysis of model school scorecard SED, PMU", + "ner_text": [ + [ + 76, + 105, + "named" + ] + ], + "validated": true, + "empirical_context": "criteria covering: ( a ) dedicated ECE classroom and teacher; ( b ) 50 % of Annual School Profiles Census based analysis of model school scorecard SED, PMU", + "type": "census", + "explanation": "In this context, it is used in relation to analysis, indicating it serves as a data source for empirical evaluation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Census', which often refers to a structured collection of data.", + "contextual_reason_agent": "In this context, it is used in relation to analysis, indicating it serves as a data source for empirical evaluation.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "037_Uganda-Investment-for-Industrial-Transformation-and-Employment-Project", + "page": 17, + "text": "Movement restrictions, changes in branch opening hours and declines in loan demand led to a strong decline in loan applications. From March to April 2020, loan applications declined by 91 percent in number and 52 percent in terms of value. In the subsequent quarter loan growth very gradually began to pick up again growing 6. 35 percent in value terms. 23 Even prior to the crisis, formal financial sector lending to MSMEs remained moderate. 14. The low capacity of Small and Medium Enterprises ( SMEs ) is a barrier to further growth. SMEs face challenges with producing at the scale and efficiency required of export markets. According to a survey conducted in 2014, 24 SME capacity is very low; only 28 percent do book-keeping to track revenues and expenses; a mere 10 percent have invested in training or human capital services for employees; and just 36 percent have access to the internet. Female-owned firms appear to be particularly lacking when it comes to the use of standard business practices, with recent microenterprise survey data from Uganda showing a gender gap of 24 percentage points 19 The financial sector in Uganda is divided into four tiers.", + "ner_text": [ + [ + 1019, + 1046, + "named" + ], + [ + 418, + 423, + "microenterprise survey data <> reference population" + ], + [ + 467, + 495, + "microenterprise survey data <> reference population" + ], + [ + 664, + 668, + "microenterprise survey data <> reference year" + ], + [ + 897, + 915, + "microenterprise survey data <> reference population" + ], + [ + 1052, + 1058, + "microenterprise survey data <> data geography" + ], + [ + 1069, + 1103, + "microenterprise survey data <> data description" + ] + ], + "validated": true, + "empirical_context": "According to a survey conducted in 2014, 24 SME capacity is very low; only 28 percent do book-keeping to track revenues and expenses; a mere 10 percent have invested in training or human capital services for employees; and just 36 percent have access to the internet. Female-owned firms appear to be particularly lacking when it comes to the use of standard business practices, with recent microenterprise survey data from Uganda showing a gender gap of 24 percentage points 19 The financial sector in Uganda is divided into four tiers.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to data collected from a specific survey about microenterprises in Uganda.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'survey data', which typically refers to collected data from a survey.", + "contextual_reason_agent": "This is indeed a dataset as it refers to data collected from a specific survey about microenterprises in Uganda.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 38, + "text": "The TC will be chaired by the designated representative from the MoICT & NG, while the Project Coordinator ( from the PIU ) will serve as the TC \u2019 s secretary. The TC will comprise subject matter technical specialists from NITA-U, the MoICT & NG, MoLG, OPM, NEMA, UCC, PPDA, and other sectoral agencies such as the MoES, MAAIF, JLOS, MoH, MTIC, MTWA, NIRA, UBOS; the Ministry of Gender; and the working group of the CRRF for its role among refugees and RHDs. The TC will meet at least once a quarter to ensure timely and smooth implementation progress. The Project Coordinator will ensure inter-institutional collaboration and coordination among different agencies. Ad hoc project implementation teams ( PITs ) will be established for the purposes of implementing specific activities of the project. The PITs, represented by key stakeholders from partner agencies, will be guided by the decisions of the TC. The summary of the technical leads and partner agencies involved in the implementation of each sub-component is presented in annex 3. B. Results Monitoring and Evaluation Arrangements 65. The project results framework will form the basis of the results M & E arrangements. M & E of the UDAP - GovNet will be embedded in the various components of the project, and TA provided through the project will include support for M & E. The arrangements for results monitoring are detailed in Section VII and will be supported using the Geo-Enabled Monitoring and Supervision ( GEMS ) Initiative. NITA-U will collect, compile, and analyze the results data and prepare M & E reports. Where surveys are required to populate baseline or progress data for specific indicators, the M & E specialist on the PIU will be coordinating the implementation of such surveys and utilizing funds from component 4, Project Management, to procure the needed surveying services. NITA-U will", + "ner_text": [ + [ + 1194, + 1207, + "named" + ] + ], + "validated": false, + "empirical_context": "The project results framework will form the basis of the results M & E arrangements. M & E of the UDAP - GovNet will be embedded in the various components of the project, and TA provided through the project will include support for M & E. The arrangements for results monitoring are detailed in Section VII and will be supported using the Geo-Enabled Monitoring and Supervision ( GEMS ) Initiative.", + "type": "project", + "explanation": "However, it is mentioned only as a project and not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'GovNet', which sounds like a data system.", + "contextual_reason_agent": "However, it is mentioned only as a project and not as a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 19, + "text": "Cameroon has participated in several rounds of the PASEC and has recently carried out a national Early Grade Reading Assessment. The Government is also keen to carry out an Early Grade Mathematics Assessment in the near future. These different assessments are not anchored to a more systematic national assessment framework. This would allow for tracking student learning on a national scale, providing diagnostics and identifying solutions for improving, the education system, and equipping decision-makers with reliable and timely information to guide policy decisions, adjustments, or reforms. There is currently limited technical capacity to conduct large-scale assessments, disseminate assessment results, and provide evidence-based guidance to those responsible for curriculum reform and teacher training. ( c ) The availability and quality of education data are poor. The Education Management Information System ( EMIS ) in Cameroon is complex, with each of the four ministries of education collecting data for the subsector for which they are responsible in the absence of institutional coordination mechanisms. Despite some progress, there is still considerable variability in the quality ( completeness, timeliness, and reliability ) of data collected by each ministry. Comparability is difficult as methods for data compilation differ from one ministry to another. The EMIS for primary education, managed by MINEDUB, is perhaps the most advanced. The United Nations Educational, Scientific, and Cultural Organization ( UNESCO ) is supporting the Government \u2019 s efforts to develop and link regionally comparable data and the United Nations Children \u2019 s Fund ( UNICEF ) is undertaking a pilot data collection and school mapping exercise in ZEPs to capture information on various school-level indicators. With the World Bank \u2019 s technical support and financing, the Government has prepared school report cards across the country, but these are yet to be mainstreamed. 9 MINEDUB. Statistical Yearbook 2014 data.", + "ner_text": [ + [ + 1380, + 1384, + "named" + ] + ], + "validated": false, + "empirical_context": "Comparability is difficult as methods for data compilation differ from one ministry to another. The EMIS for primary education, managed by MINEDUB, is perhaps the most advanced. The United Nations Educational, Scientific, and Cultural Organization ( UNESCO ) is supporting the Government \u2019 s efforts to develop and link regionally comparable data and the United Nations Children \u2019 s Fund ( UNICEF ) is undertaking a pilot data collection and school mapping exercise in ZEPs to capture information on various school-level indicators.", + "type": "system", + "explanation": "However, EMIS is described as a management information system, not explicitly as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is related to data management in education.", + "contextual_reason_agent": "However, EMIS is described as a management information system, not explicitly as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "027_Jordan-Emergency-Food-Security-Project", + "page": 14, + "text": "Of the more than 750, 000 refugees17 registered in Jordan ( 89 percent of whom came from Syria ), an estimated 17 percent live in the Za \u2019 atari and Azraq refugee camps, while the remaining 83 percent are mostly in Jordan \u2019 s urban areas. Throughout the COVID-19 pandemic, food security has been a key concern for refugees in both camps and in host communities mainly due to the loss of income from temporary and informal labor activities. More than 80 percent of labor activities performed by non-Jordanians are estimated to take place in the informal economy versus 40 percent for Jordanian citizens ( MOSD, 2019 ). February 2021 mVAM data showed that 23. 3 percent of refugee households in host communities are food insecure ( over 154, 777 individuals ), while another 63. 7 percent of refugee households ( equivalent to approximately 423, 344 individuals ) are vulnerable to food insecurity. 12. Ensuring food security and social stability are at the core of the urgent need to ensure availability of and access to staple food. Bread is an essential part of the diet in Jordan and represents the main caloric source for the poorest Jordanians and the many refugees in the country.", + "ner_text": [ + [ + 632, + 641, + "named" + ], + [ + 51, + 57, + "mVAM data <> data geography" + ], + [ + 215, + 221, + "mVAM data <> data geography" + ], + [ + 618, + 631, + "mVAM data <> publication year" + ], + [ + 671, + 689, + "mVAM data <> reference population" + ], + [ + 790, + 808, + "mVAM data <> reference population" + ], + [ + 1075, + 1081, + "mVAM data <> data geography" + ] + ], + "validated": true, + "empirical_context": "More than 80 percent of labor activities performed by non-Jordanians are estimated to take place in the informal economy versus 40 percent for Jordanian citizens ( MOSD, 2019 ). February 2021 mVAM data showed that 23. 3 percent of refugee households in host communities are food insecure ( over 154, 777 individuals ), while another 63.", + "type": "data", + "explanation": "In the context, 'mVAM data' is explicitly referenced as a source of information regarding food insecurity among refugee households.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'mVAM data' suggests a collection of information related to food insecurity.", + "contextual_reason_agent": "In the context, 'mVAM data' is explicitly referenced as a source of information regarding food insecurity among refugee households.", + "contextual_signal": "follows 'February 2021 mVAM data showed that'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 19, + "text": "The proposed project will also support the Social Registry Directorate to scale-up its communication and outreach, while strengthening data protection. Few line ministries currently use the Social Registry ( most users are linked to projects supported by the World Bank ) 9. In addition, knowledge about the objectives, modalities and strengths of the Social Registry is limited, within the government and among partners. These gaps will be addressed by intensifying the Social Registry \u2019 s communication and outreach efforts. User agreements will specify authorized uses of the data and protection of personal data. 30. The project will also encourage the use of the Social Registry as a tool for program coordination. The objective of this activity is to increase the SCAPP Directorate, Taazour and sectorial ministries \u2019 ownership of the Social Registry, by fostering its use for integrated service provision to poor and vulnerable households, and for better planning of investments according to needs. 31. This sub-component will finance the costs associated with: ( a ) the pilot Social Registry card and its evaluation, potential scale-up to 200, 000 households; ( b ) the development of the outreach strategy and related tools and communication campaign; and ( c ) workshops, analytical and coordination capacity building.", + "ner_text": [ + [ + 352, + 367, + "named" + ], + [ + 915, + 945, + "Social Registry <> reference population" + ], + [ + 1345, + 1363, + "Social Registry <> usage context" + ] + ], + "validated": true, + "empirical_context": "Few line ministries currently use the Social Registry ( most users are linked to projects supported by the World Bank ) 9. In addition, knowledge about the objectives, modalities and strengths of the Social Registry is limited, within the government and among partners. These gaps will be addressed by intensifying the Social Registry \u2019 s communication and outreach efforts.", + "type": "registry", + "explanation": "The term is indeed a dataset as it is referenced in the context of being used by line ministries and linked to projects, indicating its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Social Registry' suggests a structured collection of data related to social information.", + "contextual_reason_agent": "The term is indeed a dataset as it is referenced in the context of being used by line ministries and linked to projects, indicating its role as a data source.", + "contextual_signal": "mentioned as a data source for line ministries", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 45, + "text": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 41 of 47 Education Component CBA 13. The project is estimated to generate a total of 22, 550 additional years of schooling by increasing the number of children enrolled in project schools by 10 percent annual growth rate and the school completion rate, that is, the share of students enrolled in grade 1 of a school level who graduate from it, by 40 percent. In the absence of longitudinal data to estimate current completion rates, the CBA is limited to the benefits of additional years of schooling due to the project \u2019 s impact on school enrollment. The sex and grade distribution of students in each project district from Balochistan \u2019 s EMIS is used to determine the share of girls and boys in each grade among the 18, 000 students currently enrolled. Assuming no impact in the first project year, the number of additional boys and girls enrolled in each grade for each year in FY21 \u2013 FY24 is then estimated using the targeted annual growth rate of enrollment ( table 1. 5 ). Table 1. 5.", + "ner_text": [ + [ + 719, + 723, + "named" + ], + [ + 4, + 14, + "EMIS <> publisher" + ], + [ + 15, + 26, + "EMIS <> data geography" + ], + [ + 634, + 672, + "EMIS <> data description" + ], + [ + 703, + 714, + "EMIS <> data geography" + ] + ], + "validated": true, + "empirical_context": "In the absence of longitudinal data to estimate current completion rates, the CBA is limited to the benefits of additional years of schooling due to the project \u2019 s impact on school enrollment. The sex and grade distribution of students in each project district from Balochistan \u2019 s EMIS is used to determine the share of girls and boys in each grade among the 18, 000 students currently enrolled. Assuming no impact in the first project year, the number of additional boys and girls enrolled in each grade for each year in FY21 \u2013 FY24 is then estimated using the targeted annual growth rate of enrollment ( table 1.", + "type": "system", + "explanation": "EMIS is indeed a dataset as it provides structured data on the sex and grade distribution of students used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is referenced in the context of analyzing student distribution data.", + "contextual_reason_agent": "EMIS is indeed a dataset as it provides structured data on the sex and grade distribution of students used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "105_Mali-Reinsertion-of-Ex-combatants-Project", + "page": 23, + "text": "This system will consist of one robust database that will monitor the beneficiaries in the reinsertion and follow-up phase of the project, as well as track the implementing partners providing various support. These reports can be accessed electronically by various government partners and donors. In addition, ad hoc assessments will be conducted as well as studies based on identified needs. This will include formal assessments of the project beneficiaries in the reinsertion process as well as regular tracer beneficiary surveys that will combine qualitative and quantitative data. A mid-term review and final implementation report will be conducted in collaboration with donors and government. During the Mid-Term Review, progress towards reaching the project objectives will be evaluated and remedial action will be taken as needed. Beyond traditional M & E requirements, the project will also deliver quarterly internal and annual external audits to track fiduciary management, expectations and next steps. 19 See Annex 6: Donor Roundtable Conclusions.", + "ner_text": [ + [ + 505, + 531, + "named" + ], + [ + 437, + 458, + "tracer beneficiary surveys <> reference population" + ] + ], + "validated": true, + "empirical_context": "In addition, ad hoc assessments will be conducted as well as studies based on identified needs. This will include formal assessments of the project beneficiaries in the reinsertion process as well as regular tracer beneficiary surveys that will combine qualitative and quantitative data. A mid-term review and final implementation report will be conducted in collaboration with donors and government.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to surveys designed to collect both qualitative and quantitative data from project beneficiaries.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'tracer beneficiary surveys' implies a structured collection of data gathered from beneficiaries.", + "contextual_reason_agent": "This is indeed a dataset as it refers to surveys designed to collect both qualitative and quantitative data from project beneficiaries.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "166_304360LK", + "page": 40, + "text": "The beneficiary, including the main breadwinner, should be permanently settled in the village. 0 The beneficiary should possess a formal right to the land on which the reconstruction i s The beneficiary should possess only one house which i s fully or partly damaged. Family income o f the beneficiary should be less than Rs. 2, 5OO / m0nth ~ ~. 0 0 Families who meet the above eligibility criteria will then be prioritized based on the prioritization criteria and using a quantified weighted scoring system as outlined in the Table Four below. The weighted scoring system was discussed at consultative stakeholder workshops. 30 Much o f this data i s already available based on a survey by the divisional secretary. Vulnerability maps exist for Trincomalee and Batticaloa, and will be completed for Mannar and Vavuniya shortly. Vulnerability maps will be ready for Jaffna, Kilinochchi and Mullaitivu in January, 2005. 31 Ethnic sensitivity, for instance, might have more weight in the Amparai district while the extent o f housing damage might have greater salience in the neighboring Batticaloa district. 32 Noting problems in the pilot, the housing damage assessment and social verification survey has included questions on visible capital assets so as to cross-check the beneficiary response to t h i s question. 35", + "ner_text": [ + [ + 1144, + 1200, + "named" + ], + [ + 350, + 358, + "housing damage assessment and social verification survey <> reference population" + ], + [ + 695, + 715, + "housing damage assessment and social verification survey <> author" + ], + [ + 746, + 757, + "housing damage assessment and social verification survey <> data geography" + ], + [ + 762, + 772, + "housing damage assessment and social verification survey <> data geography" + ], + [ + 800, + 806, + "housing damage assessment and social verification survey <> data geography" + ], + [ + 811, + 819, + "housing damage assessment and social verification survey <> data geography" + ], + [ + 866, + 872, + "housing damage assessment and social verification survey <> data geography" + ], + [ + 874, + 885, + "housing damage assessment and social verification survey <> data geography" + ], + [ + 890, + 900, + "housing damage assessment and social verification survey <> data geography" + ], + [ + 904, + 917, + "housing damage assessment and social verification survey <> publication year" + ], + [ + 986, + 1002, + "housing damage assessment and social verification survey <> data geography" + ], + [ + 1086, + 1105, + "housing damage assessment and social verification survey <> data geography" + ], + [ + 1214, + 1249, + "housing damage assessment and social verification survey <> data description" + ], + [ + 1335, + 1353, + "housing damage assessment and social verification survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "31 Ethnic sensitivity, for instance, might have more weight in the Amparai district while the extent o f housing damage might have greater salience in the neighboring Batticaloa district. 32 Noting problems in the pilot, the housing damage assessment and social verification survey has included questions on visible capital assets so as to cross-check the beneficiary response to t h i s question. 35", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned as a survey that collects data for assessment purposes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'survey', which often refers to structured data collection.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a survey that collects data for assessment purposes.", + "contextual_signal": "described as a survey that collects data for assessment", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "040_Iraq-COVID-19-Vaccination-Project", + "page": 48, + "text": "The World Bank Iraq COVID-19 Vaccination Project ( P177038 ) Page 43 of 54 continuous availability and adequate functionality of adequate waste - management processes as per established standards will be maintained throughout project implementation. Number of COVID-19 vaccine doses acquired through project financing This indicator will measure the number of COVID-19 vaccines that have been procured by the GOI through World Bank financing support. 3 months MOHE records Administrative data PMU / MOHE ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Percentage of administered doses which are captured in the national vaccination digital registry The indicator will track the percentage of administered COVID-19 vaccines which are captured in the national vaccination digital registry. 3 months Digital vaccination registry, vaccine logistics management information system Administrative data PMU / MOHE Percentage of vaccination sites which publicized detailed performance data on a regular basis in the last quarter Percentage of vaccination sites which publicize detailed performance data 3 months National vaccination dashboard Administrative data PMU / MOHE", + "ner_text": [ + [ + 772, + 809, + "named" + ], + [ + 15, + 19, + "national vaccination digital registry <> data geography" + ], + [ + 460, + 464, + "national vaccination digital registry <> publisher" + ], + [ + 713, + 745, + "national vaccination digital registry <> data description" + ] + ], + "validated": true, + "empirical_context": "Number of COVID-19 vaccine doses acquired through project financing This indicator will measure the number of COVID-19 vaccines that have been procured by the GOI through World Bank financing support. 3 months MOHE records Administrative data PMU / MOHE ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Percentage of administered doses which are captured in the national vaccination digital registry The indicator will track the percentage of administered COVID-19 vaccines which are captured in the national vaccination digital registry. 3 months Digital vaccination registry, vaccine logistics management information system Administrative data PMU / MOHE Percentage of vaccination sites which publicized detailed performance data on a regular basis in the last quarter Percentage of vaccination sites which publicize detailed performance data 3 months National vaccination dashboard Administrative data PMU / MOHE", + "type": "registry", + "explanation": "This is a dataset as it is explicitly mentioned as a source for tracking administered COVID-19 vaccines.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'registry', which often refers to a structured collection of data.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a source for tracking administered COVID-19 vaccines.", + "contextual_signal": "follows 'uses data from'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 67, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 56 systems. Groundwater development is also being considered for small-scale irrigation and other uses as an adaptation measure to climate change and in situations where surface water sources are not available at a reasonable cost. Despite Uganda \u2019 s growing dependency on groundwater, concerns remain over its sustainability. The problems associated with excessive groundwater development are very localized. Effective planning and management strategies to regulate and control groundwater activities are key to ensuring that the groundwater is utilized sustainably to address unmet water demands. The availability of groundwater and its vulnerability to human and climatic impacts needs to be further assessed given that many towns and RGCS are using groundwater for solar-powered water schemes. 32. This component will support a comprehensive groundwater assessment and the development of a toolkit to inform further development of groundwater. The toolkit will include guidelines on aquifer utilization as well as reports and maps showing the aquifers \u2019 characteristics, distribution, and responses to pumping. This work will be closely coordinated with the implementation of Component 1 to ensure that the WSS investments are adequately guided on the potential for and sustainability of groundwater development, the spacing of production boreholes as well as the pumping regimes. The information generated will be fed into the WIS for use in future groundwater development projects. Water Information System 33.", + "ner_text": [ + [ + 1511, + 1514, + "named" + ] + ], + "validated": false, + "empirical_context": "This work will be closely coordinated with the implementation of Component 1 to ensure that the WSS investments are adequately guided on the potential for and sustainability of groundwater development, the spacing of production boreholes as well as the pumping regimes. The information generated will be fed into the WIS for use in future groundwater development projects. Water Information System 33.", + "type": "system", + "explanation": "However, 'WIS' is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'WIS' is a dataset because it is referred to as a system that uses information.", + "contextual_reason_agent": "However, 'WIS' is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "068_Burundi-Integrated-Community-Development-Project", + "page": 12, + "text": "The World Bank Burundi Integrated Community Development Project ( P169315 ) Page 7 of 86 I. STRATEGIC CONTEXT A. Country Context 1. Burundi is facing considerable development challenges. According to the Poverty Assessment of 2016 and the most recent national household survey, around 73 percent of the Burundian population is classified as poor. 1 This is nearly double the average for Sub-Saharan Africa ( SSA ) and for low-income countries. In 2018, Burundi ranked 138 out of 157 countries on the Human Capital Index ( HCI ) and 185 out of 189 countries on the Human Development Index. Burundi suffers from the highest stunting rate in the world ( 56 percent ) 2 and dietary diversity is well below minimum acceptable levels. 3 Despite having a formal legal system that ensures gender equality, women and girls face significant obstacles, including lower education and health outcomes. Women are also disadvantaged in the labor market where, especially in rural areas, they tend to be confined to lower paid employment. 2. The economy is slowly rebounding since the 2015 political crisis, but the recovery remains fragile. After two years of decline in 2015 and 2016, GDP returned to growth in 2017 and 2018. World Bank projections suggest a positive growth outlook, with moderate growth predicted for the 2019-2021 period.", + "ner_text": [ + [ + 251, + 276, + "named" + ], + [ + 4, + 14, + "national household survey <> publisher" + ], + [ + 15, + 22, + "national household survey <> data geography" + ], + [ + 132, + 139, + "national household survey <> data geography" + ], + [ + 226, + 230, + "national household survey <> publication year" + ], + [ + 303, + 323, + "national household survey <> reference population" + ], + [ + 447, + 451, + "national household survey <> publication year" + ], + [ + 453, + 460, + "national household survey <> data geography" + ], + [ + 1206, + 1210, + "national household survey <> publication year" + ], + [ + 1212, + 1222, + "national household survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "Burundi is facing considerable development challenges. According to the Poverty Assessment of 2016 and the most recent national household survey, around 73 percent of the Burundian population is classified as poor. 1 This is nearly double the average for Sub-Saharan Africa ( SSA ) and for low-income countries.", + "type": "survey", + "explanation": "This is indeed a dataset as it is referenced in the context as a source of empirical data regarding poverty levels.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'national household survey' implies a structured collection of data about households.", + "contextual_reason_agent": "This is indeed a dataset as it is referenced in the context as a source of empirical data regarding poverty levels.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 28, + "text": "For individuals trained only as part of Component 3, a separate pre-training survey will be conducted as well as a follow-up survey conducted at least M & E Specialist", + "ner_text": [ + [ + 115, + 131, + "named" + ] + ], + "validated": true, + "empirical_context": "For individuals trained only as part of Component 3, a separate pre-training survey will be conducted as well as a follow-up survey conducted at least M & E Specialist", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to a structured collection of data gathered from participants in the follow-up phase.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'follow-up survey' implies a structured collection of data collected from individuals.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a structured collection of data gathered from participants in the follow-up phase.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 3, + "validated": 1, + "not_validated": 2 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 107, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 95 32. In addition to the IBEX data, the assessment looked at KPI data that were submitted annually as part of DLI deliverables under ESPES. The main data include the total procurement expenditure, list of procurement activities, share of the open bidding procedure, and the bid process time. The data received indicates no large value contract exists at regional level that reaches the Output and Performance-based Road Contracts ( OPRC ) threshold for exclusion of contracts under PforR operations. The maximum contract amount identified is ETB 250 million ( around US $ 7 million ) which is much below the threshold for goods at US $ 30 million. However, it is noted that the total amount of contracts reported under the KPI does not match the data received from IBEX. This indicates that regions are not registering all the contracts for each sector. To address this quality issue, the HCO includes a DLI that requires alignment between the KPI procurement report and budget allocation and expenditure data. 33. While the KPI data have quality issues particularly related to the comprehensiveness of the data captured, the practice is encouraging. It is understood that building systems is a process that passes through many obstacles and the result cannot be achieved in one go. The effort requires continuous engagement and resources.", + "ner_text": [ + [ + 86, + 95, + "named" + ], + [ + 15, + 23, + "IBEX data <> data geography" + ], + [ + 122, + 130, + "IBEX data <> data type" + ], + [ + 227, + 256, + "IBEX data <> data description" + ], + [ + 258, + 288, + "IBEX data <> data description" + ], + [ + 290, + 325, + "IBEX data <> data description" + ], + [ + 335, + 351, + "IBEX data <> data description" + ] + ], + "validated": true, + "empirical_context": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 95 32. In addition to the IBEX data, the assessment looked at KPI data that were submitted annually as part of DLI deliverables under ESPES. The main data include the total procurement expenditure, list of procurement activities, share of the open bidding procedure, and the bid process time.", + "type": "data", + "explanation": "In the context, 'IBEX data' is explicitly referenced as part of the assessment, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'IBEX data' is a dataset because it is mentioned in the context of an assessment that looks at various data points.", + "contextual_reason_agent": "In the context, 'IBEX data' is explicitly referenced as part of the assessment, indicating it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "004_BOSIB-87c444de-4797-4bf9-b654-4932a7fb0112", + "page": 41, + "text": "The World Bank Integrated Community Resilience Project ( P506969 ) PROJECT APPRAISAL DOCUMENT Page 32 Description This indicator measures the number of people, including women, refugee and host communities in target areas who gain improved access to socio-economic infrastructure and services under the project. Investments may include infrastructure such as water, sanitation, electricity, roads, health, and education facilities, as well as productive units supporting livelihoods. Frequency Every 6 months. Data source Progress reports Methodology for Data Collection Survey Responsibility for Data Collection ADDS Total ( Number ) Description This indicator measures the number of people, including women, refugee and host communities in target areas who gain improved access to socio-economic infrastructure and services under the project. Investments may include infrastructure such as water, sanitation, electricity, roads, health, and education facilities, as well as productive units supporting livelihoods. Frequency Every 6 months. Data source Progress reports Methodology for Data Collection Survey Responsibility for Data Collection ADDS Households registered in the social registry Of which women ( Number ) Description Indicator will measure the number of individuals, both Djiboutians and refugees, that have been identified, registered and enrolled in the social registry as well as provided with evidence of enrolment. The individuals will be classified by household in the registry. Frequency Every six months.", + "ner_text": [ + [ + 1180, + 1195, + "named" + ], + [ + 1305, + 1313, + "social registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "Frequency Every 6 months. Data source Progress reports Methodology for Data Collection Survey Responsibility for Data Collection ADDS Households registered in the social registry Of which women ( Number ) Description Indicator will measure the number of individuals, both Djiboutians and refugees, that have been identified, registered and enrolled in the social registry as well as provided with evidence of enrolment. The individuals will be classified by household in the registry.", + "type": "registry", + "explanation": "The social registry is explicitly mentioned as a source for data collection and is used to classify individuals by household.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured collection of registered individuals.", + "contextual_reason_agent": "The social registry is explicitly mentioned as a source for data collection and is used to classify individuals by household.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 8, + "validated": 7, + "not_validated": 1 + } + }, + { + "filename": "061_Turkey-Formal-Employment-Creation-Project", + "page": 28, + "text": "The World Bank Formal Employment Creation Project ( P171766 ) Page 24 of 86 57. PFIs will be selected by the TKYB based on their financial health. PFIs will onlend to SMEs under the loan component, thereby increasing the reach of the TKYB to areas and sectors where it currently has no presence. The TKYB will take the credit risk of PFIs and therefore has a strong incentive to carefully assess their financial health and operational capabilities. The PFI selection is also subject to a \u2018 no objection \u2019 process by the World Bank, while Subsidiary Finance Agreement covenants between the TKYB and PFIs require compliance with standard prudential regulations thereby ensuring the financial health of PFIs. 58. SGK will provide the official employment records to monitor compliance with job creation and retention. To assess the compliance of grant-recipient firms with the formal employment creation and retention targets specified in the business plan at the moment of application, the PIU at the TKYB will receive regular employment and wage records for beneficiary firms from SGK at grant allocation, every six months thereafter, and periodically following requests for disbursements and claims of conditionality compliance by beneficiary firms. 59. \u0130\u015eKUR will participate as public provider of skills and skills-building capacity for prospective employees of beneficiary firms. The TKYB staff will guide beneficiary firms in the identification and recruitment of job seekers.", + "ner_text": [ + [ + 731, + 758, + "named" + ], + [ + 710, + 713, + "official employment records <> publisher" + ], + [ + 842, + 863, + "official employment records <> reference population" + ], + [ + 1024, + 1051, + "official employment records <> data description" + ], + [ + 1056, + 1073, + "official employment records <> reference population" + ], + [ + 1079, + 1082, + "official employment records <> publisher" + ], + [ + 1338, + 1380, + "official employment records <> reference population" + ] + ], + "validated": true, + "empirical_context": "58. SGK will provide the official employment records to monitor compliance with job creation and retention. To assess the compliance of grant-recipient firms with the formal employment creation and retention targets specified in the business plan at the moment of application, the PIU at the TKYB will receive regular employment and wage records for beneficiary firms from SGK at grant allocation, every six months thereafter, and periodically following requests for disbursements and claims of conditionality compliance by beneficiary firms.", + "type": "records", + "explanation": "These records are explicitly mentioned as being provided to monitor compliance, indicating they are used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'official employment records' suggests a structured collection of data related to employment.", + "contextual_reason_agent": "These records are explicitly mentioned as being provided to monitor compliance, indicating they are used as a data source.", + "contextual_signal": "mentioned as a source of information for monitoring compliance", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 5, + "validated": 3, + "not_validated": 2 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 52, + "text": "The integration of digitally verifiable credentials by DPI ecosystem participants is verified as follows: ( 1 ) digital representations of a collection of data attributes can be securely and reliably verified by relying parties using trusted digital technologies, enabling data to be shared with a relying party from an authoritative data source while keeping the individual data subject at the center of the data sharing transaction. Examples of digitally verifiable credentials include electronic passports, national identity cards, digital education diplomas, and other credentials that can be issued to individuals and be digitally verified by relying parties; and ( 2 ) the credentials are portable, resistant to tampering, enable streamlined verification processes in online and offline scenarios, and have adequate measures in place to ensure that they can be trusted by verifiers. The availability of these trust measures is verified as follows: ( a ) there is a common, agreed-upon set of rules ( a trust framework ) describing the standards and operational requirements that must be followed by issuers and verifiers of credentials ( participating entities ); ( b ) the standards and procedures reflected in the trust framework follow internationally-recognized standards to allow for interoperability between credential issuers as well as", + "ner_text": [ + [ + 535, + 561, + "named" + ] + ], + "validated": false, + "empirical_context": "The integration of digitally verifiable credentials by DPI ecosystem participants is verified as follows: ( 1 ) digital representations of a collection of data attributes can be securely and reliably verified by relying parties using trusted digital technologies, enabling data to be shared with a relying party from an authoritative data source while keeping the individual data subject at the center of the data sharing transaction. Examples of digitally verifiable credentials include electronic passports, national identity cards, digital education diplomas, and other credentials that can be issued to individuals and be digitally verified by relying parties; and ( 2 ) the credentials are portable, resistant to tampering, enable streamlined verification processes in online and offline scenarios, and have adequate measures in place to ensure that they can be trusted by verifiers. The availability of these trust measures is verified as follows: ( a ) there is a common, agreed-upon set of rules ( a trust framework ) describing the standards and operational requirements that must be followed by issuers and verifiers of credentials ( participating entities ); ( b ) the standards and procedures reflected in the trust framework follow internationally-recognized standards to allow for interoperability between credential issuers as well as", + "type": "document", + "explanation": "'Digital education diplomas' are mentioned as examples of credentials rather than as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'digital education diplomas' is a dataset because it refers to a type of credential that contains data attributes.", + "contextual_reason_agent": "'Digital education diplomas' are mentioned as examples of credentials rather than as a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a credential, not as a data source", + "tags": [] + }, + { + "filename": "098_CAMEROON-PAD-NEW-08032017", + "page": 27, + "text": "These will be further defined in coordination with other donors intervening in these areas ( particularly AFD ) and based on the findings of the RPBA ( expected to be available in Fall 2017 ). B. Results Monitoring and Evaluation 51. Continuous M & E will be an integral part of project implementation under the overall responsibility of the PCU. Each TLU will be responsible for day-to-day monitoring of project activities at the city level and providing regular updates to the PCU on the implementation of the City Contract. Project monitoring will be based on biannually progress reports, including updates on the results framework included in the Project Appraisal Document. At mid-term review ( MTR ) and before project closing, a beneficiary assessment will be undertaken. In addition to the Results Framework indicators, the project M & E system will also capture sector data defined by the GoC ( MINEPAT, MINHDU, and so on ) to inform policy and progress in implementation of national strategies. The M & E manual will provide further details on the results framework, social / gender - specific indicators, assessment of citizen engagement, as well as the methodology, tools, and institutional arrangements for data collection.", + "ner_text": [ + [ + 871, + 882, + "named" + ] + ], + "validated": false, + "empirical_context": "At mid-term review ( MTR ) and before project closing, a beneficiary assessment will be undertaken. In addition to the Results Framework indicators, the project M & E system will also capture sector data defined by the GoC ( MINEPAT, MINHDU, and so on ) to inform policy and progress in implementation of national strategies. The M & E manual will provide further details on the results framework, social / gender - specific indicators, assessment of citizen engagement, as well as the methodology, tools, and institutional arrangements for data collection.", + "type": "data", + "explanation": "'Sector data' is mentioned in a general context without specifying it as a structured collection or data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'sector data' refers to a structured collection of data used for analysis.", + "contextual_reason_agent": "'Sector data' is mentioned in a general context without specifying it as a structured collection or data source.", + "contextual_signal": "mentioned only as a type of data, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 89, + "text": "The World Bank Expanding Opportunities for Learning Project ( P166059 ) Page 84 of 85 Indicator: DLR 4. 3: Increase in gender parity index for gross enrollment in lower secondary education Allocation Formula: For every 0. 02 increase from the 0. 82 baseline, US $ 250, 000 will be disbursed up to a maximum of US $ 1 million, with a minimum threshold of US $ 250, 000. Theory of change QUALITY ( GPE Variable Part Allocation of US $ 1 million ) 181. Challenge: Current national assessment systems do not provide adequate detail on levels of student learning, and so cannot help decision makers at the school and central level develop specific strategies to improve learning. 182. This concern is reflected in the PAE. The PAE acknowledges the importance of OTI results to better analyze the quality of learning, while also underscoring the limitations of the statistical yearbook to provide detailed information on OTI results ( the information remains rather general ). The PAE explains what has been started to correct this but also, more importantly, what remains to be done. 183. General objective targeted: Improve assessment systems to provide useful data on learning that can serve teachers and policy makers in improving learning. 184.", + "ner_text": [ + [ + 469, + 496, + "named" + ] + ], + "validated": false, + "empirical_context": "Theory of change QUALITY ( GPE Variable Part Allocation of US $ 1 million ) 181. Challenge: Current national assessment systems do not provide adequate detail on levels of student learning, and so cannot help decision makers at the school and central level develop specific strategies to improve learning. 182.", + "type": "system", + "explanation": "However, it is not functioning as a data source in this context, as it is described as a challenge rather than a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'national assessment systems' could imply a structured collection of data on student learning.", + "contextual_reason_agent": "However, it is not functioning as a data source in this context, as it is described as a challenge rather than a dataset.", + "contextual_signal": "mentioned only as a challenge, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "116_PAD1510-PAD-P152821-IDA-R2016-0078-1-Box394886B-OUO-9", + "page": 18, + "text": "This approach is especially important because the resettlement areas of Meheba and Mayukwayukwa in Zambia are located in the more geographically isolated, rural and poorer areas of the countries where access to key socio-economic facilities is low and poverty is high. In Zambia, only 46 percent of the rural population compared to 80 percent of urban dwellers are within 1 km of a school, while 28 percent of the rural population compared to 74 percent of urban dwellers are within 1 km of a health clinics. 6 Mayukwayukwa, for example, is located in Kaoma District, where the poverty rate of 82 percent is significantly higher than the national average of 62 percent. 7 Most people in both targeted Provinces live in rural areas, where access to key socio-economic facilities is low. 22. In this context, the refugee settlements with long involvement and dedicated investment by MoHA and UNHCR have actually achieved a higher degree of service provision in some sectors than many of the surrounding districts. Forty percent of the local population in Solwezi live more than 6 km away from a health post and 20 percent live more than 6 km away from a primary school, whereas none of the residents of the Meheba refugee settlement live more than 5 km away from such facilities. Investment in socio-economic and livelihood priorities for the surrounding communities therefore becomes important not only to ensure integration, but also ensure equity across this existing disparity and to address local development challenges. 23. Area Based Planning. Administration of the refugee settlements as designated areas by the MoHA has resulted in a serious disadvantage: the settlements are not included in wider area based development strategies or planning tools at the District or Provincial level. This has practical consequences. The initial design of the resettlement areas for former refugees was done in absence of consideration of the spatial economy of the wider Districts and Province or 6 Central Statistics Office, 2011, Living Conditions Monitoring Survey 2006 and 2010, p. 249 7 World Bank, 2015, Mapping Subnational Poverty in Zambia, p. 27 - 29", + "ner_text": [ + [ + 2026, + 2061, + "named" + ], + [ + 83, + 95, + "Living Conditions Monitoring Survey <> data geography" + ], + [ + 99, + 105, + "Living Conditions Monitoring Survey <> data geography" + ], + [ + 272, + 278, + "Living Conditions Monitoring Survey <> data geography" + ], + [ + 303, + 319, + "Living Conditions Monitoring Survey <> reference population" + ], + [ + 552, + 566, + "Living Conditions Monitoring Survey <> data geography" + ], + [ + 1993, + 2018, + "Living Conditions Monitoring Survey <> author" + ], + [ + 2020, + 2024, + "Living Conditions Monitoring Survey <> publication year" + ], + [ + 2062, + 2075, + "Living Conditions Monitoring Survey <> reference year" + ], + [ + 2086, + 2096, + "Living Conditions Monitoring Survey <> publisher" + ], + [ + 2098, + 2102, + "Living Conditions Monitoring Survey <> publication year" + ], + [ + 2135, + 2141, + "Living Conditions Monitoring Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "This has practical consequences. The initial design of the resettlement areas for former refugees was done in absence of consideration of the spatial economy of the wider Districts and Province or 6 Central Statistics Office, 2011, Living Conditions Monitoring Survey 2006 and 2010, p. 249 7 World Bank, 2015, Mapping Subnational Poverty in Zambia, p.", + "type": "survey", + "explanation": "This is indeed a dataset as it is referenced in the context as a source of data for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is referenced in the context as a source of data for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "036_Tajikistan-Resilient-Landscape-Restoration-Project", + "page": 35, + "text": "The World Bank RESILAND CA + Program: Tajikistan Resilient Landscape Restoration Project ( P171524 ) Page 31 of 104 Gender Gap Gender Action Gender Indicator62 restoration practices by specific groups of women ( that is, rural women, women farmers, abandoned wives of labor migrants, and so on ). Build on skills and interests of women in terms of design of activities, for example, crop choices, processing, and so on. Objective 4. Enhancing Women \u2019 s Voice and Engaging Men and Boys ( child marriage, gender-based violence, engaging men and boys, women \u2019 s participation and decision-making in service-delivery in governance structures ) Inventory data gathered by forestry agency or PRT is currently not disaggregated by gender Ensure that all data gathered as part of the project, including forest or pasture inventories, will be disaggregated by gender, where appropriate. All data gathered as part of the project are gender disaggregated, as applicable Gender gaps in voice and decision - making, especially over natural resources and the new organizations such as PUUs that have mandates for resource management Encourage women \u2019 s membership of resource management organizations, and in management or decision - making roles on boards / councils of groups / unions to be formed as part of the project, for example, through incentives for their inclusion, setting quotas for women members to access financing.", + "ner_text": [ + [ + 640, + 654, + "named" + ], + [ + 38, + 48, + "Inventory data <> data geography" + ], + [ + 221, + 232, + "Inventory data <> reference population" + ], + [ + 234, + 247, + "Inventory data <> reference population" + ] + ], + "validated": true, + "empirical_context": "Objective 4. Enhancing Women \u2019 s Voice and Engaging Men and Boys ( child marriage, gender-based violence, engaging men and boys, women \u2019 s participation and decision-making in service-delivery in governance structures ) Inventory data gathered by forestry agency or PRT is currently not disaggregated by gender Ensure that all data gathered as part of the project, including forest or pasture inventories, will be disaggregated by gender, where appropriate. All data gathered as part of the project are gender disaggregated, as applicable Gender gaps in voice and decision - making, especially over natural resources and the new organizations such as PUUs that have mandates for resource management Encourage women \u2019 s membership of resource management organizations, and in management or decision - making roles on boards / councils of groups / unions to be formed as part of the project, for example, through incentives for their inclusion, setting quotas for women members to access financing.", + "type": "inventory", + "explanation": "In this context, 'inventory data' is explicitly mentioned as part of the project data that will be disaggregated by gender, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'inventory data' is a dataset because it refers to collected information that can be analyzed.", + "contextual_reason_agent": "In this context, 'inventory data' is explicitly mentioned as part of the project data that will be disaggregated by gender, indicating it is used as a data source.", + "contextual_signal": "mentioned as part of the project data to be disaggregated", + "tags": [] + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 139, + "text": "Using STEP, comprehensive information of all contracts for goods, non-consultancy services and consultants \u2019 services awarded under the subcomponent, for all contracts subject to the World Bank \u2019 s prior-review as well as post-review, will be available automatically, including but not limited to: a brief description of the contract, estimated cost, procurement method, timelines of the bidding process, the number of participated bidders, names and reasons of rejected bidders, the date of contract award, the name of awarded supplier, contractor or consultant, final contract value; and the contractual implementation period. 66. Selection methods. Table 8. 2 describes the various procurement methods and thresholds to be applied for procurement activities. The selection methods and World Bank review thresholds will be determined in the PPSD and procurement plans in STEP. The World Bank review thresholds will be determined based on individual activity risks while the prior review thresholds in the table are indicative of high-risk activities.", + "ner_text": [ + [ + 6, + 10, + "named" + ] + ], + "validated": false, + "empirical_context": "Using STEP, comprehensive information of all contracts for goods, non-consultancy services and consultants \u2019 services awarded under the subcomponent, for all contracts subject to the World Bank \u2019 s prior-review as well as post-review, will be available automatically, including but not limited to: a brief description of the contract, estimated cost, procurement method, timelines of the bidding process, the number of participated bidders, names and reasons of rejected bidders, the date of contract award, the name of awarded supplier, contractor or consultant, final contract value; and the contractual implementation period. 66.", + "type": "program", + "explanation": "'STEP' is mentioned as a program that provides information but is not described as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'STEP' is a dataset because it is associated with comprehensive information regarding contracts.", + "contextual_reason_agent": "'STEP' is mentioned as a program that provides information but is not described as a data source itself.", + "contextual_signal": "mentioned only as a program, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 15, + "validated": 1, + "not_validated": 14 + } + }, + { + "filename": "076_Rwanda-Socio-Economic-Inclusion-of-Refugees-and-Host-Communities-in-Rwanda-Project", + "page": 70, + "text": "The World Bank Socio-economic Inclusion of Refugees & Host Communities in Rwanda Project ( P164130 ) Page 66 of 82 The MINEMA-SPIU, BRD, and Districts are the implementing agencies of SEIRHCP. Districts hosting refugee camps have prior experience in implementing projects of a similar nature funded by World Bank and other development partners. The District staff are familiar with the World Bank \u2019 s procurement guidelines but new to the procurement regulations. BRD also has experience in implementing two World-Bank funded projects. MINEMA is not familiar with World Bank procurement guidelines. Thus, tailored training will be offered to MINEMA, BRD and district staff by the World Bank. Based on the assessment the project risk is rated \u2018 Substantial. \u2019 3. STEP. The proposed project will use STEP, a planning and tracking system that provides data on procurement activities, establishes benchmarks, monitors delays, and measures procurement performance. There are ongoing initiatives to harmonize STEP with the government \u2019 s e-procurement system, but until this process is complete, both will be used in parallel. 4. Procurement risk assessment.", + "ner_text": [ + [ + 798, + 802, + "named" + ] + ], + "validated": false, + "empirical_context": "STEP. The proposed project will use STEP, a planning and tracking system that provides data on procurement activities, establishes benchmarks, monitors delays, and measures procurement performance. There are ongoing initiatives to harmonize STEP with the government \u2019 s e-procurement system, but until this process is complete, both will be used in parallel.", + "type": "system", + "explanation": "However, STEP is described as a planning and tracking system, not a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed STEP is a dataset because it provides data on procurement activities.", + "contextual_reason_agent": "However, STEP is described as a planning and tracking system, not a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 15, + "validated": 1, + "not_validated": 14 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 17, + "text": "The global spread of COVID-19 has led to unprecedented disruptions in schooling, requiring understanding of how students \u2019 learning has changed. Moldova \u2019 s participation in PISA 2025 and the analysis of PISA 2022 results are important for managing learning recovery from the pandemic. The MoER consolidated and strengthened the student-level EMIS to support evidence-based decision-making. 22 EMIS now covers preschool, primary, general secondary, and vocational education data, which are regularly collected by the system. Reports on the performance of each school are produced and disclosed annually. However, significant differences exist between data reported in the EMIS open data portal and data published by the National Bureau of Statistics ( NBS ) of Moldova. A single integrated EMIS capable of linking information on students with population register data and financing data of the Ministry of Finance ( MoF ) is currently lacking. The current EMIS and established data reporting practices provide a solid foundation for the development of an integrated EMIS 22 Under support of the completed World Bank-financed Moldova Education Reform Project.", + "ner_text": [ + [ + 843, + 867, + "named" + ], + [ + 145, + 152, + "population register data <> data geography" + ] + ], + "validated": true, + "empirical_context": "However, significant differences exist between data reported in the EMIS open data portal and data published by the National Bureau of Statistics ( NBS ) of Moldova. A single integrated EMIS capable of linking information on students with population register data and financing data of the Ministry of Finance ( MoF ) is currently lacking. The current EMIS and established data reporting practices provide a solid foundation for the development of an integrated EMIS 22 Under support of the completed World Bank-financed Moldova Education Reform Project.", + "type": "data", + "explanation": "In the context, it is used as a source of information that could be linked with other data, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific type of data related to population information.", + "contextual_reason_agent": "In the context, it is used as a source of information that could be linked with other data, indicating it functions as a dataset.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 72, + "text": "for drafting bidding documents; ( 10 ) Coordination and integration of the Program will be done by a central agency, such as MOPIC, for Results Areas involving multiple agencies. Planning and Budgeting 4. The assessment confirms that Jordan \u2019 s budget systems are adequate for the Program. Implementing agencies use the GFMIS for budget preparation and execution. The GFMIS is fully utilized for budget execution, but not for budget preparation, given that manual interventions with various IT tools are being used. The GFMIS offers limited functionalities include: ( a ) the presentation of an initial policy-focused budget submission; ( b ) the entering of multiple quarters and multi - year commitments; and ( c ) capturing and populating the outstanding commitments and outstanding invoices for arrears monitoring. As a result, arrears have continued to be a problem. Jordan \u2019 s budgetary central government budget classification meets Government Financial Statistics / Classification of the Functions of Government standards. 31 These classifications are included in the current chart of accounts, allowing for all transactions to be reported in accordance with the appropriate standards. The budget is published on the GBD \u2019 s website ( www. gbd. gov. jo ). The final accounts and the monthly General Government Finance Bulletin, which include budgetary government finance statistics aggregated according to the economic and functional classifications ), are also published on the Ministry of Finance \u2019 s website. 31 Jordan: Public Expenditure and Financial Accountability ( PEFA ) Assessment ( 2022 ).", + "ner_text": [ + [ + 320, + 325, + "named" + ] + ], + "validated": false, + "empirical_context": "The assessment confirms that Jordan \u2019 s budget systems are adequate for the Program. Implementing agencies use the GFMIS for budget preparation and execution. The GFMIS is fully utilized for budget execution, but not for budget preparation, given that manual interventions with various IT tools are being used.", + "type": "system", + "explanation": "GFMIS is mentioned as a system used for budget execution, not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed GFMIS is a dataset because it is related to budget systems and data management.", + "contextual_reason_agent": "GFMIS is mentioned as a system used for budget execution, not as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 19, + "text": "Contracted IPs will be responsible for: ( a ) delivering quality health services; ( b ) quality improvement activities; ( c ) supervision of health facilities ( d ) recording of HMIS data, provision of HMIS data to CHDs, and support for entry of DHIS2 data into DHIS2 and data use; ( e ) in-service training complementing UNICEF \u2019 s training activities; ( f ) health facility stock management, recording, and rational use; ( g ) through an integrated approach, developing the capacity of CHDs to plan, supervise, and oversee service delivery and the DHIS2 system; and ( h ) sustain the support of the innovation activities under CERHSSP and expand using the digital health technology to address service delivery and supply chain issues. 31. Subcomponent 1. 2: Boma Health Initiative ( implemented by UNICEF; US $ 12. 41 million: US $ 3. 41 million equivalent IDA [ WHR IDA ] and US $ 9. 0 million Trust Funds [ US $ 0. 95 million SDTF and US $ 8. 05 million MDTF ] ).", + "ner_text": [ + [ + 262, + 267, + "named" + ], + [ + 178, + 187, + "DHIS2 <> data type" + ], + [ + 202, + 211, + "DHIS2 <> data type" + ], + [ + 322, + 328, + "DHIS2 <> publisher" + ], + [ + 800, + 806, + "DHIS2 <> publisher" + ] + ], + "validated": true, + "empirical_context": "Contracted IPs will be responsible for: ( a ) delivering quality health services; ( b ) quality improvement activities; ( c ) supervision of health facilities ( d ) recording of HMIS data, provision of HMIS data to CHDs, and support for entry of DHIS2 data into DHIS2 and data use; ( e ) in-service training complementing UNICEF \u2019 s training activities; ( f ) health facility stock management, recording, and rational use; ( g ) through an integrated approach, developing the capacity of CHDs to plan, supervise, and oversee service delivery and the DHIS2 system; and ( h ) sustain the support of the innovation activities under CERHSSP and expand using the digital health technology to address service delivery and supply chain issues. 31.", + "type": "system", + "explanation": "DHIS2 is indeed a data system used for health information management, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned in the context of data entry and use.", + "contextual_reason_agent": "DHIS2 is indeed a data system used for health information management, confirming its role as a data source.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "086_NEW-ETHIOPIA-PAD-06072018", + "page": 31, + "text": "23 recording and tracking system is manual, resulting in delays in updating the current database and inaccuracy. In the absence of a systemic data management system, the EIC is yet to establish an effective data and statistics reporting mechanism as a management tool. Through the support of World Bank Group projects, the EIC is currently in the process of setting up CRM and data management systems to automate its function and set up a robust investor tracking and aftercare system. Once the system is set up, the EIC needs to ensure that data is captured and maintained on a regular basis and that there is adequate institutional capacity to use the system effectively. 44. ARRA faces challenges with data collection, collation, and reporting. The agency significantly lacks resources to collect and manage refugee data. At present, ARRA uses a manual refugee registration system which is met with frequent delays in updating the database and inaccuracy of data. The ongoing refugee survey reconfirmed inaccuracy and multiple issues associated with ARRA \u2019 s refugee database. Previously, ARRA and UNHCR had parallel systems to record refugee data. Recently both agencies have signed a data sharing protocol agreement and are in discussions to have a unified data collection mechanism. Along with an effective data management system, ARRA \u2019 s capacity to manage and use the system needs to be enhanced.", + "ner_text": [ + [ + 979, + 993, + "named" + ], + [ + 849, + 883, + "refugee survey <> data type" + ] + ], + "validated": true, + "empirical_context": "At present, ARRA uses a manual refugee registration system which is met with frequent delays in updating the database and inaccuracy of data. The ongoing refugee survey reconfirmed inaccuracy and multiple issues associated with ARRA \u2019 s refugee database. Previously, ARRA and UNHCR had parallel systems to record refugee data.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it mentions an ongoing refugee survey that reconfirms inaccuracies in the existing database.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'refugee survey' implies a structured collection of data gathered from refugees.", + "contextual_reason_agent": "The context confirms it is a dataset as it mentions an ongoing refugee survey that reconfirms inaccuracies in the existing database.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "203_multi-page", + "page": 6, + "text": "HUDC, as administrator of Part A, will be responsible for planning, design, procurement. supervision of implementation and commissioning of completed works for transfer to concerned municipalities, public utilities or sectoral ministries responsible for operation and maintenance of such works. Part B The general infrastructure needs of about 300 poor municipalities and villages were identified during appraisal. In the absence of reliable poverty indicators, data from the National Aid Fund ( NAF ) on the number of families receiving NAF aid were used as a proxy poverty indicator. Unemployment data, which is generally a good indicator of poverty, was not reliable and could not be used. However, poverty surveys are included in the project to help improve identification mechanisms to be used in future phases of the CIP. The priority needs in these eligible areas were based on a list of needs identified by the municipalities and village councils available at CVDB. This list was not all-inclusive and did not directly take into account the views of the beneficiary population. During appraisal 12 communities and their basic infrastructure needs were reviewed and a program of eligible investments were identified for implementation in the first year of the CIP. However, since among the needs identified by the local councils there were many investments not considered of priority, it was agreed that CVDB will undertake in the first year of the CIP detailed surveys and", + "ner_text": [ + [ + 586, + 603, + "named" + ] + ], + "validated": false, + "empirical_context": "In the absence of reliable poverty indicators, data from the National Aid Fund ( NAF ) on the number of families receiving NAF aid were used as a proxy poverty indicator. Unemployment data, which is generally a good indicator of poverty, was not reliable and could not be used. However, poverty surveys are included in the project to help improve identification mechanisms to be used in future phases of the CIP.", + "type": "data", + "explanation": "However, it is mentioned as unreliable and not used as a data source in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a type of data commonly used in analysis.", + "contextual_reason_agent": "However, it is mentioned as unreliable and not used as a data source in this context.", + "contextual_signal": "mentioned only as unreliable data, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "185_multi-page", + "page": 41, + "text": "42 Increase in percent of households with a chronically ill adult ( 15-49 years ) who have received external help in caring for a patient or replacing lost income in the past year Increase in percentage of households caring for an orphan that has received assistance from outside the family Increase in percent of adults with access to quality STI / TB / O [ case management Percent decrease in reported STI / TB / OI prevalence Percentage of people expressing nondiscriminatory attitudes towards people with HIV / AIDS 3 ) Strengthened capacity of Increase in the number of Project data institutions and communities to institutions providing effective respond to the epidemic in a coordination at nat ' l, multisectoral and sustained provincial, and district levels manner for the planning and implementation of HIV / AIDS interventions Proper mechanisms in place to Survey data transfer funds for prevention, care, and support at district and community levels Percent increase in number of Survey data organizations capable of designing, implementing, and evaluating HIV / AIDS / STI activities", + "ner_text": [ + [ + 575, + 587, + "named" + ] + ], + "validated": false, + "empirical_context": "42 Increase in percent of households with a chronically ill adult ( 15-49 years ) who have received external help in caring for a patient or replacing lost income in the past year Increase in percentage of households caring for an orphan that has received assistance from outside the family Increase in percent of adults with access to quality STI / TB / O [ case management Percent decrease in reported STI / TB / OI prevalence Percentage of people expressing nondiscriminatory attitudes towards people with HIV / AIDS 3 ) Strengthened capacity of Increase in the number of Project data institutions and communities to institutions providing effective respond to the epidemic in a coordination at nat ' l, multisectoral and sustained provincial, and district levels manner for the planning and implementation of HIV / AIDS interventions Proper mechanisms in place to Survey data transfer funds for prevention, care, and support at district and community levels Percent increase in number of Survey data organizations capable of designing, implementing, and evaluating HIV / AIDS / STI activities", + "type": "project", + "explanation": "However, 'Project data' is mentioned in a context that does not indicate it is a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Project data' refers to a dataset due to the term 'data' being present.", + "contextual_reason_agent": "However, 'Project data' is mentioned in a context that does not indicate it is a structured collection of data or a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "113_Project-Appraisal-Document-PAD-disclosable-version-P156479-08122016", + "page": 42, + "text": "30 terms of nutrition as any deficiencies not resolved by the end of this period is very likely to be irreversible. To also encourage older children to receive adequate nutrition and benefit from routine health check-ups, the targeting range includes all households with children under the age of 12 years old including pregnant women. In rural Chad, almost all poor households have children under the age of 12 years old. 17. The program will reach an estimated 6, 200 poor households, in both the southern Sudanian and Sahel regions. The analytical work preceding the preparation of the project indicates that chronic poverty and vulnerability is present in the southern Sudanian region. For example, comparisons between 2003 and 2011 household surveys ( ECOSIT 2 and 3 ) the food poverty rate increased substantially in the three southern regions ( Guera and Salamat from 35 percent to 42 percent and Logone Occidental from 38. 6 percent to 46. 4 percent ). However, the southern Sudanian region has few existing SP programs and interventions by donor partners are very limited, while the Government has virtually no assistance programs. In the Sahel area, there is a noticeable presence of development partners, responding to cyclical food insecurity.", + "ner_text": [ + [ + 757, + 763, + "named" + ], + [ + 255, + 309, + "ECOSIT <> reference population" + ], + [ + 320, + 334, + "ECOSIT <> reference population" + ], + [ + 345, + 349, + "ECOSIT <> data geography" + ], + [ + 362, + 377, + "ECOSIT <> reference population" + ], + [ + 521, + 534, + "ECOSIT <> data geography" + ], + [ + 664, + 688, + "ECOSIT <> data geography" + ], + [ + 723, + 727, + "ECOSIT <> reference year" + ], + [ + 732, + 736, + "ECOSIT <> publication year" + ], + [ + 737, + 754, + "ECOSIT <> data type" + ], + [ + 778, + 795, + "ECOSIT <> data description" + ], + [ + 852, + 857, + "ECOSIT <> data geography" + ], + [ + 862, + 869, + "ECOSIT <> data geography" + ], + [ + 904, + 921, + "ECOSIT <> data geography" + ] + ], + "validated": true, + "empirical_context": "The analytical work preceding the preparation of the project indicates that chronic poverty and vulnerability is present in the southern Sudanian region. For example, comparisons between 2003 and 2011 household surveys ( ECOSIT 2 and 3 ) the food poverty rate increased substantially in the three southern regions ( Guera and Salamat from 35 percent to 42 percent and Logone Occidental from 38. 6 percent to 46.", + "type": "survey", + "explanation": "In the context, 'ECOSIT' is explicitly mentioned as part of household surveys, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'ECOSIT' is a dataset because it is referenced in the context of household surveys comparing data over time.", + "contextual_reason_agent": "In the context, 'ECOSIT' is explicitly mentioned as part of household surveys, indicating it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "021_BOSIB05e6fc47e0770aeec00ad5e11774f2", + "page": 71, + "text": "The World Bank Uganda Climate Smart Agricultural Transformation Project ( P173296 ) Page 66 of 81 Risk Description Risk Rating Risk Mitigating Measures Incorporated into Project Design Risk Rating after Mitigation Project level - This is a complex project implemented by MAAIF, in coordination with other agencies, local governments, and communities. H This will be mitigated by agreed accountability procedures issued by MAAIF to participating agencies and districts, spelling out duties and responsibilities together with staff specifically assigned to the project. MAAIF PCU will ensure proper coordination of the project. S Although all World Bank - funded projects are captured under the annual national budget, some IAs may not have access to IFMS accounting system. H MAAIF will use the project \u2019 s module within the Integrated Financial Management System ( IFMS ) to be used for project accounting. However, the financial reports will be prepared manually using Microsoft Excel spreadsheet since they cannot be generated from the accounting system. Manual systems are prone to errors of omission or commission. S Internal control - Inability to follow up reported internal control weaknesses. S MAAIF and participating agencies have qualified and experienced internal auditors who will include the project within their workplans to ensure the internal audit unit carries out its role within the project according to their Internal Audit Charter. This will also be spelled out in the project manual.", + "ner_text": [ + [ + 749, + 753, + "named" + ] + ], + "validated": false, + "empirical_context": "MAAIF PCU will ensure proper coordination of the project. S Although all World Bank - funded projects are captured under the annual national budget, some IAs may not have access to IFMS accounting system. H MAAIF will use the project \u2019 s module within the Integrated Financial Management System ( IFMS ) to be used for project accounting.", + "type": "system", + "explanation": "However, IFMS is described as a system for project accounting, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed IFMS is a dataset because it is related to financial management and accounting.", + "contextual_reason_agent": "However, IFMS is described as a system for project accounting, not as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "182_multi0page", + "page": 15, + "text": "Project Cost by Component Component 1: Policy Development Institutional 1. 60 10. 7 0. 70 7. 0 Development Component 2: Monitoring and Institutional 1. 30 8. 7 0. 40 4. 0 Administration of Social Services Development Component 3: Community-based Social Social Funds 10. 60 70. 7 7. 50 75. 0 Services Component 4: Project Management, Institutional 1. 50 10. 0 1. 40 14. 0 Information System and Monitoring Development 0. 0 0. 00 0. 0 0. 0 0. 00 0. 0 Total Project Costs 15. 00 100. 0 10. 00 100. 0 Front-end fee 0. 00 0. 0 0. 00 0. 0 Total Financing Required 15. 00 100. 0 10. 00 100. 0 2. Key policy and institutional reforms supported by the project: The project will address the following key policy issues: ( i ) Poverty monitoring and evaluation by supporting the development of a permanent data collection sys. tem at the household level based on periodic LSMS and panel surveys and by building Government capacity to analyze the data and monitor poverty trends. - 12 -", + "ner_text": [ + [ + 861, + 865, + "named" + ], + [ + 870, + 883, + "LSMS <> data type" + ] + ], + "validated": true, + "empirical_context": "Key policy and institutional reforms supported by the project: The project will address the following key policy issues: ( i ) Poverty monitoring and evaluation by supporting the development of a permanent data collection sys. tem at the household level based on periodic LSMS and panel surveys and by building Government capacity to analyze the data and monitor poverty trends. - 12 -", + "type": "survey", + "explanation": "LSMS is indeed a dataset as it is mentioned in relation to periodic surveys that collect data for poverty monitoring.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed LSMS is a dataset because it is referenced in the context of data collection and analysis.", + "contextual_reason_agent": "LSMS is indeed a dataset as it is mentioned in relation to periodic surveys that collect data for poverty monitoring.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "148_492780PAD0P111101Official0Use0Only1", + "page": 41, + "text": "34 Arrangements for Results Monitoring 128. The M & E Division of the DG Planning would be responsible for providing overall monitoring and evaluation of TEIP activities. Project outputs and outcomes will be tracked through a set of strategic monitoring indicators that are part of the M & E system that will be used to monitor the EDSP. During the first two years, monitoring reports will mainly focus on reporting about completion of processes and activities. 129. Coordination of data collection on teacher qualification and class teacher engagement in mentoring student teachers will be the responsibility of the DG for Qualification and Supervision and NIET, and will be done through the existing network of school supervisors. 130. The coordination of the evaluation of the experimental programs will be responsibility of the Assessment and Evaluation Center which is expected to contract out this activity to a qualified research and evaluation institution under terms of reference agreed with the World Bank. 131. The Assessment and Evaluation Center will also be responsible for coordinating the design, development and implementation of the readiness to teach indicator.", + "ner_text": [ + [ + 233, + 264, + "named" + ] + ], + "validated": false, + "empirical_context": "The M & E Division of the DG Planning would be responsible for providing overall monitoring and evaluation of TEIP activities. Project outputs and outcomes will be tracked through a set of strategic monitoring indicators that are part of the M & E system that will be used to monitor the EDSP. During the first two years, monitoring reports will mainly focus on reporting about completion of processes and activities.", + "type": "indicator", + "explanation": "However, these indicators are not a dataset but rather metrics used for evaluation and monitoring purposes.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'strategic monitoring indicators' suggests a structured collection of metrics.", + "contextual_reason_agent": "However, these indicators are not a dataset but rather metrics used for evaluation and monitoring purposes.", + "contextual_signal": "mentioned only as indicators, not as a data source", + "tags": [] + }, + { + "filename": "182_multi0page", + "page": 20, + "text": "Sub-project level monitoring will be carried during the sub-project cycle and be more closely defined once the operations manual has been developed. To monitor the indicators that have been developed in the project LogFrame ( see Annex 1 ), each of the indicators will be associated with baseline and target values, frequency of collection, responsibility and methods of measurement. The indicators in the LogFrame will be further developed to include input, output, and outcome. In some cases those would be associated with relevant risk and sustainability indicators. The monitoring system through the use of pre-formatted reports will require the recipient of the report to provide comments and feedback on the information submitted. This will insure the presence of a feedback mechanism. In addition, the annual work plan will be required to have monitoring activities as part of component activities to insure that such activities are not set aside and that they become an integral part of project implementation. Using the established baselines from the VNICA study and other studies to be carried out during the project lifetime the monitoring will be carried out starting with project effectiveness. Monitoring reports will be required on a periodic basis in addition to various reports from special studies and research activities. As part of the Bank monitoring, semi-annual supervision missions and a mid-term review will be undertaken. D. Project Rationale 1. Project alternatives considered and reasons for rejection: I. Project alternatives considered and reasons for rejection: * The project initially considered up-front financing of a large package of community services. However, it was determined that systemic reform would best be accomplished through a two-stage process, with the first stage of the project supporting a smaller package of services and intensive policy and institutional strengthening, and the second stage financing a larger package of services once a set of policy triggers had been achieved. * The possibility of adding a component for social services delivery in the Albania Development Fund - 17 -", + "ner_text": [ + [ + 215, + 223, + "named" + ] + ], + "validated": false, + "empirical_context": "Sub-project level monitoring will be carried during the sub-project cycle and be more closely defined once the operations manual has been developed. To monitor the indicators that have been developed in the project LogFrame ( see Annex 1 ), each of the indicators will be associated with baseline and target values, frequency of collection, responsibility and methods of measurement. The indicators in the LogFrame will be further developed to include input, output, and outcome.", + "type": "framework", + "explanation": "However, the LogFrame is described as a framework for monitoring and does not function as a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is associated with indicators and values that could imply data collection.", + "contextual_reason_agent": "However, the LogFrame is described as a framework for monitoring and does not function as a structured collection of data itself.", + "contextual_signal": "mentioned only as a project framework, not as a data source", + "tags": [] + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 63, + "text": "Figures from the 2014 Uganda National Population and Housing Survey indicate that 32 percent of women were not involved in any economic activities, compared to only 26 percent of men ( National Housing and Population Census 2014 ). \u2022 Sub-component 3. 2 includes Digital Access program that will support various access affordability initiatives to increase direct access to internet, particularly for women. \u2022 The program will integrate feedback by women beneficiaries in the design and target households in refugees and host districts that are among the most vulnerable and left behind in terms of access to mobile devices. \u2022 Public Internet access points ( Wi-Fi hotspots ) and community Internet access schemes ( telecenters ) with women-friendly opening hours and in women-friendly locations such as markets, informal women \u2019 s group meeting locations, water collection points, and public food distribution centers ( Sub-components 1. 2, 3. 1, and 3. 2 ). Low level of digital skills and high incidence of online violence especially within refugee and host communities \u2022 While digital skills in Uganda are generally low, the \u2018 digital \u2022 Design digital skills / literacy training under sub - component 3. 2 to promote women \u2019 s participation and \u2022 Percentage of women assessed as digitally literate post the 64 GSMA Mobile Gender Gap Report 2020. 65 After Access Surveys 2019. 66 GSMA ( Global System for Mobile Communications Association ). The Mobile Gender Gap Report. 2015. 67 Freedom on the Net 2018, Freedom House.", + "ner_text": [ + [ + 185, + 223, + "named" + ], + [ + 17, + 21, + "National Housing and Population Census <> publication year" + ], + [ + 22, + 28, + "National Housing and Population Census <> data geography" + ], + [ + 96, + 101, + "National Housing and Population Census <> reference population" + ] + ], + "validated": true, + "empirical_context": "Figures from the 2014 Uganda National Population and Housing Survey indicate that 32 percent of women were not involved in any economic activities, compared to only 26 percent of men ( National Housing and Population Census 2014 ). \u2022 Sub-component 3.", + "type": "census", + "explanation": "This is a dataset as it is explicitly mentioned in relation to the figures derived from the census data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced in the context of figures and statistics related to economic activities.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned in relation to the figures derived from the census data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "029_IBArchive-ea5ff2ad-6f1f-45bf-a2ad-036440562c18", + "page": 71, + "text": "The World Bank Electricity Access Scale-up Project ( EASP ) ( P166685 ) Page 66 of 130 access under the project, disaggregated by refugees and host communities beneficiaries. applications and installation reports. Number of grid and mini-grid connections provided under the project, of which refugees The indicator will track beneficiaries of grid, mini - grid, and off-grid electricity in districts hosting refugees access under the project, disaggregated by refugees and host communities beneficiaries Quarterly Project implementati on progress reports.. Service Providers customer database, MEMD database, IVA reports, approved loan applications and installation reports. UECCC, MEMD MV / LV network constructed under the project in refugee-hosting districts The indicator will track the kilometers of Medium and Low voltage network constructed under the project in Districts hosting refugees Quarterly Project implementati on progress report MEMD construction reports MEMD Commercial enterprises and institutions electrified under the project with grid and mini-grid connections in refugee-hosting districts The indicator will track the number of commercial enterprises and institutions electrified under the project with grid and mini-grid connections, disaggregated by refugees and host communities beneficiaries. Quarterly Implementati on progress report.", + "ner_text": [ + [ + 609, + 620, + "named" + ] + ], + "validated": false, + "empirical_context": ". Service Providers customer database, MEMD database, IVA reports, approved loan applications and installation reports. UECCC, MEMD MV / LV network constructed under the project in refugee-hosting districts The indicator will track the kilometers of Medium and Low voltage network constructed under the project in Districts hosting refugees Quarterly Project implementati on progress report MEMD construction reports MEMD Commercial enterprises and institutions electrified under the project with grid and mini-grid connections in refugee-hosting districts The indicator will track the number of commercial enterprises and institutions electrified under the project with grid and mini-grid connections, disaggregated by refugees and host communities beneficiaries.", + "type": "report", + "explanation": "However, IVA reports are mentioned as documents and not as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'reports' can sometimes contain structured data.", + "contextual_reason_agent": "However, IVA reports are mentioned as documents and not as a data source in the context.", + "contextual_signal": "mentioned only as a report, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 20, + "validated": 4, + "not_validated": 16 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 79, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 74 of 117 prior to construction. Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "ner_text": [ + [ + 137, + 142, + "named" + ], + [ + 4, + 14, + "NEMIS <> publisher" + ], + [ + 589, + 608, + "NEMIS <> data type" + ], + [ + 777, + 793, + "NEMIS <> reference population" + ], + [ + 942, + 945, + "NEMIS <> author" + ] + ], + "validated": true, + "empirical_context": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 74 of 117 prior to construction. Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "type": "system", + "explanation": "NEMIS is indeed a dataset as it is explicitly mentioned as a source of data for various educational management purposes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is referenced in relation to data utilization for managing primary education.", + "contextual_reason_agent": "NEMIS is indeed a dataset as it is explicitly mentioned as a source of data for various educational management purposes.", + "contextual_signal": "mentioned as a data source for managing primary education", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 13, + "text": "Cameroon has ranked consistently in the lower quintile of world governance indicators for the last two decades ( Figures 5-1 and 5-2, Annex 5 ). Governance, Government effectiveness, and corruption are consistently acknowledged as key issues by Cameroonians, 3 the private sector, 4 and the GoC. 5 3 Transparency International ( 2013 ), Global Corruption Barometer, Afrobarometer ( 2011 \u2013 2013 and 2014 \u2013 2015 ). 4 World Bank ( 2009 ) Cameroon Enterprise Surveys, INS ( 2009 ) firm census, GoC ( 2011 ) Business Climate Survey; WEF ( World Economic Forum ). 2015. Global Competitiveness Report 2014 \u2013 2015. 5 DSCE 2010 \u2013 2020; President Paul Biya \u2019 s New Year Speeches in December 2003, 2005, 2006, 2007, 2014, 2015 or official communication to Cabinet in December 2004, September 2006, September 2007, March 2008, July 2009, and October 2015.", + "ner_text": [ + [ + 435, + 462, + "named" + ], + [ + 0, + 8, + "Cameroon Enterprise Surveys <> data geography" + ], + [ + 245, + 257, + "Cameroon Enterprise Surveys <> reference population" + ], + [ + 329, + 333, + "Cameroon Enterprise Surveys <> publication year" + ], + [ + 415, + 425, + "Cameroon Enterprise Surveys <> publisher" + ], + [ + 428, + 432, + "Cameroon Enterprise Surveys <> publication year" + ], + [ + 435, + 443, + "Cameroon Enterprise Surveys <> data geography" + ], + [ + 470, + 474, + "Cameroon Enterprise Surveys <> publication year" + ] + ], + "validated": true, + "empirical_context": "5 3 Transparency International ( 2013 ), Global Corruption Barometer, Afrobarometer ( 2011 \u2013 2013 and 2014 \u2013 2015 ). 4 World Bank ( 2009 ) Cameroon Enterprise Surveys, INS ( 2009 ) firm census, GoC ( 2011 ) Business Climate Survey; WEF ( World Economic Forum ). 2015.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned alongside other surveys that provide empirical data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is listed among other surveys and data sources.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned alongside other surveys that provide empirical data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "191_multi-page", + "page": 30, + "text": "To that end, the surveys will provide the information on the conditions of roads, water supply, health and education services, and local institutions that is necessary for planning reconstruction programs. The relevant indicators will be selected through a survey of statistics users in the Government, donor, and NGO sectors. Project Component 2 - US $ 1. 25 million Administration. The project will be executed by Twitezimbere, which is a non-governmental organization that executes the BURSAP I under an agreement ( Convention ) with the Government. A board that represents a cross-section of Burundian society governs Twitezimbere: NGOs, individual members, and the Ministries of Finance, Plan, and Agriculture.", + "ner_text": [ + [ + 257, + 283, + "named" + ] + ], + "validated": false, + "empirical_context": "To that end, the surveys will provide the information on the conditions of roads, water supply, health and education services, and local institutions that is necessary for planning reconstruction programs. The relevant indicators will be selected through a survey of statistics users in the Government, donor, and NGO sectors. Project Component 2 - US $ 1.", + "type": "survey", + "explanation": "However, it is not a dataset itself but rather a method of gathering information from users.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'survey', which often relates to data collection.", + "contextual_reason_agent": "However, it is not a dataset itself but rather a method of gathering information from users.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "041_Burkina-Faso-Support-to-Land-and-Mining-Management-Strengthening-Project", + "page": 23, + "text": "Securing land tenure and enabling efficient and transparent land markets are indispensable conditions to support good governance, promote rural development ( including agriculture and forest protection ), and improve urban development and planning. Project activities related to the land sector will directly support objective 3. 3 in the CPF ( Support to Decentralization ) by strengthening SFR, CFV and CCFV. The mining interventions are designed to support governance and greater economic inclusion \u2014 that is, shared prosperity \u2014 of the mining sector, particularly but not solely to communities in the vicinity of mines and for artisanal miners. They will also support improved provision of goods and services to mining operations and miners; better regulations and a more inclusive management of ASM and support an improved implementation / use of the MFLD to finance sustainable socio - economic investments for rural mining areas. The exclusion of classified forests from mining licensing, better management of ASM and improved approaches to mine closure will reduce loss of trees and deforestation with corresponding impacts on climate change. Digitally securing land rights and providing spatial planning tools including harmonized geospatial datasets and a property valuation system will also support the country in achieving its climate change adaptation and mitigation objectives. There will also be a special emphasis on gender on all aspects of mining management. 26. The project is aligned with the World Bank Group ( WBG ) COVID-19 Crisis Response Approach Paper, 33 in particular Pillar 3 ( Ensuring sustainable Business Growth and Job Creation ) and Pillar 4 ( Strengthening Policies, 33 World Bank. 2020. \u201c Saving Lives, Scaling-up Impact and Getting Back on Track: WBG COVID-19 Crisis Response Approach Paper. \u201d", + "ner_text": [ + [ + 1266, + 1291, + "named" + ] + ], + "validated": false, + "empirical_context": "The exclusion of classified forests from mining licensing, better management of ASM and improved approaches to mine closure will reduce loss of trees and deforestation with corresponding impacts on climate change. Digitally securing land rights and providing spatial planning tools including harmonized geospatial datasets and a property valuation system will also support the country in achieving its climate change adaptation and mitigation objectives. There will also be a special emphasis on gender on all aspects of mining management.", + "type": "system", + "explanation": "However, the context indicates it is a system rather than a structured collection of data, as it focuses on valuation processes rather than data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'system' which can imply data handling.", + "contextual_reason_agent": "However, the context indicates it is a system rather than a structured collection of data, as it focuses on valuation processes rather than data itself.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 26, + "text": "The World Bank SPF: Improved Livelihoods for Internally Displaced Persons in Azerbaijan ( P178125 ) Page 24 of 34 VII. RESULTS FRAMEWORK AND MONITORING Results Framework COUNTRY: Azerbaijan SPF: Improved Livelihoods for Internally Displaced Persons in Azerbaijan Project Development Objectives Enhance civic engagement, technical skills and opportunities for income generation for vulnerable IDP households in Azerbaijan. Project Development Objective Indicators Indicator Name Corporate Unit of Measure Baseline End Target Frequency Data Source / Methodology Responsibility for Data Collection Name: Percentage of participants self - employed or employed by firms Percentag e 0. 00 80. 00 Once, starting three months after trainees complete their courses. Post-Training Completion Survey conducted at least three months after training completion. M & E Specialist with support from supplementary data collectors, as needed. Description: The total number of individual participants completing their training programs divided by the number of individual participants completing their training programs that have either registered a business or where an employer has verified employment. Name: Increase in income of households with individuals participating Percentag e 0. 00 30. 00 Twice, once at baseline and once after training completion.", + "ner_text": [ + [ + 757, + 788, + "named" + ], + [ + 4, + 14, + "Post-Training Completion Survey <> publisher" + ], + [ + 77, + 87, + "Post-Training Completion Survey <> data geography" + ], + [ + 179, + 189, + "Post-Training Completion Survey <> data geography" + ], + [ + 942, + 1100, + "Post-Training Completion Survey <> data description" + ], + [ + 1192, + 1255, + "Post-Training Completion Survey <> data description" + ] + ], + "validated": true, + "empirical_context": "00 Once, starting three months after trainees complete their courses. Post-Training Completion Survey conducted at least three months after training completion. M & E Specialist with support from supplementary data collectors, as needed.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey conducted to gather data after training completion.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey conducted to gather data after training completion.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 86, + "text": "Funding from the RSW will be allocated to this results area, and an emphasis will be placed on prioritizing eligible schools in refugee - affected areas. This result will be monitored by including a disaggregated intermediate results indicator related to the availability of the minimum number of state-paid teachers per school. The release of IDA funds under this results area will be linked to the following DLI: Improved distribution of teachers recruited by the state in public primary schools ( DLI 1 ). 22. The Directorate of Human Resources ( MINEDUB ), which has vast experience with teacher recruitment and deployment, will be directly responsible for implementation under this results area, with support from Inspector-General of Education, and the Directorate of Pre-primary and Primary Education ( Direction de l \u2019 Enseignement Maternel et Primaire, DEMP ) and relevant ministries and agencies, including regional agencies and schools. The project will aim to capitalize on synergies with the recently approved IDA-financed Strengthening Public Sector Effectiveness and Statistical Capacity Project, including support to line ministries in public investment management ( including human resources management ), program budgeting, and an in-depth analysis of teacher recruitment. 23. Targets associated with teacher recruitment and deployment will be as follows: ( a ) development and implementation of a comprehensive national plan for teacher recruitment and deployment; ( b ) recruitment of a minimum of 12, 000 teachers over a five-year period to be deployed to the regions premised on need; and ( c ) an increase from 43 to 95 percent in the share of public schools with at least three state-paid teachers. Activities associated with DLI 1 will disproportionately benefit rural schools because 95 percent of understaffed schools are in rural areas, as highlighted in table 2. 2 below.", + "ner_text": [ + [ + 199, + 243, + "named" + ] + ], + "validated": false, + "empirical_context": "Funding from the RSW will be allocated to this results area, and an emphasis will be placed on prioritizing eligible schools in refugee - affected areas. This result will be monitored by including a disaggregated intermediate results indicator related to the availability of the minimum number of state-paid teachers per school. The release of IDA funds under this results area will be linked to the following DLI: Improved distribution of teachers recruited by the state in public primary schools ( DLI 1 ).", + "type": "indicator", + "explanation": "However, it is not a dataset but rather an indicator used to measure results, not a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'disaggregated' which often relates to data analysis.", + "contextual_reason_agent": "However, it is not a dataset but rather an indicator used to measure results, not a structured collection of data.", + "contextual_signal": "mentioned only as an indicator, not as a data source", + "tags": [] + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 58, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 53 of 117 RESULT_FRAME_TBL_IO Indicator Name DLI Baseline Intermediate Targets End Target 1 refugee / host communities ( Number ) Strengthen Capacity for Implementing Initiated Reforms Capacity building of target Teacher Training Colleges for implementation of competency-based teacher education curriculum. ( Yes / No ) No Yes Yes Establishment of standards and tools for quality assurance mechanisms for Pre-primary education, including teacher appraisal tools, assessment and classroom observation tools. ( Yes / No ) No Yes Yes New classrooms constructed in existing schools as per the needs-based school infrastructure investment plan ( Number ) 0. 00 6, 500. 00 8, 000. 00 New classrooms constructed in refugee host communities existing schools as per the needs - based school infrastructure investment plan ( Number ) 0. 00 50. 00 50. 00 New classrooms constructed in existing non - refugee / host communities primary schools as per the needs-based school infrastructure investment plan ( Number ) 0. 00 6, 000. 00 7, 950. 00 Evidence of utilization of NEMIS data for better management of primary education ( Yes / No ) No Yes Yes Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS ( Yes / No ) No Yes Yes Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of", + "ner_text": [ + [ + 1322, + 1327, + "named" + ], + [ + 1215, + 1231, + "NEMIS <> reference population" + ], + [ + 1302, + 1318, + "NEMIS <> reference population" + ] + ], + "validated": true, + "empirical_context": "00 7, 950. 00 Evidence of utilization of NEMIS data for better management of primary education ( Yes / No ) No Yes Yes Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS ( Yes / No ) No Yes Yes Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of", + "type": "system", + "explanation": "NEMIS is indeed a dataset as it is used for registering refugee learners and managing primary education data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is referenced in the context of data utilization and management.", + "contextual_reason_agent": "NEMIS is indeed a dataset as it is used for registering refugee learners and managing primary education data.", + "contextual_signal": "mentioned as a data source for registration of refugee learners", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 19, + "text": "The inefficiencies of teacher management are symptomatic of a lack of high managerial and technical capacity to provide a clear strategic and operational direction for the sector and limited capacity for personnel planning, management, and supervision. ( b ) There is no systematic approach to assess student learning. Because Niger does not have a national system with adequate structure, mechanisms, and skilled staff to carry out large-scale standardized assessments of student outcomes, the Ministries of Education resort to international comparative assessments such as PASEC or SDI and occasionally administer numeracy, literacy, or subject tests on a sample or an ad hoc basis. In the face of limited assessment data, the Government is unable to act upon evidence and align system elements to address learning gaps. ( c ) There is limited capacity to produce, manage, and analyze education data for planning purposes at all levels ( national, regional, and school ). This constrains the Government \u2019 s ability to pilot approaches in the education system and introduce accountability and transparency mechanisms. Data are generally collected using cumbersome paper-based questionnaires at the school level and processed at the central level.", + "ner_text": [ + [ + 887, + 901, + "named" + ] + ], + "validated": false, + "empirical_context": "In the face of limited assessment data, the Government is unable to act upon evidence and align system elements to address learning gaps. ( c ) There is limited capacity to produce, manage, and analyze education data for planning purposes at all levels ( national, regional, and school ). This constrains the Government \u2019 s ability to pilot approaches in the education system and introduce accountability and transparency mechanisms.", + "type": "data", + "explanation": "However, 'education data' is described in a general sense and not as a specific structured collection or dataset used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'education data' refers to a dataset due to its mention in the context of assessment and analysis.", + "contextual_reason_agent": "However, 'education data' is described in a general sense and not as a specific structured collection or dataset used for empirical analysis.", + "contextual_signal": "mentioned only as a type of data, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "168_252640updated0version", + "page": 12, + "text": "The government recently created an Interministerial Committee against HIV / AIDS, malaria and TB ( IC ), as required in its National HIV / AIDS Strategic Plan, which will have a policy role ( see section C4 on institutional arrangements for more details ), and a Technical Interministerial Committee ( TIC ), which will be the technical arm o f the IC, to manage the response to HIV / AIDS. These two entities are to be assisted by an Executive Secretariat ( ES ). The Ministry o f Health hired a consulting firm to assist with the preparation o f the National HIV / AIDS Strategic Plan. The prevalence surveys carried out during project preparation provide baseline data for the general population, STI patients, military personnel, and CSWs. The prevalence surveys made it possible to identify the Priority Vulnerable Groups ( PVG ). Also during project preparation, KABP studies were carried out among the general population, school children, military personnel, and dockers in addition to the survey being carried out by Save the Children mentioned above. UNICEF i s implementing a Mother-to-Child Transmission ( MTCT ) pilot program which started at the end o f March 2003. The UNAIDS Thematic Group has also been reinvigorated.", + "ner_text": [ + [ + 592, + 610, + "named" + ], + [ + 680, + 698, + "prevalence surveys <> reference population" + ], + [ + 700, + 712, + "prevalence surveys <> reference population" + ], + [ + 714, + 732, + "prevalence surveys <> reference population" + ], + [ + 738, + 742, + "prevalence surveys <> reference population" + ], + [ + 800, + 826, + "prevalence surveys <> data description" + ], + [ + 909, + 927, + "prevalence surveys <> reference population" + ], + [ + 929, + 944, + "prevalence surveys <> reference population" + ], + [ + 946, + 964, + "prevalence surveys <> reference population" + ], + [ + 970, + 977, + "prevalence surveys <> reference population" + ], + [ + 1025, + 1042, + "prevalence surveys <> author" + ], + [ + 1173, + 1177, + "prevalence surveys <> publication year" + ] + ], + "validated": true, + "empirical_context": "The Ministry o f Health hired a consulting firm to assist with the preparation o f the National HIV / AIDS Strategic Plan. The prevalence surveys carried out during project preparation provide baseline data for the general population, STI patients, military personnel, and CSWs. The prevalence surveys made it possible to identify the Priority Vulnerable Groups ( PVG ).", + "type": "survey", + "explanation": "In this context, 'prevalence surveys' are indeed used as a data source for baseline data on various groups.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'prevalence surveys' are datasets because they collect and provide data on specific populations.", + "contextual_reason_agent": "In this context, 'prevalence surveys' are indeed used as a data source for baseline data on various groups.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 23, + "text": "The project will promote boosting shared prosperity, by increasing focus on the least well-off in Burundi. The cash transfer component will target the lowest quintiles of the Burundi population to increase their incomes. A targeting system will be developed to accurately identify the poorest households in the country and cash transfers will be complemented by facilitating access to productive and social inclusion initiatives to achieve a sustainable alleviation of poverty. II. PROJECT DESCRIPTION A. Project Development Objective PDO Statement 43. The development objective of the project is to strengthen management capacity, scale up safety net programs, and promote productive inclusion and access to jobs. PDO Level Indicators \uf0b7 Households in targeted areas included in the National Social Registry ( Number ) o Of which refugees o Of which host communities \uf0b7 Proportion of households enrolled in the beneficiary database living below the extreme poverty line ( Percentage ) \uf0b7 Beneficiaries of social safety net programs ( Number ) o Of which female o Of which refugees o Of which host communities o Of which beneficiaries of unconditional cash transfers o Of which beneficiaries of emergency cash transfers \uf0b7 Beneficiaries of job-focused interventions ( Number )", + "ner_text": [ + [ + 783, + 807, + "named" + ], + [ + 98, + 105, + "National Social Registry <> data geography" + ], + [ + 175, + 182, + "National Social Registry <> data geography" + ], + [ + 738, + 748, + "National Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "The development objective of the project is to strengthen management capacity, scale up safety net programs, and promote productive inclusion and access to jobs. PDO Level Indicators \uf0b7 Households in targeted areas included in the National Social Registry ( Number ) o Of which refugees o Of which host communities \uf0b7 Proportion of households enrolled in the beneficiary database living below the extreme poverty line ( Percentage ) \uf0b7 Beneficiaries of social safety net programs ( Number ) o Of which female o Of which refugees o Of which host communities o Of which beneficiaries of unconditional cash transfers o Of which beneficiaries of emergency cash transfers \uf0b7 Beneficiaries of job-focused interventions ( Number )", + "type": "registry", + "explanation": "The National Social Registry is explicitly mentioned as a source of data for the project indicators, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a registry that includes households for analysis.", + "contextual_reason_agent": "The National Social Registry is explicitly mentioned as a source of data for the project indicators, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 42, + "text": "information systems completed Integrated EMIS is under development Integrated EMIS is operational Annual report produced using integrated EMIS with Second annual report produced using integrated EMIS with Integrated EMIS is operational and reports, produced", + "ner_text": [ + [ + 67, + 82, + "named" + ] + ], + "validated": false, + "empirical_context": "information systems completed Integrated EMIS is under development Integrated EMIS is operational Annual report produced using integrated EMIS with Second annual report produced using integrated EMIS with Integrated EMIS is operational and reports, produced", + "type": "system", + "explanation": "However, 'Integrated EMIS' is described as a system and not explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'EMIS' suggests it could be related to data management.", + "contextual_reason_agent": "However, 'Integrated EMIS' is described as a system and not explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 45, + "text": "The World Bank Tajikistan Water Supply and Sanitation Investment Project ( P177325 ) Page 42 of 89 89. The project will rely on involvement of WASH committees in behavior change campaigns and development of self-reporting beneficiary feedback mechanisms, beneficiary satisfaction surveys, and formal GRM mechanisms for utilities and project separately. Every citizen engagement channel will be monitored with the appropriate frequency as follows: ( a ) Community mobilization processes for access to information will be implemented before, during, and after construction, and hence should be monitored on a semiannual basis for all the project zones. ( b ) involvement of WASH committees should be closely monitored for implementation of the planned behavior change activities at the jamoat, schools, and mahalla levels on a quarterly basis. WASH committees will be part of the self-reporting mechanisms to be included in the project. ( c ) Beneficiary satisfaction surveys will be implemented in every project zone using the simplified paper-based and digital customer satisfaction tool, at least on an annual basis starting from the second year of the project. ( d ) Large-scale sample-based surveys of beneficiaries will be conducted at the baseline, midterm, and end line of the project. 90.", + "ner_text": [ + [ + 255, + 287, + "named" + ], + [ + 4, + 14, + "beneficiary satisfaction surveys <> publisher" + ], + [ + 1026, + 1087, + "beneficiary satisfaction surveys <> data description" + ], + [ + 1205, + 1218, + "beneficiary satisfaction surveys <> reference population" + ], + [ + 1311, + 1329, + "beneficiary satisfaction surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "The World Bank Tajikistan Water Supply and Sanitation Investment Project ( P177325 ) Page 42 of 89 89. The project will rely on involvement of WASH committees in behavior change campaigns and development of self-reporting beneficiary feedback mechanisms, beneficiary satisfaction surveys, and formal GRM mechanisms for utilities and project separately. Every citizen engagement channel will be monitored with the appropriate frequency as follows: ( a ) Community mobilization processes for access to information will be implemented before, during, and after construction, and hence should be monitored on a semiannual basis for all the project zones.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to surveys specifically designed to collect data on beneficiary satisfaction, which is used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'beneficiary satisfaction surveys' implies a structured collection of data gathered from beneficiaries.", + "contextual_reason_agent": "This is indeed a dataset as it refers to surveys specifically designed to collect data on beneficiary satisfaction, which is used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "158_40156", + "page": 52, + "text": "UNHCR already has BSS information for some of the camps to be supported by the project, and if resources are made available in future, it is envisioned that the other camps would be covered by BSS as well. b ) Sentinel surveillance: Sentinel surveillance will be undertaken by IGAD member states, who will add additional sentinel sites to their national sentinel surveillance system \u2013 one in each refugee site, IDP site and surrounding population. Initially, only those sites supported by this project will be added. Later, when the regional HIV M & E strategy has been developed, all the sites and camps will be added to national sentinel surveillance. c ) Health facility surveys: At all health facilities in hot spots, a health facility survey will be undertaken to assess the availability of HIV related services in the health facilities that are servicing populations in the 14 hot spots. d ) Rapid assessment in hot spot areas: All 14 hot spot areas \u2013 the seven targeted for year 1 of implementation and the seven targeted for year 2 of implementation - will be assessed at the start of year 1.", + "ner_text": [ + [ + 193, + 196, + "named" + ] + ], + "validated": false, + "empirical_context": "UNHCR already has BSS information for some of the camps to be supported by the project, and if resources are made available in future, it is envisioned that the other camps would be covered by BSS as well. b ) Sentinel surveillance: Sentinel surveillance will be undertaken by IGAD member states, who will add additional sentinel sites to their national sentinel surveillance system \u2013 one in each refugee site, IDP site and surrounding population.", + "type": "program", + "explanation": "However, BSS is described as a program or initiative rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed BSS is a dataset because it is mentioned in the context of information collection for camps.", + "contextual_reason_agent": "However, BSS is described as a program or initiative rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 4, + "validated": 2, + "not_validated": 2 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 27, + "text": "This system would store and update the teacher profile, including qualifications, experience, language proficiency, preferred service areas, recruitment process documentation, and performance assessment. This tool would be essential for organizing and tracking teacher activities, such as deployment and training, and enhancing accountability in the process. 64. The capacity development plan activities will be tailored to the needs of each entity. For example, they could include ( i ) training and knowledge sharing in areas such as fiduciary management, strategic planning, communication, teacher management, teacher professional development, school inspection, vulnerability management, and diversity and inclusion; ( ii ) reviewing roles and responsibilities and optimizing work processes to enhance the management of education service delivery, mainly the functions related to teachers \u2019 management and professional development; ( iii ) developing a performance and reporting system, M & E system, and strengthening and decentralizing the existing data management systems ( explained under subcomponent 4. 2 ); ( iv ) preparing policy frameworks ( i. e., the teachers ' recruitment and retention strategy ); and ( v ) provision of equipment and rehabilitation of the physical and IT infrastructure that could be needed to facilitate and sustain the implementation of the introduced capacity change. The detailed activities will be clearly listed upon concluding the capacity development plans. Subcomponent 4. 1 will finance the implementation of the priority activities identified in the capacity development plans at the national and sub-national levels.", + "ner_text": [ + [ + 991, + 1003, + "named" + ] + ], + "validated": false, + "empirical_context": "The capacity development plan activities will be tailored to the needs of each entity. For example, they could include ( i ) training and knowledge sharing in areas such as fiduciary management, strategic planning, communication, teacher management, teacher professional development, school inspection, vulnerability management, and diversity and inclusion; ( ii ) reviewing roles and responsibilities and optimizing work processes to enhance the management of education service delivery, mainly the functions related to teachers \u2019 management and professional development; ( iii ) developing a performance and reporting system, M & E system, and strengthening and decentralizing the existing data management systems ( explained under subcomponent 4. 2 ); ( iv ) preparing policy frameworks ( i.", + "type": "system", + "explanation": "However, it is mentioned as a system rather than a data source, indicating it does not function as a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'M & E system' suggests a structured approach to monitoring and evaluation.", + "contextual_reason_agent": "However, it is mentioned as a system rather than a data source, indicating it does not function as a dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 96, + "text": "ERfKE II allocates approximately US $ 25 million to such efforts, which is clearly likely to be worthwhile and pay for itself through lower cost for major rehabilitation in the future, not to mention the potential ( if immeasurable ) positive effects on learning from fostering a positive, safe, and comfortable learning environment. 14 13 Conceivably these could be used at least to add an hour each of instruction in the three PISA subjects \u2014 Reading, Math, and Science \u2014 especially given the MoE \u2019 s goal to improve teacher utilization and increase teacher work loads. 14 Similarly, the JD3. 65 Million to replace or repair unsafe buildings does not require sophisticated cost-benefit analysis to know that it is a worthwhile investment. Of course this is a small amount of funds but it is over 1 percent of the total program.", + "ner_text": [ + [ + 429, + 433, + "named" + ] + ], + "validated": false, + "empirical_context": "ERfKE II allocates approximately US $ 25 million to such efforts, which is clearly likely to be worthwhile and pay for itself through lower cost for major rehabilitation in the future, not to mention the potential ( if immeasurable ) positive effects on learning from fostering a positive, safe, and comfortable learning environment. 14 13 Conceivably these could be used at least to add an hour each of instruction in the three PISA subjects \u2014 Reading, Math, and Science \u2014 especially given the MoE \u2019 s goal to improve teacher utilization and increase teacher work loads. 14 Similarly, the JD3.", + "type": "program", + "explanation": "'PISA' is mentioned in the context of educational subjects but is not described as a data source or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'PISA' is a dataset because it is often associated with educational assessments and data collection.", + "contextual_reason_agent": "'PISA' is mentioned in the context of educational subjects but is not described as a data source or dataset.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 47, + "text": "The objective of the cash transfer program is to support the consumption of extreme poor and vulnerable households and promote investments in the foundations of human capital ( nutrition, health, early childhood development ). Ultimately, a national scale program would aim to reach all the poorest households across Burundi ( estimated at about 300, 000 households, with approximately two million members17 ). The beneficiaries are the households, which receive the cash transfers and benefit from an increase in consumption. Within the household, the focus is on women \u2013 as cash transfer recipients and targets of some of the behavior change activities \u2013 and children \u2013 as targets of the behavior change activities and 16 The main symptom of vulnerability in Burundi is food insecurity. At the country-level, it translates into most of the country classified as \u201c stressed \u201d and an increasing area in the North and East classified in \u201c crisis \u201d in the Integrated Food Security Phase classification ( FAO, 2016 ). At the household-level, it translates in increasing numbers of individuals in chronic or acute food insecurity ( estimated to 4. 6 million in October 2016 ( OCHA ) ) 17 These estimations are based on the poverty analysis of the 2013 / 14 household survey ( Enqu\u00eate sur les Conditions de Vie des M\u00e9nages Burundais, ECVMB )", + "ner_text": [ + [ + 1243, + 1269, + "named" + ] + ], + "validated": true, + "empirical_context": "At the household-level, it translates in increasing numbers of individuals in chronic or acute food insecurity ( estimated to 4. 6 million in October 2016 ( OCHA ) ) 17 These estimations are based on the poverty analysis of the 2013 / 14 household survey ( Enqu\u00eate sur les Conditions de Vie des M\u00e9nages Burundais, ECVMB )", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as the source of estimations related to food insecurity.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific household survey that provides data for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as the source of estimations related to food insecurity.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "153_476340PAD0ZR0P101Official0Use0Only1", + "page": 76, + "text": "Creation of a New Sector Specific Data Management Systems ( UW3. 2 million ) 23. In the absence o f an integrated system for managing forest resource information, it i s difficult for MECNT to make informed decisions, elaborate policies, and manage the forest sector. To remedy this situation, MECNT will develop an integrated system for the management o f forest resources,: ( i ) a geospatial information management system ( SyGIS ), including an interactive forestry atlas updated regularly to allow monitoring, especially o f forest title limits and logging roads; and ( ii ) a forest management information system ( SIGEF ) based on a sub-system o f forest accounting, which collects financial data linked to the forest sector, and a sub-system o f physical log tracking, which physically validates data collected on the ground as well as the enforcement of regulations. 24. To achieve this, the project will support: ( i ) a team o f international and national experts specialized in conceiving, experimenting, and deploying the two systems on the ground; ( ii ) information seminars and workshops; ( iii ) provision o f office equipment, furniture, and logistics; and ( iv ) operational 64", + "ner_text": [ + [ + 582, + 618, + "named" + ] + ], + "validated": false, + "empirical_context": "In the absence o f an integrated system for managing forest resource information, it i s difficult for MECNT to make informed decisions, elaborate policies, and manage the forest sector. To remedy this situation, MECNT will develop an integrated system for the management o f forest resources,: ( i ) a geospatial information management system ( SyGIS ), including an interactive forestry atlas updated regularly to allow monitoring, especially o f forest title limits and logging roads; and ( ii ) a forest management information system ( SIGEF ) based on a sub-system o f forest accounting, which collects financial data linked to the forest sector, and a sub-system o f physical log tracking, which physically validates data collected on the ground as well as the enforcement of regulations. 24.", + "type": "system", + "explanation": "However, it is described as a system for managing information rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'information system' which often relates to data management.", + "contextual_reason_agent": "However, it is described as a system for managing information rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "074_Djibouti-Integrated-Cash-Transfer-and-Human-Capital-Project", + "page": 41, + "text": "The World Bank Integrated Cash Transfer and Human Capital Project ( P166220 ) Page 37 of 44 program beneficiaries will also be oriented to the program by social and health workers in the targeted regions who will receive communication pamphlets. \uf0b7 Support to Community identification committees. The project will support the organization of the community identification committees that will develop the pre-lists of potential beneficiaries. Support to community identification committees will include development of communication material and guidance notes for adopting a community targeting approach. The project will also support additional technical assistance to households and community identification committees to further guide them through the community targeting process. \uf0b7 Benefit Calculation and Payment Mechanism. The project will support the analysis of payment levels using the latest household survey data to ensure that benefits paid to households take into consideration number of household members and consumption poverty level. The project will also support the modernization of the payment mechanism over the lifetime of the project. Specifically, ensuring that a mechanism is adopted to ( i ) robustly identify beneficiaries at point of payment; ( ii ) reconcile payments; ( iii ) ensuring payments are made regularly and on-time as described in the project manual. \uf0b7 Grievance and Redress. The program will refine its GRM as part of the program structure.", + "ner_text": [ + [ + 900, + 921, + "named" + ], + [ + 668, + 678, + "household survey data <> reference population" + ], + [ + 954, + 964, + "household survey data <> reference population" + ], + [ + 989, + 1046, + "household survey data <> data description" + ] + ], + "validated": true, + "empirical_context": "\uf0b7 Benefit Calculation and Payment Mechanism. The project will support the analysis of payment levels using the latest household survey data to ensure that benefits paid to households take into consideration number of household members and consumption poverty level. The project will also support the modernization of the payment mechanism over the lifetime of the project.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned to be used for analysis in the context of the project.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'household survey data' implies a structured collection of data collected from households.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned to be used for analysis in the context of the project.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "113_Project-Appraisal-Document-PAD-disclosable-version-P156479-08122016", + "page": 14, + "text": "2 5. Chad \u2019 s high rate of monetary poverty is accompanied by very low human development indicators. Chad ranked at 185 out of 188 countries in the 2015 Human Development Index. The adult literacy rate was 47 percent, the literacy rate for men being 53. 8 percent and that for women, 44. 0 percent. In 2013, the primary school completion rate stood at 38 percent, and 56 percent of 6 - to 24-year olds were not enrolled in school. In the last decade, there have been noticeable improvements in health indicators, but challenges remain, including with child, infant, and maternal mortality ratios. The Multiple Indicator Cluster Survey 2014 \u2013 2015 reports the child, infant, and maternal mortality ratios as 65 per 1, 000, 72 per 1, 000, and 860 per 100, 000 respectively. Despite some recent improvements, the access to basic social services remains low and the number of poor and vulnerable will increase with the decline of Government spending because of the fall in oil prices and the fragile security environment. Moreover, these and other human development indicators confirm that women are more disadvantaged than men, especially when it comes to access to education and health services. 6. Poverty is strongly correlated to malnutrition, which is affecting all regions of Chad.", + "ner_text": [ + [ + 601, + 634, + "named" + ], + [ + 5, + 9, + "Multiple Indicator Cluster Survey <> data geography" + ], + [ + 101, + 105, + "Multiple Indicator Cluster Survey <> data geography" + ], + [ + 148, + 152, + "Multiple Indicator Cluster Survey <> publication year" + ], + [ + 635, + 646, + "Multiple Indicator Cluster Survey <> publication year" + ], + [ + 1279, + 1283, + "Multiple Indicator Cluster Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "In the last decade, there have been noticeable improvements in health indicators, but challenges remain, including with child, infant, and maternal mortality ratios. The Multiple Indicator Cluster Survey 2014 \u2013 2015 reports the child, infant, and maternal mortality ratios as 65 per 1, 000, 72 per 1, 000, and 860 per 100, 000 respectively. Despite some recent improvements, the access to basic social services remains low and the number of poor and vulnerable will increase with the decline of Government spending because of the fall in oil prices and the fragile security environment.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it reports specific health indicators derived from the survey.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "The context confirms it is a dataset as it reports specific health indicators derived from the survey.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "045_Turkey-Agricultural-Employment-Support-for-Refugees-and-Turkish-Citizens-through-Enhanced-Market-Linkages-Project", + "page": 12, + "text": "The Turkish Red Crescent ( TRC ) Livelihoods Survey ( 2018 ) found that around 86 percent of the refugee population works in informal jobs and the EU estimates that up to 950, 000 SuTPs currently work in the informal sector. 5 9. A combination of demand and supply factors contribute to the problems that refugees face in accessing formal employment. On the supply side, low levels of education and skills and informational and language barriers pose challenges in access to the formal labor market. Available evidence suggests that the education and skills levels of refugees are low, for example, with around 15 percent having at most a high school degree among Syrian ESSN beneficiaries, and the majority of these degrees are from Syria. 6 Additionally, among ESSN beneficiaries, 14 percent have no formal education and 39 percent have only attended primary school. 7 The eligibility criteria for the ESSN also disincentivize refugees from pursuing and accepting formal employment, as they would risk losing their social assistance support. On the demand side, employers may find it relatively more difficult to verify the skill levels, and therefore expected productivity, of refugees. 8 This is especially true in the case of SuTPs who could not bring their degrees or certifications with them to Turkey.", + "ner_text": [ + [ + 4, + 51, + "named" + ] + ], + "validated": true, + "empirical_context": "The Turkish Red Crescent ( TRC ) Livelihoods Survey ( 2018 ) found that around 86 percent of the refugee population works in informal jobs and the EU estimates that up to 950, 000 SuTPs currently work in the informal sector. 5 9.", + "type": "survey", + "explanation": "This is indeed a dataset as it is referenced in the context of findings related to the refugee population's employment.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is referenced in the context of findings related to the refugee population's employment.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "151_533550PAD0P117101Official0Use0Only1", + "page": 14, + "text": "Potential partnerships with Civil Society Organizations ( CSOs ) to develop awareness tools and messages, encourage participation of women and gather community feedback will promote the Fund transparency and increase demand from beneficiaries for improved services. Ongoing communication to inform decision makers, beneficiaries and other stakeholders concerning the SWF reforms and beneficiary recertification is essential. B. Rationale for Bank involvement 12. The World Bank has been a lead donor in supporting Yemen \u2019 s Poverty Reduction Strategy and Social Protection Agenda. In addition to direct support to two key social protection instruments in Yemen ( the Social Fund for Development and the Public Works Project ) for more than ten years, the Bank supported the review of the Yemen Social Protection program and provided technical assistance for the development of the Social Protection Strategy and Action Plan. More recently, the Bank, among other donors, is supporting the development of Yemen \u2019 s Food Security Strategy. \u2019 SWF 2008 Survey: over 1. 6 million households were covered in this survey, 1 million of which are current SWF beneficiary HHs, and 0. 6 million new applicants. 3", + "ner_text": [ + [ + 1039, + 1054, + "named" + ] + ], + "validated": true, + "empirical_context": "More recently, the Bank, among other donors, is supporting the development of Yemen \u2019 s Food Security Strategy. \u2019 SWF 2008 Survey: over 1. 6 million households were covered in this survey, 1 million of which are current SWF beneficiary HHs, and 0.", + "type": "survey", + "explanation": "This is indeed a dataset as it provides structured data on households covered in the survey.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that covered a large number of households.", + "contextual_reason_agent": "This is indeed a dataset as it provides structured data on households covered in the survey.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "182_multi0page", + "page": 41, + "text": "Annex 1: Project Design Summary ALBANIA: Social Services Delivery 141 of s MQniorl & 4hiIe 3, m_ & Sector-related CAS Goal: Sector Indicators: Sector / country reports: ( from Goal to Bank Mission ) Improve standards of living I. Increased # of beneficiaries 1. Vulnerability Needs and I. Government su Yports and promote social cohesion gained access to the social Institutional Capabilities multiple sector reforms through community-based services baseline study 2. Political stabili! y is social services targeted to 2. Raised awareness of social 2. Regional needs assessment maintained poor and vulnerable inclusion issues of reports 3. Government mntintains its population groups vulnerable population 3. Periodic beneficiary impact commitment to operate the groups assessment community-bast: d services on 3. Increased community 4. MOLSA / GASS records, participatory w.. y participation in decision other participants making information and databases. 5. Public opinion surveys 6. Household budget survey Project Development Outcome / Impact Project reports: ( from Objective to Goal ) Objective: Indicators: The project development 1. Increased efficiency and 1. MOLSAIGASS reports 1. Governments and social objectives are to assist the reduced cost per 2.", + "ner_text": [ + [ + 962, + 984, + "named" + ], + [ + 32, + 39, + "Public opinion surveys <> data geography" + ], + [ + 685, + 706, + "Public opinion surveys <> reference population" + ] + ], + "validated": true, + "empirical_context": "5. Public opinion surveys 6. Household budget survey Project Development Outcome / Impact Project reports: ( from Objective to Goal ) Objective: Indicators: The project development 1.", + "type": "survey", + "explanation": "In this context, public opinion surveys are explicitly mentioned as a type of survey, indicating they are used as a data source for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because public opinion surveys are structured collections of data used to gauge public sentiment.", + "contextual_reason_agent": "In this context, public opinion surveys are explicitly mentioned as a type of survey, indicating they are used as a data source for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "045_Turkey-Agricultural-Employment-Support-for-Refugees-and-Turkish-Citizens-through-Enhanced-Market-Linkages-Project", + "page": 40, + "text": "The World Bank Agricultural Employment Support for Refugees and Turkish Citizens through Enhanced Market Linkages ( P171543 ) Page 37 of 85 Program on Elimination of Child Labor ( 2017 \u2013 2023 ) in Turkey and has introduced strict measures to prevent involvement of children in worst forms of child labor in different sectors, including agriculture. However, with the high informality in the sector, child labor remains an important problem. According to Turkstat \u2019 s 2012 Child Labor Force Survey which contains the most recent national data regarding child labor in Turkey, 893, 000 children between the ages of 6 and 17 are working and 44. 7 percent of all working children ( 399, 000 ) are employed in agricultural production. Other data resources, such as the US Department of Labor, state in the child labor report on Turkey ( 2018 ) that out of 320, 254 children of ages between 6 and 14 years who are working, 57 percent worked in agriculture. 100. The project design and ESF instruments aim to minimize the risks of child labor and informal work which are associated with the agriculture sector in Turkey. The project will support formal employment for persons above the age of 18 years. The formality of employment will be verified by the Turkish Social Security Agency and the project will establish age verification procedure.", + "ner_text": [ + [ + 472, + 496, + "named" + ], + [ + 197, + 203, + "Child Labor Force Survey <> data geography" + ], + [ + 454, + 462, + "Child Labor Force Survey <> publisher" + ], + [ + 467, + 471, + "Child Labor Force Survey <> publication year" + ], + [ + 528, + 541, + "Child Labor Force Survey <> data type" + ], + [ + 567, + 573, + "Child Labor Force Survey <> data geography" + ], + [ + 575, + 633, + "Child Labor Force Survey <> data description" + ], + [ + 823, + 829, + "Child Labor Force Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "However, with the high informality in the sector, child labor remains an important problem. According to Turkstat \u2019 s 2012 Child Labor Force Survey which contains the most recent national data regarding child labor in Turkey, 893, 000 children between the ages of 6 and 17 are working and 44. 7 percent of all working children ( 399, 000 ) are employed in agricultural production.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned to contain national data regarding child labor in Turkey.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides national data on child labor.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned to contain national data regarding child labor in Turkey.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 55, + "text": "These shall include: ( 1 ) a client registry ( that supports the unique identification and management of patient identities ); ( 2 ) one or more terminology registries accessible as a service ( that provide a standardized classification or a query-able source for health information exchange, including clinical terminologies, ontologies, dictionaries, code systems, and value sets ); ( 3 ) a universal facility registry ( that sets the unique identifier for locations where health services are provided ); ( 4 ) a health professional registry ( that sets the unique identifier for health workers that provide services within a country ); and ( 5 ) a common product catalogue ( that properly defines and categorizes medical products or commodities ). 24 Core standards include: ( 1 ) content standards ( that dictate the structure of electronic documents and types of data they must contain by ensuring data is properly organized and represented in a clear manner ); ( 2 ) terminology standards ( that ensure that all parties will be able to understand and use it while exchanging health data ); ( 3 ) transport standards ( that facilitate data exchange between different systems by defining what formats, document architecture, data elements, methods, and application programming interfaces to use for achieving interoperability ); and ( 4 ) security standards ( that establish administrative and technical rules to protect sensitive data from misuse, unauthorized access, or disclosure ). 25 Health information systems could include the national EMR platform, supply chain management information system, and surveillance systems managed by the MOH. Selected health information systems will be described in the Program Operational Manual ( including the verification protocol ).", + "ner_text": [ + [ + 145, + 167, + "named" + ] + ], + "validated": true, + "empirical_context": "These shall include: ( 1 ) a client registry ( that supports the unique identification and management of patient identities ); ( 2 ) one or more terminology registries accessible as a service ( that provide a standardized classification or a query-able source for health information exchange, including clinical terminologies, ontologies, dictionaries, code systems, and value sets ); ( 3 ) a universal facility registry ( that sets the unique identifier for locations where health services are provided ); ( 4 ) a health professional registry ( that sets the unique identifier for health workers that provide services within a country ); and ( 5 ) a common product catalogue ( that properly defines and categorizes medical products or commodities ). 24 Core standards include: ( 1 ) content standards ( that dictate the structure of electronic documents and types of data they must contain by ensuring data is properly organized and represented in a clear manner ); ( 2 ) terminology standards ( that ensure that all parties will be able to understand and use it while exchanging health data ); ( 3 ) transport standards ( that facilitate data exchange between different systems by defining what formats, document architecture, data elements, methods, and application programming interfaces to use for achieving interoperability ); and ( 4 ) security standards ( that establish administrative and technical rules to protect sensitive data from misuse, unauthorized access, or disclosure ).", + "type": "registry", + "explanation": "In the context, 'terminology registries' are described as providing a query-able source for health information exchange, indicating they function as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'terminology registries' is a dataset because it refers to a structured collection of standardized classifications for health information exchange.", + "contextual_reason_agent": "In the context, 'terminology registries' are described as providing a query-able source for health information exchange, indicating they function as a data source.", + "contextual_signal": "described as a query-able source for health information exchange", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 30, + "text": "At the district level, GROW will provide technical assistance on specific areas ( Stakeholder engagement, grievance management, management of workers ) to government officers based in the districts and relevant government entities ( MGLSD, NEMA - Social unit, CDOs, Labour, Gender, Occupational Health and Safety ( OHS ), and all other government departments that manage social risk in the country ), and sub-county staff. Key focus will also be given to building capacity of MGLSD, PSFU and other relevant government institutions, including academia on social risk mitigation. 62. Subcomponent 4B: Policy innovation and evidence generation. This subcomponent will finance data collection efforts beyond the information gathered through the MIS and digital delivery platforms ), analysis and publication of data from project and non-project datasets on female entrepreneurship, climate resilience and WEE, establishment of a data portal, and research workshops and policy forums on female entrepreneurship and WEE. In addition, the learning agenda could also include the design and implementation of innovative pilot activities within the project to test what works to address the key constraints faced by beneficiaries, including refugees and women living in RHDs. This work will be designed and conducted in collaboration with the World Bank \u2019 s Africa Gender Innovation Lab as well as other local research institutes and development partners.", + "ner_text": [ + [ + 741, + 744, + "named" + ] + ], + "validated": false, + "empirical_context": "Subcomponent 4B: Policy innovation and evidence generation. This subcomponent will finance data collection efforts beyond the information gathered through the MIS and digital delivery platforms ), analysis and publication of data from project and non-project datasets on female entrepreneurship, climate resilience and WEE, establishment of a data portal, and research workshops and policy forums on female entrepreneurship and WEE. In addition, the learning agenda could also include the design and implementation of innovative pilot activities within the project to test what works to address the key constraints faced by beneficiaries, including refugees and women living in RHDs.", + "type": "system", + "explanation": "However, 'MIS' is mentioned as a system and not explicitly as a data source or dataset in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is an acronym that could imply a structured collection of information.", + "contextual_reason_agent": "However, 'MIS' is mentioned as a system and not explicitly as a data source or dataset in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "113_Project-Appraisal-Document-PAD-disclosable-version-P156479-08122016", + "page": 60, + "text": "48 second cycle of activities. The Mid Term Review will take into account the results of the evaluations and readjust the project activities accordingly. 38. The process evaluation will use several sources of data. The MIS and regular monitoring reports will produce information on whether the key elements of the safety nets system ( targeting mechanism, payment, MIS ) have been adequately developed. The process evaluation will include interviews with local implementing partners, such as payment agencies and NGOs in charge of the accompanying measures, to identify bottlenecks and recommend solutions. A small qualitative beneficiary assessment might be carried out to complete the process evaluation to assess the satisfaction of direct beneficiaries with payment procedures, accompanying measures and with the benefits they will have received. 39. Targeting assessment. The project targeting approach will be refined as the project starts implementation and on the basis of specific studies supported by the ASP MDTF. The result of the targeting assessment will support the preparation of the manuals. The proposed approach is as follows: Beneficiary households will be selected combining community, geographical, and poverty approaches; and will include a census of all village households in select areas, categorical targeting ( households with children under the age of 12 or with pregnant women ) and a simple PMT exercise.", + "ner_text": [ + [ + 1264, + 1312, + "named" + ] + ], + "validated": true, + "empirical_context": "The result of the targeting assessment will support the preparation of the manuals. The proposed approach is as follows: Beneficiary households will be selected combining community, geographical, and poverty approaches; and will include a census of all village households in select areas, categorical targeting ( households with children under the age of 12 or with pregnant women ) and a simple PMT exercise.", + "type": "census", + "explanation": "This is indeed a dataset as it involves a systematic collection of data on village households for the targeting assessment.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured collection of household data gathered through a census.", + "contextual_reason_agent": "This is indeed a dataset as it involves a systematic collection of data on village households for the targeting assessment.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 120, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 115 of 117 within the provisions of PPADA, 2015 and Regulations 2020. Training on auditing in the NEMIS Platform and monitor progressively how many of the audits are carried out on this platform. Fiduciary Systems MoE Recurrent Yearly Audits conducted on the NEMIS Platform. Preparation for Kenya to participate in the 2025 Programme for International Student Assessment \u2019 s for Development ( PISA-D ). Technical DLI 6 MoE and KNEC Due Date 31-Dec - 2025 a ) admission / signing up; b ) instrument translation and adaptation of instrument; ( c ) pre-test; ( d ) main survey; and ( e ) data analysis, report writing and dissemination Strengthened supply chain for sanitary towels and accountability Technical DLI 5 MoE Due Date 29-Dec - 2023 A module in NEMIS showing distribution of sanitary towels by school and number of girls benefiting. Review and analyze NEMIS capabilities on collecting, disaggregation and reporting on data for learners with special needs and disability and recommendations to close identified gaps. Environmental and Social Systems DLI 6 MoE KISE Due Date 30-Jun-2023 Reports generated from NEMIS with granular data on special needs children by category of disability, age and gender. Design and develop a GRM MIS module compatible with NEMIS and interoperable between agencies.", + "ner_text": [ + [ + 830, + 835, + "named" + ] + ], + "validated": false, + "empirical_context": "Preparation for Kenya to participate in the 2025 Programme for International Student Assessment \u2019 s for Development ( PISA-D ). Technical DLI 6 MoE and KNEC Due Date 31-Dec - 2025 a ) admission / signing up; b ) instrument translation and adaptation of instrument; ( c ) pre-test; ( d ) main survey; and ( e ) data analysis, report writing and dissemination Strengthened supply chain for sanitary towels and accountability Technical DLI 5 MoE Due Date 29-Dec - 2023 A module in NEMIS showing distribution of sanitary towels by school and number of girls benefiting. Review and analyze NEMIS capabilities on collecting, disaggregation and reporting on data for learners with special needs and disability and recommendations to close identified gaps.", + "type": "system", + "explanation": "However, NEMIS is described as a system rather than a structured collection of data or a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is mentioned in the context of data collection and reporting.", + "contextual_reason_agent": "However, NEMIS is described as a system rather than a structured collection of data or a dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "042_Burundi-Skills-for-Jobs-Women-and-Youth-Project", + "page": 63, + "text": "A Governing Board is considered functional when ( 1 ) the government agency, under which the TVET sector Annual starting year 2 Notes of board establishmen t, minutes of meetings Administrative data ( project files ) PIU M & E Specialist", + "ner_text": [ + [ + 179, + 198, + "named" + ] + ], + "validated": true, + "empirical_context": "A Governing Board is considered functional when ( 1 ) the government agency, under which the TVET sector Annual starting year 2 Notes of board establishmen t, minutes of meetings Administrative data ( project files ) PIU M & E Specialist", + "type": "data", + "explanation": "In this context, 'Administrative data' is used as a source of information related to the functioning of the Governing Board.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'Administrative data' is a dataset because it suggests a structured collection of information used for governance.", + "contextual_reason_agent": "In this context, 'Administrative data' is used as a source of information related to the functioning of the Governing Board.", + "contextual_signal": "mentioned as a source of information for governance", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "113_Project-Appraisal-Document-PAD-disclosable-version-P156479-08122016", + "page": 65, + "text": "53 Annex 5: Economic Analysis Chad: Safety Nets Project 1. This economic analysis provides an ex ante estimate of the program \u2019 s potential impact on poverty and consumption for different benefit scenarios. 2. The main source of data for this analysis is the ECOSIT national household survey that was carried out by the National Statistical Office in 2011. This survey included all 20 regions of the country. In total, 9, 259 households were surveyed, covering 49, 985 individuals. The sample was stratified into 20 clusters per region, of which 12 were urban and 8 were rural, apart from the capital, N ' Djamena, where 100 clusters were surveyed and all were classified as urban. Using the weighting methodology provided by the National Statistics Office, this survey corresponds to representing a population of 10, 015, 591. The survey data was collected between June and July 2011. For estimates of the costs in the United States of different benefit scenarios, the exchange rate of XAF 585 per U. S. dollar is used. 3. The ECOSIT 2011 data is the most recent nationally representative consumption data. However, it is five years old and it is likely that the country has experienced changes in consumption at the household level, the average household composition and the distribution of the population across the country.", + "ner_text": [ + [ + 259, + 291, + "named" + ], + [ + 320, + 347, + "ECOSIT national household survey <> author" + ], + [ + 351, + 355, + "ECOSIT national household survey <> publication year" + ], + [ + 419, + 436, + "ECOSIT national household survey <> reference population" + ], + [ + 461, + 480, + "ECOSIT national household survey <> reference population" + ], + [ + 880, + 884, + "ECOSIT national household survey <> publication year" + ], + [ + 1035, + 1039, + "ECOSIT national household survey <> publication year" + ], + [ + 1064, + 1106, + "ECOSIT national household survey <> data type" + ], + [ + 1343, + 1358, + "ECOSIT national household survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "2. The main source of data for this analysis is the ECOSIT national household survey that was carried out by the National Statistical Office in 2011. This survey included all 20 regions of the country.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as the main source of data for the analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a national household survey, which typically involves structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as the main source of data for the analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "037_Uganda-Investment-for-Industrial-Transformation-and-Employment-Project", + "page": 89, + "text": "The monitoring component of the M & E approach will require data collection across different dimensions of the Project: ( 1 ) Performance Tracking data ( e. g. sales, employment, wages, transactions, etc ); ( 2 ) Activity Tracking data reflecting the Theory of Change ( e. g. as reflected by the number of loans serviced on the project \u2019 s web platform, the number of receivables purchased on the factoring platform, the number of refugees receiving business training, etc. ); ( 3 ) Key Results data ( e. g. value of private investment in manufacturing firms, formal employment in manufacturing firms, etc ); and ( 4 ) Key Risks tracking ( e. g. project implementation performance, NPL ratio of banks and PAR of MFIs, etc ). The evaluation component will build on the data collected under the monitoring component, but additionally focus on implementing a structured impact evaluation to measure the impact and attribution of the different policies under the project i. e. incubators, industrial parks, etc.,", + "ner_text": [ + [ + 126, + 151, + "named" + ], + [ + 358, + 415, + "Performance Tracking data <> data description" + ] + ], + "validated": true, + "empirical_context": "The monitoring component of the M & E approach will require data collection across different dimensions of the Project: ( 1 ) Performance Tracking data ( e. g.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to a structured collection of data used for performance tracking in the project.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific type of data collected for monitoring purposes.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a structured collection of data used for performance tracking in the project.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 49, + "text": "School records of assessments Third Party Check assessments records for a sample of schools DLR # 7. 3 First phase of Tawjihi exam reform completed and action plan for reform rollout is produced The first phase of the Tawjihi reform will consist of conducting consultations on the reform with the various stakeholders, the design of the new examination instruments and the piloting of those instruments. After the first phase, MOE will analyze the examination data, in addition to perception and satisfaction data collected during the process and will accordingly identify lessons learnt and develop an action plan for the rollout of the reform. First phase of Tawjihi reform report and action plan Third Party The verification agency reviews first phase completion report which includes results of stakeholder consultations, instruments design methodology, pilot results, recommendations, and action plan for reform roll \u2010 out. DLR # 7. 4 Legal framework for the Tawjihi exam has been adopted so that its secondary graduation and certification function is separated from its function as a screening mechanism for university entrance Legislations and / or decrees reforming the requirements for secondary school graduation are issued and standard operating procedures are adopted.", + "ner_text": [ + [ + 448, + 464, + "named" + ], + [ + 427, + 430, + "examination data <> publisher" + ], + [ + 481, + 513, + "examination data <> data description" + ] + ], + "validated": true, + "empirical_context": "3 First phase of Tawjihi exam reform completed and action plan for reform rollout is produced The first phase of the Tawjihi reform will consist of conducting consultations on the reform with the various stakeholders, the design of the new examination instruments and the piloting of those instruments. After the first phase, MOE will analyze the examination data, in addition to perception and satisfaction data collected during the process and will accordingly identify lessons learnt and develop an action plan for the rollout of the reform. First phase of Tawjihi reform report and action plan Third Party The verification agency reviews first phase completion report which includes results of stakeholder consultations, instruments design methodology, pilot results, recommendations, and action plan for reform roll \u2010 out.", + "type": "data", + "explanation": "In the context, 'examination data' is explicitly mentioned as something that will be analyzed, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'examination data' is a dataset because it refers to data collected from the examination process.", + "contextual_reason_agent": "In the context, 'examination data' is explicitly mentioned as something that will be analyzed, indicating it is used as a data source.", + "contextual_signal": "follows 'MOE will analyze the examination data'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "090_Djibouti-Public-Admin-PAD-PAD2604-04062018", + "page": 21, + "text": "The World Bank Public Administration Modernization Project ( P162904 ) Page 18 of 69 \u2022 New Tax Administration Information System deployed; \u2022 Upgrade of the Customs Administration Information System; \u2022 Number of systems connected to the Government \u2019 s interoperable platform; \u2022 Percentage increase of transactions initiated per CSC per year; \u2022 Citizens involved in the design and delivery of e-services ( disaggregated by gender ); and \u2022 Percentage of CSC users satisfied with provided e-services ( disaggregated by gender ). 30. Other process and output indicators will be monitored over the life cycle of the Project. III. PROJECT DESCRIPTION A. Project Components Component 1: Strengthening e-government Foundation and Institutional Building ( US $ 9. 5 million ) 31. The expected outcomes of this component are to establish a strong foundation and enabling environment for e-government and ensure that people, including vulnerable groups \u2014 people with disabilities, the poor, women, the elderly, ethnic minorities, and the rural population \u2014 have increased access to modernized services. This component consists of three interrelated subcomponents, including ( a ) the development of a digital platform and e-services, ( b ) the establishment of a CSC pilot, and ( c ) institution building. A gradual and phased approach will be pursued in the development and provision of e-services.", + "ner_text": [ + [ + 156, + 197, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Public Administration Modernization Project ( P162904 ) Page 18 of 69 \u2022 New Tax Administration Information System deployed; \u2022 Upgrade of the Customs Administration Information System; \u2022 Number of systems connected to the Government \u2019 s interoperable platform; \u2022 Percentage increase of transactions initiated per CSC per year; \u2022 Citizens involved in the design and delivery of e-services ( disaggregated by gender ); and \u2022 Percentage of CSC users satisfied with provided e-services ( disaggregated by gender ). 30.", + "type": "system", + "explanation": "However, it is mentioned as a system and not as a data source, indicating it does not function as a dataset in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Information System', which often relates to data management.", + "contextual_reason_agent": "However, it is mentioned as a system and not as a data source, indicating it does not function as a dataset in this context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "037_Uganda-Investment-for-Industrial-Transformation-and-Employment-Project", + "page": 76, + "text": "Table 1: Refugee and Host Population in Uganda62 Population Refugee % of total Number of firms63 Refu gee Ugandan hosts Refuge e Host North West Refugee-Hosting Districts Yumbe, Adjumani, Madi Okollo, Terego Lamwo, Koboko, Obongi 873, 844 2, 169, 200 29 % 1, 987 13, 505 South West Refugee-Hosting Districts Isingiro, Kyegegwa, Kamwenge, Kiryandongo, Kikuube 576, 922 2, 266, 800 20 % 2, 526 15, 095 Total non-Kampala RHDs 1, 450, 766 4, 436, 000 25 % 4, 513 28, 601 Total Kampala 98, 415 1, 709, 000 5 % 5, 028 104, 972 2. The economic activity slow down caused by COVID-19 has affected Uganda \u2019 s ability to generate jobs for those living in vulnerable situations, including refugees and host communities. Despite the concerted efforts to integrate refugees within the ecosystems of their host communities, refugee - hosting districts ( RHDs ) remain less developed areas. Low levels of disposable incomes have resulted in low demand and limited access to labor markets, leaving those residents with some access to land with no alternative but to live off subsistence agriculture and humanitarian aid. These areas were less developed even before the inflow of refugees and remain decoupled from resilient and viable supply chains in the economy. For example, the average value of assets among all households ( both refugee and host ) in the district of Arua64 is 560, 000 Ugandan shillings ( US $ 144 ), which is only 10 percent of comparable asset values in the Kampala region. 62 Uganda Comprehensive Refugee Response Portal ( https: / / data2. unhcr. org / en / country / uga ) 31 October 2021 63 Calculation based on district-level firm data from Census of Business Establishments ( COBE ), and refugee and host community household data from the Refugee and Host Community Household Survey 64 Arua was until recent sub-divisions of the district considered a refugee hosting district.", + "ner_text": [ + [ + 1752, + 1795, + "named" + ], + [ + 473, + 480, + "Refugee and Host Community Household Survey <> data geography" + ], + [ + 588, + 594, + "Refugee and Host Community Household Survey <> data geography" + ], + [ + 677, + 685, + "Refugee and Host Community Household Survey <> reference population" + ], + [ + 809, + 836, + "Refugee and Host Community Household Survey <> reference population" + ], + [ + 1355, + 1361, + "Refugee and Host Community Household Survey <> data geography" + ], + [ + 1484, + 1490, + "Refugee and Host Community Household Survey <> data geography" + ], + [ + 1594, + 1598, + "Refugee and Host Community Household Survey <> publication year" + ], + [ + 1623, + 1647, + "Refugee and Host Community Household Survey <> data type" + ], + [ + 1905, + 1923, + "Refugee and Host Community Household Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "unhcr. org / en / country / uga ) 31 October 2021 63 Calculation based on district-level firm data from Census of Business Establishments ( COBE ), and refugee and host community household data from the Refugee and Host Community Household Survey 64 Arua was until recent sub-divisions of the district considered a refugee hosting district.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referenced as a source of household data in the context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Survey' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referenced as a source of household data in the context.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "162_PAD12050PAD0P15264600PUBLIC00Box391428B", + "page": 12, + "text": "To date, the PHC Network includes 204 contracted PHCCs ( out of 1, 085 PHC centers and dispensaries in the country ), of which 67 percent are affiliated with NGOs, 20 percent with local municipalities, 11 percent with MoPH and 2 percent with the Ministry of Social Affairs ( MOSA ). 15. For the past decade, the PHC Network has been successful in providing quality primary healthcare to poor and low income Lebanese. MoPH data show that use of contracted PHCCs by low income groups increased by 73 percent between 2002 and 2012, from 32, 6184 visits to 121, 2000, respectively. The data further show that the PHC Network played a major role in the delivery of free essential drugs provided by the MoPH while becoming the main provider of prenatal care with the number of pregnancy visits increasing from 5, 124 in 2002 to 26, 666 in 2012, which constituted 36 percent of total pregnancy visits in the country. As all the PHC network facilities are participating in the MoPH PHC accreditation program, quality of care is closely monitored in these facilities. Impact of the Syrian Crisis on the Health Sector 16. As a result of the influx of Syrian refugees, Lebanon \u2019 s health sector is regressing in several ways.", + "ner_text": [ + [ + 417, + 426, + "named" + ], + [ + 218, + 222, + "MoPH data <> publisher" + ], + [ + 417, + 421, + "MoPH data <> publisher" + ], + [ + 514, + 518, + "MoPH data <> reference year" + ], + [ + 523, + 527, + "MoPH data <> publication year" + ], + [ + 697, + 701, + "MoPH data <> publisher" + ], + [ + 833, + 837, + "MoPH data <> reference year" + ], + [ + 969, + 973, + "MoPH data <> publisher" + ], + [ + 1158, + 1165, + "MoPH data <> data geography" + ] + ], + "validated": true, + "empirical_context": "For the past decade, the PHC Network has been successful in providing quality primary healthcare to poor and low income Lebanese. MoPH data show that use of contracted PHCCs by low income groups increased by 73 percent between 2002 and 2012, from 32, 6184 visits to 121, 2000, respectively. The data further show that the PHC Network played a major role in the delivery of free essential drugs provided by the MoPH while becoming the main provider of prenatal care with the number of pregnancy visits increasing from 5, 124 in 2002 to 26, 666 in 2012, which constituted 36 percent of total pregnancy visits in the country.", + "type": "data", + "explanation": "In the context, 'MoPH data' is used to present statistical information about healthcare visits, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'MoPH data' is a dataset because it refers to specific numerical information regarding healthcare usage.", + "contextual_reason_agent": "In the context, 'MoPH data' is used to present statistical information about healthcare visits, indicating it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "032_IBArchive-e8d67f4f-bc76-49af-9b6c-6099c748075b", + "page": 54, + "text": "Key GBV risk mitigation measures for the project will include ( a ) participatory identification of GBV risks at the subproject level with women and girls through a safety planning tool; ( b ) comprehensive rollout of safeguards from SEA and coercion at labor sites, including a public works code of conduct; and ( c ) routine safety audits to monitor GBV risks. Risk mitigation measures are supported by a GRM that builds on specific survivor-centered protocols, including appeals committees. Attention will be paid to upholding confidentiality and nondiscrimination to ensure that the risk to survivors of raising grievances ( for example, retaliation, further violence, and even death ) is minimized. GBV service mapping and linking to referrals for GBV in project locations will be conducted. Referral pathway training and skills building on how to support a survivor of GBV using psychological first aid will be provided to all project personnel, thereby ensuring access to referral services in all project areas. To monitor and track GBV risks, recurring gender / GBV audits of identified risks will be conducted to identify trends to inform action. The project will engage gender and GBV specialist ( s ) / specialized agency to train and provide technical assistance. A GBV Action Plan will be developed for the scope of the project. 123. Other risks - refugee protection risk is \u2018 Moderate \u2019. The World Bank, in close consultation with UNHCR, has confirmed the adequacy of South Sudan \u2019 s refugee protection framework. The country has", + "ner_text": [ + [ + 165, + 185, + "named" + ] + ], + "validated": false, + "empirical_context": "Key GBV risk mitigation measures for the project will include ( a ) participatory identification of GBV risks at the subproject level with women and girls through a safety planning tool; ( b ) comprehensive rollout of safeguards from SEA and coercion at labor sites, including a public works code of conduct; and ( c ) routine safety audits to monitor GBV risks. Risk mitigation measures are supported by a GRM that builds on specific survivor-centered protocols, including appeals committees.", + "type": "tool", + "explanation": "However, the context indicates that it is a tool for participatory identification of risks, not a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'tool' which could imply a structured approach to data collection.", + "contextual_reason_agent": "However, the context indicates that it is a tool for participatory identification of risks, not a structured collection of data.", + "contextual_signal": "mentioned only as a tool, not as a data source", + "tags": [] + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 112, + "text": "Project Proposal. \u201d 61 MWE. 2009. \u201c Strategic Sector Investment Plan for the Water and Sanitation Sector in Uganda. \u201d The costs were adjusted to 2017 using the inflation rate during the period ( 55 percent ). 62 UBOS. National Household Survey 2016 \u2013 2017. Page 106. Average monthly income per household in the West Nile region. 63 UBOS. NSDS 2016. 56 minutes corresponds to time spent in rural areas and 33 minutes corresponds to urban areas. 64 GIZ. 2016. \u201c Refugees and Water Resources Management in Northern Uganda. \u201d 65 The same cost is obtained assuming the cost of the truck calculation was based on the price of 10, 000 L water truck was US $ 30, 000, 7-year life span, 5 percent maintenance cost, plus repairs, gas, driver cost, and water from the NWSC. The resulting value is half as much what the humanitarian aid and civil protection reports in its assessment of solar water schemes.", + "ner_text": [ + [ + 218, + 243, + "named" + ], + [ + 145, + 149, + "National Household Survey <> publication year" + ], + [ + 212, + 216, + "National Household Survey <> author" + ], + [ + 244, + 255, + "National Household Survey <> reference year" + ], + [ + 267, + 303, + "National Household Survey <> data description" + ], + [ + 311, + 327, + "National Household Survey <> data geography" + ], + [ + 332, + 336, + "National Household Survey <> author" + ], + [ + 343, + 347, + "National Household Survey <> publication year" + ], + [ + 911, + 929, + "National Household Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "62 UBOS. National Household Survey 2016 \u2013 2017. Page 106.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referred to as a National Household Survey, indicating it is a structured collection of data used for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referred to as a National Household Survey, indicating it is a structured collection of data used for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 44, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 33 Indicator Name of which refugees Definition / Description People who benefitted from improved water supply services that have been constructed or rehabilitated under the project. Per UNICEF-WHO Joint Monitoring Program definition, \u201c improved water sources \u201d include piped household connection ( house or yard connections ), public standpipe, boreholes, protected dug well, protected spring and rainwater collection, and do not include unprotected well, unprotected spring, surface water ( river, pond, dam, lake, stream, irrigation channel ), or bottled water. Frequency Bi-annually. Data Source MWE M & E system, NWSC Area performance reports, Refugee Database, Refugees GIS Mapping for WASH and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MEW / NWSC.", + "ner_text": [ + [ + 678, + 694, + "named" + ] + ], + "validated": false, + "empirical_context": "Frequency Bi-annually. Data Source MWE M & E system, NWSC Area performance reports, Refugee Database, Refugees GIS Mapping for WASH and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'system' which can imply data management.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "105_Mali-Reinsertion-of-Ex-combatants-Project", + "page": 80, + "text": "This review will validate the proposed reinsertion process or modify it if necessary. A final implementation report will be prepared within six months after the end of the project and include the contribution of the Government and donors. 81. Furthermore, M & E will include a MIS combining several robust databases to: ( i ) monitor each demobilized ex-combatant from demobilization through reinsertion, ( ii ) follow the implementing partners providing reinsertion support, and ( iii ) monitor the financial management. The various regional offices and the cantonment camps will be connected to the MIS via a wireless connection and data will be transmitted on a regular basis. The data will be centralized in Bamako and synchronized at each office to minimize any possible duplication.", + "ner_text": [ + [ + 601, + 604, + "named" + ] + ], + "validated": false, + "empirical_context": "Furthermore, M & E will include a MIS combining several robust databases to: ( i ) monitor each demobilized ex-combatant from demobilization through reinsertion, ( ii ) follow the implementing partners providing reinsertion support, and ( iii ) monitor the financial management. The various regional offices and the cantonment camps will be connected to the MIS via a wireless connection and data will be transmitted on a regular basis. The data will be centralized in Bamako and synchronized at each office to minimize any possible duplication.", + "type": "system", + "explanation": "However, 'MIS' is described as a management information system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MIS' suggests a system that manages data.", + "contextual_reason_agent": "However, 'MIS' is described as a management information system, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 55, + "text": "These shall include: ( 1 ) a client registry ( that supports the unique identification and management of patient identities ); ( 2 ) one or more terminology registries accessible as a service ( that provide a standardized classification or a query-able source for health information exchange, including clinical terminologies, ontologies, dictionaries, code systems, and value sets ); ( 3 ) a universal facility registry ( that sets the unique identifier for locations where health services are provided ); ( 4 ) a health professional registry ( that sets the unique identifier for health workers that provide services within a country ); and ( 5 ) a common product catalogue ( that properly defines and categorizes medical products or commodities ). 24 Core standards include: ( 1 ) content standards ( that dictate the structure of electronic documents and types of data they must contain by ensuring data is properly organized and represented in a clear manner ); ( 2 ) terminology standards ( that ensure that all parties will be able to understand and use it while exchanging health data ); ( 3 ) transport standards ( that facilitate data exchange between different systems by defining what formats, document architecture, data elements, methods, and application programming interfaces to use for achieving interoperability ); and ( 4 ) security standards ( that establish administrative and technical rules to protect sensitive data from misuse, unauthorized access, or disclosure ). 25 Health information systems could include the national EMR platform, supply chain management information system, and surveillance systems managed by the MOH. Selected health information systems will be described in the Program Operational Manual ( including the verification protocol ).", + "ner_text": [ + [ + 393, + 420, + "named" + ], + [ + 437, + 454, + "universal facility registry <> data description" + ] + ], + "validated": true, + "empirical_context": "These shall include: ( 1 ) a client registry ( that supports the unique identification and management of patient identities ); ( 2 ) one or more terminology registries accessible as a service ( that provide a standardized classification or a query-able source for health information exchange, including clinical terminologies, ontologies, dictionaries, code systems, and value sets ); ( 3 ) a universal facility registry ( that sets the unique identifier for locations where health services are provided ); ( 4 ) a health professional registry ( that sets the unique identifier for health workers that provide services within a country ); and ( 5 ) a common product catalogue ( that properly defines and categorizes medical products or commodities ). 24 Core standards include: ( 1 ) content standards ( that dictate the structure of electronic documents and types of data they must contain by ensuring data is properly organized and represented in a clear manner ); ( 2 ) terminology standards ( that ensure that all parties will be able to understand and use it while exchanging health data ); ( 3 ) transport standards ( that facilitate data exchange between different systems by defining what formats, document architecture, data elements, methods, and application programming interfaces to use for achieving interoperability ); and ( 4 ) security standards ( that establish administrative and technical rules to protect sensitive data from misuse, unauthorized access, or disclosure ).", + "type": "registry", + "explanation": "This is a dataset as it functions as a registry providing unique identifiers for health service locations.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a registry that supports unique identification.", + "contextual_reason_agent": "This is a dataset as it functions as a registry providing unique identifiers for health service locations.", + "contextual_signal": "mentioned as a registry that supports unique identification", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "155_432110PAD0P09717632B01off0use0only1", + "page": 81, + "text": "It revealed several weaknesses in the PFM system attributed to years of civil war, including budget formulation and execution, financial reporting and oversight systems as well as weak linkages between agreed policies for budgeting planning and execution. However since then, significant progress has been made in all areas. 2. The introduction and now fully operational of an interim Financial Management Information System ( FMIS ) which generates standard quarterly budget execution reports and reports on poverty-reducing expenditure and / or HIPC expenditure execution, 3. The adoption and implementation of a new unified functional and economic budget classification system and a double-entry accounting system has served to improve budget monitoring while also addressing weakened treasury controls. As a result, the closure of the Government \u2019 s extra-budgetary accounts is successfully on track. The Audit Court ( Cour des Comptes ) established in 2004 has been an important step towards the strengthening of jurisdictional control over public finance management", + "ner_text": [ + [ + 385, + 424, + "named" + ] + ], + "validated": false, + "empirical_context": "2. The introduction and now fully operational of an interim Financial Management Information System ( FMIS ) which generates standard quarterly budget execution reports and reports on poverty-reducing expenditure and / or HIPC expenditure execution, 3. The adoption and implementation of a new unified functional and economic budget classification system and a double-entry accounting system has served to improve budget monitoring while also addressing weakened treasury controls.", + "type": "system", + "explanation": "However, it is described as a system that generates reports, not as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Information System', which often relates to data management.", + "contextual_reason_agent": "However, it is described as a system that generates reports, not as a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 65, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 53. Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Girls \u2019 survival rate in secondary school in all regions including areas affected by conflict, droughts and high levels of refugees This indicator measures the share of a cohort of female students enrolled in Grade 9 who successfully completed secondary schools and it will be measured by using a \u2018 synthetic cohort method \u2019. Annual EMIS, RRS and UNHCR Data collected by MoE through the annual EMIS process and through RRS and UNHCR in refugee host communities MOE / EMIS Directorate Children 0-59 months with diarrhea treated with ORS and zinc in low performing regions The indicator will be measured by the percentage of children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in the low performing regions Annual DHIS2, RRS and UNHCH in refugee host communities Data collected by MoH through the annual DHIS2 and through RRS and UNHCR in refugee host communities MoH Woredas that engage citizens in planning and budgeting including in areas affected by conflict, droughts and high levels of refugees in conflict affected areas The percentage of woredas that conduct pre-budget discussion with at least 40 % participation of women Annual Administrative data / reports from regions including through RRS and UNHCR in refugee host communities Use", + "ner_text": [ + [ + 1085, + 1090, + "named" + ], + [ + 1062, + 1065, + "DHIS2 <> author" + ], + [ + 1145, + 1148, + "DHIS2 <> author" + ] + ], + "validated": true, + "empirical_context": "Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Girls \u2019 survival rate in secondary school in all regions including areas affected by conflict, droughts and high levels of refugees This indicator measures the share of a cohort of female students enrolled in Grade 9 who successfully completed secondary schools and it will be measured by using a \u2018 synthetic cohort method \u2019. Annual EMIS, RRS and UNHCR Data collected by MoE through the annual EMIS process and through RRS and UNHCR in refugee host communities MOE / EMIS Directorate Children 0-59 months with diarrhea treated with ORS and zinc in low performing regions The indicator will be measured by the percentage of children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in the low performing regions Annual DHIS2, RRS and UNHCH in refugee host communities Data collected by MoH through the annual DHIS2 and through RRS and UNHCR in refugee host communities MoH Woredas that engage citizens in planning and budgeting including in areas affected by conflict, droughts and high levels of refugees in conflict affected areas The percentage of woredas that conduct pre-budget discussion with at least 40 % participation of women Annual Administrative data / reports from regions including through RRS and UNHCR in refugee host communities Use", + "type": "database", + "explanation": "DHIS2 is explicitly referenced as a data source for collecting health-related data, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because DHIS2 is mentioned as a source for data collection in the context of health indicators.", + "contextual_reason_agent": "DHIS2 is explicitly referenced as a data source for collecting health-related data, confirming its role as a dataset.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 47, + "text": "Annual Progress reports Impact evaluation, progress reports drawing on data from selected schools MoER PMT Annual education statistics reports produced and publicly disseminated based on data generated by the integrated EMIS and used for education sector management including refugee response Reports providing education statistics are produced and publicly disseminated on an annual basis thanks to the integrated EMIS ( to identify determinants of student and school performance for decision making on teacher policies / training, tutoring program, and other investments in the Annual Integrated EMIS database; annual education reports progress and monitoring reports; annual education reports MoER, PMT", + "ner_text": [ + [ + 107, + 142, + "named" + ] + ], + "validated": false, + "empirical_context": "Annual Progress reports Impact evaluation, progress reports drawing on data from selected schools MoER PMT Annual education statistics reports produced and publicly disseminated based on data generated by the integrated EMIS and used for education sector management including refugee response Reports providing education statistics are produced and publicly disseminated on an annual basis thanks to the integrated EMIS ( to identify determinants of student and school performance for decision making on teacher policies / training, tutoring program, and other investments in the Annual Integrated EMIS database; annual education reports progress and monitoring reports; annual education reports MoER, PMT", + "type": "report", + "explanation": "However, it is described as a report that disseminates statistics rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'statistics' which often relates to data.", + "contextual_reason_agent": "However, it is described as a report that disseminates statistics rather than a structured collection of data itself.", + "contextual_signal": "mentioned only as a report, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 20, + "validated": 4, + "not_validated": 16 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 27, + "text": "It was agreed that the second phase of this reform should be shaped around a principle of school and district focus, so that reforms at the central level will be premised on the changing role that the central ministry will play in a system that is focused on school and district level delivery of learning programs to build the kind of participatory, outcome-based and student-centered learning that is required. 11. Quality: Jordan has now participated in three rounds of the Trends in International Mathematics and Science Study ( TIMSS ) and one round of the Programme for International Student Assessment ( PISA ). In both cases, Jordan performed well in comparison with other countries in the Region, but well below many countries at comparable income and education expenditure levels, and at or below the international averages. As part of ERfKE I, MoE introduced a national assessment program using a PISA - like approach but focused specifically on the knowledge economy aspects of the national curriculum. It also conducted a series of more \u201c traditional \u201d national assessments of learning achievement in key subjects. Establishing and implementing these systems is a major stride forward in its own right and a significant achievement of the ERfKE I reform. However, these assessments have revealed that the majority of students still perform below desired achievement levels and that a significant number of students drop out of the system. The overall completion rate of secondary education is about 70 percent, with almost 30 percent of students dropping out after the 10th grade. The monitoring and evaluation processes established under ERfKE I are now beginning to relate this performance to a range of different education and non education variables to determine what mix of inputs and processes has yielded the greatest improvements in terms of quality learning outcomes. Quality remains a significant challenge for Jordan, but some mechanisms have been put in place to permit more systematic and rigorous evaluation of which investments are most likely to yield impact on learning outcomes.", + "ner_text": [ + [ + 562, + 608, + "named" + ] + ], + "validated": false, + "empirical_context": "11. Quality: Jordan has now participated in three rounds of the Trends in International Mathematics and Science Study ( TIMSS ) and one round of the Programme for International Student Assessment ( PISA ). In both cases, Jordan performed well in comparison with other countries in the Region, but well below many countries at comparable income and education expenditure levels, and at or below the international averages.", + "type": "program", + "explanation": "However, it is mentioned only as a program and not as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to an assessment program that collects data on student performance.", + "contextual_reason_agent": "However, it is mentioned only as a program and not as a data source in the context.", + "contextual_signal": "mentioned only as a program, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 57, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 53 of 74 within SNSOP project locations that are satisfied or very satisfied with assets created through LIPW divided by the total number of beneficiaries and non beneficiaries surveyed in SNSOP project locations. SNSOP project locations refer to bomas or quarter councils where the SNSOP project is active a quarterly basis during missions and ISRs System and satisfaction surveys carried out by the SNSOP M & E team. In addition, satisfaction will be monitored by the Third Party Monitor ( TPM ) Beneficiary households receiving economic opportunities Number of total beneficiary households of Component 1 that are also receiving economic opportunities under Component 2, in accordance with the Project Operations Manual, and have received at least 1 installment of the livelihood grant. This indicator will be measured at a minimum on a quarterly basis. SNSOP Management Information System ( MIS ) Data on participation in Component 2 will be collected at registration where based on the targeting and registration process outlined in the Project Operations Manual, eligible beneficiaries will be allocated to Component 2.", + "ner_text": [ + [ + 564, + 583, + "named" + ] + ], + "validated": false, + "empirical_context": "SNSOP project locations refer to bomas or quarter councils where the SNSOP project is active a quarterly basis during missions and ISRs System and satisfaction surveys carried out by the SNSOP M & E team. In addition, satisfaction will be monitored by the Third Party Monitor ( TPM ) Beneficiary households receiving economic opportunities Number of total beneficiary households of Component 1 that are also receiving economic opportunities under Component 2, in accordance with the Project Operations Manual, and have received at least 1 installment of the livelihood grant. This indicator will be measured at a minimum on a quarterly basis.", + "type": "organization", + "explanation": "However, it is mentioned as a role or entity responsible for monitoring rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'monitor' which can imply data collection.", + "contextual_reason_agent": "However, it is mentioned as a role or entity responsible for monitoring rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 8, + "text": "With the exception of early grade primary teachers, most teachers enter the classroom without any sustained training in pedagogical approaches or basic classroom skills. \u2022 Access and Equity: Household survey data indicate that public financing of basic schooling is more pro-poor than that of secondary schooling. Progress has been made in reducing gender differences in access to basic education, and to a lesser extent in secondary education. Subject specialization in secondary and vocational education still tends to reflect traditional gender roles. Completion rates and transition rates to tertiary education are highly correlated with family income: three times as many students in university come from the upper two income quintiles. \u2022 Physical facilities: A recent school utilization study indicates that the number of MoE students is expected to increase by 124, 634 between 2008 and 2013. It is estimated that there will be a need to provide an additional 3, 360 classrooms over this time period. The same study also reveals a dichotomy in the provision of educational infrastructure in the Kingdom.", + "ner_text": [ + [ + 191, + 212, + "named" + ], + [ + 894, + 898, + "Household survey data <> publication year" + ], + [ + 1102, + 1109, + "Household survey data <> data geography" + ], + [ + 1126, + 1144, + "Household survey data <> usage context" + ] + ], + "validated": true, + "empirical_context": "With the exception of early grade primary teachers, most teachers enter the classroom without any sustained training in pedagogical approaches or basic classroom skills. \u2022 Access and Equity: Household survey data indicate that public financing of basic schooling is more pro-poor than that of secondary schooling. Progress has been made in reducing gender differences in access to basic education, and to a lesser extent in secondary education.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as 'household survey data' used to analyze public financing in education.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'household survey data' suggests a structured collection of data collected from households.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as 'household survey data' used to analyze public financing in education.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 15, + "text": "The World Bank Tajikistan Water Supply and Sanitation Investment Project ( P177325 ) Page 12 of 89 Figure 3. Average Rate of Infectious and Parasitic Diseases in the Khatlon Region ( per 100, 000 people ), 2016 \u2013 2020 Figure 4. Number of Registered Typhoid Fever Cases in the Khatlon Region ( per 100, 000 people ), 2016 \u2013 2020 Source: MoHSP Data, 2020 Source: MoHSP Data, 2020 13. The JMP reports that 85. 7 percent of the population has access to improved sanitation services, with nearly 94. 9 percent of rural population relying on pit latrines, and only 0. 5 percent of rural households reporting sewage treatment. Government data focused on access to centralized sewage systems, which indicates a great disparity between the urban and rural areas, estimating access to sewage network in urban areas at 80. 0 percent, 18. 2 percent in peri-urban areas, and 0. 2 percent in rural areas. 26 Lack of operational and capital funds, ageing of the facilities, and limited capabilities in wastewater management are key sector bottlenecks. Increased discharges of polluted or untreated wastewater facilitate the spread of pathogens in water bodies, open drains, and directly within urban areas, posing a severe public health risk.", + "ner_text": [ + [ + 361, + 371, + "named" + ], + [ + 166, + 180, + "MoHSP Data <> data geography" + ], + [ + 213, + 217, + "MoHSP Data <> publication year" + ], + [ + 228, + 268, + "MoHSP Data <> data description" + ], + [ + 276, + 290, + "MoHSP Data <> data geography" + ], + [ + 373, + 377, + "MoHSP Data <> publication year" + ], + [ + 508, + 524, + "MoHSP Data <> reference population" + ] + ], + "validated": true, + "empirical_context": "Average Rate of Infectious and Parasitic Diseases in the Khatlon Region ( per 100, 000 people ), 2016 \u2013 2020 Figure 4. Number of Registered Typhoid Fever Cases in the Khatlon Region ( per 100, 000 people ), 2016 \u2013 2020 Source: MoHSP Data, 2020 Source: MoHSP Data, 2020 13. The JMP reports that 85.", + "type": "data", + "explanation": "In the context, 'MoHSP Data' is explicitly mentioned as a source, indicating it is used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a source of data in the context.", + "contextual_reason_agent": "In the context, 'MoHSP Data' is explicitly mentioned as a source, indicating it is used for empirical analysis.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 20, + "text": "These programs may include savings associations, income-generation, training, micro - credit, and youth employment such as the one under development in the upcoming Youth Employability project ( P162916 ) 10. In the early months of the proposed project, a comprehensive exit operational process will be developed. Between ten and 20 percent of households are expected to exit the program; that estimate will be updated based on the updated poverty numbers and the results of the impact evaluation under way. As this activity is specific to households, which complete the Tekavoul five-year cycle, beneficiaries among refugees and host communities will not benefit from it. 38. This sub-component will finance the development of materials to include economic inclusion in the social promotion curriculum, the complementary mentoring package to accompany the households for six months after Tekavoul transfers end, and a referral system and partnerships with ongoing programs. Sub-component 2. 3: Facilitating civil registration ( US $ 0. 8 million ) 39. The proposed project could explore ways to facilitate registration of Tekavoul beneficiaries in the Civil Registry. Preliminary evidence hints at high rates of Tekavoul beneficiaries without a National Identity Number ( Num\u00e9ro National d \u2019 Identit\u00e9, NNI ). The NNI is necessary for school enrollment ( even if often waived at the primary level ) and exam registration, access to financial services, and a host of other public services. 10 A mapping of these programs is under way as part of the preparation of the Youth Employability Project.", + "ner_text": [ + [ + 1153, + 1167, + "named" + ] + ], + "validated": false, + "empirical_context": "8 million ) 39. The proposed project could explore ways to facilitate registration of Tekavoul beneficiaries in the Civil Registry. Preliminary evidence hints at high rates of Tekavoul beneficiaries without a National Identity Number ( Num\u00e9ro National d \u2019 Identit\u00e9, NNI ).", + "type": "registry", + "explanation": "However, in this context, it is mentioned as a registry but not explicitly as a data source for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Civil Registry' suggests a collection of records related to individuals.", + "contextual_reason_agent": "However, in this context, it is mentioned as a registry but not explicitly as a data source for empirical analysis.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 53, + "text": "The World Bank South Sudan Health Sector Transformation Project ( HSTP ) ( P181385 ) Page 50 of 68 Description Proportion of an alert about a disease, condition, or event of public health importance which may be true or invented Frequency Quarterly Data source WHO Methodology for Data Collection Quarterly and biannual TPM Responsibility for Data Collection PMU / TPM; Measures subcomponent 2. 1 under WHO Birth registration notification coverage Description Proportion of live births notified by the health facility among the total expected live births in specific period Frequency Quarterly Data source DHIS2 Methodology for Data Collection DHIS2 Responsibility for Data Collection MoH / UNICEF Maternal death review coverage ( % ) Description Percentage of maternal deaths occurring in the health facility that were audited and reviewed. Frequency Quarterly Data source WHO Methodology for Data Collection Quarterly and biannual TPM Responsibility for Data Collection PMU / TPM; Measures subcomponent 2. 1 under WHO Component 3: Monitoring and Evaluation and Project Management Percentage of health facilities receiving quarterly supervision visits ( disaggregated by visits by CHDs, and States MoH ) ( Percentage ) Description Percentage of health facilities receiving at least one quarterly supervision visit within the quarter from either the CHD, or the State MoH Frequency Quarterly Data source MoH; TPM Methodology for Data Collection MoH to provide data; TPM to verify Responsibility for Data Collection MoH / TPM Percentage of health facilities receiving quarterly supervision visits", + "ner_text": [ + [ + 644, + 649, + "named" + ], + [ + 407, + 447, + "DHIS2 <> data description" + ], + [ + 698, + 728, + "DHIS2 <> data description" + ] + ], + "validated": true, + "empirical_context": "The World Bank South Sudan Health Sector Transformation Project ( HSTP ) ( P181385 ) Page 50 of 68 Description Proportion of an alert about a disease, condition, or event of public health importance which may be true or invented Frequency Quarterly Data source WHO Methodology for Data Collection Quarterly and biannual TPM Responsibility for Data Collection PMU / TPM; Measures subcomponent 2. 1 under WHO Birth registration notification coverage Description Proportion of live births notified by the health facility among the total expected live births in specific period Frequency Quarterly Data source DHIS2 Methodology for Data Collection DHIS2 Responsibility for Data Collection MoH / UNICEF Maternal death review coverage ( % ) Description Percentage of maternal deaths occurring in the health facility that were audited and reviewed. Frequency Quarterly Data source WHO Methodology for Data Collection Quarterly and biannual TPM Responsibility for Data Collection PMU / TPM; Measures subcomponent 2.", + "type": "system", + "explanation": "In the context, DHIS2 is explicitly mentioned as a data source for birth registration notification coverage, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a data source for health statistics.", + "contextual_reason_agent": "In the context, DHIS2 is explicitly mentioned as a data source for birth registration notification coverage, confirming its role as a dataset.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "031_IBArchive-bd2c789e-ee04-4df7-a219-9409a5f705d3", + "page": 76, + "text": "Households with Access to Electricity, by Sex of Head and Location Location Male ( % ) Female ( % ) All ( % ) Nationwide 8. 3 5. 7 7. 7 Urban 35. 5 22. 6 32. 5 Rural 0. 8 0. 4 0. 7 N \u2019 Djamena 51. 0 43. 8 49. 6 Source: Chad DHS ( 2015 ) and author \u2019 s calculations. 2. The gap in electricity access is also visible when looking at quintiles of wealth, although it only becomes sizable for the highest quintile. From all male-headed households in the top quintile, 40. 9 percent have access to electricity while the same figure is 29. 79 percent for female-headed households. Table 3. 3. Households with Access to Electricity, by Sex of Head and Wealth Quintile Location Male ( % ) Female ( % ) All ( % ) Poorest 0. 01 0. 03 0. 01 43 The data on the distribution of household heads were updated by a survey on ability and willingness of households to pay for electricity services, completed in 2021. Details are provided in annex 6.", + "ner_text": [ + [ + 799, + 878, + "named" + ], + [ + 181, + 192, + "survey on ability and willingness of households to pay for electricity services <> data geography" + ], + [ + 893, + 897, + "survey on ability and willingness of households to pay for electricity services <> publication year" + ], + [ + 947, + 965, + "survey on ability and willingness of households to pay for electricity services <> usage context" + ] + ], + "validated": true, + "empirical_context": "03 0. 01 43 The data on the distribution of household heads were updated by a survey on ability and willingness of households to pay for electricity services, completed in 2021. Details are provided in annex 6.", + "type": "survey", + "explanation": "This is indeed a dataset as it is a structured collection of data from a survey used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it describes a survey that collects data on household payment willingness.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data from a survey used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "010_BOSIB1554c314c0a2187c019d7e85bc2a91", + "page": 38, + "text": "Frequency Every six months Data source Project report Methodology for Data Collection Project monitoring Responsibility for Data Collection KEMSA, MoH People who have received essential health, nutrition, and population ( HNP ) services ( Number ) CRI Description Total number of deliveries attended by skilled health personnel and total number of children immunized. Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Number of children immunized ( Number ) CRI Description Total number of children immunized. Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Number of deliveries attended by skilled health personnel ( Number ) CRI", + "ner_text": [ + [ + 452, + 456, + "named" + ] + ], + "validated": false, + "empirical_context": "Frequency Every six months Data source Project report Methodology for Data Collection Project monitoring Responsibility for Data Collection KEMSA, MoH People who have received essential health, nutrition, and population ( HNP ) services ( Number ) CRI Description Total number of deliveries attended by skilled health personnel and total number of children immunized. Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Number of children immunized ( Number ) CRI Description Total number of children immunized. Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Number of deliveries attended by skilled health personnel ( Number ) CRI", + "type": "system", + "explanation": "However, HMIS is referred to as a methodology for data collection rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HMIS is a dataset because it is mentioned in the context of data collection and reporting.", + "contextual_reason_agent": "However, HMIS is referred to as a methodology for data collection rather than a structured collection of data itself.", + "contextual_signal": "mentioned as a methodology for data collection", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 21, + "validated": 4, + "not_validated": 17 + } + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 30, + "text": "Specific communication campaigns will be carried out for beneficiaries to understand their rights and to inform on the existing entry channels for beneficiaries and non-beneficiaries to submit their claims and to inform on the procedures for the claims to be handled. Table 2. Status of delivery systems and proposed upgrades Delivery System Current status Upgrades to be financed by the project Targeting system \uf0b7 A targeting system with three layers ( geographical targeting, PMT and Community validation ) was developed to select the poorest households in project implementation areas \uf0b7 The PMT formula will be updated once new household survey data become available \uf0b7 The PMT formula will be replaced by a more accurate system in refugee camps \uf0b7 A specific targeting methodology will be developed for productive inclusion and jobs activities Registration system \uf0b7 The registration form exists for non - refugee areas \uf0b7 The registration forms will be modified to collect additional information ( i. e refugee status ) Enrolment system \uf0b7 An enrolment module exists applying the PMT formula to the registration data \uf0b7 A specific enrolment module will be developed for refugee areas and for productive inclusion and jobs activities Payment system \uf0b7 Payments of cash transfers are made through a mobile-phone-based mechanism \uf0b7 While scaling up to the national level, the project will potentially use other e-payment mechanisms in those areas where mobile network might not be available Monitoring and evaluation system \uf0b7 An M & E manual was developed listing all existing M & E tools and detailing the processes for data collection \uf0b7 Monitoring reports will directly be created by the project \u2019 s MIS \uf0b7 The IBM will be incorporated as a core M & E tool Grievance Redress Mechanism \uf0b7 A GRM with two entry points ( social workers at payment days and green line ) exists \uf0b7 The GRM will be tailored to identify and confidentially handle potential GBV cases \uf0b7 Information from the green line will be populated into the project \u2019 s MIS to facilitate tracking Management Information System \uf0b7 A MIS exists with the following modules: registration, enrolment, payments, M & E and GRM \uf0b7 All existing modules will need to be updated to integrate new program features and to reflect new geographical areas", + "ner_text": [ + [ + 631, + 652, + "named" + ], + [ + 537, + 555, + "household survey data <> reference population" + ], + [ + 734, + 747, + "household survey data <> data geography" + ] + ], + "validated": true, + "empirical_context": "Table 2. Status of delivery systems and proposed upgrades Delivery System Current status Upgrades to be financed by the project Targeting system \uf0b7 A targeting system with three layers ( geographical targeting, PMT and Community validation ) was developed to select the poorest households in project implementation areas \uf0b7 The PMT formula will be updated once new household survey data become available \uf0b7 The PMT formula will be replaced by a more accurate system in refugee camps \uf0b7 A specific targeting methodology will be developed for productive inclusion and jobs activities Registration system \uf0b7 The registration form exists for non - refugee areas \uf0b7 The registration forms will be modified to collect additional information ( i. e refugee status ) Enrolment system \uf0b7 An enrolment module exists applying the PMT formula to the registration data \uf0b7 A specific enrolment module will be developed for refugee areas and for productive inclusion and jobs activities Payment system \uf0b7 Payments of cash transfers are made through a mobile-phone-based mechanism \uf0b7 While scaling up to the national level, the project will potentially use other e-payment mechanisms in those areas where mobile network might not be available Monitoring and evaluation system \uf0b7 An M & E manual was developed listing all existing M & E tools and detailing the processes for data collection \uf0b7 Monitoring reports will directly be created by the project \u2019 s MIS \uf0b7 The IBM will be incorporated as a core M & E tool Grievance Redress Mechanism \uf0b7 A GRM with two entry points ( social workers at payment days and green line ) exists \uf0b7 The GRM will be tailored to identify and confidentially handle potential GBV cases \uf0b7 Information from the green line will be populated into the project \u2019 s MIS to facilitate tracking Management Information System \uf0b7 A MIS exists with the following modules: registration, enrolment, payments, M & E and GRM \uf0b7 All existing modules will need to be updated to integrate new program features and to reflect new geographical areas", + "type": "survey", + "explanation": "This is indeed a dataset as it is mentioned in the context of updating the PMT formula based on new household survey data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to data collected from households for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it is mentioned in the context of updating the PMT formula based on new household survey data.", + "contextual_signal": "follows 'will be updated once new household survey data become available'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "061_Turkey-Formal-Employment-Creation-Project", + "page": 39, + "text": "The World Bank Formal Employment Creation Project ( P171766 ) Page 35 of 86 participatory needs assessments with loan beneficiary firms to establish their trainings needs at the application stage. ( ii ) Post-training assessments. The TKYB will conduct a post-training assessment to measure the satisfaction and impact of the trainings, with the results being used by training providers to revise and improve the process in subsequent assessments. ( iii ) Satisfaction surveys. The TKYB, in collaboration with PFIs, will conduct satisfaction surveys in the midterm and end term with the loan beneficiary firms regarding the subfinance received in terms of their needs. 30 ( iv ) Biannual beneficiary workshops, roundtables, and focus group discussions ( targeting different beneficiaries: employers, civil society and end beneficiaries ) will be held to discuss the survey results with a view to developing measures that improve the project design ( such as the selection criteria of loans beneficiary firms, loan utilization, and choice of training activities ). This activity will draw from the World Bank team \u2019 s recent experience in carrying out validation workshops in the context of the FRIT I - Strengthening Economic Opportunities for Syrians under Temporary Protection and Turkish Citizens in Selected Localities Project ( P165687 ).", + "ner_text": [ + [ + 529, + 549, + "named" + ], + [ + 4, + 14, + "satisfaction surveys <> publisher" + ], + [ + 113, + 135, + "satisfaction surveys <> reference population" + ], + [ + 587, + 609, + "satisfaction surveys <> reference population" + ], + [ + 1097, + 1107, + "satisfaction surveys <> publisher" + ] + ], + "validated": true, + "empirical_context": "( iii ) Satisfaction surveys. The TKYB, in collaboration with PFIs, will conduct satisfaction surveys in the midterm and end term with the loan beneficiary firms regarding the subfinance received in terms of their needs. 30 ( iv ) Biannual beneficiary workshops, roundtables, and focus group discussions ( targeting different beneficiaries: employers, civil society and end beneficiaries ) will be held to discuss the survey results with a view to developing measures that improve the project design ( such as the selection criteria of loans beneficiary firms, loan utilization, and choice of training activities ).", + "type": "survey", + "explanation": "These surveys are explicitly mentioned as being conducted to gather data from loan beneficiary firms, confirming their role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because satisfaction surveys are typically structured collections of data used to gather feedback.", + "contextual_reason_agent": "These surveys are explicitly mentioned as being conducted to gather data from loan beneficiary firms, confirming their role as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 85, + "text": "The biggest gap in the Cameroon statistical system is on the production of micro-data ( household surveys and censuses ). The funding requirements for the population census is estimated at US $ 55 million ( CFAF 30 billion ) according to BUCREP and the requirements for the agriculture census is estimated at US $ 51 million ( CFAF 28 billion ). In addition, there is no commitment for the next ECAM. The proposed project intends to increase the frequency of the production of micro-data for a close poverty monitoring ( by complementing resources for the population census and securing resources for the next living conditions survey ), strengthen the national accounts, and enhance access to statistics to strengthen both policy making and monitoring.", + "ner_text": [ + [ + 274, + 292, + "named" + ] + ], + "validated": false, + "empirical_context": "The biggest gap in the Cameroon statistical system is on the production of micro-data ( household surveys and censuses ). The funding requirements for the population census is estimated at US $ 55 million ( CFAF 30 billion ) according to BUCREP and the requirements for the agriculture census is estimated at US $ 51 million ( CFAF 28 billion ). In addition, there is no commitment for the next ECAM.", + "type": "census", + "explanation": "However, it is mentioned in the context as a funding requirement and not as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'agriculture census' implies a structured collection of data related to agricultural statistics.", + "contextual_reason_agent": "However, it is mentioned in the context as a funding requirement and not as a data source itself.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "076_Rwanda-Socio-Economic-Inclusion-of-Refugees-and-Host-Communities-in-Rwanda-Project", + "page": 30, + "text": "The World Bank Socio-economic Inclusion of Refugees & Host Communities in Rwanda Project ( P164130 ) Page 26 of 82 77. Project-specific MIS ( P-MIS ). The P-MIS will be web-based and user-friendly and will not duplicate existing systems. It will be used to gather and consolidate data across the components. It will allow the project to track and report on project-specific data requirements disaggregated by group and sub-group ( refugee, host community, women, children, etc. ). The P-MIS will be housed at the MINEMA SPIU and accessible via a web-based application in project districts and at BRD. The system will be used to ( a ) collect data and information and monitor progress of project activities; and ( b ) generate reports, figures and tables for project M & E. 78. Regular progress monitoring will be complemented by externally-administered surveys to measure progress and effectiveness of activities and to understand beneficiary perceptions of socio-economic inclusion. Periodic technical audits will be carried out to verify infrastructure quality. A baseline study will be conducted during the inception phase and followed up with annual surveys and other assessments, a midterm evaluation, and at project closing, to evaluate qualitative and quantitative aspects of project results.", + "ner_text": [ + [ + 155, + 160, + "named" + ] + ], + "validated": false, + "empirical_context": "Project-specific MIS ( P-MIS ). The P-MIS will be web-based and user-friendly and will not duplicate existing systems. It will be used to gather and consolidate data across the components.", + "type": "system", + "explanation": "However, the P-MIS is described as a management information system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it mentions gathering and consolidating data.", + "contextual_reason_agent": "However, the P-MIS is described as a management information system, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "054_Sudan-Basic-Education-Emergency-Support-Project", + "page": 19, + "text": "The States with support from the Project Coordination Unit ( PCU ) will be responsible for capacity building at the community level ( empowerment, inclusion, gender sensitivity, school safety ) \u2013 train the localities and prepare them to perform their role in overseeing implementation of the school grants. The PCU will assess capacity of localities and schools in in participatory planning and monitoring of school results. 37. Key activities will include: \u2022 Assessing capacity of localities and schools in in participatory planning and monitoring of school results; \u2022 Training of school heads and PTAs in participatory planning and monitoring of school results, including learning; \u2022 Training of locality supervisors to provide support to schools as needed; and \u2022 Providing grants to schools to improve learning environments. 38. Selection of intervention schools: The project will target all public primary schools in Sudan. Rich school - level data obtained from the School Census in 2015-2019 with support from the BERP will be used for the targeting of project beneficiaries ( figure 4 ).", + "ner_text": [ + [ + 933, + 952, + "named" + ], + [ + 33, + 58, + "school - level data <> author" + ], + [ + 895, + 917, + "school - level data <> reference population" + ], + [ + 921, + 926, + "school - level data <> data geography" + ], + [ + 988, + 997, + "school - level data <> reference year" + ], + [ + 1059, + 1080, + "school - level data <> reference population" + ] + ], + "validated": true, + "empirical_context": "Selection of intervention schools: The project will target all public primary schools in Sudan. Rich school - level data obtained from the School Census in 2015-2019 with support from the BERP will be used for the targeting of project beneficiaries ( figure 4 ).", + "type": "data", + "explanation": "This is indeed a dataset as it refers to specific data collected from the School Census used for targeting project beneficiaries.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'school-level data' suggests a structured collection of information from the School Census.", + "contextual_reason_agent": "This is indeed a dataset as it refers to specific data collected from the School Census used for targeting project beneficiaries.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "013_BOSIB0efb09b920d90858a0135df22da7d1", + "page": 12, + "text": "Foundational ID systems11 are broadly recognized as key enablers for inclusive digitalization and development. For people, the ability to establish and verify their identity is often a prerequisite for access to services and economic opportunities, such as social protection, healthcare, education, financial services, and employment. Proof of legal identity is also the basis for exercising rights, such as property ownership, and nationality. For governments and businesses, ID systems can serve as a platform for more effective and efficient service delivery by enabling the unique identification and verification of persons. Importantly, ID systems can promote greater inclusion by de-risking and reducing the costs of 8 UNHCR ' s Ethiopia Update on the Total Number of Refugees and Asylum Seekers as of August 31, 2023. 9 In Tigray, new internal displacement data has been reported, including 1, 021, 798 IDPs ( 250, 468 households ) in 643 sites across six zones ( excluding 20 woredas / districts hard to reach due to security or environmental factors ). 10 IOM. 2023. Ethiopia National Displacement Report 16 - Site Assessment Round 33 and Village Assessment Survey Round 16: Nov 2022 - Jun 2023. https: / / reliefweb. int / report / ethiopia / ethiopia-national-displacement-report-16-site-assessment-round-33-and-village-assessment-survey-round - 16-november-2022-june-2023. 11 Foundational ID systems are primarily created to provide credentials to the general population as proof of identity for a wide variety of public and private sector transactions. Common types of foundational ID systems include civil registries, national ID systems, and population registers.", + "ner_text": [ + [ + 1614, + 1630, + "named" + ] + ], + "validated": true, + "empirical_context": "11 Foundational ID systems are primarily created to provide credentials to the general population as proof of identity for a wide variety of public and private sector transactions. Common types of foundational ID systems include civil registries, national ID systems, and population registers.", + "type": "registry", + "explanation": "In the context, 'civil registries' are mentioned as a type of foundational ID system that provides credentials, indicating they function as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'civil registries' is a dataset because it refers to a structured collection of identity information.", + "contextual_reason_agent": "In the context, 'civil registries' are mentioned as a type of foundational ID system that provides credentials, indicating they function as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "103_Kenya-Hon-Africa-PAD-04072017", + "page": 93, + "text": "Procurement risk assessment: A procurement capacity and risk assessment has been carried out by the World Bank for the NPIU at the Executive Office of the President and IGAD Regional Secretariat office in Nairobi who will be responsible for implementing the project at the National and Regional levels to review the organizational structure for implementing the project and the interaction between the project \u2019 s staff responsible for procurement. The assessment has been fed into the Procurement Risk Assessment and Management System ( PRAMS ). Based on the assessment and taking note of the roles and responsibilities of the line ministries in carrying out procurement, the existing procurement capacity within the agencies and at community level, and the risks associated with CDD operations the procurement risk rating is considered \u201c High \u201d. 49. The key issues and risks concerning procurement for implementation of the project which include systemic weaknesses in the areas of: ( i ) procurement capacity at national and community level; ( ii ) accountability of procurement decisions especially at community level; ( iii ) Procurement delays in bid / proposal evaluation and signing of contracts; ( iv ) procurement record keeping; ( v ) capacity of procurement staff; ( vi ) procurement planning; ( vii ) procurement process administration, up to and including award of contracts; ( viii ) contract management; and ( ix ) procurement oversight. 50.", + "ner_text": [ + [ + 486, + 535, + "named" + ] + ], + "validated": false, + "empirical_context": "Procurement risk assessment: A procurement capacity and risk assessment has been carried out by the World Bank for the NPIU at the Executive Office of the President and IGAD Regional Secretariat office in Nairobi who will be responsible for implementing the project at the National and Regional levels to review the organizational structure for implementing the project and the interaction between the project \u2019 s staff responsible for procurement. The assessment has been fed into the Procurement Risk Assessment and Management System ( PRAMS ). Based on the assessment and taking note of the roles and responsibilities of the line ministries in carrying out procurement, the existing procurement capacity within the agencies and at community level, and the risks associated with CDD operations the procurement risk rating is considered \u201c High \u201d.", + "type": "system", + "explanation": "However, it is described as a system rather than a data source, indicating it does not function as a dataset in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'System' in its name, suggesting a structured approach to data management.", + "contextual_reason_agent": "However, it is described as a system rather than a data source, indicating it does not function as a dataset in this context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "010_BOSIB1554c314c0a2187c019d7e85bc2a91", + "page": 39, + "text": "Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Number of children immunized among the host community in Garissa and Turkana ( Number ) Description Total number of children immunized among the host community in Garissa and Turkana. Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Number of children immunized among refugees in Garissa and Turkana ( Number ) Description Total number of children immunized among refugees in Garissa and Turkana. Frequency Every six months Data source UNHCR reports Methodology for Data Collection Routine UNHCR data collection Responsibility for Data Collection MoH", + "ner_text": [ + [ + 84, + 88, + "named" + ] + ], + "validated": false, + "empirical_context": "Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Number of children immunized among the host community in Garissa and Turkana ( Number ) Description Total number of children immunized among the host community in Garissa and Turkana. Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Number of children immunized among refugees in Garissa and Turkana ( Number ) Description Total number of children immunized among refugees in Garissa and Turkana.", + "type": "system", + "explanation": "However, HMIS is mentioned as a methodology for data collection, not as a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because HMIS is often associated with health data management.", + "contextual_reason_agent": "However, HMIS is mentioned as a methodology for data collection, not as a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 21, + "validated": 4, + "not_validated": 17 + } + }, + { + "filename": "082_PAD2809-PAD-PUBLIC-disclosed-9-12-2018-IDA-R2018-0286-1", + "page": 45, + "text": "The World Bank Chad - Refugees and Host Communities Support Project ( P164748 ) Page 40 RESULT_FRAME_TBL_PDO Indicator Name DLI Baseline End Target 2019 2024 ( Percentage ) Strengthening country systems to support refugees and host communities Beneficiaries in targeted areas included in the Unified Social Registry ( Percentage ) 0. 00 80. 00 Beneficiaries in targeted areas included in the Unified Social Registry - - Female ( Percentage ) 0. 00 52. 00 Beneficiaries in targeted areas included in the Unified Social Registry - - Refugees ( Percentage ) 0. 00 30. 00 Eligible refugees with identity documents issued by CNARR ( Percentage ) 10. 00 70. 00 Eligible refugees with identity documents issued by CNARR - - Female ( Percentage ) 30. 00 52. 00 PDO Table SPACE Intermediate Results Indicators by Components RESULT_FRAME_TBL_IO Indicator Name DLI Baseline End Target 2019 2024 Improving access to basic services Geo-referenced health and education sector facility mapping in targeted areas completed ( Yes / No ) No Yes Communities consulted for basic services needs assessment and targeting validation ( Percentage ) 0. 00 90. 00 Classrooms rehabilitated or newly built ( Number ) 42. 00 420. 00 Health centers rehabilitated or newly built ( Number ) 9. 00 70. 00", + "ner_text": [ + [ + 292, + 315, + "named" + ], + [ + 15, + 19, + "Unified Social Registry <> data geography" + ], + [ + 214, + 222, + "Unified Social Registry <> reference population" + ], + [ + 244, + 257, + "Unified Social Registry <> reference population" + ], + [ + 531, + 539, + "Unified Social Registry <> reference population" + ], + [ + 577, + 585, + "Unified Social Registry <> reference population" + ], + [ + 664, + 672, + "Unified Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "The World Bank Chad - Refugees and Host Communities Support Project ( P164748 ) Page 40 RESULT_FRAME_TBL_PDO Indicator Name DLI Baseline End Target 2019 2024 ( Percentage ) Strengthening country systems to support refugees and host communities Beneficiaries in targeted areas included in the Unified Social Registry ( Percentage ) 0. 00 80.", + "type": "registry", + "explanation": "The Unified Social Registry is explicitly mentioned as a source of information for beneficiaries, indicating its use as a dataset in the context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a registry, which typically implies a structured collection of data.", + "contextual_reason_agent": "The Unified Social Registry is explicitly mentioned as a source of information for beneficiaries, indicating its use as a dataset in the context.", + "contextual_signal": "mentioned as a source of information for beneficiaries", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "110_Lebaon-RACE2-PforR-Board-Package-PAD-WB-9-5-16-09072016", + "page": 41, + "text": "for transition from Cycle 1 to 2 94 for transition from Cycle 2 to 3 91 for transition from Cycle 3 to Secondary Non-Lebanese 96 for transition from Cycle 1 to 2 82 for transition from Cycle 2 to 3 82 for transition from Cycle 3 to Secondary Same rates maintained 2015 and 2019 or 2020 for all; Annually for non - Lebanese MICS Round 6, VASyR, UNHCR community - based reporting; MEHE information management Number of students in public schools successful in Grade 3 and Number Grade 3: 0 Grade 6: 0 ( Baseline at 0 because Grade 3: TBD after pilot Grade 6: TBD after pilot Annually CERD", + "ner_text": [ + [ + 337, + 342, + "named" + ], + [ + 264, + 268, + "VASyR <> reference year" + ], + [ + 273, + 277, + "VASyR <> publication year" + ], + [ + 281, + 285, + "VASyR <> publication year" + ], + [ + 308, + 322, + "VASyR <> reference population" + ], + [ + 344, + 349, + "VASyR <> publisher" + ] + ], + "validated": true, + "empirical_context": "for transition from Cycle 1 to 2 94 for transition from Cycle 2 to 3 91 for transition from Cycle 3 to Secondary Non-Lebanese 96 for transition from Cycle 1 to 2 82 for transition from Cycle 2 to 3 82 for transition from Cycle 3 to Secondary Same rates maintained 2015 and 2019 or 2020 for all; Annually for non - Lebanese MICS Round 6, VASyR, UNHCR community - based reporting; MEHE information management Number of students in public schools successful in Grade 3 and Number Grade 3: 0 Grade 6: 0 ( Baseline at 0 because Grade 3: TBD after pilot Grade 6: TBD after pilot Annually CERD", + "type": "survey", + "explanation": "VASyR is indeed a dataset as it is referenced in the context of community-based reporting and data collection.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because VASyR is mentioned alongside other data sources and reporting mechanisms.", + "contextual_reason_agent": "VASyR is indeed a dataset as it is referenced in the context of community-based reporting and data collection.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 56, + "text": "A household will be considered as a beneficiary household if it is both enrolled in the project and have received a cash transfer, at least for one payment cycle This indicator will be measured on a quarterly basis during missions and ISRs SNSOP MIS which will track beneficiary registration and payment data The implementing agency will collect registration and payment data over the course of the project Implementing Partner Beneficiary and non-beneficiary households reporting satisfaction with community assets created through LIPW The total number of surveyed beneficiary and non-beneficiary households This indicator will be measured on SNSOP Management Information Data on satisfaction will be collected during Post Distribution Monitoring Implementing Partner", + "ner_text": [ + [ + 267, + 308, + "named" + ], + [ + 36, + 57, + "beneficiary registration and payment data <> reference population" + ], + [ + 428, + 470, + "beneficiary registration and payment data <> reference population" + ] + ], + "validated": true, + "empirical_context": "A household will be considered as a beneficiary household if it is both enrolled in the project and have received a cash transfer, at least for one payment cycle This indicator will be measured on a quarterly basis during missions and ISRs SNSOP MIS which will track beneficiary registration and payment data The implementing agency will collect registration and payment data over the course of the project Implementing Partner Beneficiary and non-beneficiary households reporting satisfaction with community assets created through LIPW The total number of surveyed beneficiary and non-beneficiary households This indicator will be measured on SNSOP Management Information Data on satisfaction will be collected during Post Distribution Monitoring Implementing Partner", + "type": "data", + "explanation": "This is indeed a dataset as it refers to structured data collected on beneficiary registration and payments for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific data collected about beneficiaries.", + "contextual_reason_agent": "This is indeed a dataset as it refers to structured data collected on beneficiary registration and payments for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "015_BOSIB0e7334a5d0570a3e40f8ae4d0c1266", + "page": 18, + "text": "Average annual precipitation has decreased by 2. 9 millimeters per decade on average since the 1960s. Different climate model projections show a wide range of changes over Cameroon, with some projecting increases in average annual rainfall and others a decrease. 30 Cameroon \u2019 s Nationally Determined Contribution ( NDC ) indicates an intensification of droughts and an increase in the frequency and intensity of flooding events. 31 Overall, Cameroon is vulnerable to the effects of climate change, and it ranks 146 out of 182 on the 2020 vulnerability index of the Notre Dame Global Adaptation Initiative ( ND-GAIN ), which measures a country \u2019 s exposure, sensitivity, and ability to adapt to the negative impacts of climate change. 32 This index summarizes and ranks countries in terms of their vulnerability to climate change and other global challenges in combination with its readiness to improve resilience. 27 UNHCR \u2013 April 2023 \u2013 Main persons of concern, Cameroon. 28 UNHCR \u2013 April 2023 \u2013 Main persons of concern, Cameroon. 29Think Hazard, consulted on February 17, 2021. URL: https: / / thinkhazard. org / en / report / 45-cameroon. 30WBG Climate Knowledge Portal, consulted on February 17, 2021. URL: https: / / climateknowledgeportal. worldbank. org / country / cameroon / climate-data-historical 31 Cameroon \u2019 s Nationally Determined Contribution to the United Nations Framework Convention on Climate Change; Revised in 2021. https: / / unfccc. int / sites / default / files / NDC / 2022-06 / CDN % 20r % C3 % A9vis % C3 % A9e % 20CMR % 20finale % 20sept % 202021. pdf 32 https: / / gain-new. crc. nd. edu / ranking / vulnerability.", + "ner_text": [ + [ + 112, + 137, + "named" + ] + ], + "validated": false, + "empirical_context": "9 millimeters per decade on average since the 1960s. Different climate model projections show a wide range of changes over Cameroon, with some projecting increases in average annual rainfall and others a decrease. 30 Cameroon \u2019 s Nationally Determined Contribution ( NDC ) indicates an intensification of droughts and an increase in the frequency and intensity of flooding events.", + "type": "model", + "explanation": "However, it is not a dataset as it refers to projections from models rather than a structured collection of empirical data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'climate model projections' suggests a collection of data related to climate predictions.", + "contextual_reason_agent": "However, it is not a dataset as it refers to projections from models rather than a structured collection of empirical data.", + "contextual_signal": "mentioned only as projections, not as a data source", + "tags": [] + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 53, + "text": "The World Bank Building Skills for Human Capital Development in South Sudan ( P178654 ) Page 49 of 66 agricultural program. Targets are cumulative. annually Of which, female This indicator will track the share of students who are certified after completing the digital training for the agricultural program. Targets are cumulative. Cumulative Monitoring reports MoHEST PIU will collect data from Universities / polytechni cs; validated by TPM annually MoHEST PIU Of which, refugee / host community population This indicator will track the share of refugee students students who are certified after completing the digital training for the agricultural program. Targets are cumulative. Annual Monitoring reports MoHEST PIU will collect data from Universities / polytechni cs; validated by TPM annually MoHEST PIU Education Management Information System strengthened This indicator will track the implementation of the EMIS strengthening plan. The target will be achieved once the EMIS collects and reports data on at least 90 % of schools in South Sudan including information on teachers ( profile, qualifications, deployment and training ), schools ( location, enrollment, physical facilities ( including WASH, classrooms etc ), teachers and school management committees and students. Annually starting from the second year of implementati on Monitoring reports MoGEI and MoHEST will prepare reports on implementation progress of EMIS strengthening plan.", + "ner_text": [ + [ + 916, + 920, + "named" + ] + ], + "validated": false, + "empirical_context": "Targets are cumulative. Annual Monitoring reports MoHEST PIU will collect data from Universities / polytechni cs; validated by TPM annually MoHEST PIU Education Management Information System strengthened This indicator will track the implementation of the EMIS strengthening plan. The target will be achieved once the EMIS collects and reports data on at least 90 % of schools in South Sudan including information on teachers ( profile, qualifications, deployment and training ), schools ( location, enrollment, physical facilities ( including WASH, classrooms etc ), teachers and school management committees and students.", + "type": "system", + "explanation": "However, EMIS is described as a management information system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is related to data collection and reporting.", + "contextual_reason_agent": "However, EMIS is described as a management information system, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 77, + "text": "The World Bank CAMEROON Education Reform Support Project ( P160926 ) Page 74 of 148 DLI Protocol to Evaluate Achievement of the DLI and Data / Result Verification Data Source / Agency Verification Entity Definitions Verification Procedure MINEDUB, MINESEC, MINEFOP, and MINESUP. It will respect the DQAF ( Data Quality Assessment Framework ), be methodologically validated by INS, and follow international norms and standards \u2022 Integrated Statistical Yearbook published and available online, which covers the whole education and training sector \u2022 Integrated education sector database exists, covering national and sub-national levels, and is compatible with and linked to other systems used for the management of state personnel ( SIGIPES ) \u2022 The level of deconcentration of EMIS and the grouping of regions for implementation will be defined by the EMIS Technical Unit in consultation with the World Bank and UNESCO, and will be clearly defined in the National EMIS Implementation Strategy. EMIS deconcentration only applies to MINEDUB and MINESEC \u2022 Budget approved for EMIS activities for subsequent year Year 5 \u2022 The level of deconcentration of EMIS and the grouping of regions for implementation will be defined by the EMIS Technical Unit in consultation with the World Bank and UNESCO, and will be clearly defined in the National EMIS Implementation Strategy.", + "ner_text": [ + [ + 428, + 459, + "named" + ], + [ + 4, + 14, + "Integrated Statistical Yearbook <> publisher" + ], + [ + 515, + 544, + "Integrated Statistical Yearbook <> reference population" + ], + [ + 895, + 905, + "Integrated Statistical Yearbook <> author" + ], + [ + 1268, + 1278, + "Integrated Statistical Yearbook <> publisher" + ] + ], + "validated": true, + "empirical_context": "The World Bank CAMEROON Education Reform Support Project ( P160926 ) Page 74 of 148 DLI Protocol to Evaluate Achievement of the DLI and Data / Result Verification Data Source / Agency Verification Entity Definitions Verification Procedure MINEDUB, MINESEC, MINEFOP, and MINESUP. It will respect the DQAF ( Data Quality Assessment Framework ), be methodologically validated by INS, and follow international norms and standards \u2022 Integrated Statistical Yearbook published and available online, which covers the whole education and training sector \u2022 Integrated education sector database exists, covering national and sub-national levels, and is compatible with and linked to other systems used for the management of state personnel ( SIGIPES ) \u2022 The level of deconcentration of EMIS and the grouping of regions for implementation will be defined by the EMIS Technical Unit in consultation with the World Bank and UNESCO, and will be clearly defined in the National EMIS Implementation Strategy. EMIS deconcentration only applies to MINEDUB and MINESEC \u2022 Budget approved for EMIS activities for subsequent year Year 5 \u2022 The level of deconcentration of EMIS and the grouping of regions for implementation will be defined by the EMIS Technical Unit in consultation with the World Bank and UNESCO, and will be clearly defined in the National EMIS Implementation Strategy.", + "type": "yearbook", + "explanation": "This is a dataset as it is explicitly mentioned as published and available online, indicating it serves as a structured collection of data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is described as an 'Integrated Statistical Yearbook' that covers the education and training sector.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as published and available online, indicating it serves as a structured collection of data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "054_Sudan-Basic-Education-Emergency-Support-Project", + "page": 13, + "text": "An analysis of enrollment in 20171 illustrates the large volume of pupils entering Grade 1 gradually shrinks while moving to upper grades due to drop out. In general, boys are more likely to drop out than girls. For example, 48 percent of boys enrolled in Grade 1 are likely to reach Grade 8 compared to 53 percent of girls. Anecdotal evidence suggests that high drop out of male pupils is associated with the high opportunity cost of attending school, which includes the cost of not working in the household, while female pupils drop out due to early marriage. 15. The number of out-of-school-children ( OOSC ) is striking: approximately three million school-age children are not in the education system. While 52 percent of those children had never attended school, 48 percent quit. The majority of OOSC ( 77 percent ) are 6 - to 13-year-olds, i. e. basic school-age. The system still has late entry until 11 years, with children who do not attend school before turning 12 are likely not to attend ever. According to the results of the National Household Budget and Poverty Survey ( NHBPS ) conducted in 2014 / 15, the main reasons for not attending school for children between the age of 6 and 15 are high costs ( mentioned by 20 percent of respondents ), distance to schools ( 14 percent ), and the need for the child to support the family ( 6 percent ) ( World Bank, 2018 ). There is a significant risk that OOSC will increase further when schools reopen again post COVID-19. 1 Education Sector Analysis, 2018.", + "ner_text": [ + [ + 1038, + 1082, + "named" + ], + [ + 1085, + 1090, + "National Household Budget and Poverty Survey <> acronym" + ], + [ + 1106, + 1115, + "National Household Budget and Poverty Survey <> reference year" + ], + [ + 1163, + 1199, + "National Household Budget and Poverty Survey <> reference population" + ], + [ + 1259, + 1278, + "National Household Budget and Poverty Survey <> data description" + ], + [ + 1360, + 1370, + "National Household Budget and Poverty Survey <> publisher" + ], + [ + 1372, + 1376, + "National Household Budget and Poverty Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "The system still has late entry until 11 years, with children who do not attend school before turning 12 are likely not to attend ever. According to the results of the National Household Budget and Poverty Survey ( NHBPS ) conducted in 2014 / 15, the main reasons for not attending school for children between the age of 6 and 15 are high costs ( mentioned by 20 percent of respondents ), distance to schools ( 14 percent ), and the need for the child to support the family ( 6 percent ) ( World Bank, 2018 ). There is a significant risk that OOSC will increase further when schools reopen again post COVID-19.", + "type": "survey", + "explanation": "This is indeed a dataset as it is a structured collection of data used to analyze the reasons for school attendance issues.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides results on reasons for not attending school.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data used to analyze the reasons for school attendance issues.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 52, + "text": "The World Bank Education Quality Improvement Project ( P179363 ) Page 43 of 68 inclusion, and sustainability ) Number of sector staff participating in capacity building trainings under the project This indicator will measure the number of sector staff trained on ( EMIS, national and international assessments and data analysis, tutoring / accelerated learning programs, ECEC reforms, integrated approach to designing and implementing resilient, sustainable, and inclusive school and preschool projects ). Progress will be tracked, and the capacity building plan will be updated every two years. Annual Progress and monitoring reports, capacity building plan Progress and monitoring reports NORLD, MoER, PMT Monitoring tool with reliable gender - disaggregated data to identify disadvantaged students to receive accelerated learning This indicator will capture the development of the monitoring tool with reliable gender-disaggregated data to identify disadvantaged students to receive tutoring, accelerated learning or other catch-up program Annual EMIS EMIS and monitoring tool data CTICE, MoER, PMT Improved regulatory environment for affordable and quality childcare for preschool children This indicator will measure the number of changes made to subsector regulations, with adjustments to the norms on the space, trainings for Annual Policy documents, progress and monitoring reports Policy documents, progress and monitoring reports MoER, PMT", + "ner_text": [ + [ + 1050, + 1054, + "named" + ] + ], + "validated": false, + "empirical_context": "Progress will be tracked, and the capacity building plan will be updated every two years. Annual Progress and monitoring reports, capacity building plan Progress and monitoring reports NORLD, MoER, PMT Monitoring tool with reliable gender - disaggregated data to identify disadvantaged students to receive accelerated learning This indicator will capture the development of the monitoring tool with reliable gender-disaggregated data to identify disadvantaged students to receive tutoring, accelerated learning or other catch-up program Annual EMIS EMIS and monitoring tool data CTICE, MoER, PMT Improved regulatory environment for affordable and quality childcare for preschool children This indicator will measure the number of changes made to subsector regulations, with adjustments to the norms on the space, trainings for Annual Policy documents, progress and monitoring reports Policy documents, progress and monitoring reports MoER, PMT", + "type": "system", + "explanation": "However, EMIS is referred to as a monitoring tool and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is mentioned in the context of data collection and monitoring.", + "contextual_reason_agent": "However, EMIS is referred to as a monitoring tool and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "158_40156", + "page": 13, + "text": "programs in the selected areas and meeting the selection criteria7 will be providing the services to the targeted community. Based upon the services provided by the NGO \u2019 s, health facilities in these areas will face greater demand for services. Therefore, some support will be channeled to the local health facilities to address this increased demand, based on rapid surveys for these selected sites. The regional mapping assessment conducted by IGAD provides a list of organizations and government agencies working in the cross-border areas in each of the countries, and the kind of services being provided ( if any ), which has provided the basis for moving forward with the proposed project. In addition, during the \u201c Horn of Africa Partnership \u201d ( HOAP ) meeting in November 2006, some of the IGAD countries developed draft work plans for activities in the border areas, which should be further strengthened and taken into account when the countries and IGAD are identifying the key hot-spot areas. The IGAD regional mapping assessment and the draft work plans would therefore be the basis for one \u201c situation analysis \u201d for selecting hot-spots to be supported. This regional mapping assessment will be updated on an annual basis in order to guide the selection of the hot-spot areas to be supported every year. 34.", + "ner_text": [ + [ + 406, + 433, + "named" + ] + ], + "validated": false, + "empirical_context": "Therefore, some support will be channeled to the local health facilities to address this increased demand, based on rapid surveys for these selected sites. The regional mapping assessment conducted by IGAD provides a list of organizations and government agencies working in the cross-border areas in each of the countries, and the kind of services being provided ( if any ), which has provided the basis for moving forward with the proposed project. In addition, during the \u201c Horn of Africa Partnership \u201d ( HOAP ) meeting in November 2006, some of the IGAD countries developed draft work plans for activities in the border areas, which should be further strengthened and taken into account when the countries and IGAD are identifying the key hot-spot areas.", + "type": "assessment", + "explanation": "However, it is described as an assessment and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'mapping assessment' which suggests data collection.", + "contextual_reason_agent": "However, it is described as an assessment and not explicitly as a data source or structured collection of data.", + "contextual_signal": "mentioned only as an assessment, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + } + }, + { + "filename": "062_Turkey-Municipal-Services-Improvement-Project", + "page": 45, + "text": "The World Bank Municipal Services Improvement Project in Refugee Affected Areas ( P169996 ) Page 41 of 94 which refugees ) managed drinking water services in the selected municipalities that have been provided through this Project. progress reports, based on updated beneficiary \u2019 s records from respective SKIs. People benefitting from safely managed sanitation services in the selected municipalities as a result of the project This indicator measures the cumulative number of people benefitted from safely managed sanitation services in the selected municipalities that have been provided through this Project. Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs. Municipal PIUs and ILBANK PMU People benefitting from safely managed sanitation services in the selected municipalities ( out of which female ) This indicator measures the percentage of female benefitted from safely managed sanitation services in the selected municipalities that have been provided through this Project. Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs.", + "ner_text": [ + [ + 1136, + 1144, + "named" + ], + [ + 4, + 14, + "PMU Data <> publisher" + ], + [ + 701, + 721, + "PMU Data <> data type" + ] + ], + "validated": true, + "empirical_context": "Municipal PIUs and ILBANK PMU People benefitting from safely managed sanitation services in the selected municipalities ( out of which female ) This indicator measures the percentage of female benefitted from safely managed sanitation services in the selected municipalities that have been provided through this Project. Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs.", + "type": "data", + "explanation": "In the context, 'PMU Data' is explicitly referenced as data to be compiled and recorded, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'PMU Data' is a dataset because it is mentioned in the context of compiling and recording information.", + "contextual_reason_agent": "In the context, 'PMU Data' is explicitly referenced as data to be compiled and recorded, indicating it functions as a data source.", + "contextual_signal": "mentioned as data to be compiled and recorded", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 10, + "validated": 9, + "not_validated": 1 + } + }, + { + "filename": "021_BOSIB05e6fc47e0770aeec00ad5e11774f2", + "page": 71, + "text": "The World Bank Uganda Climate Smart Agricultural Transformation Project ( P173296 ) Page 66 of 81 Risk Description Risk Rating Risk Mitigating Measures Incorporated into Project Design Risk Rating after Mitigation Project level - This is a complex project implemented by MAAIF, in coordination with other agencies, local governments, and communities. H This will be mitigated by agreed accountability procedures issued by MAAIF to participating agencies and districts, spelling out duties and responsibilities together with staff specifically assigned to the project. MAAIF PCU will ensure proper coordination of the project. S Although all World Bank - funded projects are captured under the annual national budget, some IAs may not have access to IFMS accounting system. H MAAIF will use the project \u2019 s module within the Integrated Financial Management System ( IFMS ) to be used for project accounting. However, the financial reports will be prepared manually using Microsoft Excel spreadsheet since they cannot be generated from the accounting system. Manual systems are prone to errors of omission or commission. S Internal control - Inability to follow up reported internal control weaknesses. S MAAIF and participating agencies have qualified and experienced internal auditors who will include the project within their workplans to ensure the internal audit unit carries out its role within the project according to their Internal Audit Charter. This will also be spelled out in the project manual.", + "ner_text": [ + [ + 824, + 862, + "named" + ] + ], + "validated": false, + "empirical_context": "S Although all World Bank - funded projects are captured under the annual national budget, some IAs may not have access to IFMS accounting system. H MAAIF will use the project \u2019 s module within the Integrated Financial Management System ( IFMS ) to be used for project accounting. However, the financial reports will be prepared manually using Microsoft Excel spreadsheet since they cannot be generated from the accounting system.", + "type": "system", + "explanation": "However, it is described as a financial management system, not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'system' which can imply data management.", + "contextual_reason_agent": "However, it is described as a financial management system, not as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "094_PAD-final-02262018", + "page": 47, + "text": "The World Bank Greater Beirut Public Transport Project ( P160224 ) Page 36 of 59 Indicator Name Core Unit of Measure Baseline End Target Frequency Data Source / Methodology Responsibility for Data Collection percentage of female passengers and percentage of vulnerable population. Name: Percentage of population residing in GBA with access to Beirut city center ( \u201c La place des martyrs \u201d ) within 60 minutes commuting period using public transport Percentage 50. 00 61. 00 Annual A global information system ( GIS ) - based spatial analysis will be conducted using the open source accessibility tool developed by the World Bank called Open Trip Planner Analyst ( OTPA ). CDR The RPTA / BRT operators ( for the GPS data ) Description: This indicator will measure the increase in percentage of population with access to jobs and services located at the CBD using public transport services. This indicator captures the improved accessibility objective of the project for public transport passengers. Name: Average travel time by public transport from Tabarja station to Charles Helou terminal at morning peak hours Minutes 75. 00 45. 00 Biannual Data to be obtained from the ITS. CDR / the RPTA BRT operators Description: Average rush hour in-vehicle travel time by the PT services from Tabarja station to Beirut ( Charles Helou terminal ) at morning peak hours between 7: 00am and 9: 00am.", + "ner_text": [ + [ + 1173, + 1176, + "named" + ] + ], + "validated": false, + "empirical_context": "00 45. 00 Biannual Data to be obtained from the ITS. CDR / the RPTA BRT operators Description: Average rush hour in-vehicle travel time by the PT services from Tabarja station to Beirut ( Charles Helou terminal ) at morning peak hours between 7: 00am and 9: 00am.", + "type": "system", + "explanation": "However, 'ITS' is referred to as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'ITS' is a dataset because it is mentioned in the context of obtaining data.", + "contextual_reason_agent": "However, 'ITS' is referred to as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "064_Mauritania-Water-and-Sanitation-Sectoral-Project", + "page": 48, + "text": "PIU Satisfaction rate of users of WSS services Results from survey taken with beneficiaries in Hodh Chargui and Hodh Gharbi on their satisfaction with water Beginning and end of project Supervision firm reports Supervision firm will carry out user surveys at the beginning of the project to establish the PIU", + "ner_text": [ + [ + 243, + 255, + "named" + ], + [ + 60, + 66, + "user surveys <> data type" + ], + [ + 95, + 107, + "user surveys <> data geography" + ], + [ + 112, + 123, + "user surveys <> data geography" + ], + [ + 186, + 202, + "user surveys <> author" + ] + ], + "validated": true, + "empirical_context": "PIU Satisfaction rate of users of WSS services Results from survey taken with beneficiaries in Hodh Chargui and Hodh Gharbi on their satisfaction with water Beginning and end of project Supervision firm reports Supervision firm will carry out user surveys at the beginning of the project to establish the PIU", + "type": "survey", + "explanation": "In this context, 'user surveys' is indeed a dataset as it is used to gather empirical data on user satisfaction.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'user surveys' is a dataset because it refers to a structured collection of responses from beneficiaries.", + "contextual_reason_agent": "In this context, 'user surveys' is indeed a dataset as it is used to gather empirical data on user satisfaction.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "151_533550PAD0P117101Official0Use0Only1", + "page": 64, + "text": "B. Risk Assessment Mitigating Measures ( MM ) Risk Before MM Issue I Risk Country Financial Management Risks Risk After MM 171. As reported and outlined in the Country Assistance Strategy ( CAS ) of 2009, poor governance remains a critical issue for Yemen. The Country Public Expenditure and Financial Accountability report ( PEFA, 2008 ) indicated that there had been some progress in the fiscal area, particularly in terms of budget expenditure classification and consolidation of investment. However, a lack of progress was observed in achieving budget comprehensiveness and implementing a broader fiscal framework based on a multi-year expenditure framework. Efforts to move forward in reforming budget comprehensiveness, implementation, and cash management, accounting and reporting have been pinned on the design and implementation of the Accounting & Financial Management Information System ( AFMIS ) project, which is experiencing significant delays. These factors, as well as the poor quality of education and training in accounting, have contributed to the generally observed insufficiencies of the financial reporting and auditing systems in the country. The above Country Risks result in having higher potential exposure to corruption, which is mitigated through the Project ' s design. Staffing Current FM department may not have required capacity. 172. perform quarterly reviews of the Project ' s IFRs and annual audits of the Project ' s Financial Statements.", + "ner_text": [ + [ + 845, + 897, + "named" + ] + ], + "validated": false, + "empirical_context": "However, a lack of progress was observed in achieving budget comprehensiveness and implementing a broader fiscal framework based on a multi-year expenditure framework. Efforts to move forward in reforming budget comprehensiveness, implementation, and cash management, accounting and reporting have been pinned on the design and implementation of the Accounting & Financial Management Information System ( AFMIS ) project, which is experiencing significant delays. These factors, as well as the poor quality of education and training in accounting, have contributed to the generally observed insufficiencies of the financial reporting and auditing systems in the country.", + "type": "system", + "explanation": "However, it is mentioned as a project and not as a data source, indicating it does not function as a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Information System', which often relates to data management.", + "contextual_reason_agent": "However, it is mentioned as a project and not as a data source, indicating it does not function as a dataset.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "086_NEW-ETHIOPIA-PAD-06072018", + "page": 107, + "text": "99 given at budget preparation, it would difficult to trace the budget and expenditure during reporting. It also will be difficult for capturing Program-related transactions and reporting on them. ARRA uses the Gregorian calendar as fiscal year, which is different from EFY and needs alignment during the Program implementation. 8. Budget monitoring. The budget control of the proposed four entities is satisfactory. Before effecting payment, the transaction requested for payment is checked on whether it is included in the approved budget and whether sufficient uncommitted budget balance is available. The EIC and MoLSA use the IBEX budget control module, IPDC uses Excel spreadsheet, and ARRA uses \u2018 FAST \u2019 system to track expenditures against the budget. In addition, a quarterly report ( comparison of budget with actual expenditure ) is produced and performance is monitored by the management. With regard to the annual budget utilization, except IPDC, a good budget performance was registered by the other entities in the past three years. Capacity constraints, contractors \u2019 delay in completing works on time, and the regions \u2019 delay in effecting right-of-way payments and resettlements were reasons for low budget utilization by IPDC. 9. Program budgeting arrangements. The Program will follow the Federal GoE ' s budgeting procedure and calendar. The procedures and calendar are documented in the Federal GoE Budget manual.", + "ner_text": [ + [ + 704, + 708, + "named" + ] + ], + "validated": false, + "empirical_context": "Before effecting payment, the transaction requested for payment is checked on whether it is included in the approved budget and whether sufficient uncommitted budget balance is available. The EIC and MoLSA use the IBEX budget control module, IPDC uses Excel spreadsheet, and ARRA uses \u2018 FAST \u2019 system to track expenditures against the budget. In addition, a quarterly report ( comparison of budget with actual expenditure ) is produced and performance is monitored by the management.", + "type": "system", + "explanation": "'FAST' is referred to as a system, not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'FAST' is a dataset because it is mentioned in the context of tracking expenditures.", + "contextual_reason_agent": "'FAST' is referred to as a system, not as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "010_BOSIB1554c314c0a2187c019d7e85bc2a91", + "page": 23, + "text": "The M & E approach for the project is aligned with the Government \u2019 s procedures and data sources and will contribute to improved data quality. All project indicators ( a ) are a subset of the health sector \u2019 s performance indicators available in various data sources including the Kenya Health Information System ( KHIS ); and ( b ) will be collected routinely through project reports. The project will support county health sector annual performance data review meetings as well as availability of key surveys under Component 1. Where relevant, at project closure, data from household and facility surveys will be used to complement routine data to measure project achievement of the PDO. C. Sustainability 33. The project will support priority interventions outlined in the national health strategies to ensure sustainability. The project will build on existing national systems and structures for implementation and fiduciary arrangements. The Government remains committed to improving delivery of primary healthcare services to advance progress towards UHC, and key project activities are aligned with these objectives. The project implementation entities will be drawn from existing Government structures which will ensure continuity of the expected results beyond the project period. In addition,", + "ner_text": [ + [ + 282, + 313, + "named" + ], + [ + 148, + 166, + "Kenya Health Information System <> data description" + ], + [ + 316, + 320, + "Kenya Health Information System <> acronym" + ], + [ + 577, + 607, + "Kenya Health Information System <> data description" + ] + ], + "validated": true, + "empirical_context": "The M & E approach for the project is aligned with the Government \u2019 s procedures and data sources and will contribute to improved data quality. All project indicators ( a ) are a subset of the health sector \u2019 s performance indicators available in various data sources including the Kenya Health Information System ( KHIS ); and ( b ) will be collected routinely through project reports. The project will support county health sector annual performance data review meetings as well as availability of key surveys under Component 1.", + "type": "system", + "explanation": "It is indeed a dataset as it is mentioned as a source of performance indicators and data collection.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a data source in the context.", + "contextual_reason_agent": "It is indeed a dataset as it is mentioned as a source of performance indicators and data collection.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 64, + "text": "The effect of remedial education on learning outcomes is well established in the literature: teaching oriented to the level of the student rather than the level prescribed for the student \u2019 s grade in the curriculum has produced large gains in learning as found in a number of randomized controlled trials ( Banerjee et al. 2016 ). Table 2. 3 presents effect sizes from evaluations of interventions that have elements of remedial learning targeting struggling students. These vary in modality including computer-assisted adaptive learning, volunteer community members providing after-school tutoring, and curriculum adjustments to focus on core skills. Together, they provide a sense of what would be a reasonable range of effect sizes to expect from the interventions supported under this project. 7. Modeling the benefits of Component 2 relies on the association between the digital learning environment and learning outcomes in PISA 2018. Given that the type of support to schools under Component 2 would vary depending on the needs of recipient schools, it is not possible to identify a range of effect sizes from similar interventions in the impact evaluation literature. Instead, the approach is to use an indicator of school digital learning environment in the PISA 2018 data and use the association between this indicator and learning outcomes as an effect size. The indicator used was whether all computers in the school were connected to the internet or not. The difference in learning outcomes between children at schools with all computers connected to the internet and those not was 0. 15 SD after controlling for differences in household wealth, gender, age, grade, and number of years of preprimary education.", + "ner_text": [ + [ + 1268, + 1282, + "named" + ] + ], + "validated": true, + "empirical_context": "Given that the type of support to schools under Component 2 would vary depending on the needs of recipient schools, it is not possible to identify a range of effect sizes from similar interventions in the impact evaluation literature. Instead, the approach is to use an indicator of school digital learning environment in the PISA 2018 data and use the association between this indicator and learning outcomes as an effect size. The indicator used was whether all computers in the school were connected to the internet or not.", + "type": "dataset", + "explanation": "This is indeed a dataset as it is explicitly used to derive an indicator for analysis in the context of the research.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'PISA 2018 data' refers to a structured collection of data collected during the PISA assessment.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly used to derive an indicator for analysis in the context of the research.", + "contextual_signal": "mentioned as a data source for effect size analysis", + "tags": [] + }, + { + "filename": "168_252640updated0version", + "page": 12, + "text": "conducted awareness meetings with a number o f decision-makers. In addition, sensitization meetings were conducted with religious leaders to address the problem o f FGC and to identify ways to limit and / or eradicate it. The project approach, which emphasizes gender as a key issue, i s consistent with one o f the key levers o f IDA ' s regional strategy. Government response to HIVIAIDS In the early 1990s, some HIV / AIDS prevention activities - mostly IEC activities - took place when financing from the Global Program for AIDS ( GPA ) was available. However, since the end o f the GPA, the National AIDS Control Program ( NACP ) has been dormant. In 2001, the Government o f Djibouti carried out a health sector study that was used to develop the governmentk health sector reform program, as well as a medium-term plan for the sector ' s development. In the context o f this reform program, and in order to respond more equitably to the needs o f the population, it was decided to reinvigorate the NACP and the treatment for STIs. In October 2001, the president o f the Republic himself endorsed the control o f HIV / AIDS when he inaugurated the new NACP and treatment o f STIs facility, which energized the control o f HIV / AIDS.", + "ner_text": [ + [ + 704, + 723, + "named" + ] + ], + "validated": false, + "empirical_context": "However, since the end o f the GPA, the National AIDS Control Program ( NACP ) has been dormant. In 2001, the Government o f Djibouti carried out a health sector study that was used to develop the governmentk health sector reform program, as well as a medium-term plan for the sector ' s development. In the context o f this reform program, and in order to respond more equitably to the needs o f the population, it was decided to reinvigorate the NACP and the treatment for STIs.", + "type": "study", + "explanation": "However, the term 'health sector study' is mentioned as a study and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a study that likely involved data collection.", + "contextual_reason_agent": "However, the term 'health sector study' is mentioned as a study and not explicitly as a data source or structured collection of data.", + "contextual_signal": "mentioned only as a study, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 21, + "validated": 8, + "not_validated": 13 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 52, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 40 and building capacity of oversight entities. 104. Key fiduciary arrangements have been put in place to ensure that resources are used for the intended purpose. The budget and expenditure for the sectors to be supported are clearly identifiable in the government \u2019 s budget and expenditure system of the IBEX at the regional and IFMIS at the federal level. The expenditure of the sectors is subject to internal as well as external audit reviews by the Office of Federal and Regional Auditor Generals ( OF / RAGs ). In addition, Program-specific audit exists which is done on a continual basis and annually. The audit terms of reference are updated for HCO to look into payroll in more detail as 90 percent of the expenditure framework is expected to be for salary of these basic service sectors. Quarterly financial reports are produced from the government system and consolidated at the federal level, and submitted to and reviewed by the World Bank. Relevant staff are placed at all levels to ensure that the system continues to function well. PFM institutionalized training is available which supports the system to deliver as expected. Procurement audits and fraud and corruption reports are also part of the operation \u2019 s arrangements for the pilot SPG woredas.", + "ner_text": [ + [ + 391, + 396, + "named" + ] + ], + "validated": false, + "empirical_context": "Key fiduciary arrangements have been put in place to ensure that resources are used for the intended purpose. The budget and expenditure for the sectors to be supported are clearly identifiable in the government \u2019 s budget and expenditure system of the IBEX at the regional and IFMIS at the federal level. The expenditure of the sectors is subject to internal as well as external audit reviews by the Office of Federal and Regional Auditor Generals ( OF / RAGs ).", + "type": "system", + "explanation": "However, IFMIS is described as a system, not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed IFMIS is a dataset because it is mentioned in the context of budget and expenditure management.", + "contextual_reason_agent": "However, IFMIS is described as a system, not as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "101_Project-Appraisal-Document-PAD-P163830-2017-05-22-CLEAN-05232017", + "page": 109, + "text": "The World Bank Somalia Emergency Drought Response and Recovery Project ( P163830 ) Page 105 of 111 14. The majority of drought-related displacement takes place from rural to urban areas. In Baidoa, for example, more than 7, 000 people arrived in the first three weeks of January in search of water and food, having traveled by foot, in donkey carts and trucks. Most of the families have joined existing settlements for internally displaced in Baidoa. Most of the newly displaced ( 80 per cent ) are from villages in the Bay region. It is foreseen that as the situation continues to deteriorate, increasing numbers of people from rural areas will move to urban centers and join settlements for internally displaced. In some cases, families split up and let children and women move to towns, while men stay behind with the remaining animals. In other cases, preemptive movement is done by the strongest family members, leaving behind young children, women and the elderly. 15. While demographic profile information is still needed, it is likely these drought-related internal displacements may be from minority clans, who have lost assets including their homes, livestock, and livelihoods.", + "ner_text": [ + [ + 981, + 1012, + "named" + ] + ], + "validated": false, + "empirical_context": "15. While demographic profile information is still needed, it is likely these drought-related internal displacements may be from minority clans, who have lost assets including their homes, livestock, and livelihoods.", + "type": "information", + "explanation": "However, it is not presented as a data source or structured collection of data in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'demographic profile information' suggests structured data about populations.", + "contextual_reason_agent": "However, it is not presented as a data source or structured collection of data in the context.", + "contextual_signal": "mentioned only as information needed, not as a data source", + "tags": [] + }, + { + "filename": "155_432110PAD0P09717632B01off0use0only1", + "page": 30, + "text": "Results from focus group interviews and the household survey show that a large majority of households stated that they would be willing to pay for an improved service at the standpost managed by a dedicated operator. The results indicate that 78 percent of households would be willing to pay the lowest price tested in the survey, 5 FBu per 20 liters of potable water, if the service became more reliable and readily accessible with the rehabilitation of existing standposts and the installation of new ones. If the price proposed was 10 FBu, then 66 percent of respondents would agree to pay it; 56 percent of respondents would agree to 15 FBu / 20 liters; and 50 percent would agree to 20 FBu / 20 liters. 80. Standpost pricing impact. The potential social impact of changing from a regime of free water at the standpost to one where standpost users would pay has been carefully assessed.", + "ner_text": [ + [ + 44, + 60, + "named" + ], + [ + 91, + 101, + "household survey <> reference population" + ], + [ + 257, + 267, + "household survey <> reference population" + ], + [ + 906, + 924, + "household survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "Results from focus group interviews and the household survey show that a large majority of households stated that they would be willing to pay for an improved service at the standpost managed by a dedicated operator. The results indicate that 78 percent of households would be willing to pay the lowest price tested in the survey, 5 FBu per 20 liters of potable water, if the service became more reliable and readily accessible with the rehabilitation of existing standposts and the installation of new ones.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it provides results from the survey that are used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'household survey' implies a structured collection of data from households.", + "contextual_reason_agent": "The context confirms it is a dataset as it provides results from the survey that are used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "033_IBArchive-38b35d89-2c5c-4636-ab7e-0ba662b95a5f", + "page": 52, + "text": "Of course, the main reason for this is that most of the preparation has taken place during COVID-19 lockdowns and restricted travel periods, making deep and meaningful citizen \u2019 s engagement more complicated. The preparation team, with the support of locally based consultants, has reached out to both a range of training providers and businesses and employers in priority economic sectors to get a better understanding of their views on a project of this nature. Consultations were conducted with a wide, but expected, set of stakeholders including policy makers, ministry officials from MENFOP, MT, MOF, etc., development partners, private sector employers, and other stakeholders to the skills development, employment, and jobs sector. The preparation team also consulted with students and trainees, but this was more limited in scope and is something that the Project will continue with during implementation and will gather information on student and employer satisfaction, and other information obtained through graduate tracking surveys, etc.", + "ner_text": [ + [ + 1018, + 1043, + "named" + ], + [ + 780, + 801, + "graduate tracking surveys <> reference population" + ], + [ + 929, + 977, + "graduate tracking surveys <> data description" + ], + [ + 1065, + 1083, + "graduate tracking surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "Consultations were conducted with a wide, but expected, set of stakeholders including policy makers, ministry officials from MENFOP, MT, MOF, etc., development partners, private sector employers, and other stakeholders to the skills development, employment, and jobs sector. The preparation team also consulted with students and trainees, but this was more limited in scope and is something that the Project will continue with during implementation and will gather information on student and employer satisfaction, and other information obtained through graduate tracking surveys, etc.", + "type": "survey", + "explanation": "This is a dataset as it is mentioned in the context of gathering information on student and employer satisfaction, indicating its use for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'graduate tracking surveys' implies a structured collection of data related to graduates.", + "contextual_reason_agent": "This is a dataset as it is mentioned in the context of gathering information on student and employer satisfaction, indicating its use for empirical analysis.", + "contextual_signal": "follows 'will gather information on' indicating its role as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "008_BOSIB-60d57288-4e09-4519-ae6c-ffdc0037e0b1", + "page": 15, + "text": "Among people who are not working or are not looking for a job, 23 percent of the women answered that they had to take care of the house, kids, or other relatives; meanwhile, only 1 percent of men chose the same reason. 18 These data reveal the likelihood of women making more multi-destination trips than men and relying more on walking and public transport. Public consultations held during Project preparation revealed that women in the Region use bicycles more than the average rate in Brazil; however, there is still a gender gap, and women might consider using bicycles more if the infrastructure were safer. Moreover, public transport analyses of Brazilian cities19, 20 reveal that women and minorities are more dependent on buses than other groups, and they also tend to make more time - consuming, chained trips and multi-purpose journeys, such as for jobs, childcare, and shopping. Therefore, ensuring that jobs, schools, childcare, and other services are accessible by public transport is critical for women and minorities. 16. The safety and security of public transport disproportionally affect women. According to a survey from 2019 conducted by the Locomotiva and Patr\u00edcia Galv\u00e3o institutes across Brazil, 97 percent of interviewed Brazilian women said they had experienced sexual harassment in public transport or in taxis or ride-hailing vehicles.", + "ner_text": [ + [ + 624, + 649, + "named" + ] + ], + "validated": false, + "empirical_context": "Public consultations held during Project preparation revealed that women in the Region use bicycles more than the average rate in Brazil; however, there is still a gender gap, and women might consider using bicycles more if the infrastructure were safer. Moreover, public transport analyses of Brazilian cities19, 20 reveal that women and minorities are more dependent on buses than other groups, and they also tend to make more time - consuming, chained trips and multi-purpose journeys, such as for jobs, childcare, and shopping. Therefore, ensuring that jobs, schools, childcare, and other services are accessible by public transport is critical for women and minorities.", + "type": "analysis", + "explanation": "However, 'public transport analyses' refers to studies or evaluations rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'analyses' can imply a structured examination of data.", + "contextual_reason_agent": "However, 'public transport analyses' refers to studies or evaluations rather than a structured collection of data.", + "contextual_signal": "mentioned only as an analysis, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "105_Mali-Reinsertion-of-Ex-combatants-Project", + "page": 26, + "text": "16 cantonment operations can begin: ( i ) the list of ex-combatants, identified based on the eligibility criteria as agreed by the parties, is available, and ( ii ) security arrangements during cantonment phase have been established. The signatory movements have delayed the transmission of the lists of their respective combatants to the Government and MINUSMA, while the security arrangements have not taken place as agreed. There is political commitment to DDR and the World Bank team is closely coordinating with the Government to ensure that the operational engagement and the political decisions required for successful implementation take place. With regards to governance, anti-corruption and public sector regulations exist, but corruption and transparency issues are still substantial. 20 Mitigating measures for governance issues have been built in the project design in the form of strict fiduciary control mechanisms and application of World Bank fiduciary rules. 56. Institutional capacity for implementation and sustainability risk is High. Further, there must be a PIU in place prior to project effectiveness for the project to proceed. It must be expected that the PIU will have limited technical and implementation capacity. To mitigate these risks, the World Bank will continue to provide technical assistance and advice to the Government. 57. Fiduciary risks are substantial. Due to the overall fragility of the country, financial management systems are considered weak.", + "ner_text": [ + [ + 46, + 67, + "named" + ], + [ + 93, + 113, + "list of ex-combatants <> data description" + ] + ], + "validated": true, + "empirical_context": "16 cantonment operations can begin: ( i ) the list of ex-combatants, identified based on the eligibility criteria as agreed by the parties, is available, and ( ii ) security arrangements during cantonment phase have been established. The signatory movements have delayed the transmission of the lists of their respective combatants to the Government and MINUSMA, while the security arrangements have not taken place as agreed.", + "type": "list", + "explanation": "This is indeed a dataset as it is a list of individuals used for the purpose of identifying eligible ex-combatants in the context of cantonment operations.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured collection of individuals identified as ex-combatants.", + "contextual_reason_agent": "This is indeed a dataset as it is a list of individuals used for the purpose of identifying eligible ex-combatants in the context of cantonment operations.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 77, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 66 and Corporate Strategy. It is expected that acceptable FM arrangements will continue to be in place throughout Project effectiveness and implementation. 18. Key risks envisaged under the current Project are the following: ( a ) ministry internal audit review reports are not shared regularly with the World Bank. This is risky given the number of ongoing activities and spread of operations where such reviews give management needed assurance of fiduciary status; and ( b ) the IFMS has not been fully operationalized at the ministry, which increases the risk of errors and accuracy of financial reports. These same risks affect the new project under preparation. 19. The conclusion of the assessment is that the FM arrangements for the Project have an overall risk rating of Moderate. Procurement 20. The Borrower will carry out procurement under the proposed Project in accordance with the World Bank \u2019 s \u2018 Procurement Regulations for IPF Borrowers \u2019 ( Procurement Regulations ), dated July 2016 and revised in November 2017, under the NPF; the \u2018 Guidelines on Preventing and Combating Fraud and Corruption in Projects Financed by IBRD Loans and IDA Credits and Grants, dated July 1, 2016; and other provisions stipulated in the Financing Agreement. 21.", + "ner_text": [ + [ + 560, + 564, + "named" + ] + ], + "validated": false, + "empirical_context": "Key risks envisaged under the current Project are the following: ( a ) ministry internal audit review reports are not shared regularly with the World Bank. This is risky given the number of ongoing activities and spread of operations where such reviews give management needed assurance of fiduciary status; and ( b ) the IFMS has not been fully operationalized at the ministry, which increases the risk of errors and accuracy of financial reports. These same risks affect the new project under preparation.", + "type": "system", + "explanation": "However, IFMS is mentioned as a system that has not been fully operationalized, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed IFMS is a dataset because it is related to financial reporting.", + "contextual_reason_agent": "However, IFMS is mentioned as a system that has not been fully operationalized, not as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "151_533550PAD0P117101Official0Use0Only1", + "page": 36, + "text": "testing, implementing a national survey to identify the poor with the participation of local communities, and building institutional capacity. The aim is to increase the number of beneficiaries from 1 million households currently to 1. 5 million households to include all those under the national poverty line. 105. However, resources to implement such reform program are insufficient, and additional resources and technical assistance are needed to support it. The current food price crisis has highlighted the limitations of the current system to support the poor when faced with risk and the importance of building a Social Safety Net ( SSN ) for the long run that could both be scaled up in times of crisis as well as providing incentives for human capital accumulation and economic advancement. 111. The Social Welfare Fund: Opportunities and Challenges 106. The SWF, established in 1996 by Presidential Law, is the only public cash-transfer based social safety net in Yemen. The SWF has expanded its coverage from 100, 000 beneficiaries at its start to almost 1 million poor and vulnerable Yemeni households over a ten-year period. The Fund? s budget has grown from US $ 4 million at the outset to US $ 200 million in 2008 / 2009.", + "ner_text": [ + [ + 24, + 39, + "named" + ], + [ + 888, + 892, + "national survey <> publication year" + ], + [ + 974, + 979, + "national survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "testing, implementing a national survey to identify the poor with the participation of local communities, and building institutional capacity. The aim is to increase the number of beneficiaries from 1 million households currently to 1.", + "type": "survey", + "explanation": "In this context, the national survey is explicitly mentioned as a method to identify the poor, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because a national survey typically involves the collection of structured data from a population.", + "contextual_reason_agent": "In this context, the national survey is explicitly mentioned as a method to identify the poor, indicating it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 13, + "text": "The World Bank Tajikistan Water Supply and Sanitation Investment Project ( P177325 ) Page 10 of 89 less than 5, 000 inhabitants. 20 The lack of consolidated and reliable data on distribution of population by water sources; low local capacity for on-site water storage to bridge intermittent water supply; and exposure to prolonged periods of droughts, which are expected to become more frequent and longer because of climate change, make long-term planning of service delivery a challenge, especially in the context of the evident impact of climate change on different water sources. Figure 1. Distribution of Population by Density, 2020 Figure 2. Spatial Use of Improved Water Sources by Regions, Household Survey 2017 Source: TajStat, Population Census Data, 2020 Source: WB, WASH Poverty Diagnostics, 2017 10. Even where WSS infrastructure is available for provision of piped safely managed WSW services, this infrastructure is largely outdated and inefficient. This infrastructure was not designed to ensure resilience to climate change shocks, associated with high operation and maintenance ( O & M ) costs, and the institutions \u2019 capacity to consider alternative WSW collection / treatment options is limited. Maintenance is limited to quick fixes of apparent system breakdowns, affecting operational performance in the long term. The number of incidents in the piped water supply networks is extremely high.", + "ner_text": [ + [ + 728, + 735, + "named" + ] + ], + "validated": false, + "empirical_context": "Distribution of Population by Density, 2020 Figure 2. Spatial Use of Improved Water Sources by Regions, Household Survey 2017 Source: TajStat, Population Census Data, 2020 Source: WB, WASH Poverty Diagnostics, 2017 10. Even where WSS infrastructure is available for provision of piped safely managed WSW services, this infrastructure is largely outdated and inefficient.", + "type": "organization", + "explanation": "However, TajStat is referenced as an organization and not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is mentioned alongside data sources.", + "contextual_reason_agent": "However, TajStat is referenced as an organization and not as a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "015_BOSIB0e7334a5d0570a3e40f8ae4d0c1266", + "page": 62, + "text": "The World Bank ENHANCING CONNECTIVITY AND RESILIENCE IN THE FAR NORTH OF CAMEROON FOR INCLUSIVENESS PROJECT ( P178207 ) Page 63 of 82 passability during the rainy season. passable road within five kilometers of the MDK road section Number of refugees and host communities population with access to an all-weather passable road within five kilometers of the MDK road section. Number of refugees and people in host communities with access to an all-season road. Climate change impacts are expected to affect road passability during the rainy season. Yearly Surveys UNHCR data The methodology will consist on using survey to calculate the number of refugees and host communities population located within a 5km - buffer zone of each road section rehabilitated and maintained with climate resilience features. Project Implementation Unit Share of women with improved access to an all-weather passable road within five kilometers of the MDK road section. This sub-indicator measures the percentage of women with improved access to an all-weather passable road within five kilometers of the MDK road section. Annual M & E report Compute the percentage of women with improved access to an all - weather passable road within five kilometers of the MDK road section.", + "ner_text": [ + [ + 548, + 562, + "named" + ], + [ + 4, + 14, + "Yearly Surveys <> author" + ], + [ + 60, + 81, + "Yearly Surveys <> data geography" + ], + [ + 612, + 618, + "Yearly Surveys <> data type" + ], + [ + 1273, + 1291, + "Yearly Surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "Climate change impacts are expected to affect road passability during the rainy season. Yearly Surveys UNHCR data The methodology will consist on using survey to calculate the number of refugees and host communities population located within a 5km - buffer zone of each road section rehabilitated and maintained with climate resilience features. Project Implementation Unit Share of women with improved access to an all-weather passable road within five kilometers of the MDK road section.", + "type": "survey", + "explanation": "In this context, 'Yearly Surveys' is explicitly mentioned as a source of data used to calculate population metrics.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Yearly Surveys' suggests a systematic collection of data over time.", + "contextual_reason_agent": "In this context, 'Yearly Surveys' is explicitly mentioned as a source of data used to calculate population metrics.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 12, + "text": "These cross \u2010 cutting challenges along with sector specific supply \u2010 side and demand \u2010 side constraints have resulted in poor health indicators and large disparities by socioeconomic status. Balochistan performs worse than the national average across reproductive, maternal, newborn, child health, and nutrition ( RMNCHN ) indicators. 10 Under \u2010 five mortality rate is 78 per 1, 000 live births in Balochistan, compared to 74 per 1, 000 live births at the national level. The total fertility rate ( TFR ) is 4. 0 in Balochistan and 3. 6 nationally, and almost half of the children under five are stunted in the province, compared to about one in three at the national level. Differences in service utilization between the 8 Javed, S. A., M. D. Anjum, W. Imran, et al. 2013. \u201c Correlates of Preferences for Home or Hospital Confinement in Pakistan: Evidence from a National Survey. \u201d BMC \u2010 Pregnancy and Childbirth 13: 137. ul Husnain, M. I., M. Rashid, and U. Shakoor. 2018. \u201c Decision \u2010 making for Birth Location among Women in Pakistan: Evidence from National Survey. \u201d BMC Pregnancy and Childbirth 18: 226. https: / / doi. org / 10. 1186 / s12884 \u2010 018 \u2010 1844 \u2010 8. 9 Alif Ailaan 2018. 2013 \u2010 2018 Five Years of Education Reforms in Balochistan. Wins, Losses and Challenges for 2018 \u2010 2023. Islamabad: Alif Ailaan. vi \u2010 33 pp. 10 NIPS and ICF ( 2019 ).", + "ner_text": [ + [ + 864, + 879, + "named" + ], + [ + 191, + 202, + "National Survey <> data geography" + ], + [ + 398, + 409, + "National Survey <> data geography" + ], + [ + 838, + 846, + "National Survey <> data geography" + ], + [ + 883, + 886, + "National Survey <> publisher" + ], + [ + 942, + 951, + "National Survey <> author" + ], + [ + 957, + 967, + "National Survey <> author" + ], + [ + 1170, + 1181, + "National Survey <> author" + ], + [ + 1182, + 1186, + "National Survey <> reference year" + ], + [ + 1195, + 1199, + "National Survey <> publication year" + ], + [ + 1235, + 1246, + "National Survey <> data geography" + ], + [ + 1347, + 1351, + "National Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "Imran, et al. 2013. \u201c Correlates of Preferences for Home or Hospital Confinement in Pakistan: Evidence from a National Survey. \u201d BMC \u2010 Pregnancy and Childbirth 13: 137.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referenced in the title of the study, indicating it is used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'National Survey' suggests a structured collection of data collected for research purposes.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referenced in the title of the study, indicating it is used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 11, + "text": "Inequalities measured by the Gini index increased at national level ( from 43. 5 percent in 2006 to 46. 3 percent in 2012 ) and in rural areas, confirming that economic growth was insufficient to pull people out of poverty, thus the need to be complemented by targeted interventions. 4. While access to health and education had improved, malnutrition remains a serious threat to human development and is on the rise again. In 2010 ( Demographic and Health Survey ), 58 percent of children between six and 59 months of age were reported stunted ( low height-for-age, an indicator of chronic malnutrition ) while in 2014, the estimated rate remained 49 1 Burundi \u2013 Fragility Assessment Note \u2013 March 2016 2 67. 1 percent in 2006 and 64. 9 percent in 2014 3 Burundi is divided into 18 provinces, 129 communes and 2, 638 collines, which are equivalent to large villages ( with an average of 535 households ). 4 In October 2016, there were over 315, 000 Burundian refugees mostly in Tanzania and Rwanda ( UNHCR, http: / / data. unhcr. org / burundi / regional. php )", + "ner_text": [ + [ + 433, + 462, + "named" + ], + [ + 92, + 96, + "Demographic and Health Survey <> reference year" + ], + [ + 426, + 430, + "Demographic and Health Survey <> publication year" + ], + [ + 480, + 521, + "Demographic and Health Survey <> reference population" + ], + [ + 614, + 618, + "Demographic and Health Survey <> publication year" + ], + [ + 653, + 660, + "Demographic and Health Survey <> data geography" + ], + [ + 754, + 761, + "Demographic and Health Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "While access to health and education had improved, malnutrition remains a serious threat to human development and is on the rise again. In 2010 ( Demographic and Health Survey ), 58 percent of children between six and 59 months of age were reported stunted ( low height-for-age, an indicator of chronic malnutrition ) while in 2014, the estimated rate remained 49 1 Burundi \u2013 Fragility Assessment Note \u2013 March 2016 2 67. 1 percent in 2006 and 64.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey providing empirical data on malnutrition rates.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides statistical data on health indicators.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey providing empirical data on malnutrition rates.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "135_810840PAD0P144010Box379877B00OUO090", + "page": 77, + "text": "The DGMP-DS and the ARMDS will have to play their role to ensure good governance and limit the opportunities for undue influence by anyone PCU / MEF DGMP - DS / ARM-DS No later than six months Throughout the project life 4-Delays in procurement process Identify the root cause of procurement delays at National level and propose appropriate solutions MEF During Negotiations 5-Lack of adequate record keeping system Set up the project filing system in order to better keep procurement documents and reports and identify a staff responsible for this task. Train staff in data management. PCU / MEF No later than 3 months within the project implementation AGETIPE 1-Lack of adequate procurement staff due to the new volume of activities to be implemented Recruitment of a procurement specialist and technical experts during the first two years of the project for regular short-term support missions during the assignment to speed procurement processes and to provide the required expertise AGETIPE At any time after the effectiveness 2-Delays in the procurement processes due to the workload generated Setting up a dedicated team within AGETIPE to process procurement activities of the project AGETIPE During the negotiations of delegated", + "ner_text": [ + [ + 427, + 448, + "named" + ] + ], + "validated": false, + "empirical_context": "The DGMP-DS and the ARMDS will have to play their role to ensure good governance and limit the opportunities for undue influence by anyone PCU / MEF DGMP - DS / ARM-DS No later than six months Throughout the project life 4-Delays in procurement process Identify the root cause of procurement delays at National level and propose appropriate solutions MEF During Negotiations 5-Lack of adequate record keeping system Set up the project filing system in order to better keep procurement documents and reports and identify a staff responsible for this task. Train staff in data management.", + "type": "system", + "explanation": "However, it is not a dataset as it refers to a filing system rather than a structured collection of data used for analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'system' which can imply data organization.", + "contextual_reason_agent": "However, it is not a dataset as it refers to a filing system rather than a structured collection of data used for analysis.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 58, + "text": "The population census is the most important data collection activity in developing countries for at least two reasons. First, it provides spatial distribution of the population that cannot be obtained with demographic projections. Second, the cartography of the census provides the sampling framework of all other statistical operations ( household surveys, agricultural censuses, enterprise censuses, and so on ). In addition, population census data are used to construct poverty maps, a powerful tool for targeting social programs. Since the census is planned for 2017, the project will contribute to the analysis phase of the population census. 42. The subcomponent will support poverty analysis. As stated, INS conducted its fourth living conditions survey in 2014 ( ECAM 4 ) and a complementary survey financed by the Government early 2017; the first results are expected by end of 2017. The objective of the complementary survey is to update the Cameroon poverty map, probably at the commune level. The survey collects some data at the household level as well as at the community level. Information is also collected on school and health facilities. 43. Finally, the LFS methodology needs to be upgraded and updated, and data collection frequency improved to be relevant for policy making. 44.", + "ner_text": [ + [ + 771, + 777, + "named" + ], + [ + 764, + 768, + "ECAM 4 <> reference year" + ], + [ + 952, + 960, + "ECAM 4 <> data geography" + ] + ], + "validated": true, + "empirical_context": "The subcomponent will support poverty analysis. As stated, INS conducted its fourth living conditions survey in 2014 ( ECAM 4 ) and a complementary survey financed by the Government early 2017; the first results are expected by end of 2017. The objective of the complementary survey is to update the Cameroon poverty map, probably at the commune level.", + "type": "survey", + "explanation": "In the context, 'ECAM 4' is explicitly mentioned as a living conditions survey, indicating it is used as a data source for poverty analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'ECAM 4' is referred to as a survey conducted to analyze living conditions.", + "contextual_reason_agent": "In the context, 'ECAM 4' is explicitly mentioned as a living conditions survey, indicating it is used as a data source for poverty analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 64, + "text": "On an annual basis the health service quality assessment will include direct observation of health service process quality measures at hospitals and health centers. ( iv ) Bi-annual patient feedback using exit surveys. ( v ) Bi-annual visits to a sample of BHTs to measure service outputs and quality. ( b ) Periodic TPM data collection: ( i ) Biennial household coverage surveys as baseline / endline surveys in the project \u2019 s three-year timeframe. 35 ( ii ) Citizen engagement survey collected at the household level, with the coverage survey every other year. 2. Data Analysis and Visualization Platform. The platform will emphasize development of an integrated, institutionalized, and sustainable system. The platform will include analysis of health service delivery in refugee and host community areas. The platform will include the following: ( a ) Interactive data visualization platform presenting Results Framework and core indicators. The platform will use data from DHIS2 and the TPM and will include BHI data. It will be updated at least on a quarterly basis. ( b ) Presentation of data in data visualization platform in the form of static and interactive maps, charts and graphs, tailored to project monitoring needs and partner priorities. ( c ) Incorporation of HSF data within the data visualization platform through a link or page within the platform. ( d ) Overlay of disease and health service delivery data with meteorologic data to better understand seasonal patterns in service delivery and infectious disease data. 35 Given the planned project length of three years, this is a baseline and an endline survey. Potential timeframe changes would include interim surveys, which are planned to be light surveys focusing on key indicators.", + "ner_text": [ + [ + 344, + 379, + "named" + ] + ], + "validated": true, + "empirical_context": "( v ) Bi-annual visits to a sample of BHTs to measure service outputs and quality. ( b ) Periodic TPM data collection: ( i ) Biennial household coverage surveys as baseline / endline surveys in the project \u2019 s three-year timeframe. 35 ( ii ) Citizen engagement survey collected at the household level, with the coverage survey every other year.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned as a survey used for data collection within the project's timeframe.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific type of survey conducted periodically to collect data.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a survey used for data collection within the project's timeframe.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "037_Uganda-Investment-for-Industrial-Transformation-and-Employment-Project", + "page": 16, + "text": "Indeed, emerging data collected from several countries in the region ( including Uganda ) in the Facebook Future of Business Survey show that women \u2019 s businesses are significantly more likely to be temporarily closed during the pandemic. 17 Recognizing the pressure on MSMEs \u2019 liquidity, the Bank of Uganda ( BoU ) encouraged banks to provide moratoria on their loans to their liquidity-constrained borrowers for up to 12 months. The BoU also took measures to reduce both the cost and risk associated with the bank lending. 18 In addition, the BoU authorized banks to accrue interest on outstanding loans during the moratorium period, provided the interest rate charged was no more than that in the original loan 12 Federation of Small and Medium Sized Enterprises in Uganda ( August 2021 ). 13 Uganda Bureau of Statistics June 2020 conducted with the support of the World Bank. 14 DC2021-004, From COVID 19 Response to Resilient Recovery, March 20, 2021. 15 See Pillar 3, of the World Bank Group, Saving Lives, Scaling-up Impact and Getting Back on Track, World Bank Group COVID-19 Crisis Response Approach Paper. 16 World Bank Group. 2019. Profiting from Parity: Unlocking the Potential of Women ' s Business in Africa.", + "ner_text": [ + [ + 97, + 131, + "named" + ], + [ + 8, + 21, + "Facebook Future of Business Survey <> data type" + ], + [ + 81, + 87, + "Facebook Future of Business Survey <> data geography" + ], + [ + 142, + 162, + "Facebook Future of Business Survey <> reference population" + ], + [ + 270, + 275, + "Facebook Future of Business Survey <> reference population" + ], + [ + 769, + 775, + "Facebook Future of Business Survey <> data geography" + ], + [ + 785, + 789, + "Facebook Future of Business Survey <> publication year" + ], + [ + 796, + 802, + "Facebook Future of Business Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "Indeed, emerging data collected from several countries in the region ( including Uganda ) in the Facebook Future of Business Survey show that women \u2019 s businesses are significantly more likely to be temporarily closed during the pandemic. 17 Recognizing the pressure on MSMEs \u2019 liquidity, the Bank of Uganda ( BoU ) encouraged banks to provide moratoria on their loans to their liquidity-constrained borrowers for up to 12 months.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey collecting data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that collects data from several countries.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey collecting data used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 15, + "text": "Such reform of the Tawjihi would need to be aligned with a revision of the curriculum of secondary education and its implementation. The double purpose of certification and university admission, and the selection criteria of tertiary education institutions must also be simultaneously reformed to better align schools toward learning, not passing exams. The GOJ has shown commitment by creating a High Commission for the reform of the Tawjihi, with specific changes already to become effective in the 2017 / 18 school year, and by deepening the dialogue with the MOHESR. 21. The final challenge that hinders further improvements in access and quality of education for all children is the MOE \u2019 s ability to manage the education system efficiently, including the system \u2019 s capacity to absorb growing populations of students. Since the start of the Syrian refugee crisis, Syrian refugees have accounted for an increase of more than 10 percent of the public student population. Additional financial resources are essential to cater to a growing number of students, many of them coming from vulnerable backgrounds and refugee populations, while improving and preserving gains in learning and education quality. Despite the increase in financial and technical support by international partners, the current capacity does not align with the challenges on the ground. 16 TIMSS assesses grades 4 and 8 students worldwide, but Jordan only participates in the grade 8 assessment. 17 EGRA and EGMA were sponsored and supported by USAID and are considered by GOJ to be part of the institutional student assessment system in the country. Assessments were conducted nationwide over five years ( 2012 \u2010 2017 ) in grades K \u2010 3 ( about 400, 000 children ).", + "ner_text": [ + [ + 1365, + 1370, + "named" + ], + [ + 501, + 522, + "TIMSS <> reference year" + ], + [ + 1419, + 1425, + "TIMSS <> data geography" + ] + ], + "validated": true, + "empirical_context": "Despite the increase in financial and technical support by international partners, the current capacity does not align with the challenges on the ground. 16 TIMSS assesses grades 4 and 8 students worldwide, but Jordan only participates in the grade 8 assessment. 17 EGRA and EGMA were sponsored and supported by USAID and are considered by GOJ to be part of the institutional student assessment system in the country.", + "type": "assessment", + "explanation": "TIMSS is indeed used as a data source for assessing student performance in grades 4 and 8, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed TIMSS is a dataset because it assesses students and provides data on their performance.", + "contextual_reason_agent": "TIMSS is indeed used as a data source for assessing student performance in grades 4 and 8, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + }, + "term_stats": { + "total": 3, + "validated": 2, + "not_validated": 1 + } + }, + { + "filename": "158_40156", + "page": 55, + "text": "\u0083 Quality, utilization, coverage and satisfaction with HIV services provided to refugees, returnees, IDPs and surrounding populations \u0083 Quality, utilization, coverage and satisfaction with HIV services provided to CBMPs \u0083 Analysis of differential HIV knowledge and behavior in refugee and surrounding communities to guide intervention strategies \u0083 Assessment of the referral system \u0083 Best / good practice HIV service delivery in the region 35. Dissemination of data to facilitate their use is essential. The PFO will prepare standardized information products ( reports ), which IGAD will disseminate through an annual information sharing seminar about the IGAD HIV / AIDS initiative \u2013 bi annual physical progress report, annual mapping assessment and annual IGAD HIV progress report ( also for the IGAD Annual Heads of State meeting ). These information products will be ready before new work plans are developed for the following year \u2013 therefore \u2013 the time frame for the annual report will lag three months behind the planning cycle, to ensure that M & E data are available when decisions are made about implementation of activities.", + "ner_text": [ + [ + 721, + 746, + "named" + ] + ], + "validated": false, + "empirical_context": "Dissemination of data to facilitate their use is essential. The PFO will prepare standardized information products ( reports ), which IGAD will disseminate through an annual information sharing seminar about the IGAD HIV / AIDS initiative \u2013 bi annual physical progress report, annual mapping assessment and annual IGAD HIV progress report ( also for the IGAD Annual Heads of State meeting ). These information products will be ready before new work plans are developed for the following year \u2013 therefore \u2013 the time frame for the annual report will lag three months behind the planning cycle, to ensure that M & E data are available when decisions are made about implementation of activities.", + "type": "assessment", + "explanation": "However, it is described as an assessment and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'assessment', which can imply data collection.", + "contextual_reason_agent": "However, it is described as an assessment and not explicitly as a data source or structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + } + }, + { + "filename": "004_BOSIB-87c444de-4797-4bf9-b654-4932a7fb0112", + "page": 43, + "text": "The World Bank Integrated Community Resilience Project ( P506969 ) PROJECT APPRAISAL DOCUMENT Page 34 Monitoring & Evaluation Plan: Intermediate Results Indicators by Components Component 1: Social Safety Nets Systems Beneficiaries of the SBCC sessions reporting improved human development practices due to project interventions ( Percentage ) Description Indicator measures percentage of beneficiafries participating in SBCC sessions who report that they have adopted improved practices that promote better HD outcomes following implementation of the SBCC intervention. In addition to determining the performance against this indicator, the MASS will also provide detailed absolute numbers of participants disagregated by gender, and refugees. Frequency Twice in the life of the project - in the second and fourth year Data source Beneficiary Assessment ( BA ) Methodology for Data Collection The MASS will hire a consultant to conduct two BAs during the project life, one in the second year and another in the fourth / final year. The BA will provide various sets of information about project performance based on beneficiary feedback \u2013 including providing updates to this particular indicator. Responsibility for Data Collection MASS - based on a BA report that will be produced by a consultant that the MASS will hire. Of which women ( Number of people ) Description Indicator will measure women participation in the SBCC and the BA as well as their perception with regard to the indicator.", + "ner_text": [ + [ + 642, + 646, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Integrated Community Resilience Project ( P506969 ) PROJECT APPRAISAL DOCUMENT Page 34 Monitoring & Evaluation Plan: Intermediate Results Indicators by Components Component 1: Social Safety Nets Systems Beneficiaries of the SBCC sessions reporting improved human development practices due to project interventions ( Percentage ) Description Indicator measures percentage of beneficiafries participating in SBCC sessions who report that they have adopted improved practices that promote better HD outcomes following implementation of the SBCC intervention. In addition to determining the performance against this indicator, the MASS will also provide detailed absolute numbers of participants disagregated by gender, and refugees. Frequency Twice in the life of the project - in the second and fourth year Data source Beneficiary Assessment ( BA ) Methodology for Data Collection The MASS will hire a consultant to conduct two BAs during the project life, one in the second year and another in the fourth / final year.", + "type": "program", + "explanation": "However, 'MASS' is described as a program that hires a consultant for data collection, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MASS' is a dataset because it is mentioned in the context of data collection and monitoring.", + "contextual_reason_agent": "However, 'MASS' is described as a program that hires a consultant for data collection, not a structured collection of data itself.", + "contextual_signal": "'mentioned only as a project, not as a data source'", + "tags": [] + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 41, + "text": "The Operation will build on the existing M & E structures at MoE and strengthen these under the IPF component. MoE \u2019 s Central Planning and Project Management Unit ( CPPMU ), headed by the Chief Economists ( from Basic Education, TVET, University and Post Training and Skills Development ), are responsible for overall coordination and monitoring of NESSP implementation. The CPPMU works closely with the Kenya Bureau of Statistics ( KNBS ). The CPPMU, in collaboration with the KNBS team, develops and publishes educational statistical booklets. The MoE \u2019 s DPCAD oversees day-to day implementation of key donor funded projects, including the ongoing education projects. 65. The Operation results monitoring will build on the existing databases used by the CPPMU that comprise the online - based NEMIS platform. The NEMIS, hosted by MoE, is being revamped to ensure data credibility and data security. The NEMIS contains up-to-date key education data, including for primary education, and has been used to generate the annual educational statistical booklets. KNEC hosts the platform for data on learning assessments, including the school specific learning assessments reports. The Directorate of Quality Assurance and Standards, and the Directorate of School Audit at MoE conduct regular visits to schools and submit their data and reports to MoE management and relevant IEs for action.", + "ner_text": [ + [ + 817, + 822, + "named" + ], + [ + 61, + 64, + "NEMIS <> publisher" + ], + [ + 111, + 114, + "NEMIS <> publisher" + ], + [ + 119, + 163, + "NEMIS <> author" + ], + [ + 513, + 545, + "NEMIS <> data type" + ], + [ + 551, + 554, + "NEMIS <> publisher" + ], + [ + 834, + 837, + "NEMIS <> publisher" + ], + [ + 967, + 984, + "NEMIS <> reference population" + ], + [ + 1133, + 1177, + "NEMIS <> data description" + ], + [ + 1270, + 1273, + "NEMIS <> publisher" + ], + [ + 1345, + 1348, + "NEMIS <> publisher" + ] + ], + "validated": true, + "empirical_context": "The Operation results monitoring will build on the existing databases used by the CPPMU that comprise the online - based NEMIS platform. The NEMIS, hosted by MoE, is being revamped to ensure data credibility and data security. The NEMIS contains up-to-date key education data, including for primary education, and has been used to generate the annual educational statistical booklets.", + "type": "database", + "explanation": "NEMIS is confirmed as a dataset since it is explicitly mentioned to contain up-to-date key education data and is used to generate statistical booklets.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is described as an online-based platform containing key education data.", + "contextual_reason_agent": "NEMIS is confirmed as a dataset since it is explicitly mentioned to contain up-to-date key education data and is used to generate statistical booklets.", + "contextual_signal": "described as a platform containing key education data", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 31, + "text": "The EMIS will build on efforts that are under way including by UNICEF through Niger \u2019 s Data Must Speak initiative which creates data dashboards at the school level; ( b ) support ( i ) the implementation of a national standardized assessment of students; and ( ii ) building the capacity of National Assessment Unit; and ( c ) M & E results of the project literacy intervention ( under Subcomponent 1. 2 ); and ( d ) support teacher deployment and retention measures in refugee-hosting areas and training in contingency planning in the event of new influx of populations. 31 Notably, a census of contractual teachers was conducted, in 2018, with the establishment of a single identification mechanism, which led to the registration of all contractual teachers at the MEP. Recent efforts also include an organizational audit of MES, the elaboration and distribution of HR procedures manuals at the MEP, training of trainers on teacher management, diagnosis of HR functions at the MEP and MES, elaboration of a compendium of all HR legal texts, and the elaboration of a strategy to reform HR management in both ministries. 32 These include the Capacity and Performance of Public Sector for Service Delivery Project and Support to Quality Education Project ( Projet d \u2019 Appui \u00e0 une \u00c9ducation de Qualit\u00e9, PAEQ, P132405 ).", + "ner_text": [ + [ + 129, + 144, + "named" + ] + ], + "validated": false, + "empirical_context": "The EMIS will build on efforts that are under way including by UNICEF through Niger \u2019 s Data Must Speak initiative which creates data dashboards at the school level; ( b ) support ( i ) the implementation of a national standardized assessment of students; and ( ii ) building the capacity of National Assessment Unit; and ( c ) M & E results of the project literacy intervention ( under Subcomponent 1. 2 ); and ( d ) support teacher deployment and retention measures in refugee-hosting areas and training in contingency planning in the event of new influx of populations.", + "type": "tool", + "explanation": "'Data dashboards' are tools for visualizing data rather than structured collections of data themselves.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'data dashboards' are datasets because they present data visually.", + "contextual_reason_agent": "'Data dashboards' are tools for visualizing data rather than structured collections of data themselves.", + "contextual_signal": "mentioned only as a tool for presenting data, not as a data source", + "tags": [] + }, + { + "filename": "044_Turkey-Social-Entrepreneurship-Empowerment-and-Cohesion-in-Refugee-and-Host-Communities-in-Turkey-Project", + "page": 86, + "text": "Gender gap in economic opportunities for refugee women90 in Turkey. According to UN Women survey results ( 201891 ), 85 percent of Syrian women were unemployed in Turkey. Only 10 percent of female respondents stated that they work in a regular income-generating activity while 5 percent make their living from irregular or seasonal work. Syrian refugee women who are working are often limited to informal work opportunities with low 84 Cebeci, T. 2014. Performance of Female Employers in Turkey. Ministry of Family and Social Policies of the Republic of Turkey and the World Bank.; Okten, K. 2014. Female Entrepreneurship in Turkey: Patterns, Characteristics and Trends. Ministry of Family and Social Policies of the Republic of Turkey and the World Bank. 85 Cebeci, T. 2014. Performance of Female Employers in Turkey.; Okten, K. 2014. Female Entrepreneurship in Turkey: Patterns, Characteristics and Trends. 86 World Bank. 2009. Female Labor Force Participation in Turkey, Trends, Determinants and Policy Framework. Report number 48508-TR.; Cebeci, T. 2014. Performance of Female Employers in Turkey.; Kizilaslan, N., and M. Karaomer. 2015.", + "ner_text": [ + [ + 81, + 96, + "named" + ], + [ + 60, + 66, + "UN Women survey <> data geography" + ], + [ + 131, + 143, + "UN Women survey <> reference population" + ], + [ + 163, + 169, + "UN Women survey <> data geography" + ], + [ + 338, + 358, + "UN Women survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "Gender gap in economic opportunities for refugee women90 in Turkey. According to UN Women survey results ( 201891 ), 85 percent of Syrian women were unemployed in Turkey. Only 10 percent of female respondents stated that they work in a regular income-generating activity while 5 percent make their living from irregular or seasonal work.", + "type": "survey", + "explanation": "This is indeed a dataset as it provides empirical data on the employment status of Syrian women in Turkey.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey conducted by UN Women, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it provides empirical data on the employment status of Syrian women in Turkey.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "113_Project-Appraisal-Document-PAD-disclosable-version-P156479-08122016", + "page": 45, + "text": "By having a database with larger geographical coverage, when crises / shocks happen, humanitarian organizations and government agencies will be able to respond faster and in a more coordinated way. The social registry and business functions for the two safety net pilots will be supported by the MIS developed under this Component 2 during the project. As the social registry \u2019 s use is expanded in the future, it can potentially de-linked from this MIS and have its own information system to facilitate flow of information between the social registry and other programs. 30. Payment system. The project will support the development of a payment system for Government safety nets. The payment system would enable Government to distribute the correct amount of benefits to the right people, at the right time, and with the right frequency, while minimizing transaction costs for both the program and the beneficiaries and allowing increased transparency and accountability of financial transactions. The project will use a small number of payment agencies to provide payments to beneficiaries and the selection of payment agencies will be supported by existing ( or new ) information outlining the various agencies and resources available, their pros and cons in the project areas and humanitarian and UN agencies experience for paying cash benefits in Chad. Payment agencies may be selected in each region based on the", + "ner_text": [ + [ + 296, + 299, + "named" + ] + ], + "validated": false, + "empirical_context": "By having a database with larger geographical coverage, when crises / shocks happen, humanitarian organizations and government agencies will be able to respond faster and in a more coordinated way. The social registry and business functions for the two safety net pilots will be supported by the MIS developed under this Component 2 during the project. As the social registry \u2019 s use is expanded in the future, it can potentially de-linked from this MIS and have its own information system to facilitate flow of information between the social registry and other programs.", + "type": "system", + "explanation": "However, 'MIS' is described as a management information system, not a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is associated with data management.", + "contextual_reason_agent": "However, 'MIS' is described as a management information system, not a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "101_Project-Appraisal-Document-PAD-P163830-2017-05-22-CLEAN-05232017", + "page": 42, + "text": "In this light, the 40 percent of the project that is designed to support resilient recovery can be counted as contributing to climate change adaptation ( co-benefits ). VI. APPRAISAL SUMMARY A. Economic Analysis 89. The economic analysis conducted as part of the project preparation process suggests that the proposed interventions are economically feasible. In particular sub-components 1. 1, 2. 1 and 2. 3 were assessed in detail and are expected to lead to positive economic rates of return, largely in excess of the discount rate of 6 percent assumed for these interventions, by: ( i ) saving human lives; ( ii ) reducing the 30 FEWS-FSNAU: Food Security Outlook, February to September 2017 31 According to climate data from the CMIP5 ( Coupled Model Inter-comparison Project ) database of the World Climate Research Program, from World Bank Climate Change Knowledge Portal 32 Ibid", + "ner_text": [ + [ + 711, + 723, + "named" + ] + ], + "validated": false, + "empirical_context": "1 and 2. 3 were assessed in detail and are expected to lead to positive economic rates of return, largely in excess of the discount rate of 6 percent assumed for these interventions, by: ( i ) saving human lives; ( ii ) reducing the 30 FEWS-FSNAU: Food Security Outlook, February to September 2017 31 According to climate data from the CMIP5 ( Coupled Model Inter-comparison Project ) database of the World Climate Research Program, from World Bank Climate Change Knowledge Portal 32 Ibid", + "type": "data", + "explanation": "'Climate data' is mentioned as a type of information rather than a structured collection or source used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'climate data' refers to a dataset due to its mention in a scientific context.", + "contextual_reason_agent": "'Climate data' is mentioned as a type of information rather than a structured collection or source used for empirical analysis.", + "contextual_signal": "mentioned only as a type of information, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "103_Kenya-Hon-Africa-PAD-04072017", + "page": 17, + "text": "Investments in these traditionally underserved border areas, under the proposed operation, are firmly aligned with the national priority for Kenya as reflected in the North and North Eastern Kenya Development Initiative ( NEDI ) which covers the three counties affected by protracted refugee presence. 22. According to the World Bank \u2010 UNHCR study on \u2018 Forced Displacement and Mixed Migration in the Horn of Africa \u201d, the impact of refugee presence on Kenyan hosts has been positive overall with respect to social and economic impacts, though there have been significant negative impacts on the environment and natural resources. Various World Bank \u2010 UNHCR studies and the multi \u2010 stakeholder consultations in both Kakuma and Dadaab, especially with host communities, refugees, and various partner agencies, during preparation, reveal a number of impacts that refugee presence has on hosting areas. These include increased competition \u2014 direct and indirect \u2014 for basic social services such as health, education, and drinking water; a degraded physical and natural environment because of high pressure on biomass to meet energy and construction needs; limited livelihood opportunities; and decreasing water availability 8 IGAD is among the 8 Regional Economic Communities of the African Union.", + "ner_text": [ + [ + 673, + 706, + "named" + ] + ], + "validated": false, + "empirical_context": "According to the World Bank \u2010 UNHCR study on \u2018 Forced Displacement and Mixed Migration in the Horn of Africa \u201d, the impact of refugee presence on Kenyan hosts has been positive overall with respect to social and economic impacts, though there have been significant negative impacts on the environment and natural resources. Various World Bank \u2010 UNHCR studies and the multi \u2010 stakeholder consultations in both Kakuma and Dadaab, especially with host communities, refugees, and various partner agencies, during preparation, reveal a number of impacts that refugee presence has on hosting areas. These include increased competition \u2014 direct and indirect \u2014 for basic social services such as health, education, and drinking water; a degraded physical and natural environment because of high pressure on biomass to meet energy and construction needs; limited livelihood opportunities; and decreasing water availability 8 IGAD is among the 8 Regional Economic Communities of the African Union.", + "type": "consultation", + "explanation": "However, it is not a dataset as it refers to a process of gathering insights rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves gathering information from various stakeholders.", + "contextual_reason_agent": "However, it is not a dataset as it refers to a process of gathering insights rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "013_BOSIB0efb09b920d90858a0135df22da7d1", + "page": 40, + "text": "and working over a one - year period Frequency Annual Data source Fayda system KPI Methodology for Data Collection Fayda data analytics platform Responsibility for Data Collection NIDP Number of penetration tests conducted to prevent cyber-attacks and loss of data ( Number ) Description At least 4 penetration tests conducted each year during the 5-year project duration, intended to proactively identify and address gaps in security ( cyber and physical ) of personal data to prevent unauthorized access to or loss of data. Frequency Annual Data source Test reports by specialized firms Methodology for Data Collection Project progress report Responsibility for Data Collection NIDP Inclusive and sustainable ID issuance Percentage of population within 10 km of a permanent, semi-permanent, or mobile registration site at least once a year ( Percentage ) Description The proportion of people who have access to a permanent, semipermanent, or mobile Fayda registration center at least once a year within less than 10 km from their residence. The indicator will be measured using population estimates per area using satellite data compared to the global positioning system coordinates of Fayda registration centers and mobile units. Frequency Annual Data source Fayda registration centers global positioning system coordinate and data population from online", + "ner_text": [ + [ + 1116, + 1130, + "named" + ] + ], + "validated": false, + "empirical_context": "Frequency Annual Data source Test reports by specialized firms Methodology for Data Collection Project progress report Responsibility for Data Collection NIDP Inclusive and sustainable ID issuance Percentage of population within 10 km of a permanent, semi-permanent, or mobile registration site at least once a year ( Percentage ) Description The proportion of people who have access to a permanent, semipermanent, or mobile Fayda registration center at least once a year within less than 10 km from their residence. The indicator will be measured using population estimates per area using satellite data compared to the global positioning system coordinates of Fayda registration centers and mobile units. Frequency Annual Data source Fayda registration centers global positioning system coordinate and data population from online", + "type": "data", + "explanation": "However, 'satellite data' is mentioned as a source of information rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'satellite data' is a dataset because it refers to a type of data used in analysis.", + "contextual_reason_agent": "However, 'satellite data' is mentioned as a source of information rather than a structured collection of data.", + "contextual_signal": "mentioned as a source of information, not as a dataset", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "036_Tajikistan-Resilient-Landscape-Restoration-Project", + "page": 43, + "text": "The methodology monitors three aspects of degradation: ( a ) net primary productivity using normalized difference vegetation index ( NDVI ) with information from satellite imagery ( for example, Moderate Resolution Imaging Spectroradiometer ); ( b ) land cover using either a representative area approach with high-resolution imagery, Landsat and Sentinel imagery, or globally produced datasets; and ( c ) soil organic carbon using field sampling. The UNCCD Secretariat will support the CEP to establish baselines for the above three aspects and repeat the measurements at project midterm and completion. 90. The M & E system of the project will comprise both performance and impact monitoring. The project M & E system will include both annual outcome and result targets as well as periodic evaluations of impact on land degradation, using the three LDN indicators, and households \u2019 socioeconomic factors that will be compared with baseline evaluations to be carried out by the CEP IT in Years 1 and 2. The project will make particular efforts to integrate participatory monitoring methods, using ongoing advances in digital tools and data collection, thus giving communities the potential for timely decision - making, wider sharing of results, and greater ownership of investments. The project will design and maintain a website for wider dissemination of the results and progress.", + "ner_text": [ + [ + 335, + 342, + "named" + ], + [ + 92, + 130, + "Landsat <> data description" + ], + [ + 162, + 179, + "Landsat <> data type" + ], + [ + 406, + 425, + "Landsat <> data description" + ] + ], + "validated": true, + "empirical_context": "The methodology monitors three aspects of degradation: ( a ) net primary productivity using normalized difference vegetation index ( NDVI ) with information from satellite imagery ( for example, Moderate Resolution Imaging Spectroradiometer ); ( b ) land cover using either a representative area approach with high-resolution imagery, Landsat and Sentinel imagery, or globally produced datasets; and ( c ) soil organic carbon using field sampling. The UNCCD Secretariat will support the CEP to establish baselines for the above three aspects and repeat the measurements at project midterm and completion.", + "type": "dataset", + "explanation": "In this context, 'Landsat' is indeed a dataset as it provides satellite imagery data used for empirical analysis of land cover.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'Landsat' is a dataset because it is associated with imagery used for land cover analysis.", + "contextual_reason_agent": "In this context, 'Landsat' is indeed a dataset as it provides satellite imagery data used for empirical analysis of land cover.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "029_IBArchive-ea5ff2ad-6f1f-45bf-a2ad-036440562c18", + "page": 64, + "text": "The World Bank Electricity Access Scale-up Project ( EASP ) ( P166685 ) Page 59 of 130 electricity access under the project, disaggregated by gender. Average household size of 4. 7 people, UBOS National Household Survey 2016-2017. reports. Average household size based on UBOS National Household Survey. People provided with access to electricity under the project with off - grid of which female The indicator will track the number of females benefiting from off-grid electricity access under the project. Share of female is 51 %, UBOS Women and Men in Uganda, Facts and Figures 2016. Quarterly Project implementati on progress reports. UBOS Women and Men in Uganda, Facts and Figures, 2016 Approved loan and grants applications UECCC, MEMD People provided with access to electricity with grid, mini-grid, off-grid in refugee - hosting districts The indicator will track the number of host communities and refugees beneficiaries of access to grid, mini-grid, and off-grid technologies under the project in District hosting refugees. Average household size of 4. 7 people, UBOS National Household Survey 2016 - Quarterly Project implementati on progress reports. Average household size based on UBOS National Household Survey. Service Providers customer database, MEMD database, IVA reports, approved loan applications and installation reports.", + "ner_text": [ + [ + 189, + 219, + "named" + ], + [ + 150, + 187, + "UBOS National Household Survey <> data description" + ], + [ + 220, + 229, + "UBOS National Household Survey <> publication year" + ], + [ + 554, + 560, + "UBOS National Household Survey <> data geography" + ], + [ + 580, + 584, + "UBOS National Household Survey <> publication year" + ], + [ + 660, + 666, + "UBOS National Household Survey <> data geography" + ], + [ + 1360, + 1378, + "UBOS National Household Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "Average household size of 4. 7 people, UBOS National Household Survey 2016-2017. reports.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly referenced in the context as a survey that provides empirical data on household size.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is a dataset as it is explicitly referenced in the context as a survey that provides empirical data on household size.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "187_multi-page", + "page": 10, + "text": "Results of that survey the private sector ) sector manufacturing wages below will provide several baseline measures of public / private pay 0. 5 provides inadequate attraction for relationships. Thereafter, the Government will report on skilled staff, or above 1. 5 indicates civil service and other public employee salaries, whose capture by existing personnel. changes will be tracked relative to changes in measures such as GDP and CPI, to assess whether public salaries are adjusting as desired, given their starting point. ( Periodicity: Once for baseline. Annual for tracking changes within the public sector relative to changes in GDP and CPI. ) Civil service pay Horizontal decompression Central pay and employment registry currently being ( horizontal ( discretionary allowances over and developed by the government will, once up and running, compression ) above base pay ) in excess of 1: 1. 2 produce reports on the composition of the budget-financed provides opportunities for excessive wage bill by component of salary, as defined in the Civil managerial discretion, facilitating Service Law, including variance in that composition across organized corruption and public agencies. ( Periodicity: Semi-annual, once the pay and rent-seeking. employment registry is functioning ). A survey of public officials is being completed in early 2000. Data from that survey, as well as from the public / private salary survey to be undertaken later in 2000, will allow identification of a baseline assessment of horizontal compression. The survey of public officials will be repeated in 2002, permitting assessment near the time of the project ' s midterm review of progress on reducing any excessive horizontal decompression. ( Periodicity: Once for baseline. At least one follow-up survey ) 7 -", + "ner_text": [ + [ + 1293, + 1319, + "named" + ], + [ + 211, + 221, + "survey of public officials <> author" + ], + [ + 1348, + 1352, + "survey of public officials <> reference year" + ], + [ + 1589, + 1593, + "survey of public officials <> publication year" + ] + ], + "validated": true, + "empirical_context": "employment registry is functioning ). A survey of public officials is being completed in early 2000. Data from that survey, as well as from the public / private salary survey to be undertaken later in 2000, will allow identification of a baseline assessment of horizontal compression.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned that data from the survey will be used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned that data from the survey will be used for empirical analysis.", + "contextual_signal": "follows 'uses data from'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "057_West-Bank-and-Gaza-Phase-1-of-the-Multiphase-Programmatic-Approach-Advancing-Sustainability-in-Performance-Infrastructure-and-Reliability-of-Energy-Sector-Project", + "page": 47, + "text": "The World Bank Advancing Sustainability in Performance, Infrastructure, and Reliability of the Energy Sector in the West Bank and Gaza ( P170928 ) Page 44 of 74 RESULT_FRAME_TBL_IO Indicator Name DLI Baseline Intermediate Targets End Target 1 PENRA publishes on its website results of citizen engagement survey ( Number ) 0. 00 1. 00 2. 00 Grievances registered related to delivery of project benefits that are actually addressed ( Percentage ) 0. 00 90. 00 90. 00 IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Non-technical losses West Bank Average based on non - technical losses recorded for each distribution company in West Bank Annual Non-technical losses reported by each distribution company individually Primary data PENRA PMU Non-technical losses Gaza Non-technical losses as per Gaza Distribution Company ( GEDCO ) Annual Gaza electricity distribution company ( GEDCO ) Primary data provided by GEDCO PENRA PMU", + "ner_text": [ + [ + 285, + 310, + "named" + ], + [ + 4, + 14, + "citizen engagement survey <> publisher" + ], + [ + 116, + 125, + "citizen engagement survey <> data geography" + ], + [ + 243, + 248, + "citizen engagement survey <> publisher" + ], + [ + 689, + 698, + "citizen engagement survey <> data geography" + ], + [ + 883, + 888, + "citizen engagement survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "The World Bank Advancing Sustainability in Performance, Infrastructure, and Reliability of the Energy Sector in the West Bank and Gaza ( P170928 ) Page 44 of 74 RESULT_FRAME_TBL_IO Indicator Name DLI Baseline Intermediate Targets End Target 1 PENRA publishes on its website results of citizen engagement survey ( Number ) 0. 00 1.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned that PENRA publishes results of the citizen engagement survey, indicating it is used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data on citizen engagement.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned that PENRA publishes results of the citizen engagement survey, indicating it is used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 28, + "text": "The World Bank SPF: Improved Livelihoods for Internally Displaced Persons in Azerbaijan ( P178125 ) Page 26 of 34 Indicator Name Corporate Unit of Measure Baseline End Target Frequency Data Source / Methodology Responsibility for Data Collection Description: Name: Percentage of beneficiaries taking a more active role in their communities Percentag e 0. 00 50. 00 Twice, once before civic engagement training and again at least three months after civic engagement training completion. For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection. For individuals trained only as part of Component 3, a separate pre-training survey will be conducted as well as a follow-up survey conducted at least three months after civic engagement training. M & E Specialist Percentage of beneficiaries taking a more active role in their communities - female Percentag e 0. 00 50. 00 Twice, once before civic engagement training and again at least three months after civic engagement training completion. For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection.", + "ner_text": [ + [ + 544, + 559, + "named" + ], + [ + 45, + 73, + "Baseline Survey <> reference population" + ], + [ + 77, + 87, + "Baseline Survey <> data geography" + ], + [ + 265, + 339, + "Baseline Survey <> data description" + ] + ], + "validated": true, + "empirical_context": "00 Twice, once before civic engagement training and again at least three months after civic engagement training completion. For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection. For individuals trained only as part of Component 3, a separate pre-training survey will be conducted as well as a follow-up survey conducted at least three months after civic engagement training.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned as a survey used for collecting data in the context of the project.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey used for data collection.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a survey used for collecting data in the context of the project.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 30, + "text": "The World Bank Generating Growth Opportunities and Productivity for Women Enterprises Uganda Project ( P176747 ) Page 26 of 77 60. Subcomponent 4A: Project management support for high-quality implementation. This subcomponent will finance the Project Implementation Teams ( PITs ) at the MGLSD and the PSFU. it will finance capacity building activities, including ( a ) of the national, district, subcounty, parish, refugee settlement stakeholders and implementation support teams. It will finance the development of key partnerships including of quarterly review meetings for all stakeholders involved in the project at the regional and district levels. The project as part of its Monitoring and Evaluation ( M & E ) activities will design and develop an MIS that collects and stores detailed data from project applicants during the registration process ( i. e.,, before beneficiaries have accessed any project-financed activities ). As indicated in component 1, the MIS is expected to assign a unique identifier to each registered applicant ( GROW_ID ) that will be shared with the applicant.", + "ner_text": [ + [ + 756, + 759, + "named" + ] + ], + "validated": false, + "empirical_context": "It will finance the development of key partnerships including of quarterly review meetings for all stakeholders involved in the project at the regional and district levels. The project as part of its Monitoring and Evaluation ( M & E ) activities will design and develop an MIS that collects and stores detailed data from project applicants during the registration process ( i. e.", + "type": "system", + "explanation": "However, 'MIS' is described as a management information system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it involves data collection and storage.", + "contextual_reason_agent": "However, 'MIS' is described as a management information system, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 128, + "text": "This subcomponent will support participatory, multisectoral planning and budgeting, as well as quality data collection, utilization, and verification. The establishment of planning and budget structures with incentive mechanisms for convergent service delivery will be critical for the successful implementation of the Human Capital SPGs. This subcomponent will finance the capacity building of these structures at all levels. In addition, it will support baseline data collection from each SPG woreda. Specific activities under this subcomponent will include: ( a ) strengthening systems and capacity for quality service delivery, 118 ( b ) increasing capacity to prioritize local needs using available data and improving multisectoral planning and budgeting skills to address these priorities at the woreda level, and ( c ) improving the quality of data collection, local verification, and utilization at the woreda level, including support for the establishment of the Unified Nutrition Information System for Ethiopia ( UNISE ) in the target SPG woredas to monitor and verify key indicators. Subcomponent 2. 3. 2 Strengthening accountability, fiduciary and E & S management at sub-national levels to improve service delivery. ( US $ 18 million equivalent IDA Credit and US $ 4 million GFF Grant ) 16. This subcomponent will support the promotion, institutionalization, and sustainability of social 118 TA to improve community engagement in budget and planning processes may include improved sensitization of communities to Community-Based Health Insurance ( CBHI ) enrollment which can improve demand for quality health services.", + "ner_text": [ + [ + 972, + 1021, + "named" + ] + ], + "validated": false, + "empirical_context": "In addition, it will support baseline data collection from each SPG woreda. Specific activities under this subcomponent will include: ( a ) strengthening systems and capacity for quality service delivery, 118 ( b ) increasing capacity to prioritize local needs using available data and improving multisectoral planning and budgeting skills to address these priorities at the woreda level, and ( c ) improving the quality of data collection, local verification, and utilization at the woreda level, including support for the establishment of the Unified Nutrition Information System for Ethiopia ( UNISE ) in the target SPG woredas to monitor and verify key indicators. Subcomponent 2.", + "type": "system", + "explanation": "However, it is described as a system for monitoring and verifying indicators, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a data-related function.", + "contextual_reason_agent": "However, it is described as a system for monitoring and verifying indicators, not as a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 80, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 68. Verification Protocol Table: Disbursement Linked Indicators DLI_TBL_VERIFICATION DLI 1 Girls \u2019 promotion rate from grade 11 to 12, nationwide Description The improvement in grils enrollment is measured by the proportion of girls enrolled in grade 11 at a given school year who study in the grade 12 in the following school year. The indicator is calculated by dividing the number of new enrolments of girls in grade 12 in school year t + 1 by the number of girls enrolled in grade 11 in the previous school year t. Data source / Agency EMIS Verification Entity ESS Procedure Data collected by MOE through the annual EMIS process and verified by ESS using a representative sample. The verified information is then submitted by MOF to the World Bank in a consolidated DLI verification report as part of each semiannual Government / World Bank review. DLI_TBL_VERIFICATION DLI 2 Children 0-59 months with diarrhea treated with ORS and Zinc in low performing regions Description The indicator will be measured by the percentage of children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc Data source / Agency DHIS2 Verification Entity ESS Procedure Data collected by MOH through the annual DHIS2 and verified by ESS using a representative sample.", + "ner_text": [ + [ + 1210, + 1215, + "named" + ] + ], + "validated": false, + "empirical_context": "The verified information is then submitted by MOF to the World Bank in a consolidated DLI verification report as part of each semiannual Government / World Bank review. DLI_TBL_VERIFICATION DLI 2 Children 0-59 months with diarrhea treated with ORS and Zinc in low performing regions Description The indicator will be measured by the percentage of children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc Data source / Agency DHIS2 Verification Entity ESS Procedure Data collected by MOH through the annual DHIS2 and verified by ESS using a representative sample.", + "type": "system", + "explanation": "However, DHIS2 is described as a health information system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned in the context of data collection and verification.", + "contextual_reason_agent": "However, DHIS2 is described as a health information system, not a structured collection of data itself.", + "contextual_signal": "system but mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 58, + "text": "Proposed activities: Four main activities will be undertaken under this subcomponent: ( a ) improvement of the methodology and support to data collection of ECAM 5 planned for 2018 or 2019; ( b ) analysis of the fourth population census in 2017; 37 ( c ) poverty mapping owing to the ongoing complementary ECAM 4; and ( d ) the design of a system of labor statistics by improving LFS and enterprises and informal sector surveys. The project will finance trainings on poverty mapping and workshops to produce regional monography of poverty. The project will also provide TA to INS to train and assist targeted ministries in integrating national statistics in the definition of sector strategies, programs, and public investment projects. 37 This activity budgeted for US $ 1 million will complement the resources ( US $ 4 million ) provided by another World Bank - financed project in the health sector. In total, the World Bank contribution in the financing of the population will amount to US $ 5 million.", + "ner_text": [ + [ + 157, + 163, + "named" + ], + [ + 176, + 180, + "ECAM 5 <> publication year" + ], + [ + 184, + 188, + "ECAM 5 <> publication year" + ], + [ + 851, + 861, + "ECAM 5 <> publisher" + ], + [ + 917, + 927, + "ECAM 5 <> publisher" + ] + ], + "validated": true, + "empirical_context": "Proposed activities: Four main activities will be undertaken under this subcomponent: ( a ) improvement of the methodology and support to data collection of ECAM 5 planned for 2018 or 2019; ( b ) analysis of the fourth population census in 2017; 37 ( c ) poverty mapping owing to the ongoing complementary ECAM 4; and ( d ) the design of a system of labor statistics by improving LFS and enterprises and informal sector surveys. The project will finance trainings on poverty mapping and workshops to produce regional monography of poverty.", + "type": "survey", + "explanation": "ECAM 5 is indeed a dataset as it is referenced in relation to data collection methodology and is part of the planned activities for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed ECAM 5 is a dataset because it is mentioned in the context of data collection and analysis activities.", + "contextual_reason_agent": "ECAM 5 is indeed a dataset as it is referenced in relation to data collection methodology and is part of the planned activities for empirical analysis.", + "contextual_signal": "mentioned as part of data collection activities", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 58, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XLIX Against DLR 9. 2: US $ 1 million for each core micro-data set made available via an online portal to users in adherence with the access protocols for up to a total of US $ 8 million. Against DLR 9. 3: An additional US $ 400, 000 for every core micro-data set made available with indicators for gender, disability and residency status, for up to 5 micro data sets and in the limit of US $ 2 million. Description The DLI supports the operationalization of an interactive statistical interface by DOS to allow users to query key statistical indicators ( such as unemployment, labor force participation, and so on ). This can be done according to user-defined cross tabulations, including gender, age, education-level, nationality and governorate. The DLI also supports the integration of administrative data on the platform from key line ministries to allow for a \u2018 one-stop shop \u2019, with incentives for each additional line Ministry that is integrated.", + "ner_text": [ + [ + 142, + 161, + "named" + ], + [ + 4, + 14, + "core micro-data set <> publisher" + ], + [ + 379, + 433, + "core micro-data set <> data description" + ], + [ + 607, + 612, + "core micro-data set <> reference population" + ] + ], + "validated": true, + "empirical_context": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XLIX Against DLR 9. 2: US $ 1 million for each core micro-data set made available via an online portal to users in adherence with the access protocols for up to a total of US $ 8 million. Against DLR 9.", + "type": "dataset", + "explanation": "This is indeed a dataset as it is described as being made available via an online portal, indicating its use as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a 'core micro-data set' which implies a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is described as being made available via an online portal, indicating its use as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 19, + "text": "Contracted IPs will be responsible for: ( a ) delivering quality health services; ( b ) quality improvement activities; ( c ) supervision of health facilities ( d ) recording of HMIS data, provision of HMIS data to CHDs, and support for entry of DHIS2 data into DHIS2 and data use; ( e ) in-service training complementing UNICEF \u2019 s training activities; ( f ) health facility stock management, recording, and rational use; ( g ) through an integrated approach, developing the capacity of CHDs to plan, supervise, and oversee service delivery and the DHIS2 system; and ( h ) sustain the support of the innovation activities under CERHSSP and expand using the digital health technology to address service delivery and supply chain issues. 31. Subcomponent 1. 2: Boma Health Initiative ( implemented by UNICEF; US $ 12. 41 million: US $ 3. 41 million equivalent IDA [ WHR IDA ] and US $ 9. 0 million Trust Funds [ US $ 0. 95 million SDTF and US $ 8. 05 million MDTF ] ).", + "ner_text": [ + [ + 178, + 187, + "named" + ], + [ + 322, + 328, + "HMIS data <> publisher" + ], + [ + 800, + 806, + "HMIS data <> publisher" + ] + ], + "validated": true, + "empirical_context": "Contracted IPs will be responsible for: ( a ) delivering quality health services; ( b ) quality improvement activities; ( c ) supervision of health facilities ( d ) recording of HMIS data, provision of HMIS data to CHDs, and support for entry of DHIS2 data into DHIS2 and data use; ( e ) in-service training complementing UNICEF \u2019 s training activities; ( f ) health facility stock management, recording, and rational use; ( g ) through an integrated approach, developing the capacity of CHDs to plan, supervise, and oversee service delivery and the DHIS2 system; and ( h ) sustain the support of the innovation activities under CERHSSP and expand using the digital health technology to address service delivery and supply chain issues. 31.", + "type": "data", + "explanation": "In the context, 'HMIS data' is explicitly mentioned as being recorded and provided, indicating it is used as a data source for health facilities.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'HMIS data' is a dataset because it refers to a specific type of health data collected and used in health management information systems.", + "contextual_reason_agent": "In the context, 'HMIS data' is explicitly mentioned as being recorded and provided, indicating it is used as a data source for health facilities.", + "contextual_signal": "mentioned as data to be recorded and provided", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "143_749060PAD0P124010Box374388B00OUO090", + "page": 9, + "text": "In addition, with a 54 percent gross enrolment rate in tertiary education in 2010, Lebanon greatly exceeds the 31 and 26 percent enrolment rates registered, respectively, for the MENA region and for middle-income countries. Still, weak institutional capacity and inefficiencies limit the country ' s prospects for developing higher quality education and health services. In some sectors, public service delivery is severely under - resourced, especially in terms of skilled staff, which affects the performance of ministries and their capacity to deliver quality services. Therefore, those who can afford it rely on the more expensive services provided by the private sector. There is widespread consensus that improved service delivery is a critical step toward a more inclusive Lebanon. 7 The last national household budget survey conducted in Lebanon was in 2004. The Central Administration of Statistics is currently finalizing the next HBS ( 2011 / 2012 ) with support from the World Bank. United Nations Development Program ( 2008 ). Poverty, Growth and Income Distribution in Lebanon. Beirut, Lebanon. 9 Findings from the Lebanon Financial Capability and Literacy Survey undertaken in May 2012. This was the first survey ever measuring financial literacy and capabilities at the country level. 10 All comparisons in the paragraph are based on the World Development Indicators for 2010 10", + "ner_text": [ + [ + 800, + 832, + "named" + ], + [ + 83, + 90, + "national household budget survey <> data geography" + ], + [ + 780, + 787, + "national household budget survey <> data geography" + ], + [ + 846, + 853, + "national household budget survey <> data geography" + ], + [ + 861, + 865, + "national household budget survey <> reference year" + ], + [ + 871, + 907, + "national household budget survey <> author" + ], + [ + 947, + 958, + "national household budget survey <> publication year" + ], + [ + 983, + 993, + "national household budget survey <> publisher" + ], + [ + 995, + 1029, + "national household budget survey <> publisher" + ], + [ + 1100, + 1107, + "national household budget survey <> data geography" + ], + [ + 1129, + 1136, + "national household budget survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "There is widespread consensus that improved service delivery is a critical step toward a more inclusive Lebanon. 7 The last national household budget survey conducted in Lebanon was in 2004. The Central Administration of Statistics is currently finalizing the next HBS ( 2011 / 2012 ) with support from the World Bank.", + "type": "survey", + "explanation": "This is indeed a dataset as it is a structured collection of data from a national survey used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey that collects household budget data.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data from a national survey used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "108_Jordan-PforR-PAD-P159522-FINAL-DISCLOSURE-10052016", + "page": 79, + "text": "This approach improves transparency with respect to the purposes of resources and the efficiency of their use. It is also helpful in improving communications between the Government and its citizens. The GFMIS will easily provide information on how the annual Budget Law and its execution support the strategic priorities of the Government. The GFMIS has been fully rolled out to budget units ( ministries, departments, and regional financial centers ) encompassed in the Budget Law except three ( the Ministry of Foreign Affairs, Water Authority, and civil defense ) that would be completed in 2016. The Government has launched GFMIS phase two aiming to connect IPUs by 2018. 28. The current GFMIS implementation utilizes a subset of the functionalities of the underlying application software. The current core application software comprises: ( a ) Hyperion for budget preparation; ( b ) Oracle Financials for budget execution; and ( c ) software for interfacing to other software for debt management, payroll, bank reconciliation, and revenue management. The GFMIS is fully utilized for", + "ner_text": [ + [ + 692, + 697, + "named" + ] + ], + "validated": false, + "empirical_context": "28. The current GFMIS implementation utilizes a subset of the functionalities of the underlying application software. The current core application software comprises: ( a ) Hyperion for budget preparation; ( b ) Oracle Financials for budget execution; and ( c ) software for interfacing to other software for debt management, payroll, bank reconciliation, and revenue management.", + "type": "system", + "explanation": "GFMIS is mentioned as a system for financial management, not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed GFMIS is a dataset because it is related to financial management and software applications.", + "contextual_reason_agent": "GFMIS is mentioned as a system for financial management, not as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "044_Turkey-Social-Entrepreneurship-Empowerment-and-Cohesion-in-Refugee-and-Host-Communities-in-Turkey-Project", + "page": 14, + "text": "The research found important challenges to Syrian refugee households, and hence to women, especially concerning tensions such as gender-based violence within families. In such situations, and particularly within single parent households, children are also often employed informally. The research data suggests that more male than female children are working, at the expense of their education and future economic prospects. 22 17. Despite the challenges, Syrian refugees are creating jobs and participating in the Turkish economy by establishing their own businesses \u2013 five Syrian businesses are established each working day. There is emerging evidence that despite the struggle to obtain employment, displaced Syrians in Turkey are contributing positively to the local economy and to the creation of new enterprises. Many micro, small, and medium enterprises have been established as startups in Turkey, owned by Syrians or run jointly by Turkish citizens and Syrians. The total number of Syrian-owned businesses in Turkey, including informal ( unregistered ), is estimated at 10, 000, constituting almost one fifth of the foreign-owned companies in the country. According to a recent report, 39 percent of Syrian entrepreneurs plan to establish an additional business in Turkey, and 76 percent intend to retain their businesses in Turkey while expanding their businesses in Syria.", + "ner_text": [ + [ + 287, + 300, + "named" + ], + [ + 43, + 68, + "research data <> reference population" + ], + [ + 315, + 357, + "research data <> data description" + ], + [ + 722, + 728, + "research data <> data geography" + ], + [ + 897, + 903, + "research data <> data geography" + ], + [ + 1017, + 1023, + "research data <> data geography" + ], + [ + 1273, + 1279, + "research data <> data geography" + ], + [ + 1333, + 1339, + "research data <> data geography" + ] + ], + "validated": true, + "empirical_context": "In such situations, and particularly within single parent households, children are also often employed informally. The research data suggests that more male than female children are working, at the expense of their education and future economic prospects. 22 17.", + "type": "data", + "explanation": "In this context, 'research data' is indeed used as a source of information to support the findings about child employment.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'research data' refers to a dataset because it implies a collection of empirical information used for analysis.", + "contextual_reason_agent": "In this context, 'research data' is indeed used as a source of information to support the findings about child employment.", + "contextual_signal": "described as a source of information for analysis", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "078_Afghanistan-Eshteghal-Zaiee-Karmondena-EZ-Kar-Project", + "page": 55, + "text": "The target Semi \u2010 annually Project MIS CDC profiles, GA / BGA profiles, nahia level administrative data, quarterly progress reports, evaluation IDLG \u2010 CCAP PIU with FPs, IDLG \u2010 CIP PIU, KM PIU", + "ner_text": [ + [ + 72, + 103, + "named" + ] + ], + "validated": true, + "empirical_context": "The target Semi \u2010 annually Project MIS CDC profiles, GA / BGA profiles, nahia level administrative data, quarterly progress reports, evaluation IDLG \u2010 CCAP PIU with FPs, IDLG \u2010 CIP PIU, KM PIU", + "type": "administrative data", + "explanation": "This is indeed a dataset as it refers to structured administrative data used for project monitoring and evaluation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific type of data collected at the nahia level.", + "contextual_reason_agent": "This is indeed a dataset as it refers to structured administrative data used for project monitoring and evaluation.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 64, + "text": "The effect of remedial education on learning outcomes is well established in the literature: teaching oriented to the level of the student rather than the level prescribed for the student \u2019 s grade in the curriculum has produced large gains in learning as found in a number of randomized controlled trials ( Banerjee et al. 2016 ). Table 2. 3 presents effect sizes from evaluations of interventions that have elements of remedial learning targeting struggling students. These vary in modality including computer-assisted adaptive learning, volunteer community members providing after-school tutoring, and curriculum adjustments to focus on core skills. Together, they provide a sense of what would be a reasonable range of effect sizes to expect from the interventions supported under this project. 7. Modeling the benefits of Component 2 relies on the association between the digital learning environment and learning outcomes in PISA 2018. Given that the type of support to schools under Component 2 would vary depending on the needs of recipient schools, it is not possible to identify a range of effect sizes from similar interventions in the impact evaluation literature. Instead, the approach is to use an indicator of school digital learning environment in the PISA 2018 data and use the association between this indicator and learning outcomes as an effect size. The indicator used was whether all computers in the school were connected to the internet or not. The difference in learning outcomes between children at schools with all computers connected to the internet and those not was 0. 15 SD after controlling for differences in household wealth, gender, age, grade, and number of years of preprimary education.", + "ner_text": [ + [ + 1212, + 1260, + "named" + ] + ], + "validated": false, + "empirical_context": "Given that the type of support to schools under Component 2 would vary depending on the needs of recipient schools, it is not possible to identify a range of effect sizes from similar interventions in the impact evaluation literature. Instead, the approach is to use an indicator of school digital learning environment in the PISA 2018 data and use the association between this indicator and learning outcomes as an effect size. The indicator used was whether all computers in the school were connected to the internet or not.", + "type": "indicator", + "explanation": "However, it is not a dataset but rather a specific measure or variable derived from the PISA 2018 data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a measurable aspect of the school digital learning environment.", + "contextual_reason_agent": "However, it is not a dataset but rather a specific measure or variable derived from the PISA 2018 data.", + "contextual_signal": "described as an indicator, not a dataset", + "tags": [] + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 61, + "text": "An example is a record of achievement in education ( proof that the person has a qualification, such as a degree ), a medical record ( details of the person \u2019 s current medication and conditions ), or an entitlement document, such as a national identity card ( identifying the person \u2019 s legal name ). Holding and managing all this data in a single database is impractical and creates security vulnerabilities. Sharing this data securely and reliably under the consent of the individual offers many advantages over silos of data and functionality. People should have the ability to manage their consent with a particular service or dataset, including the ability to review and revoke consent as necessary. Taking a 27 Digital Public Infrastructure ( DPI ) refers to digital ID, payment, and data exchange capabilities that are fundamental to enabling service delivery at scale and supporting innovation in the digital economy. DPI provides reusable and foundational digital platforms that allow public and private sector service providers to build and innovate their products and services.", + "ner_text": [ + [ + 118, + 132, + "named" + ] + ], + "validated": true, + "empirical_context": "An example is a record of achievement in education ( proof that the person has a qualification, such as a degree ), a medical record ( details of the person \u2019 s current medication and conditions ), or an entitlement document, such as a national identity card ( identifying the person \u2019 s legal name ). Holding and managing all this data in a single database is impractical and creates security vulnerabilities.", + "type": "record", + "explanation": "However, in this context, it is described as an example of a type of record rather than a dataset used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'medical record' is a dataset because it refers to a structured collection of medical information about a person.", + "contextual_reason_agent": "However, in this context, it is described as an example of a type of record rather than a dataset used for empirical analysis.", + "contextual_signal": "mentioned only as an example, not as a data source", + "tags": [] + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 54, + "text": "DLR 5. 3: Enhanced digital literacy / skills. DLR 5. 4: Utilization of the HRMIS and other digital platforms for recording competitive recruitments, performance-based promotions, and digital upskilling. Data source / Agency DLR 5. 0: SPAC DLR 5. 1 and 2: SPAC Assessment Center HR interview and evaluation records, as well as HRMIS data records. DLR 5. 3: Training registry maintained by the IPA and HRMIS data records. DLR 5. 4: HRMIS data reports. Verification Entity KACE. Procedure DLR 5. 0: Verification of adopted SPAC by-law and subsidiary regulation. DLR 5. 1: Verification of recruitment records and competency assessment reports shared biannually by the SPAC and competence-based recruitments recorded by the HRMIS. DLR 5. 2: Verification of promotion records and competency assessment reports shared biannually by the SPAC and performance-based promotions recorded by the HRMIS and other digital platforms. DLR 5. 3: Verification of digital skill certifying training records shared annually by IPA and digital skill credentials recorded by the HRMIS and other digital platforms. DLR 5. 4: Cross-validation of HRMIS and other digital platforms data reports with SPAC Assessment Center interview and evaluation records and IPA training assessment reports. DLI 6 on the establishment of digital health standards across a national Health Information Exchange ( HIE ) Formula Scalable. The DLI disburses up to US $ 29 million across the three following DLRs:", + "ner_text": [ + [ + 883, + 888, + "named" + ] + ], + "validated": false, + "empirical_context": "DLR 5. 2: Verification of promotion records and competency assessment reports shared biannually by the SPAC and performance-based promotions recorded by the HRMIS and other digital platforms. DLR 5.", + "type": "system", + "explanation": "However, HRMIS is described as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HRMIS is a dataset because it is mentioned in the context of performance-based promotions and records.", + "contextual_reason_agent": "However, HRMIS is described as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "153_476340PAD0ZR0P101Official0Use0Only1", + "page": 33, + "text": "procurement decisions that are proposed by CU. Its functions will be specified in the Project Implementation Manual. C. Monitoring and evaluation of outcomeshesults 79. The monitoring system has been designed to maximize the synergy with other data monitoring efforts and makes it possible to assess progress by observing changes in parameters accessible from outside the project structure. To this end, the system will build upon biodiversity, and socioeconomic baselines established by all agencies having worked in the project area and establish strong links with the WRI DRC Forest Atlas, the CARPE monitoring system and other efforts such as those supported by WWF and IUCN ( International Union for the Conservation of Nature ). Collaboration with these organizations will be framed through memoranda of understanding or other suitable formal agreements. By helping MECNT access, analyze, and store information, images and data, the project will help build long-term monitoring capacity in the ministry and strengthen its currently tenuous links with the social and environmental monitoring efforts o f foreign assistance organizations ( more in Annex 3 ). 80. In collaboration with the CU, project indicators will be compiled and analyzed by the Direction des Etudes et Planification ( DEP ) in MECNT.", + "ner_text": [ + [ + 597, + 620, + "named" + ] + ], + "validated": false, + "empirical_context": "The monitoring system has been designed to maximize the synergy with other data monitoring efforts and makes it possible to assess progress by observing changes in parameters accessible from outside the project structure. To this end, the system will build upon biodiversity, and socioeconomic baselines established by all agencies having worked in the project area and establish strong links with the WRI DRC Forest Atlas, the CARPE monitoring system and other efforts such as those supported by WWF and IUCN ( International Union for the Conservation of Nature ). Collaboration with these organizations will be framed through memoranda of understanding or other suitable formal agreements.", + "type": "system", + "explanation": "However, the context describes it as a system for monitoring rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'monitoring system' which suggests data collection.", + "contextual_reason_agent": "However, the context describes it as a system for monitoring rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "161_28046", + "page": 29, + "text": "Activity Monitoring and Evaluation. Activities on any level would be monitored following structured reporting and assessment forms and procedures. Given the program \u2019 s large scope o f interventions and decentralized nature of activities, it would be necessary to incorporate a coherent and consistent set o f indicators into all contracts / agreements funded by the project. For example, the health centers or prefectoral hospitals would be required to submit their plans following the logical framework outline linking inputs, process, outputs. Agreements / contracts would be performance-based and would thus identify all yearly indicators which those front-line health structures plan on achieving. These indicators would be compiled and aggregated in the annual report o f the Task Force. Outcome and Impact Monitoring and Evaluation. Another aspect o f the M & E system would be the monitoring o f the outcome and impact; this would be done by a Demographic and Health Survey at the beginning and end o f the project. In addition, data on deaths avoided would be calculated through operational research contracted to a specialized institution which would use DHS estimates as well as health structures records on coverage. Quality o f services would be checked yearly based on a simple checklist which describes the standards expected and which would be designed with the help o f GTZ. Such quality check would be contracted out to consultants.", + "ner_text": [ + [ + 952, + 981, + "named" + ], + [ + 1037, + 1059, + "Demographic and Health Survey <> data description" + ], + [ + 1165, + 1168, + "Demographic and Health Survey <> publisher" + ], + [ + 1190, + 1215, + "Demographic and Health Survey <> data type" + ] + ], + "validated": true, + "empirical_context": "Outcome and Impact Monitoring and Evaluation. Another aspect o f the M & E system would be the monitoring o f the outcome and impact; this would be done by a Demographic and Health Survey at the beginning and end o f the project. In addition, data on deaths avoided would be calculated through operational research contracted to a specialized institution which would use DHS estimates as well as health structures records on coverage.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey used for monitoring outcomes and impacts in the project.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that collects demographic and health data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey used for monitoring outcomes and impacts in the project.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "010_BOSIB1554c314c0a2187c019d7e85bc2a91", + "page": 35, + "text": "Denominator: Total number of expected live births within the host commnunity of Garissa and Turkana, during the reporting period Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Percentage of refugee women receiving postnatal care within 48 hours in Garissa and Turkana ( Percentage ) Description Numerator: Number of refugee women in Garissa and Turkana, receiving postnatal care after delivery within 48 hours. Denominator: Total number of expected live births within the refugee community in Garissa and Turkana, during the reporting period Frequency Every six months Data source UNHCR reports Methodology for Data Collection Routine UNHCR data collection Responsibility for Data Collection MoH Percentage of children immunized with three doses of Pentavalent vaccine ( Percentage ) Description Numerator: Number of children under 1 year who have received three doses of the Pentavalent vaccine Denominator: Total number of surviving children under 1 year Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data MoH", + "ner_text": [ + [ + 213, + 217, + "named" + ] + ], + "validated": false, + "empirical_context": "Denominator: Total number of expected live births within the host commnunity of Garissa and Turkana, during the reporting period Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Percentage of refugee women receiving postnatal care within 48 hours in Garissa and Turkana ( Percentage ) Description Numerator: Number of refugee women in Garissa and Turkana, receiving postnatal care after delivery within 48 hours. Denominator: Total number of expected live births within the refugee community in Garissa and Turkana, during the reporting period Frequency Every six months Data source UNHCR reports Methodology for Data Collection Routine UNHCR data collection Responsibility for Data Collection MoH Percentage of children immunized with three doses of Pentavalent vaccine ( Percentage ) Description Numerator: Number of children under 1 year who have received three doses of the Pentavalent vaccine Denominator: Total number of surviving children under 1 year Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data MoH", + "type": "system", + "explanation": "'HMIS' is mentioned as a methodology for data collection, not as a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'HMIS' is a dataset because it is associated with data collection methodologies.", + "contextual_reason_agent": "'HMIS' is mentioned as a methodology for data collection, not as a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 21, + "validated": 4, + "not_validated": 17 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 82, + "text": "The World Bank Strengthening Public Sector Efficiency and Statistical Capacity Project ( P151155 ) Page 79 of 93 \uf0b7 Increased share of competitive procurement methods \uf0b7 Increase value for money for public contract \uf0b7 Operationalization of MOOC Subcomponent 3. 2: Enhancing the capacity of organizations in the procurement system to carry out their functions \uf0b7 Clarified procurement rules, guidelines, and procedures \uf0b7 Setting an e-tracking system for the procurement chain 0. 5m Subcomponent 3. 3: Streamlining the procurement regulatory framework \uf0b7 Revised procurement code and related instruments 0. 7m Subcomponent 3. 4: Improving the management and monitoring of procurement performance: \uf0b7 Piloting individual performance contract approach in the procurement system \uf0b7 RRI to support procurement process performance in the pilot 3. 3m Improved decision - making process based on reliable statistical data Component 4: Enhancing the use of statistics for policy making \uf0b7 Timely production of reliable statistical data \uf0b7 Statistics widely disseminated Subcomponent 4. 1: Improvement of poverty-related data \uf0b7 Production of a series of Poverty Notes ( based on ECAM 4 and high - frequency surveys ) \uf0b7 Production of ECAM 5 \uf0b7 Analysis of the population census \uf0b7 Production of the LFS 5. 4m Subcomponent 4. 2: Strengthening the national accounts production \uf0b7 Quarterly production of improved national accounts ( including revised methodology for cross-border trade statistics ) \uf0b7 Creation of an economic simulation tool for MINEPAT simulation 2", + "ner_text": [ + [ + 1159, + 1165, + "named" + ], + [ + 4, + 14, + "ECAM 4 <> publisher" + ], + [ + 1238, + 1255, + "ECAM 4 <> data type" + ] + ], + "validated": true, + "empirical_context": "3m Improved decision - making process based on reliable statistical data Component 4: Enhancing the use of statistics for policy making \uf0b7 Timely production of reliable statistical data \uf0b7 Statistics widely disseminated Subcomponent 4. 1: Improvement of poverty-related data \uf0b7 Production of a series of Poverty Notes ( based on ECAM 4 and high - frequency surveys ) \uf0b7 Production of ECAM 5 \uf0b7 Analysis of the population census \uf0b7 Production of the LFS 5. 4m Subcomponent 4.", + "type": "survey", + "explanation": "In the context, 'ECAM 4' is explicitly linked to the production of statistical data, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'ECAM 4' is mentioned in the context of producing poverty-related data.", + "contextual_reason_agent": "In the context, 'ECAM 4' is explicitly linked to the production of statistical data, indicating it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 28, + "text": "The World Bank SPF: Improved Livelihoods for Internally Displaced Persons in Azerbaijan ( P178125 ) Page 26 of 34 Indicator Name Corporate Unit of Measure Baseline End Target Frequency Data Source / Methodology Responsibility for Data Collection Description: Name: Percentage of beneficiaries taking a more active role in their communities Percentag e 0. 00 50. 00 Twice, once before civic engagement training and again at least three months after civic engagement training completion. For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection. For individuals trained only as part of Component 3, a separate pre-training survey will be conducted as well as a follow-up survey conducted at least three months after civic engagement training. M & E Specialist Percentage of beneficiaries taking a more active role in their communities - female Percentag e 0. 00 50. 00 Twice, once before civic engagement training and again at least three months after civic engagement training completion. For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection.", + "ner_text": [ + [ + 1132, + 1147, + "named" + ], + [ + 4, + 14, + "Baseline Survey <> publisher" + ], + [ + 45, + 73, + "Baseline Survey <> reference population" + ], + [ + 77, + 87, + "Baseline Survey <> data geography" + ], + [ + 265, + 339, + "Baseline Survey <> data description" + ], + [ + 490, + 502, + "Baseline Survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "00 Twice, once before civic engagement training and again at least three months after civic engagement training completion. For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection.", + "type": "survey", + "explanation": "The Baseline Survey is explicitly mentioned as a tool for data collection in the context, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey used for data collection.", + "contextual_reason_agent": "The Baseline Survey is explicitly mentioned as a tool for data collection in the context, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 40, + "text": "DLI 7: Pregnant women and caregivers of children 0-23 months participating in community conversation sessions in 29 selected SPG woredas. This DLI will promote an increase in the proportion of pregnant women and caregivers of children 0-23 months old participating in community conversation sessions by 10 percentage points in the 29 target woredas ( from 0 percent baseline to 10 percent target ). It will be measured through routine health data. The DLI is scalable, measured annually, and specific woreda-level baseline and target values will be developed and included in the POM. TA support for innovative approaches to improve the coverage and quality of community conversation sessions in the current FCV context, ongoing droughts and new levels of refugees, will be provided to woredas through the IPF component. Evidence in Ethiopia demonstrates that prolonged periods of drought increase stunting prevalence in the country. 67 Given the direct link between drought and climate change in the context, community conversations will help children and mothers adapt to the impacts of climate change by improving their dietary diversity and promoting optimal feeding practices. Community conversations will also be crucial to identifying growth faltering among children under two in the communities and expanding targeted growth promotions considering the climate change crisis. Demonstrating multisectoral planning and budgeting and convergent service delivery to promote peace building and community resilience in SPG woredas 66.", + "ner_text": [ + [ + 427, + 446, + "named" + ], + [ + 7, + 60, + "routine health data <> reference population" + ], + [ + 832, + 840, + "routine health data <> data geography" + ] + ], + "validated": true, + "empirical_context": "This DLI will promote an increase in the proportion of pregnant women and caregivers of children 0-23 months old participating in community conversation sessions by 10 percentage points in the 29 target woredas ( from 0 percent baseline to 10 percent target ). It will be measured through routine health data. The DLI is scalable, measured annually, and specific woreda-level baseline and target values will be developed and included in the POM.", + "type": "data", + "explanation": "In this context, 'routine health data' is explicitly mentioned as a means to measure participation, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'routine health data' suggests a systematic collection of health-related information.", + "contextual_reason_agent": "In this context, 'routine health data' is explicitly mentioned as a means to measure participation, indicating it is used as a data source.", + "contextual_signal": "follows 'measured through'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 45, + "text": "In addition to the PAC to be established, the project will also ensure frequent community roundtables or forums with water users to inform them of the status of investments, seek their feedback regarding project implementation progress, and discuss any corrective action that was taken to address issues raised through the feedback process. In addition, starting from the second year of project implementation, the project will support target utilities in Balkhi and Dusti districts in launching the GRM with multiple channels of communications. Information on each channel will need to be monitored separately and reported by the utilities on a quarterly basis. 91. Gender. A baseline survey conducted in one of the districts under the RWSSP confirmed several common gender-based challenges related to WSS access. Those include ( a ) time burden for women and girls due to unreliable and unsafe water supplies; ( b ) poor water quality and healthcare responsibilities for children; ( c ) physical, social, and health risks associated with collecting water or using open toilets; and ( d ) inequitable access to information, training, and opportunities for employment in water institutions, particularly in technical and decision-making roles. The baseline assessment conducted in Vosse district confirmed that water collection responsibility is mainly assigned to women regardless of the water source type and distance to the source.", + "ner_text": [ + [ + 677, + 692, + "named" + ], + [ + 117, + 128, + "baseline survey <> reference population" + ], + [ + 456, + 482, + "baseline survey <> data geography" + ], + [ + 851, + 866, + "baseline survey <> reference population" + ], + [ + 989, + 1078, + "baseline survey <> data description" + ], + [ + 1090, + 1189, + "baseline survey <> data description" + ], + [ + 1281, + 1295, + "baseline survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "Gender. A baseline survey conducted in one of the districts under the RWSSP confirmed several common gender-based challenges related to WSS access. Those include ( a ) time burden for women and girls due to unreliable and unsafe water supplies; ( b ) poor water quality and healthcare responsibilities for children; ( c ) physical, social, and health risks associated with collecting water or using open toilets; and ( d ) inequitable access to information, training, and opportunities for employment in water institutions, particularly in technical and decision-making roles.", + "type": "survey", + "explanation": "This is indeed a dataset as it is used to confirm gender-based challenges related to WSS access.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'baseline survey' implies a structured collection of data collected for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it is used to confirm gender-based challenges related to WSS access.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 45, + "text": "Frequency Quarterly Data source Project MIS Methodology for Data Collection Monitoring project implementation, MIS database. Data collected by BRD, MINEMA and BDF. Responsibility for Data Collection MINEMA, BRD, BDF Improved environmental management in the target areas People benefitting from enhanced resilience of terrestrial and aquatic systems ( Number of people ) Description Quantitative indicator counting number of beneficiaries in the catchment area where environmental management activities under component 3 have been implemented. Data is disaggregated by gender, youth ( 16-30 years, in line with GoR guidelines ) and status ( refugee / host community member ). The youth target of 26 % is based on the youth population in the five RHDs 2022 census ). The beneficiary number includes the camp-based refugee population and people living in the villages surrounding the five camps. Frequency Quarterly Data source Project MIS Methodology for Data Collection Monitoring project implementation. MIS database with population statistics for project sites cross - tabulated with hectarage benefiting from improved terrestrial and aquatic systems. Responsibility for Data Collection MINEMA Monitoring & Evaluation Plan: Intermediate Results Indicators by Components Access to Services and Socio-economic Investments Climate-resilient infrastructure subprojects for basic services ( education, health, water and sanitation ) completed in refugee hosting districts ( Number ) Description Quantitative indicator counting number of infrastructure subprojects completed in refugee hosting districts. Data is disaggregated by type of sub-project ( education, health, water and sanitation ). Climate resilience is defined by compliance with GoR standards. Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation. Responsibility for Data Collection MINEMA", + "ner_text": [ + [ + 111, + 123, + "named" + ], + [ + 750, + 754, + "MIS database <> publication year" + ], + [ + 801, + 830, + "MIS database <> reference population" + ], + [ + 1022, + 1043, + "MIS database <> data description" + ] + ], + "validated": true, + "empirical_context": "Frequency Quarterly Data source Project MIS Methodology for Data Collection Monitoring project implementation, MIS database. Data collected by BRD, MINEMA and BDF.", + "type": "database", + "explanation": "The term is indeed a dataset as it is explicitly mentioned as a database that collects and stores data for project monitoring.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MIS database' suggests a structured collection of data used for monitoring.", + "contextual_reason_agent": "The term is indeed a dataset as it is explicitly mentioned as a database that collects and stores data for project monitoring.", + "contextual_signal": "mentioned as a data source", + "tags": [] + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 62, + "text": "This indicator will be measured at least on a quarterly basis SNSOP MIS This data will be collected through registration and payments Implementing Partner Number of beneficiaries receiving economic opportunities who are youth Number of beneficiaries receiving economic opportunities under Component 2, in accordance with the Project Operations Manual, of which are youth, defined as people between the ages of 18 and 35 years, and have receive at least 1 This indicator will be measured, at a minimum, on a quarterly basis SNSOP MIS Beneficiary data will be gathered at registration and will be updated over the course of project implementation. Payment data will be regularly updated in the SNSOP MIS The Implementing Partner in charge of Component 2 will be responsible for data collection", + "ner_text": [ + [ + 692, + 701, + "named" + ], + [ + 155, + 211, + "SNSOP MIS <> data description" + ], + [ + 220, + 225, + "SNSOP MIS <> reference population" + ], + [ + 533, + 549, + "SNSOP MIS <> data type" + ], + [ + 646, + 658, + "SNSOP MIS <> data type" + ] + ], + "validated": true, + "empirical_context": "This indicator will be measured at least on a quarterly basis SNSOP MIS This data will be collected through registration and payments Implementing Partner Number of beneficiaries receiving economic opportunities who are youth Number of beneficiaries receiving economic opportunities under Component 2, in accordance with the Project Operations Manual, of which are youth, defined as people between the ages of 18 and 35 years, and have receive at least 1 This indicator will be measured, at a minimum, on a quarterly basis SNSOP MIS Beneficiary data will be gathered at registration and will be updated over the course of project implementation. Payment data will be regularly updated in the SNSOP MIS The Implementing Partner in charge of Component 2 will be responsible for data collection", + "type": "system", + "explanation": "In the context, SNSOP MIS is explicitly mentioned as a system used for data collection and management, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a management information system (MIS) that collects and updates data.", + "contextual_reason_agent": "In the context, SNSOP MIS is explicitly mentioned as a system used for data collection and management, indicating it functions as a data source.", + "contextual_signal": "mentioned as a data source for collecting and updating beneficiary data", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 18, + "validated": 10, + "not_validated": 8 + } + }, + { + "filename": "040_Iraq-COVID-19-Vaccination-Project", + "page": 38, + "text": "To ensure that communities can engage nevertheless, the project will actively engage with citizens to collect feedback on project performance, including through the use of the Iterative Beneficiary Monitoring ( IBM ) survey and social media surveys. Findings from such surveys will be used to improve the communication campaign and citizen engagement. Through the IBM, as well as social media surveys, engagement with community and religious leaders, especially in remote areas, will ensure the inclusion of their ongoing feedback in the rollout and implementation of the COVID-19 vaccination campaign to strengthen targeting accuracy and increase uptake. To ensure citizen engagement, the project will: ( a ) ensure community engagement teams are gender-balanced; ( b ) target messages to areas where vulnerable groups, including refugees and IDPs, reside to inform them about safety measures and benefits; ( c ) tailor messages to the elderly and those with medical risks including their target family members and health care providers; and ( d ) provide information for disabled people in accessible formats, like Braille, large print; text captioning; videos etc. The project will also explore the possibility of including NGO representation in oversight bodies established to oversee transparent and inclusive administration of vaccines. H. Gender 87.", + "ner_text": [ + [ + 176, + 223, + "named" + ] + ], + "validated": true, + "empirical_context": "To ensure that communities can engage nevertheless, the project will actively engage with citizens to collect feedback on project performance, including through the use of the Iterative Beneficiary Monitoring ( IBM ) survey and social media surveys. Findings from such surveys will be used to improve the communication campaign and citizen engagement.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey used to collect feedback and improve project performance.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey used to collect feedback and improve project performance.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 72, + "text": "EU 15, 000, 000 General budget support for the MoER priority areas of the Education Development Strategy \u201c Education 2030 \u201d ( EDS ) and as detailed in government budget spending in the EDS plan and the medium-term budget framework. Note: UNDP = United Nations Development Programme. Program of UNICEF / GPE: Digital Innovation of Moldova Education System ( July 30, 2022 to July 30, 2025 ): \u2022 Program 1: Equip general education institutions with appropriate ICTs: ( a ) establish and codify as policy or strategy the national education digital standards that will take account of the needs of all children and include specific software and support requirements for children with specific needs; ( b ) prepare a national ( deficit ) mapping of educational institutions in relation to the national standards, a portal to maintain this, and establish priority response criteria; and ( c ) provide education institutions with appropriate equipment. \u2022 Program 2: Strengthen the digital learning environment by developing pedagogical content of the syllabuses for digital teaching and the capacity of the teachers to use these: ( a ) review curriculum and develop gender - sensitive digital learning materials and teaching / learning strategies; ( b ) provide in-service training in digital pedagogy to 10, 000 teachers using curriculum materials and strategies; ( c ) equip the Republican Centre for Psycho-Pedagogical Assistance, psycho-pedagogical assistance service, and resource centers in educational institutions with assistive technologies; and ( d ) strengthen the resilience of the education system to crises. \u2022 Program 3: Improve efficiency of educational management using digital tools: ( a ) establish and implement EMIS required to provide timely information for evidence-based decision-making at the class, educational institution, and local education authority levels and ( b ) stablish a tracking system to follow and respond to the needs of each student especially those at risk and vulnerable.", + "ner_text": [ + [ + 1723, + 1727, + "named" + ] + ], + "validated": false, + "empirical_context": "\u2022 Program 2: Strengthen the digital learning environment by developing pedagogical content of the syllabuses for digital teaching and the capacity of the teachers to use these: ( a ) review curriculum and develop gender - sensitive digital learning materials and teaching / learning strategies; ( b ) provide in-service training in digital pedagogy to 10, 000 teachers using curriculum materials and strategies; ( c ) equip the Republican Centre for Psycho-Pedagogical Assistance, psycho-pedagogical assistance service, and resource centers in educational institutions with assistive technologies; and ( d ) strengthen the resilience of the education system to crises. \u2022 Program 3: Improve efficiency of educational management using digital tools: ( a ) establish and implement EMIS required to provide timely information for evidence-based decision-making at the class, educational institution, and local education authority levels and ( b ) stablish a tracking system to follow and respond to the needs of each student especially those at risk and vulnerable.", + "type": "program", + "explanation": "EMIS is mentioned as a system for educational management, not as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is related to information management in education.", + "contextual_reason_agent": "EMIS is mentioned as a system for educational management, not as a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 62, + "text": "This indicator will be measured at least on a quarterly basis SNSOP MIS This data will be collected through registration and payments Implementing Partner Number of beneficiaries receiving economic opportunities who are youth Number of beneficiaries receiving economic opportunities under Component 2, in accordance with the Project Operations Manual, of which are youth, defined as people between the ages of 18 and 35 years, and have receive at least 1 This indicator will be measured, at a minimum, on a quarterly basis SNSOP MIS Beneficiary data will be gathered at registration and will be updated over the course of project implementation. Payment data will be regularly updated in the SNSOP MIS The Implementing Partner in charge of Component 2 will be responsible for data collection", + "ner_text": [ + [ + 62, + 71, + "named" + ], + [ + 155, + 211, + "SNSOP MIS <> data description" + ], + [ + 220, + 225, + "SNSOP MIS <> reference population" + ], + [ + 533, + 549, + "SNSOP MIS <> data type" + ], + [ + 646, + 658, + "SNSOP MIS <> data type" + ] + ], + "validated": true, + "empirical_context": "This indicator will be measured at least on a quarterly basis SNSOP MIS This data will be collected through registration and payments Implementing Partner Number of beneficiaries receiving economic opportunities who are youth Number of beneficiaries receiving economic opportunities under Component 2, in accordance with the Project Operations Manual, of which are youth, defined as people between the ages of 18 and 35 years, and have receive at least 1 This indicator will be measured, at a minimum, on a quarterly basis SNSOP MIS Beneficiary data will be gathered at registration and will be updated over the course of project implementation. Payment data will be regularly updated in the SNSOP MIS The Implementing Partner in charge of Component 2 will be responsible for data collection", + "type": "system", + "explanation": "In this context, it is indeed a dataset as it is mentioned as a source for collecting and updating beneficiary and payment data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a 'MIS' (Management Information System) which typically stores and manages data.", + "contextual_reason_agent": "In this context, it is indeed a dataset as it is mentioned as a source for collecting and updating beneficiary and payment data.", + "contextual_signal": "mentioned as a data source for collecting and updating information", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 18, + "validated": 10, + "not_validated": 8 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 22, + "text": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 18 of 47 Figure 1. Theory of Change Output Inadequate infrastructure, equipment, and medicine \uf0b7 Renovation / upgradation of selected HFs to 24 / 7 \uf0b7 Purchasing of equipment and medicines \uf0b7 Increased number of HFs renovated / upgraded including female staff hostels * \uf0b7 Increased number of HFs with essential inputs Improved utilization of quality health services in selected refugee hosting districts in Balochistan Improved child health outcomes ( reduced under \u2010 five mortality, reduced stunting ) among refugees and host communities in Balochistan Improved human capital among refugees and host communities in Balochistan Shortage, absence, low productivity, and competency of providers \uf0b7 Contracting of especially female providers * \uf0b7 Creation of HRH database \uf0b7 Competency \u2010 based training \uf0b7 Increased availability of ( especially female ) providers * at the HFs \uf0b7 Functional HRH database \uf0b7 Increased number of providers with minimum clinical competency Low quality and suboptimal use of health data \uf0b7 Digitization and integration of the HMIS into the DHIS with dashboards \uf0b7 Data review meetings \uf0b7 HMIS ( DHIS, vertical programs ) digitized and integrated into DHIS / DHIS2 \uf0b7 Improved quality of DHIS reports Weak institutional capacity \uf0b7 Capacity building ( HMIS, monitoring, supervision, and management ) \uf0b7 Increased number of targeted staff with basic / advanced competencies Low demand of health and education services \uf0b7 Advocacy and awareness \u2010 raising", + "ner_text": [ + [ + 1340, + 1344, + "named" + ], + [ + 4, + 14, + "HMIS <> publisher" + ], + [ + 15, + 26, + "HMIS <> data geography" + ], + [ + 481, + 492, + "HMIS <> data geography" + ], + [ + 583, + 591, + "HMIS <> reference population" + ], + [ + 616, + 627, + "HMIS <> data geography" + ], + [ + 690, + 701, + "HMIS <> data geography" + ] + ], + "validated": true, + "empirical_context": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 18 of 47 Figure 1. Theory of Change Output Inadequate infrastructure, equipment, and medicine \uf0b7 Renovation / upgradation of selected HFs to 24 / 7 \uf0b7 Purchasing of equipment and medicines \uf0b7 Increased number of HFs renovated / upgraded including female staff hostels * \uf0b7 Increased number of HFs with essential inputs Improved utilization of quality health services in selected refugee hosting districts in Balochistan Improved child health outcomes ( reduced under \u2010 five mortality, reduced stunting ) among refugees and host communities in Balochistan Improved human capital among refugees and host communities in Balochistan Shortage, absence, low productivity, and competency of providers \uf0b7 Contracting of especially female providers * \uf0b7 Creation of HRH database \uf0b7 Competency \u2010 based training \uf0b7 Increased availability of ( especially female ) providers * at the HFs \uf0b7 Functional HRH database \uf0b7 Increased number of providers with minimum clinical competency Low quality and suboptimal use of health data \uf0b7 Digitization and integration of the HMIS into the DHIS with dashboards \uf0b7 Data review meetings \uf0b7 HMIS ( DHIS, vertical programs ) digitized and integrated into DHIS / DHIS2 \uf0b7 Improved quality of DHIS reports Weak institutional capacity \uf0b7 Capacity building ( HMIS, monitoring, supervision, and management ) \uf0b7 Increased number of targeted staff with basic / advanced competencies Low demand of health and education services \uf0b7 Advocacy and awareness \u2010 raising", + "type": "system", + "explanation": "HMIS is indeed a dataset as it refers to a Health Management Information System that collects and manages health data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed HMIS is a dataset because it is mentioned in the context of health data management and integration.", + "contextual_reason_agent": "HMIS is indeed a dataset as it refers to a Health Management Information System that collects and manages health data.", + "contextual_signal": "mentioned as a data source in the context of digitization and integration", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 21, + "validated": 4, + "not_validated": 17 + } + }, + { + "filename": "073_Ethiopia-Consolidated-Water-Supply-Sanitation-and-Hygiene-Account-Project", + "page": 82, + "text": "The World Bank One WASH \u2014 Consolidated Water Supply, Sanitation, and Hygiene Account Project ( One WASH \u2014 CWA ) ( P167794 ) Page 78 of 103 the string committees, before submission to the next reporting line, should approve all produced reports. Figure A1. 7 shows the range and schedule of reporting on Project progress. Figure A1. 7: Reporting on Project Progress 97. Baseline assessment: A woreda-level baseline for key WASH indicators was compiled, analyzed, and mapped for a total of 754 woredas using data collected from regional baseline reports, Health Management Information System and Education Management Information System reports, and regular reports of the Early Warning and Response Committee. Under the assessment, ten WASH indicators clustered in two groups were covered ( impact and outcome level ). The impact-level indicators are: ( i ) stunting rate; ( ii ) prevalence of diarrhea under the age of five; and ( iii ) primary students \u2019 dropout rate. Outcome-level indicators include: ( i ) access to rural water supply; ( ii ) access to rural household sanitation; ( iii ) open defecation free ( ODF ) coverage; ( iv ) health facility water supply coverage; ( v ) health facility improved sanitation coverage; ( vi ) school water supply coverage; and ( vii ) school improved sanitation coverage.", + "ner_text": [ + [ + 553, + 589, + "named" + ] + ], + "validated": false, + "empirical_context": "7: Reporting on Project Progress 97. Baseline assessment: A woreda-level baseline for key WASH indicators was compiled, analyzed, and mapped for a total of 754 woredas using data collected from regional baseline reports, Health Management Information System and Education Management Information System reports, and regular reports of the Early Warning and Response Committee. Under the assessment, ten WASH indicators clustered in two groups were covered ( impact and outcome level ).", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting it could contain data.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "153_476340PAD0ZR0P101Official0Use0Only1", + "page": 76, + "text": "Creation of a New Sector Specific Data Management Systems ( UW3. 2 million ) 23. In the absence o f an integrated system for managing forest resource information, it i s difficult for MECNT to make informed decisions, elaborate policies, and manage the forest sector. To remedy this situation, MECNT will develop an integrated system for the management o f forest resources,: ( i ) a geospatial information management system ( SyGIS ), including an interactive forestry atlas updated regularly to allow monitoring, especially o f forest title limits and logging roads; and ( ii ) a forest management information system ( SIGEF ) based on a sub-system o f forest accounting, which collects financial data linked to the forest sector, and a sub-system o f physical log tracking, which physically validates data collected on the ground as well as the enforcement of regulations. 24. To achieve this, the project will support: ( i ) a team o f international and national experts specialized in conceiving, experimenting, and deploying the two systems on the ground; ( ii ) information seminars and workshops; ( iii ) provision o f office equipment, furniture, and logistics; and ( iv ) operational 64", + "ner_text": [ + [ + 621, + 626, + "named" + ] + ], + "validated": false, + "empirical_context": "In the absence o f an integrated system for managing forest resource information, it i s difficult for MECNT to make informed decisions, elaborate policies, and manage the forest sector. To remedy this situation, MECNT will develop an integrated system for the management o f forest resources,: ( i ) a geospatial information management system ( SyGIS ), including an interactive forestry atlas updated regularly to allow monitoring, especially o f forest title limits and logging roads; and ( ii ) a forest management information system ( SIGEF ) based on a sub-system o f forest accounting, which collects financial data linked to the forest sector, and a sub-system o f physical log tracking, which physically validates data collected on the ground as well as the enforcement of regulations. 24.", + "type": "system", + "explanation": "However, SIGEF is described as a forest management information system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed SIGEF is a dataset because it is associated with data collection and management.", + "contextual_reason_agent": "However, SIGEF is described as a forest management information system, not a structured collection of data itself.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 13, + "text": "The education system, however, lacks the capacity to integrate the refugee students promptly and properly to schools and preschools. A Targeted Approach 10. Significant inequality in learning opportunities, especially between the students from wealthiest and poorest households persists. While learning outcomes had been improving in Moldova before the pandemic, there was significant inequality, especially between the wealthiest and poorest households ( figure 1 ). The pandemic only increased these inequalities. It disproportionally affects disadvantaged students including due to differential access to learning technologies across student groups. In 2021, approximately 80 percent of students living in Chisinau were able to access the internet, but only about half of the students could continue remote learning in the north and south parts of Moldova. According to the estimates, the performance gap between rich and poor students has deepened, increasing the differences in PISA reading scores from 115 points to 123 points, equivalent to over three years of schooling ( figure 2 ), and performance gaps will only widen over time if they remain unaddressed. Inequality in learning outcomes leads to inequality in human capital, which in turn abets intergenerational transmission of poverty and poverty traps. 11. Focusing learning recovery and acceleration among the most disadvantaged and vulnerable students is urgent and important to improve equity and inclusion. Despite robust progress on inclusion, 11 According to 2020 HCI data.", + "ner_text": [ + [ + 983, + 987, + "named" + ] + ], + "validated": false, + "empirical_context": "In 2021, approximately 80 percent of students living in Chisinau were able to access the internet, but only about half of the students could continue remote learning in the north and south parts of Moldova. According to the estimates, the performance gap between rich and poor students has deepened, increasing the differences in PISA reading scores from 115 points to 123 points, equivalent to over three years of schooling ( figure 2 ), and performance gaps will only widen over time if they remain unaddressed. Inequality in learning outcomes leads to inequality in human capital, which in turn abets intergenerational transmission of poverty and poverty traps.", + "type": "index", + "explanation": "'PISA' is mentioned as a source of scores rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'PISA' is a dataset because it is associated with educational performance metrics.", + "contextual_reason_agent": "'PISA' is mentioned as a source of scores rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "'PISA' is referenced in relation to performance scores, not as a data source.", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 7, + "validated": 3, + "not_validated": 4 + }, + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "138_781290PAD0JO0R0t0Box377365B00OUO090", + "page": 11, + "text": "According to UNHCR data, 78 percent of the Syrian refugees are vulnerable, requiring additional assistance. This includes women ( 49 percent ), children under the age of 12 ( 40 percent ), and elderly ( 2. 1 percent ). In addition, 23 percent of Syrian refugees have chronic diseases or serious medical conditions that require medical follow up. Comparative morbidity data show a different disease profile with increased levels of morbidity for Syrians refugees than Jordanians which may affect the disease burden in the future. According to Jordan \u2019 s national cancer statistics, Syrian refugees presenting with cancer at health facilities rose from 134 in 2011 to 169 in the first quarter of 2013, representing a 14 percent increase in Jordan \u2019 s total cancer disease burden. Similarly, morbidity data from the MOH show a rise in selected communicable diseases. For example, TB case notification increased from 5 / 100, 000 in 2009 among Jordanians to 13 / 100, 000 among Syrian refugees in 2013. While no measles cases have been reported in Jordan since 2009, MOH data show that 18 Jordanians and 23 Syrians have been diagnosed with the disease in 2013. Polio, which had been eliminated since 1999 in Jordan, was also detected in two cases in 2013. Demand for services by refugees at MOH facilities has increased significantly.", + "ner_text": [ + [ + 346, + 372, + "named" + ], + [ + 43, + 58, + "Comparative morbidity data <> reference population" + ], + [ + 542, + 548, + "Comparative morbidity data <> data geography" + ], + [ + 658, + 662, + "Comparative morbidity data <> reference year" + ], + [ + 694, + 698, + "Comparative morbidity data <> publication year" + ], + [ + 789, + 803, + "Comparative morbidity data <> data type" + ], + [ + 877, + 897, + "Comparative morbidity data <> data description" + ], + [ + 929, + 933, + "Comparative morbidity data <> reference year" + ], + [ + 993, + 997, + "Comparative morbidity data <> publication year" + ], + [ + 1044, + 1050, + "Comparative morbidity data <> data geography" + ], + [ + 1151, + 1155, + "Comparative morbidity data <> publication year" + ], + [ + 1204, + 1210, + "Comparative morbidity data <> data geography" + ], + [ + 1363, + 1381, + "Comparative morbidity data <> usage context" + ] + ], + "validated": true, + "empirical_context": "In addition, 23 percent of Syrian refugees have chronic diseases or serious medical conditions that require medical follow up. Comparative morbidity data show a different disease profile with increased levels of morbidity for Syrians refugees than Jordanians which may affect the disease burden in the future. According to Jordan \u2019 s national cancer statistics, Syrian refugees presenting with cancer at health facilities rose from 134 in 2011 to 169 in the first quarter of 2013, representing a 14 percent increase in Jordan \u2019 s total cancer disease burden.", + "type": "data", + "explanation": "This is indeed a dataset as it provides empirical data used for analysis of morbidity among different populations.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific morbidity data that can be compared across groups.", + "contextual_reason_agent": "This is indeed a dataset as it provides empirical data used for analysis of morbidity among different populations.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "104_PAD2161-PUBLIC-P162004", + "page": 17, + "text": "In addition, the project will also facilitate expanding education infrastructure investments in Istanbul, Ankara, Bursa, \u0130zmir, Konya and Kayseri which host a high number of SuTP and has high concentration of out-of - school SuTP at the district level. 33. The criteria to select the twelve provinces, and locations within the provinces, included a detailed analysis of concentration of SuTP at the district level, their access to education, and population densities of SuTP compared to host communities. Based on these criteria, the selected locations represent the following conditions: a. Districts which host more than 50, 000 SuTP and with a resident population over 500, 000 ( where the ratio of SuTP versus host community creates considerable distress for existing infrastructure and service capacity ) b. Districts where the majority of school-aged SuTP reside and which currently host the highest concentration ( ratio ) of out-of-school SuTP due to very limited ( deprived ) access to education services, c. Districts where a planned change from double-shifts to single-shifts and abolishment of Temporary Education Centers are imminent, d. Districts where the appropriate public land is available and secured for construction. 34. The Bank team jointly with MoNE used various sources of available data to ensure all dimensions of the aforementioned criteria were addressed in the selection. Apart from administrative data from MoNE and DGMM, the team also used the National Muhtar Survey ( NMS ). The NMS was conducted in every neighborhood and village throughout Turkey, jointly by the World Bank and the Government, to ascertain the accessibility criteria. The survey relies on the information provided by muhtars, who are the elected heads for neighborhoods and villages. They report on population related", + "ner_text": [ + [ + 1476, + 1498, + "named" + ], + [ + 96, + 104, + "National Muhtar Survey <> data geography" + ], + [ + 106, + 112, + "National Muhtar Survey <> data geography" + ], + [ + 114, + 119, + "National Muhtar Survey <> data geography" + ], + [ + 121, + 126, + "National Muhtar Survey <> data geography" + ], + [ + 128, + 133, + "National Muhtar Survey <> data geography" + ], + [ + 138, + 145, + "National Muhtar Survey <> data geography" + ], + [ + 446, + 503, + "National Muhtar Survey <> data description" + ], + [ + 1501, + 1504, + "National Muhtar Survey <> acronym" + ], + [ + 1512, + 1515, + "National Muhtar Survey <> acronym" + ], + [ + 1575, + 1581, + "National Muhtar Survey <> data geography" + ], + [ + 1598, + 1608, + "National Muhtar Survey <> publisher" + ], + [ + 1719, + 1726, + "National Muhtar Survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "The Bank team jointly with MoNE used various sources of available data to ensure all dimensions of the aforementioned criteria were addressed in the selection. Apart from administrative data from MoNE and DGMM, the team also used the National Muhtar Survey ( NMS ). The NMS was conducted in every neighborhood and village throughout Turkey, jointly by the World Bank and the Government, to ascertain the accessibility criteria.", + "type": "survey", + "explanation": "The National Muhtar Survey is explicitly mentioned as a source of data used in the research context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that collects data from various locations.", + "contextual_reason_agent": "The National Muhtar Survey is explicitly mentioned as a source of data used in the research context.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 32, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 27 of 117 dialogue on refugee education issues will also be supported under this component, to support implementation of Kenya \u2019 s existing refugee education commitments. Table 5: A Summary of Key Activities for the IPF Component Description of Area Activity # Key Activity description RA / PAP PEELP budget ( US $ 12 million ) Comments 1. Program management, policy dialogue, communication, monitoring and evaluation, safeguards and fiduciary, and verification. 1. 1 Program operational costs, including whole-of - Government refugee policy coordination PAP US $ 5 million39 Annual work plans are required of all implementing agencies and will be consolidated by PCU and approved by the NSC and the World Bank. Procurement: workshops and consultants. 1. 2 SIP / school grant SIP manual40; development of a disaster mitigation plan for schools affected by floods and drought; and additional capacity building measures on the SIPs, including incorporation actions from the disaster mitigation plan in the SIPs-including for camp - based refugee schools. RA 1 1. 3 Data management, including categorization of special needs learners by nature of disability and gender, and registration of refugee learners, including learners in host communities, in NEMIS. MoE develops guidelines for inclusion of refugee children in NEMIS and other education services.", + "ner_text": [ + [ + 1325, + 1330, + "named" + ], + [ + 198, + 203, + "NEMIS <> data geography" + ], + [ + 1100, + 1128, + "NEMIS <> reference population" + ], + [ + 1167, + 1242, + "NEMIS <> data description" + ], + [ + 1264, + 1280, + "NEMIS <> reference population" + ], + [ + 1332, + 1335, + "NEMIS <> author" + ], + [ + 1373, + 1389, + "NEMIS <> reference population" + ] + ], + "validated": true, + "empirical_context": "RA 1 1. 3 Data management, including categorization of special needs learners by nature of disability and gender, and registration of refugee learners, including learners in host communities, in NEMIS. MoE develops guidelines for inclusion of refugee children in NEMIS and other education services.", + "type": "system", + "explanation": "NEMIS is indeed a dataset as it is used for managing and categorizing data related to learners, including refugees.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is involved in the registration and categorization of learners.", + "contextual_reason_agent": "NEMIS is indeed a dataset as it is used for managing and categorizing data related to learners, including refugees.", + "contextual_signal": "mentioned as a data source for registration and categorization", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "113_Project-Appraisal-Document-PAD-disclosable-version-P156479-08122016", + "page": 45, + "text": "33 to identify extremely poor households in areas known to be facing chronic poverty, yet also support the data requirements to build the HEA baseline model to cover areas where transient food security is the more pressing concern. Data collection will therefore be harmonized with a single questionnaire, but different formulas or methodological approaches will then be applied depending on whether beneficiary households are in areas of chronic poverty ( in the south ) or food insecurity ( Sahel ). 28. The proposed project will use a combination of targeting mechanisms. Beneficiary households will be selected combining geographic poverty targeting with a census of households in selected villages, followed by categorical targeting ( households with children under the age of 10 ) and a PMT screening which will then be discussed and validated by the community. Once the village is selected, data will be collected for all households, following a list of variables and based on the experience of other targeting techniques, namely the HEA. This will allow the calculation of a PMT score to be used to select among those households that have passed the categorical filter. The community will then discuss the list and validate it if in agreement.", + "ner_text": [ + [ + 661, + 702, + "named" + ], + [ + 740, + 784, + "census of households in selected villages <> reference population" + ] + ], + "validated": true, + "empirical_context": "The proposed project will use a combination of targeting mechanisms. Beneficiary households will be selected combining geographic poverty targeting with a census of households in selected villages, followed by categorical targeting ( households with children under the age of 10 ) and a PMT screening which will then be discussed and validated by the community. Once the village is selected, data will be collected for all households, following a list of variables and based on the experience of other targeting techniques, namely the HEA.", + "type": "census", + "explanation": "This is indeed a dataset as it involves a systematic collection of data about households in selected villages.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured collection of household data collected through a census.", + "contextual_reason_agent": "This is indeed a dataset as it involves a systematic collection of data about households in selected villages.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 57, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XLVIII Against DLR 8. 3: US $ 2 million in CY2024 and 2025 and US $ 1 million in CYs 2026, 2027 and 2028 for each 10 percentage point increase on average in response rates to requests for information filed online for government entities, for a limit of US $ 12 million. Against DLR 8. 4: US $ 0. 5 million every year for each compliant ministry and agency with proactive information disclosure requirements for the limit of US $ 12 million. Description The DLI supports the strengthening of online official communications regarding progress achieved toward the objectives of the Economic Modernization Vision on digital transformation, as well as enforcement of the 2007 Access to Information Law. It consists of the 4 following DLRs: DLR 8. 1: Tabling by government to the Parliament of amendments strengthening the enforcement of 2007 Access to information law ( prior result ). DLR 8. 2: Reporting through the PMDU public dashboard concerning the progress achieved regarding digital transformation toward the objectives of the Economic Modernization Vision. DLR 8. 3: Enhanced government responsiveness to online requests for information. DLR 8. 4: Online compliance with legal requirements concerning proactive information disclosure across selected government entities.", + "ner_text": [ + [ + 1008, + 1029, + "named" + ] + ], + "validated": false, + "empirical_context": "DLR 8. 2: Reporting through the PMDU public dashboard concerning the progress achieved regarding digital transformation toward the objectives of the Economic Modernization Vision. DLR 8.", + "type": "dashboard", + "explanation": "However, the PMDU public dashboard is mentioned as a reporting tool rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'dashboard', which often implies data visualization.", + "contextual_reason_agent": "However, the PMDU public dashboard is mentioned as a reporting tool rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "110_Lebaon-RACE2-PforR-Board-Package-PAD-WB-9-5-16-09072016", + "page": 13, + "text": "6 16. The third pillar is focused on \u201c enhanced governance and managerial capacities of MEHE and CERD to plan, budget, deliver, monitor, and evaluate education services. \u201d This pillar aims to achieve the following outputs: C. 1 An effective and accurate Education Management Information System ( EMIS ) is established and functional. C. 3 Appropriate policy frameworks are endorsed and implemented to regulate education programs and services, strengthen school management, and professionalize teaching services. C. 2 Revised curricula for schools and learning spaces are developed and endorsed to improve quality learning, life-skills and employability for children and youth. C. 4 MEHE and CERD at the central and regional levels are strengthened to lead and coordinate the planning, implementation, and evaluation of the relevant RACE 2 activities. 17. Within the Government program, the proposed operation will support specific RACE 2 Program objectives linked to formal education. 12 The Bank-supported Program objectives are spread across all three pillars, and exclude activities that are directly implemented by international partners, such as UNICEF and UNHCR. Although these activities are not part of the Bank-supported Program, they continue to be core elements of the overall RACE 2 program. Activities financed by international partners but implemented by MEHE are typically within the scope of the Bank-supported Program. 18.", + "ner_text": [ + [ + 254, + 293, + "named" + ] + ], + "validated": false, + "empirical_context": "\u201d This pillar aims to achieve the following outputs: C. 1 An effective and accurate Education Management Information System ( EMIS ) is established and functional. C.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Information System' which often relates to data management.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "185_multi-page", + "page": 182, + "text": "Baseline ( KDHS, 1999 ): 90 percent * Economic situation will improve during Target ( KDHS, 2003 ): 100 percent project life. * Intensified prograrn of HIV / AIDS prevention, care and support will be * By 2003, the median age of first sex * Kenya Demographic and Health socially and culturally acceptable at among young women ( 15-24 years ) will Survey ( KDHS ) data community level. increase. Baseline ( KDHS, 1999 ): 12 years Target ( KDHS, 2003 ): 14 years * Reports from the Ministry of * By 2004, HIV / AIDS curricula will be Education and other Ministries integrated and examinable in an increasing number of primary and secondary schools, and various post-graduate institutions. Baseline: less than I percent Target: 75 percent", + "ner_text": [ + [ + 406, + 410, + "named" + ], + [ + 17, + 21, + "KDHS <> reference year" + ], + [ + 92, + 96, + "KDHS <> publication year" + ], + [ + 241, + 246, + "KDHS <> data geography" + ], + [ + 412, + 416, + "KDHS <> reference year" + ] + ], + "validated": true, + "empirical_context": "increase. Baseline ( KDHS, 1999 ): 12 years Target ( KDHS, 2003 ): 14 years * Reports from the Ministry of * By 2004, HIV / AIDS curricula will be Education and other Ministries integrated and examinable in an increasing number of primary and secondary schools, and various post-graduate institutions. Baseline: less than I percent Target: 75 percent", + "type": "survey", + "explanation": "KDHS is indeed a dataset as it is used to provide baseline and target statistics in the context of HIV/AIDS curricula.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because KDHS is referenced with baseline and target values, suggesting it provides structured data for analysis.", + "contextual_reason_agent": "KDHS is indeed a dataset as it is used to provide baseline and target statistics in the context of HIV/AIDS curricula.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 47, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 36 Indicator Name of which refugees Definition / Description People benefitted from improved sanitation facilities that have been constructed or rehabilitated under the project, including: pit latrine with slab, ventilated improved pit ( VIP ) latrine, composting toilet, and flush or pour-flush toilet / latrine to piped sewer system and septic tank, and fecal sludge treatment plants. It also includes shared sanitation facilities built in institutions and public places. Frequency Bi-annually. Data Source Primary sources include: MWE M & E system, NWSC Area performance reports, Refugee Database, Refugees GIS Mapping for WASH and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE / NWSC.", + "ner_text": [ + [ + 613, + 629, + "named" + ] + ], + "validated": false, + "empirical_context": "Frequency Bi-annually. Data Source Primary sources include: MWE M & E system, NWSC Area performance reports, Refugee Database, Refugees GIS Mapping for WASH and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'system' in its name, which can imply data collection.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "129_PAD1030-ENGLISH-P149242-PUBLIC-FINAL-LEB-ENPTP-English", + "page": 24, + "text": "14 39. The NPTP Project Unit in the MOSA is responsible for the following: ( i ) managing the NPTP database in MOSA; ( ii ) receiving household applications; ( iii ) interfacing with applicants; ( iv ) entering data; ( v ) conducting household visits; ( vi ) checking for data errors; ( vii ) transmitting data to the central database of the NPTP CMU; ( viii ) verifying claims from hospitals, schools, and primary healthcare centers ( PHCs ) and authorizing payments; ( ix ) managing the outreach campaign; ( x ) managing the e-card food voucher beneficiaries list, delivery of the e-cards to beneficiaries, and follow up; and ( xi ) monitoring of the program ( specifically inputs and outputs ). 40. The NPTP CMU in the PCM is responsible for the following: ( i ) managing the central database; ( ii ) validating data and cross-checking with national databases; ( iii ) processing household data and generating scores and ranks according to the PMT formula; ( iv ) maintaining the PMT formula, and providing the list of beneficiaries ( v ) analyzing national data and reporting findings to the Social Inter-Ministerial Committee ( Social-IMC ); ( vi ) monitoring of program results including targeting performance; and ( vii ) auditing data processing. 41.", + "ner_text": [ + [ + 527, + 565, + "named" + ], + [ + 594, + 607, + "e-card food voucher beneficiaries list <> reference population" + ] + ], + "validated": true, + "empirical_context": "14 39. The NPTP Project Unit in the MOSA is responsible for the following: ( i ) managing the NPTP database in MOSA; ( ii ) receiving household applications; ( iii ) interfacing with applicants; ( iv ) entering data; ( v ) conducting household visits; ( vi ) checking for data errors; ( vii ) transmitting data to the central database of the NPTP CMU; ( viii ) verifying claims from hospitals, schools, and primary healthcare centers ( PHCs ) and authorizing payments; ( ix ) managing the outreach campaign; ( x ) managing the e-card food voucher beneficiaries list, delivery of the e-cards to beneficiaries, and follow up; and ( xi ) monitoring of the program ( specifically inputs and outputs ). 40.", + "type": "list", + "explanation": "This is indeed a dataset as it is used to manage and track beneficiaries of the e-card food voucher program.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a list of beneficiaries, which implies a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is used to manage and track beneficiaries of the e-card food voucher program.", + "contextual_signal": "mentioned as a list of beneficiaries in the context of managing the program", + "tags": [] + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 10, + "text": "As of August 2017, Jordan hosts 660, 5822 registered Syrian refugees, of which 232, 8683 are school \u2010 aged children requiring the provision of education services. Eighty percent of refugees live in host communities, representing 10 percent of Jordan \u2019 s population, while the rest live in dedicated refugee camps. Per the recent 2016 government census, the total number of Syrians in Jordan reached an estimated 1. 265 million, representing 13. 2 percent of population. 4 Jordan has been committed to integrating Syrian refugee children in the public formal sector, and as of June 2017, approximately 10 percent of children in public schools were Syrian refugees. Therefore, it is important that education services to refugee children in Jordan respond to the nature of the challenges they face in the education system. 3. Jordan \u2019 s economic development hinges on the existence of an education system that provides students with the cognitive and socioemotional skills needed to succeed in the labor market. Realizing the full potential of educational investments for economic prosperity requires improving access and quality of education for both girls and boys. 5 Additionally, the cost of not educating refugee children is high in terms of loss of human capital for regional economic development, as well as for the long \u2010 term processes of peace, stability, and reconstruction. It is thus necessary for the education system to be agile and address the existing education quality challenges by strengthening the system ' s ability to manage a growing number of students, including refugee and vulnerable children. 1 Human Development Data ( 1990 \u2013 2015 ); Human Development Report. 2016. 2 United Nations High Commissioner for Refugees ( UNHCR ). August 6, 2017. 3 Brussels Conference Paper. 2017. 4 Department of Statistics ( DOS ); National census. November 2016. 5 OECD. 2016. PISA. 2015 Results ( Volume I ): Excellence and Equity in Education, OECD Publishing, Paris. http: / / dx. doi. org / 10. 1787 / 9789264266490 \u2010 en.", + "ner_text": [ + [ + 329, + 351, + "named" + ] + ], + "validated": true, + "empirical_context": "Eighty percent of refugees live in host communities, representing 10 percent of Jordan \u2019 s population, while the rest live in dedicated refugee camps. Per the recent 2016 government census, the total number of Syrians in Jordan reached an estimated 1. 265 million, representing 13.", + "type": "census", + "explanation": "This is indeed a dataset as it provides structured data on the population of Syrians in Jordan.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a government census, which typically involves structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it provides structured data on the population of Syrians in Jordan.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "003_BOSIB-850e0c11-07c1-4c9c-8d44-4286704221bd", + "page": 47, + "text": "The World Bank Southern Niger Connectivity and Integration Project ( P179770 ) Page 37 of women to physically access obstetric care in the project area. Frequency At baseline, midterm, and after completion Data source A survey on rural accessibility will be used to collect mobility data on the impact of the newly rehabilitated and upgraded rural roads on people \u2019 s access to health services, particularly on women \u2019 s travel time to access obstetric care in the project areas. Methodology for Data Collection - Prepare a GEMS form to collect mobility data from a sample of women beneficiaries of maternity services. - Measure the reduction in travel time compared to the baseline value ( based on a GIS transportation model which relies on mix of modelled data and empirical data ). Responsibility for Data Collection General Directorate for Public Health ( DGSP ) / ministry in charge of public health and social affairs, in collaboration with some World Bank experts. Length of rehabilitated RN1 Maradi \u2013 Zinder section incorporating climate resilience measures ( Km ) Description This indicator measures the total length, in kilometers, of the RN1 road between Maradi and Zinder that has been rehabilitated taking into account climate resilience measures.", + "ner_text": [ + [ + 220, + 249, + "named" + ], + [ + 4, + 14, + "survey on rural accessibility <> publisher" + ], + [ + 274, + 287, + "survey on rural accessibility <> data type" + ], + [ + 576, + 617, + "survey on rural accessibility <> reference population" + ], + [ + 953, + 963, + "survey on rural accessibility <> publisher" + ], + [ + 997, + 1016, + "survey on rural accessibility <> data geography" + ] + ], + "validated": true, + "empirical_context": "The World Bank Southern Niger Connectivity and Integration Project ( P179770 ) Page 37 of women to physically access obstetric care in the project area. Frequency At baseline, midterm, and after completion Data source A survey on rural accessibility will be used to collect mobility data on the impact of the newly rehabilitated and upgraded rural roads on people \u2019 s access to health services, particularly on women \u2019 s travel time to access obstetric care in the project areas. Methodology for Data Collection - Prepare a GEMS form to collect mobility data from a sample of women beneficiaries of maternity services.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a data source for collecting mobility data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey designed to collect data on rural accessibility.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a data source for collecting mobility data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 25, + "text": "16 NCHRD and MOE \u2014 which started under ERfKE II and contributed to producing key pieces of analytical work that have played a significant role in policy development \u2014 will continue under the proposed Program. 21 One example of an evaluation study can be to look at how gender issues are addressed across different policy proposals and program interventions. The annual plan for the M & E activities will be endorsed by the GPSC and shared with all concerned donors and education sector partners. 52. The MOE has strengthened its data systems and will continue to do so to meet the M & E requirements of the Program. Under ERfKE II22, the MOE completed the redesign of its OpenEMIS, which was successfully deployed in all public schools in Jordan. OpenEMIS is a comprehensive and integrated information and data collection system, which covers all education levels. The system captures disaggregated school level data on students, teachers, grades, subjects, and teaching periods for all MOE operated schools and institutions in the country. A dashboard linked to the OpenEMIS is currently being developed and will be accessible to individual units at the MOE to enable them to monitor their respective indicators. The Queen Rania Center ( QRC ) manages the EMIS and its dashboard.", + "ner_text": [ + [ + 747, + 755, + "named" + ] + ], + "validated": false, + "empirical_context": "Under ERfKE II22, the MOE completed the redesign of its OpenEMIS, which was successfully deployed in all public schools in Jordan. OpenEMIS is a comprehensive and integrated information and data collection system, which covers all education levels. The system captures disaggregated school level data on students, teachers, grades, subjects, and teaching periods for all MOE operated schools and institutions in the country.", + "type": "system", + "explanation": "However, OpenEMIS is referred to as a system rather than a structured collection of data or a dataset itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed OpenEMIS is a dataset because it is described as an information and data collection system.", + "contextual_reason_agent": "However, OpenEMIS is referred to as a system rather than a structured collection of data or a dataset itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 9, + "validated": 2, + "not_validated": 7 + } + }, + { + "filename": "054_Sudan-Basic-Education-Emergency-Support-Project", + "page": 20, + "text": "The World Bank Sudan Basic Education Emergency Support Project ( P172812 ) Page 16 of 40 Figure 4: Heat map of Sudan \u2019 s basic education schools Source: Based on 2018 / 19 School Census using Arcgis software. 39. Component 2 Program coordination and management ( US $ 0. 3 million ). This component will support the Federal Ministry of Educaiton ( MoE ) in overall program coordination, monitoring and evaluation. The PCU will cover functions such as planning, procurement, financial management, environmental and social safeguards and monitoring and evaluation. Technical experts will be mobilized as necessary. The PCU will monitor the progress by collecting and analyzing school-level data under the the Annual School Census. C. Project Beneficiaries 40. Primary beneficiaries are schoolchildren, teachers, and parents. Approximately 5. 4 million students will benefit from the project through provision of school grants. Communities in targeted areas will also benefit from enhance participatory school management.", + "ner_text": [ + [ + 707, + 727, + "named" + ], + [ + 4, + 14, + "Annual School Census <> publisher" + ], + [ + 15, + 20, + "Annual School Census <> data geography" + ], + [ + 111, + 116, + "Annual School Census <> data geography" + ], + [ + 162, + 185, + "Annual School Census <> reference year" + ], + [ + 675, + 692, + "Annual School Census <> data type" + ], + [ + 784, + 798, + "Annual School Census <> reference population" + ], + [ + 1034, + 1052, + "Annual School Census <> usage context" + ] + ], + "validated": true, + "empirical_context": "Technical experts will be mobilized as necessary. The PCU will monitor the progress by collecting and analyzing school-level data under the the Annual School Census. C.", + "type": "census", + "explanation": "The context confirms it is a dataset as it is explicitly mentioned that school-level data will be collected and analyzed under the Annual School Census.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Census', which typically refers to a systematic collection of data.", + "contextual_reason_agent": "The context confirms it is a dataset as it is explicitly mentioned that school-level data will be collected and analyzed under the Annual School Census.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 62, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 58 of 74 working on LIPW under sub - component 1. 1 on behalf of beneficiary HH, of which are refugees and host communities. Refugees are defined as forcibly displaced HHs originating from a country other than South Sudan and registered as refugees in South Sudan by the UNHCR. Host communities are defined as local population groups living in counties with a high concentration of refugees. measured at a minimum on a quarterly basis SNSOP MIS updated over the course of the project. Payment data will also be periodically updated in the MIS Number of beneficiary households receiving Direct Income Support who have a female primary beneficiary ( Number ) Total number of beneficiary households under comopnent 1. 2 that have a primary beneficiary, as registered in the SNSOP MIS, who is a woman.", + "ner_text": [ + [ + 865, + 874, + "named" + ] + ], + "validated": false, + "empirical_context": "Payment data will also be periodically updated in the MIS Number of beneficiary households receiving Direct Income Support who have a female primary beneficiary ( Number ) Total number of beneficiary households under comopnent 1. 2 that have a primary beneficiary, as registered in the SNSOP MIS, who is a woman.", + "type": "system", + "explanation": "However, the context indicates that 'SNSOP MIS' is referred to as a system and not explicitly as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'MIS' which often stands for Management Information System, typically associated with data management.", + "contextual_reason_agent": "However, the context indicates that 'SNSOP MIS' is referred to as a system and not explicitly as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 18, + "validated": 10, + "not_validated": 8 + } + }, + { + "filename": "041_Burkina-Faso-Support-to-Land-and-Mining-Management-Strengthening-Project", + "page": 27, + "text": "Bureau des Mines et de la G\u00e9ologie du Burkina Faso, BUMIGEB ] and MEMC ) and between key ministries involved with land and mining such as the Info-center located at the Directorate General for Taxation ( Direction G\u00e9n\u00e9rale des Imp\u00f4ts, DGI ) at MINEFID to enable compliant mineral payments, or connection with MEEVCC regarding the exclusion of classified forests areas from mining licensing; ( g ) strengthen the National Observatory of Territorial Economy to facilitate the collection, sharing, and analysis of economic data at the local and regional levels; and ( h ) strengthen the National Urban Observatory to support the monitoring and evaluation of urbanization policies. Activities of this subcomponent will enable the collection, management and dissemination of land, mining and other common territorial datasets that will be key for planners and decision makers to enhance climate resilient planning, and for the academia and NGOs to monitor climate change and strengthen early warning systems. Standardized land use data and relevant risk information will also help decision makers and planners assess and manage risk exposure to natural hazards through land-use zoning and by selecting strategic location for critical infrastructure. 36. 1. 3. 2 Institutional Cooperation and coordination. The management of land and mining requires support, coordination, and cooperation between various departments at central, regional and local levels.", + "ner_text": [ + [ + 1017, + 1030, + "named" + ], + [ + 1164, + 1179, + "land use data <> data description" + ], + [ + 1482, + 1500, + "land use data <> usage context" + ] + ], + "validated": true, + "empirical_context": "Activities of this subcomponent will enable the collection, management and dissemination of land, mining and other common territorial datasets that will be key for planners and decision makers to enhance climate resilient planning, and for the academia and NGOs to monitor climate change and strengthen early warning systems. Standardized land use data and relevant risk information will also help decision makers and planners assess and manage risk exposure to natural hazards through land-use zoning and by selecting strategic location for critical infrastructure. 36.", + "type": "data", + "explanation": "In the context, 'land use data' is described as essential for planners and decision makers, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific type of data used for planning and decision-making.", + "contextual_reason_agent": "In the context, 'land use data' is described as essential for planners and decision makers, indicating it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "165_311820EG", + "page": 16, + "text": "M & E will be the responsibility o f the central KG Department with the support o f the C A T and other relevant partners and stakeholders at the central, sub-national, and community levels. Integration o f ECE into the existing Education Management and Information System ( EMIS ) is an important activity to be carried out and is expected to help better monitor and evaluate project progress and impact. CIDA is already helping develop the approach for data organization and information flow - see PIP. 45. A simple yet comprehensive Results Framework provides output indicators for the project objective and key components. The detailed baseline values will be completed as the project is implemented and baseline values for various administrative units are collected for specific components. Indicators will be disaggregated as far as possible by income, gender and inclusion in \u2018 disadvantaged \u2019 groups ( including, inter alia, those disadvantaged because o f geography, gender, disability, ethnicity, disability, refugee status or the need to work ) and will thus include a range o f indicators to assist in the measurement o f project impact on beneficiaries at the govemorate, community and group level. 4. Sustainability i. Institutional sustainability will be ensured through the following: ( i ) project and financial management capacity would be sustained through the involvement o f and capacity building for the relevant MOE Departments, including counterparts at the sub-national levels, in their respective areas of responsibilities ( refer to responsibilities chart under component 3. 2 in the PIP ); ( ii ) quality o f the ECE programs would be sustained through a National Standards regulatory model. The proposed regulatory model would allow for MOE and MISA inspectors, teachers, staff, principals, other providers, community professionals and parents to 11", + "ner_text": [ + [ + 229, + 272, + "named" + ] + ], + "validated": false, + "empirical_context": "M & E will be the responsibility o f the central KG Department with the support o f the C A T and other relevant partners and stakeholders at the central, sub-national, and community levels. Integration o f ECE into the existing Education Management and Information System ( EMIS ) is an important activity to be carried out and is expected to help better monitor and evaluate project progress and impact. CIDA is already helping develop the approach for data organization and information flow - see PIP.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting it handles data.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "158_40156", + "page": 37, + "text": "Annex 3: Results Framework and Monitoring IGAD Regional HIV / AIDS Partnership Program ( IRAPP ) Support Project Results Framework15 Preface The results framework for the IRAPP Support Project was developed in close collaboration between IGAD, the IGAD Member States, GAMET, the World Bank and other development partners. The monitoring table which outlines the baseline data and targets for the Project is being finalized, and is a critical part of the project. The UNHCR is currently finalizing the baseline data for the refugee camps, which have been collected through behavioral surveillance surveys for each of the refugee areas to be targeted. IGAD is completing additional baseline data based on the regional mapping assessment recently completed. This data will be presented, discussed and targets finalized during the IGAD Technical Working Group meeting on M & E, which will include all the IGAD Member States and development partners, scheduled for the project launch ( September 2007 ). The project will contribute towards the establishment of a regional M & E system, in order to get systematic and regular data updates for cross-border and mobile populations. As of now there is no regional M & E system that captures data on these target groups. Furthermore, data on increased regional collaboration will be collected using the Most - Significant-Change technique. This qualitative data will be collected by September 2007, and annually thereafter.", + "ner_text": [ + [ + 362, + 375, + "named" + ], + [ + 523, + 536, + "baseline data <> reference population" + ], + [ + 572, + 603, + "baseline data <> data type" + ], + [ + 981, + 995, + "baseline data <> publication year" + ], + [ + 1137, + 1172, + "baseline data <> reference population" + ], + [ + 1343, + 1378, + "baseline data <> data description" + ] + ], + "validated": true, + "empirical_context": "Annex 3: Results Framework and Monitoring IGAD Regional HIV / AIDS Partnership Program ( IRAPP ) Support Project Results Framework15 Preface The results framework for the IRAPP Support Project was developed in close collaboration between IGAD, the IGAD Member States, GAMET, the World Bank and other development partners. The monitoring table which outlines the baseline data and targets for the Project is being finalized, and is a critical part of the project. The UNHCR is currently finalizing the baseline data for the refugee camps, which have been collected through behavioral surveillance surveys for each of the refugee areas to be targeted.", + "type": "data", + "explanation": "In this context, 'baseline data' is indeed used as a data source for the project, specifically collected through behavioral surveillance surveys.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'baseline data' is a dataset because it refers to collected information used for analysis in the project.", + "contextual_reason_agent": "In this context, 'baseline data' is indeed used as a data source for the project, specifically collected through behavioral surveillance surveys.", + "contextual_signal": "mentioned as a critical part of the project", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "115_PAD1678-PAD-P152822-IDA-R2016-0090-1-Box394886B-OUO-9", + "page": 145, + "text": "131 related data. The main instrument for M & E under the DRDIP will be the Results Framework ( Annex 1 ). The indicators are the same for Djibouti, Ethiopia and Uganda with their respective baseline, intermediate and end of project targets; and will be the basis for reporting progress. The indicators, data sources and data collection methods take into account the CDD approach, and the limited institutional capacities in the project targeted areas. All indicators are disaggregated by country, with further disaggregation by gender ( percent female ) for the core indicator on direct project beneficiaries. 140. The project will design a management information system for monitoring inputs, outputs and processes; evaluation of outcome and impacts; environmental and social safeguard monitoring; and participatory monitoring and evaluation and internal learning. M & E activities will also include regular monitoring of implementation progress / performance, independent process monitoring ( including inter alia regular assessments of community level planning and review of the effectiveness and quality of capacity support efforts ), outcome / impact evaluations at baseline, mid-term and end of project, and annual thematic studies. The project \u2019 s Results Framework will be used as a basis for reporting progress against indicators, including progress toward achieving the PDO and implementation progress.", + "ner_text": [ + [ + 642, + 671, + "named" + ] + ], + "validated": false, + "empirical_context": "140. The project will design a management information system for monitoring inputs, outputs and processes; evaluation of outcome and impacts; environmental and social safeguard monitoring; and participatory monitoring and evaluation and internal learning. M & E activities will also include regular monitoring of implementation progress / performance, independent process monitoring ( including inter alia regular assessments of community level planning and review of the effectiveness and quality of capacity support efforts ), outcome / impact evaluations at baseline, mid-term and end of project, and annual thematic studies.", + "type": "system", + "explanation": "However, it is described as a management information system focused on monitoring and evaluation activities, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'information system' which often relates to data management.", + "contextual_reason_agent": "However, it is described as a management information system focused on monitoring and evaluation activities, not as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "086_NEW-ETHIOPIA-PAD-06072018", + "page": 111, + "text": "This year much of work is performed to strengthen the unit and as such regular audit work was not yet started. The audit unit is accountable to the Deputy Director. Audit manual in Amharic was prepared. Training has not yet provided to the internal auditors. In view of ARRA is new for internal audit function, a lot of capacity-building activities are needed to strengthen the unit. IPDC has an internal audit service that reports to the Board. As shown in the staff data in the annex the unit is staffed as per the structure. The unit has an internal audit charter; however internal audit manuals are not yet developed. The manuals are under preparation along with other procedure manuals of IPDC. The internal audit unit coverage of the planned tasks is satisfactory. A performance audit on property administration was also conducted. Currently internal audit is performed only at the head office level. The current staffing arrangement is inadequate with the level of IPDC and needs to be strengthened in number and capacity so that audits can also cover the Industrial Parks. The internal audit units at proposed implementing entities are not adequately staffed and due to this the Program book of account might not be reviewed. IPDC has not yet finalized maintaining a comprehensive fixed asset register and affixing of identification number on each fixed asset items. 21.", + "ner_text": [ + [ + 462, + 472, + "named" + ] + ], + "validated": true, + "empirical_context": "IPDC has an internal audit service that reports to the Board. As shown in the staff data in the annex the unit is staffed as per the structure. The unit has an internal audit charter; however internal audit manuals are not yet developed.", + "type": "data", + "explanation": "In this context, 'staff data' is indeed used as a structured collection of data regarding the unit's staffing.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'staff data' is a dataset because it refers to a collection of information about staff members.", + "contextual_reason_agent": "In this context, 'staff data' is indeed used as a structured collection of data regarding the unit's staffing.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 51, + "text": "There is an overall improvement in the financial management function of the government through institutionalized trainings on PFM, woreda benchmarks to assess and support woredas, as well as improvements in internal control processes. With regards to procurement, the existing Proclamation No. 649 / 2009, which has been governing procurement for more than 10 years, will be replaced by the new proclamation that is expected to be ratified at the federal level soon. Following this, regional administrations are expected to revise their laws accordingly. On the other hand, progress is noted in the improved function of procurement regulatory bodies, which are registering procurement process data and reporting on procurement performance through agreed KPIs, though reporting needs further improvement. Similarly, regional regulatory bodies are making efforts to deliver on their responsibility to undertake procurement audits on procurement implementing institutions, with improvement needed in the quality and coverage of these audits. The government has also established a checks and balances system to handle fraud and corruption risks and complaints through the Federal Ethics and Anti-Corruption Commission ( FEACC ) and the Regional Ethics and Anti-Corruption Commissions ( REACCs ) which have a responsibility for prevention activities. The recently completed World Bank financed PFM project has achieved results in the areas of strengthening information technology systems of the government ( IFMIS and e-procurement ); maintaining the currently used system at the woreda level ( IBEX ); institutionalizing PFM trainings; professionalization of procurement function;", + "ner_text": [ + [ + 1590, + 1594, + "named" + ] + ], + "validated": false, + "empirical_context": "The government has also established a checks and balances system to handle fraud and corruption risks and complaints through the Federal Ethics and Anti-Corruption Commission ( FEACC ) and the Regional Ethics and Anti-Corruption Commissions ( REACCs ) which have a responsibility for prevention activities. The recently completed World Bank financed PFM project has achieved results in the areas of strengthening information technology systems of the government ( IFMIS and e-procurement ); maintaining the currently used system at the woreda level ( IBEX ); institutionalizing PFM trainings; professionalization of procurement function;", + "type": "system", + "explanation": "However, IBEX is described as a system used at the woreda level, not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed IBEX is a dataset because it is mentioned in the context of information technology systems.", + "contextual_reason_agent": "However, IBEX is described as a system used at the woreda level, not as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 28, + "text": "The World Bank Strengthening Public Sector Effectiveness and Statiscal Capacity Project ( P151155 ) Page 25 of 93 and ( e ) design a high-frequency survey to monitor poverty and vulnerability in regions in conflict. \uf0b7 Subcomponent 4. 2: Strengthening the national accounts production by ( a ) providing TA to improve the quarterly national accounts methodology, ( b ) establishing a methodology and baseline to better reflect the regional cross-border trade, and ( c ) creating an economic simulation tool for MINEPAT / Direction G\u00e9n\u00e9rale de l \u2019 Economie. \uf0b7 Subcomponent 4. 3: Enhancing statistical data dissemination and use by the Government to improve policy making and implementation by ( a ) providing TA and financing to improve archiving procedures and build the capacity of the INS archiving department and setting up the IT network and equipment in the new INS headquarters and the 10 regional agencies to facilitate data treatment, sharing, archiving, and public dissemination; ( b ) providing TA to INS to train and assist targeted ministries in integrating an M & E system in the design and implementation of their strategies and programs and public investment projects; and ( c ) supporting proactive dissemination of statistics to external audience ( academia, media, civil society, business associations, and so on ). Component 5: Project coordination ( US $ 3. 5 million ) 42.", + "ner_text": [ + [ + 133, + 154, + "named" + ], + [ + 4, + 14, + "high-frequency survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "The World Bank Strengthening Public Sector Effectiveness and Statiscal Capacity Project ( P151155 ) Page 25 of 93 and ( e ) design a high-frequency survey to monitor poverty and vulnerability in regions in conflict. \uf0b7 Subcomponent 4.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to a survey designed to collect data on poverty and vulnerability.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'high-frequency survey' implies a structured collection of data collected over time.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a survey designed to collect data on poverty and vulnerability.", + "contextual_signal": "described as a survey to monitor data on poverty and vulnerability", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 28, + "text": "The World Bank Building Skills for Human Capital Development in South Sudan ( P178654 ) Page 24 of 66 65. Given its focus on building local capacity in the medium and long term, this Subcomponent is aligned with GCRF Pillar 4: Strengthening Policies, Institutions and Investments for Rebuilding Better. 66. Subcomponent 4. 2: Strengthening data for education system management ( US $ 2 million ). Strengthening the education system \u2019 s management also entails supporting the education governing ministries in establishing a data system to deliver reliable data in line with international standards. This activity will build on the existing EMIS. TA will be provided to diagnose the existing system regarding data collection processes, standards and templates, personnel, and IT infrastructure. Potential areas to be addressed may include ( i ) improved templates and standards; ( ii ) improved data collection processes, such as phone-based data collection to enable the various education stakeholders, including the community and parents, to collect and submit real-time data into the EMIS; and ( iii ) an improved web-based EMIS system that has the capacity to integrate real-time data. Real-time data would include schools \u2019 condition, teacher registration, teacher and student attendance, and the availability of teaching and learning materials. This could be effectively utilized once synced across the web-based platform.", + "ner_text": [ + [ + 640, + 644, + "named" + ], + [ + 64, + 75, + "EMIS <> data geography" + ], + [ + 929, + 956, + "EMIS <> data type" + ], + [ + 1017, + 1026, + "EMIS <> reference population" + ], + [ + 1031, + 1038, + "EMIS <> reference population" + ], + [ + 1062, + 1076, + "EMIS <> data type" + ], + [ + 1239, + 1259, + "EMIS <> data description" + ], + [ + 1261, + 1291, + "EMIS <> data description" + ], + [ + 1301, + 1348, + "EMIS <> data description" + ] + ], + "validated": true, + "empirical_context": "Strengthening the education system \u2019 s management also entails supporting the education governing ministries in establishing a data system to deliver reliable data in line with international standards. This activity will build on the existing EMIS. TA will be provided to diagnose the existing system regarding data collection processes, standards and templates, personnel, and IT infrastructure.", + "type": "system", + "explanation": "EMIS is indeed a data system mentioned as a source for reliable data delivery.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is referred to as a system related to data collection and management.", + "contextual_reason_agent": "EMIS is indeed a data system mentioned as a source for reliable data delivery.", + "contextual_signal": "mentioned as a data system that supports data collection processes", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "023_Ethiopia-Second-Phase-Development-Response-to-Displacement-Impacts-Project-in-the-Horn-of-Africa-Project", + "page": 84, + "text": "The World Bank Development Response to Displacement Impacts Project in the Horn of Africa Phase II ( P178047 ) Page 79 of 80 through STEP document reviews and participation of procurement specialists in bi-annual implementation support missions. 45. The table below presents the focus of the implementation support and the skills required. Table 1. 1: Implementation support and skills required Time Focus Main Skills Needed Resource Estimate ( SW ) Partner Role First twelve months \u2022 Build understanding of the governing procurement framework ( rules and procedures ) and ESF \u2022 Support capacities in new region and woredas \u2022 Establish remote monitoring tools ( GEMS, IBM, TPM ) \u2022 Support baseline survey Task management / social dev ( Nairobi-based ) 4 UNHCR and partners to support monitoring of ongoing adequacy of the refugee protection framework Agriculture / rural dev / task management ( CO-based ) 8 Forced displacement 2 Rural livelihood ( CO-based ) 5 FM ( CO-based ) 3 Procurement ( CO-based ) 3 ESS ( CO-based ) 6 M & E ( CO-based ) 8 Rural infra engineer ( CO-based ) 4 NRM specialist ( CO-based ) 4 Small-scale irrigation specialist ( CO-based ) 10 12-48 months \u2022 Joint ISMs with government and UNHCR to monitor implementation performance \u2022 Review of annual work / financial plans \u2022 Review of quarterly / annual reports \u2022 Review of audits / IFRs \u2022 Review subproject selection processes \u2022 Process review on participatory development planning", + "ner_text": [ + [ + 662, + 666, + "named" + ] + ], + "validated": false, + "empirical_context": "Table 1. 1: Implementation support and skills required Time Focus Main Skills Needed Resource Estimate ( SW ) Partner Role First twelve months \u2022 Build understanding of the governing procurement framework ( rules and procedures ) and ESF \u2022 Support capacities in new region and woredas \u2022 Establish remote monitoring tools ( GEMS, IBM, TPM ) \u2022 Support baseline survey Task management / social dev ( Nairobi-based ) 4 UNHCR and partners to support monitoring of ongoing adequacy of the refugee protection framework Agriculture / rural dev / task management ( CO-based ) 8 Forced displacement 2 Rural livelihood ( CO-based ) 5 FM ( CO-based ) 3 Procurement ( CO-based ) 3 ESS ( CO-based ) 6 M & E ( CO-based ) 8 Rural infra engineer ( CO-based ) 4 NRM specialist ( CO-based ) 4 Small-scale irrigation specialist ( CO-based ) 10 12-48 months \u2022 Joint ISMs with government and UNHCR to monitor implementation performance \u2022 Review of annual work / financial plans \u2022 Review of quarterly / annual reports \u2022 Review of audits / IFRs \u2022 Review subproject selection processes \u2022 Process review on participatory development planning", + "type": "system", + "explanation": "'GEMS' is mentioned as a monitoring tool, not as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'GEMS' is a dataset because it is mentioned alongside monitoring tools.", + "contextual_reason_agent": "'GEMS' is mentioned as a monitoring tool, not as a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "054_Sudan-Basic-Education-Emergency-Support-Project", + "page": 40, + "text": "The Internal Efficiency of the system is weak, particularly due to very high dropout rates and low learning achievements of pupils. The IEC at the primary level is particularly low ( 39 percent ), which implies that more than half of public resources are wasted in paying for repeated grades or schooling for students who dropout before cycle completion. Economic Rationale for Public Investment in Sustaining Basic Education Enrollment in Sudan 8. The rationale for public sector financing of basic education is well established. Investments under the Project would strengthen efficiency and equity at the basic level overall, likely contributing to improved learning outcomes at the school level. The pressing needs and challenges for both improved efficiency and equity warrant public sector support consistent with Sudan \u2019 s commitment to providing Universal Primary Education of reasonable quality to all children. 9. Investment in basic education in Sudan is justified by the low NER ( 69 percent ) and completion rate ( 55 percent ) and weak learning levels among enrolled students. National Learning Assessment conducted in all 18 states of Sudan found that Grade 3 students performed very poorly. On average, 40 percent of pupil are not able to read a single word. This suggests that there is not only a large proportion of school-age children out of school but even when in school many students are not learning. The Project \u2019 s Development Impact 10. The project is expected to contribute positively to Sudan \u2019 s education system and national economic development. It aims to sustain enrollment in public schools during the economic crises and pandemic. To that end, it is expected that the proposed interventions will affect the probability of a child completing primary education and transitioning to the secondary level. This, in turn, will yield gains in labor earnings measured 3 Authors \u2019 estimation based on 2018 School Census data and reported USD / SDG exchange rate ( Economist ).", + "ner_text": [ + [ + 1090, + 1118, + "named" + ] + ], + "validated": false, + "empirical_context": "Investment in basic education in Sudan is justified by the low NER ( 69 percent ) and completion rate ( 55 percent ) and weak learning levels among enrolled students. National Learning Assessment conducted in all 18 states of Sudan found that Grade 3 students performed very poorly. On average, 40 percent of pupil are not able to read a single word.", + "type": "assessment", + "explanation": "However, it is mentioned as an assessment rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Assessment', which often relates to data collection.", + "contextual_reason_agent": "However, it is mentioned as an assessment rather than a structured collection of data or a data source.", + "contextual_signal": "mentioned only as an assessment, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + }, + "term_stats": { + "total": 3, + "validated": 2, + "not_validated": 1 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 79, + "text": "Phone based / on-line beneficiary satisfaction Annual MoE MoE will conduct the phone / online based satisfaction survey, analyze the findings and disseminate the results to schools, MoE", + "ner_text": [ + [ + 79, + 119, + "named" + ], + [ + 54, + 57, + "phone / online based satisfaction survey <> publisher" + ], + [ + 58, + 61, + "phone / online based satisfaction survey <> publisher" + ], + [ + 173, + 180, + "phone / online based satisfaction survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "Phone based / on-line beneficiary satisfaction Annual MoE MoE will conduct the phone / online based satisfaction survey, analyze the findings and disseminate the results to schools, MoE", + "type": "survey", + "explanation": "This is indeed a dataset as it involves conducting a survey to gather and analyze data on satisfaction levels.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured survey designed to collect data on beneficiary satisfaction.", + "contextual_reason_agent": "This is indeed a dataset as it involves conducting a survey to gather and analyze data on satisfaction levels.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 120, + "text": "Environmental and Social Systems MOE Due Date 29-Dec - 2023 Complaints and grievances lodged through the NEMIS. Number of GRM satisfactorily addressed. Assess the outcomes of biogas pilots and analyze existing practices on use of biogas in schools Environmental and Social Systems MoE Due Date 29-Dec - 2023 Costed action plan developed to scale up biogas projects. Whole of Government refugee policy Other Ministry of Interior Recurrent Continuous Relevant coordination structures established.", + "ner_text": [ + [ + 105, + 110, + "named" + ] + ], + "validated": false, + "empirical_context": "Environmental and Social Systems MOE Due Date 29-Dec - 2023 Complaints and grievances lodged through the NEMIS. Number of GRM satisfactorily addressed.", + "type": "system", + "explanation": "NEMIS is referred to as a system, not explicitly as a data source or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is mentioned in the context of complaints and grievances data.", + "contextual_reason_agent": "NEMIS is referred to as a system, not explicitly as a data source or dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "110_Lebaon-RACE2-PforR-Board-Package-PAD-WB-9-5-16-09072016", + "page": 36, + "text": "29 \uf0b7 C. 4 - MEHE and CERD at the central and regional levels are strengthened to lead and coordinate the planning, implementation, and evaluation of the relevant RACE 2 activities. 22. One of the main challenges faced during the implementation of the first phase of RACE is the unavailability of timely and reliable data, structured data collection systems, and systems-capacity to analyze data that could inform policy decisions or programmatic interventions. There are currently multiple data collection mechanisms working in parallel, in addition to long delays in the ability for MEHE or CERD to collect and use school-level data for decision-making. As a result, MEHE and CERD will develop a data management framework detailing how data about schools, students, teachers, and other personnel is collected, validated, categorized, and used by different stakeholders. An EMIS will be deployed in order to support the timely collection of the data, so that decision-making at both the school and the central MEHE and CERD levels is based on accurate and updated data. 23. For output C. 2, the current Lebanese formal curriculum dates from 1997 and has not seen major changes since that date. It is strongly centered on specific concepts or information that students should know, rather than on competencies and skills that learners should acquire.", + "ner_text": [ + [ + 874, + 878, + "named" + ] + ], + "validated": false, + "empirical_context": "As a result, MEHE and CERD will develop a data management framework detailing how data about schools, students, teachers, and other personnel is collected, validated, categorized, and used by different stakeholders. An EMIS will be deployed in order to support the timely collection of the data, so that decision-making at both the school and the central MEHE and CERD levels is based on accurate and updated data. 23.", + "type": "system", + "explanation": "However, EMIS is described as a system for managing data rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it relates to data management and collection.", + "contextual_reason_agent": "However, EMIS is described as a system for managing data rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 25, + "text": "The World Bank Cash for Jobs Project ( P175327 ) Page 21 of 86 ensure transparency, accountability, and digital inclusion in the delivery. Beneficiaries will be selected through a combination of targeting instruments. First, the poorest communes in the country will be selected making use of existing poverty maps, hazard exposure maps and climate change vulnerability maps. The collines29 within these communes, given that their poverty profiles are very similar, will be selected randomly through a lottery. This will be done to avoid any political interference in the selection of the collines. Secondly, all households in the selected collines will be registered in the country \u2019 s social registry and a PMT score will be calculated for all of them. Beneficiary quota will be established for each colline and the project will select the poorest households in each colline according to the PMT until reaching the proposed quota. Finally, a community validation process will be carried out to allow communities to correct inclusion and exclusion errors that might occur in the targeting process. The targeting process might be adapted in urban and refugee areas if necessary. Beneficiaries will receive Burundi Francs ( BIF ) 36, 00030 per month ( approx. US $ 18 ). This amount is equivalent to 20 percent of the household consumption of an average poor household, which is aligned to international standards. 49.", + "ner_text": [ + [ + 301, + 313, + "named" + ], + [ + 4, + 14, + "poverty maps <> publisher" + ], + [ + 229, + 245, + "poverty maps <> reference population" + ], + [ + 612, + 622, + "poverty maps <> reference population" + ], + [ + 1205, + 1212, + "poverty maps <> data geography" + ] + ], + "validated": true, + "empirical_context": "Beneficiaries will be selected through a combination of targeting instruments. First, the poorest communes in the country will be selected making use of existing poverty maps, hazard exposure maps and climate change vulnerability maps. The collines29 within these communes, given that their poverty profiles are very similar, will be selected randomly through a lottery.", + "type": "map", + "explanation": "In this context, 'poverty maps' are used as a source of information to select beneficiaries, indicating they function as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'poverty maps' imply a structured representation of data related to poverty levels.", + "contextual_reason_agent": "In this context, 'poverty maps' are used as a source of information to select beneficiaries, indicating they function as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 4, + "validated": 3, + "not_validated": 1 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 11, + "text": "Under the IMF program for 2024-28, 3 the Government of Jordan ( GOJ ) is committed to the \u201c digitalization of all government services by end-2025, and to establishing a single service platform by December 2024 \u201d to \u201c operationalize a system that will enable payment of all government bills and fees under the same platform, as well as options to connect e-wallets to the system. \u201d In addition, the GOJ will digitalize business licensing, increase the digitalization of taxpayers \u2019 services, foster access to credit information through digital channels, and further digitalize data capture and analysis. It is also committed to strengthening controls on wage bill increases through the professionalization of the civil service and by mobilizing an \u201c HR management information system that includes real-time civil service data and performance management functions to support the roles and functions of the newly established \u201d Service and Public Administration Commission ( SPAC ). 5. The Public Sector Modernization Roadmap for 2022-2025, approved in 2022, builds on a sound diagnostic of governance challenges in Jordan, aimed at both analog and digital governance reforms to improve governmental effectiveness.", + "ner_text": [ + [ + 749, + 781, + "named" + ] + ], + "validated": false, + "empirical_context": "\u201d In addition, the GOJ will digitalize business licensing, increase the digitalization of taxpayers \u2019 services, foster access to credit information through digital channels, and further digitalize data capture and analysis. It is also committed to strengthening controls on wage bill increases through the professionalization of the civil service and by mobilizing an \u201c HR management information system that includes real-time civil service data and performance management functions to support the roles and functions of the newly established \u201d Service and Public Administration Commission ( SPAC ). 5.", + "type": "system", + "explanation": "However, it is described as a management information system, which does not function as a data source in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'data' in its name.", + "contextual_reason_agent": "However, it is described as a management information system, which does not function as a data source in this context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "143_749060PAD0P124010Box374388B00OUO090", + "page": 39, + "text": "In the case of the CSD Program, for instance, information about approved and rejected proposals will be public, as well as the eligibility and scoring criteria for proposals. ( b ) Building and empowering the human resources of the SDCs: Several capacity building activities have been identified to strengthen SDC staff skills in connection to: ( a ) the new equipment and computerized systems, ( b ) the implementation of the CSD and NPTP programs supported by Component 2 and 3, ( c ) community participation, and ( 4 ) overall outreach and M & E. Specific hands-on instruction will be provided on the use of MIS and databases. As a database of social service providers will be created, SDCs staff will be trained to collect and enter information concerning providers in their catchment area ( see below ). In connection with the implementation of the CSD and NPTP programs, staff will receive training in handling grievances, preventing and detecting corruption and fraud, fiduciary matters, and using case management and participatory approaches, while training on proposal writing will be provided to both CSOs and SDCs. A training needs assessment carried out as soon as the project becomes effective will provide additional detailed information, on the basis of which a training plan will be elaborated.", + "ner_text": [ + [ + 611, + 614, + "named" + ] + ], + "validated": false, + "empirical_context": "( b ) Building and empowering the human resources of the SDCs: Several capacity building activities have been identified to strengthen SDC staff skills in connection to: ( a ) the new equipment and computerized systems, ( b ) the implementation of the CSD and NPTP programs supported by Component 2 and 3, ( c ) community participation, and ( 4 ) overall outreach and M & E. Specific hands-on instruction will be provided on the use of MIS and databases. As a database of social service providers will be created, SDCs staff will be trained to collect and enter information concerning providers in their catchment area ( see below ).", + "type": "system", + "explanation": "However, 'MIS' refers to a management information system, which is not a dataset but a system for managing information.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is related to data management.", + "contextual_reason_agent": "However, 'MIS' refers to a management information system, which is not a dataset but a system for managing information.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "166_304360LK", + "page": 40, + "text": "vulnerability and poverty maps3 '. Variations between districts makes it necessary for each of these factors to be weighted differently in each district31. Subsequent to selection o f villages, grants will be allocated per village based on: ( i ) caseload o f returnees; and ( ii ) extent o f damaged housing stock. The list o f villages to be included in the program, the number o f grants per village, and a justification of each will be approved at the provincial level and submitted with the rationale behind the selection to IDA for its no-objection. It i s estimated that approximately 1000 villages will participate in NEHRP. Unaccompanied Minor / Orphan Total Score NEHRU will select beneficiaries in the following manner: Subsequent to the selection of the villages and the allocation o f grants per village through the process described above, NEHRU will undertake a Housing Damage Assessment and Social Verification Survey in the selected villages. The assessment team will consist o f a divisional technical officer, VRC member and a NGO / CBO representative supported by the village headmadwoman ( Grama Niladari representative ). Breadwinner 1 person 2-3 persons 4 and above A potential beneficiary must meet four eligibility conditions to be considered for the housing assistance to begin with. These are: e proposed.", + "ner_text": [ + [ + 877, + 933, + "named" + ], + [ + 282, + 314, + "Housing Damage Assessment and Social Verification Survey <> data description" + ] + ], + "validated": true, + "empirical_context": "It i s estimated that approximately 1000 villages will participate in NEHRP. Unaccompanied Minor / Orphan Total Score NEHRU will select beneficiaries in the following manner: Subsequent to the selection of the villages and the allocation o f grants per village through the process described above, NEHRU will undertake a Housing Damage Assessment and Social Verification Survey in the selected villages. The assessment team will consist o f a divisional technical officer, VRC member and a NGO / CBO representative supported by the village headmadwoman ( Grama Niladari representative ).", + "type": "survey", + "explanation": "This is a dataset as it refers to a structured survey designed to collect data on housing damage and social verification.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is a dataset as it refers to a structured survey designed to collect data on housing damage and social verification.", + "contextual_signal": "described as a survey that collects data", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "166_304360LK", + "page": 38, + "text": "The Grant Description and Transfer Mechanism N E W will finance a housing support cash grant to selected eligible beneficiary families ( the eligbility criteria are described below ). The selected eligible beneficiary families will qualify for one o f two types of grants-i. e. \u201c fully damaged \u201d housing grant or \u201c partly damaged \u201d housing grant?. The assessment o f fully damaged versus partly damaged houses will occur through a Housing Damage Assessment and Social Verification Survev. NEHRP will provide a cash grant o f Rs. 150, 000 to beneficiary families whose house i s assessed as fully damaged. It will provide a grant o f Rs. 70, 000 to beneficiary families whose house i s assessed as partly damaged. It i s estimated that 80 % o f the houses will be classified as fully damaged while 20 % will be categorized as partly damaged. The grant will finance the physical requirement o f at least a permanent foundation o f 400 square feet, four walls and a roof. The pilot has determined that Rs. 150, 000 i s a sufficient grant to provide this structure across all districts o f the North East.", + "ner_text": [ + [ + 431, + 487, + "named" + ], + [ + 105, + 134, + "Housing Damage Assessment and Social Verification Survev <> reference population" + ], + [ + 1090, + 1100, + "Housing Damage Assessment and Social Verification Survev <> data geography" + ] + ], + "validated": true, + "empirical_context": ". The assessment o f fully damaged versus partly damaged houses will occur through a Housing Damage Assessment and Social Verification Survev. NEHRP will provide a cash grant o f Rs.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to a survey designed to collect data on housing damage.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Assessment' which often relates to data collection.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a survey designed to collect data on housing damage.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 43, + "text": "The World Bank Education Quality Improvement Project ( P179363 ) Page 34 of 68 RESULT_FRAME_TBL_PDO Indicator Name PBC Baseline Intermediate Targets End Target 1 2 3 4 5 based on data generated by the integrated EMIS and used for education sector management including refugee response ( Text ) assessment, NBS and MoF data ( report on trends, regional variations in learning outcomes and inputs, and determinants of student learning ) assessment, NBS and MoF data ( report on trends, regional variations in learning outcomes and inputs, and determinants of student learning ) using its data, are publicly disseminated and used for education sector management including refugee response. PDO Table SPACE Intermediate Results Indicators by Components RESULT_FRAME_TBL_IO Indicator Name PBC Baseline Intermediate Targets End Target 1 2 3 4 5 Improving Quality of Teaching Teachers trained on professional standards under the project ( % of teachers in ECEC facilities and grades 1 \u2013 12 ) ( Percentage ) 0. 00 0. 00 5. 00 10. 00 15. 00 20. 00 20. 00 Rapid student assessment mechanism developed and piloted ( for selected grade ) ( Yes / No ) No No Yes Yes Yes Yes Yes Tutoring / accelerated learning program for disadvantaged students developed and implemented ( for selected grade ) ( Yes / No ) No No Yes Yes Yes Yes Yes Students benefiting from direct interventions to 0. 00 0. 00", + "ner_text": [ + [ + 455, + 463, + "named" + ] + ], + "validated": true, + "empirical_context": "The World Bank Education Quality Improvement Project ( P179363 ) Page 34 of 68 RESULT_FRAME_TBL_PDO Indicator Name PBC Baseline Intermediate Targets End Target 1 2 3 4 5 based on data generated by the integrated EMIS and used for education sector management including refugee response ( Text ) assessment, NBS and MoF data ( report on trends, regional variations in learning outcomes and inputs, and determinants of student learning ) assessment, NBS and MoF data ( report on trends, regional variations in learning outcomes and inputs, and determinants of student learning ) using its data, are publicly disseminated and used for education sector management including refugee response. PDO Table SPACE Intermediate Results Indicators by Components RESULT_FRAME_TBL_IO Indicator Name PBC Baseline Intermediate Targets End Target 1 2 3 4 5 Improving Quality of Teaching Teachers trained on professional standards under the project ( % of teachers in ECEC facilities and grades 1 \u2013 12 ) ( Percentage ) 0.", + "type": "data", + "explanation": "In the context, 'MoF data' is explicitly mentioned as a source of information used for assessments and reports, confirming it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'MoF data' is a dataset because it is referenced in the context of providing information for education sector management.", + "contextual_reason_agent": "In the context, 'MoF data' is explicitly mentioned as a source of information used for assessments and reports, confirming it functions as a data source.", + "contextual_signal": "mentioned as a data source for assessments and reports", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "090_Djibouti-Public-Admin-PAD-PAD2604-04062018", + "page": 50, + "text": "The World Bank Public Administration Modernization Project ( P162904 ) Page 47 of 69 Currently, the civil status information is scattered; it should be grouped under a unique authority ( that is, the DGPF ). Biometric procedures should be expressly reflected in the texts. Laws and decrees should also be enacted for data privacy and electronic transactions. 13. Technical assessments of the existing main ID systems ( civil registry, social security, and safety net ) have revealed some good achievements in terms of enrollment, technical, and procedural capabilities that this new ID project should reuse and capitalize on. \u2022 The DGPF, which is in charge of the civil registry, issues national ID cards with biographic and scanned biometric information. The national ID card includes a picture and scanned pictures of 10 fingerprints of the beneficiary. The digitization process started in 2007, and in 2014 important upgrades took place that allowed administrative processes to be treated online. The population registry has more than 300, 000 individuals in its database. The DGPF has also scanned all identity cases and related identity documents to issue an identity card from 1954 to 2011.", + "ner_text": [ + [ + 1004, + 1023, + "named" + ], + [ + 687, + 704, + "population registry <> data type" + ], + [ + 892, + 896, + "population registry <> publication year" + ], + [ + 905, + 909, + "population registry <> publication year" + ], + [ + 1183, + 1195, + "population registry <> reference year" + ] + ], + "validated": true, + "empirical_context": "The digitization process started in 2007, and in 2014 important upgrades took place that allowed administrative processes to be treated online. The population registry has more than 300, 000 individuals in its database. The DGPF has also scanned all identity cases and related identity documents to issue an identity card from 1954 to 2011.", + "type": "registry", + "explanation": "The population registry is explicitly mentioned as having a database of individuals, confirming it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a collection of individuals in a database.", + "contextual_reason_agent": "The population registry is explicitly mentioned as having a database of individuals, confirming it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 46, + "text": "Collection TPM / PMU; Measures subcomponents 1. 1 and 1. 2 Under UNICEF Percentage of women receiving four ANC visits ( Percentage ) Description Percentage of women at childbearing age with a live birth in a given time period who received antenatal care, four times or more times from any provider. Frequency Quarterly Data source DHIS2 Methodology for Data Collection DHIS2 Responsibility for Data Collection MoH and UNICEF; Measures subcomponent 1. 1 Under UNICEF Percentage of refugee women receiving four ANC visits ( Percentage ) Description Percentage of refugee women at childbearing age with a live birth in a given time period who received antenatal care, four times or more times from any provider.", + "ner_text": [ + [ + 331, + 336, + "named" + ], + [ + 65, + 71, + "DHIS2 <> publisher" + ], + [ + 72, + 117, + "DHIS2 <> data description" + ], + [ + 418, + 424, + "DHIS2 <> publisher" + ], + [ + 459, + 465, + "DHIS2 <> publisher" + ], + [ + 466, + 519, + "DHIS2 <> data description" + ] + ], + "validated": true, + "empirical_context": "2 Under UNICEF Percentage of women receiving four ANC visits ( Percentage ) Description Percentage of women at childbearing age with a live birth in a given time period who received antenatal care, four times or more times from any provider. Frequency Quarterly Data source DHIS2 Methodology for Data Collection DHIS2 Responsibility for Data Collection MoH and UNICEF; Measures subcomponent 1. 1 Under UNICEF Percentage of refugee women receiving four ANC visits ( Percentage ) Description Percentage of refugee women at childbearing age with a live birth in a given time period who received antenatal care, four times or more times from any provider.", + "type": "system", + "explanation": "DHIS2 is indeed a data source as it is explicitly referenced for data collection in the context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned as a data source for collecting information on antenatal care visits.", + "contextual_reason_agent": "DHIS2 is indeed a data source as it is explicitly referenced for data collection in the context.", + "contextual_signal": "mentioned as a data source for data collection", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "185_multi-page", + "page": 38, + "text": "39 Output from Each Output Indicators: Project Reports: From Outputs to Objective: Component: 1 ) Expansion of HIV / AIDS Increase in condom availability Project data Effective monitoring of prevention programs nationwide targeted coverage Increased percentage of retail Survey data Sustained intersectoral outlets and service delivery collaboration at national and points with condoms in stock local levels Increased percentage of Improved implementation condoms in central stock and capacity of local government, retail outlets that meet WHO conmmunities, and NGOs quality control measures Improved Ministry of Health Increase in percentage of implementation capacity population with correct knowledge of HIV / AIDS Improved effectiveness of IEC transmission and prevention programs methods Increase in percentage of women testing positive at selected antenatal clinics who are provided with a complete course of ARV therapy to prevent PTCT in accordance with national / intemational guidelines Increase in the number of communities with improved prevention services Reduction in number in target group ' s reported non-regular sexual partners", + "ner_text": [ + [ + 271, + 282, + "named" + ], + [ + 601, + 619, + "Survey data <> author" + ], + [ + 819, + 871, + "Survey data <> reference population" + ], + [ + 1161, + 1179, + "Survey data <> usage context" + ] + ], + "validated": true, + "empirical_context": "39 Output from Each Output Indicators: Project Reports: From Outputs to Objective: Component: 1 ) Expansion of HIV / AIDS Increase in condom availability Project data Effective monitoring of prevention programs nationwide targeted coverage Increased percentage of retail Survey data Sustained intersectoral outlets and service delivery collaboration at national and points with condoms in stock local levels Increased percentage of Improved implementation condoms in central stock and capacity of local government, retail outlets that meet WHO conmmunities, and NGOs quality control measures Improved Ministry of Health Increase in percentage of implementation capacity population with correct knowledge of HIV / AIDS Improved effectiveness of IEC transmission and prevention programs methods Increase in percentage of women testing positive at selected antenatal clinics who are provided with a complete course of ARV therapy to prevent PTCT in accordance with national / intemational guidelines Increase in the number of communities with improved prevention services Reduction in number in target group ' s reported non-regular sexual partners", + "type": "survey", + "explanation": "In this context, 'Survey data' is explicitly mentioned as part of the project reports, indicating it is used as a data source for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Survey data' suggests a structured collection of information gathered from surveys.", + "contextual_reason_agent": "In this context, 'Survey data' is explicitly mentioned as part of the project reports, indicating it is used as a data source for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 46, + "text": "In addition, a survey will be conducted in sampled SPG woredas as part of the impact evaluation to measure composite indicators not collected through routine information systems. 71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2. The MOH will share HCO-related data with the regions and woredas to provide any disaggregated data needed to monitor HCO results indicators and DLIs. Likewise, the central MOE is responsible for the management of EMIS and NLA which collects varying aspects of data at the school level. MOE will validate and share disaggregated data with regions and woredas to monitor HCO results indicators and DLIs. Other M & E data sources will be coordinated with various implementation units and service providers. The SPG impact evaluation survey will be 71 Such as Minimal Acceptable Diet for children 6 \u2013 23 months. Minimal Acceptable Diet is calculated according to breasfeeding status, number of food groups consumed, and number of times fed. https: / / inddex. nutrition. tufts. edu / data4diets / indicator / minimum-acceptable-diet-mad.", + "ner_text": [ + [ + 497, + 502, + "named" + ], + [ + 51, + 62, + "DHIS2 <> data geography" + ], + [ + 194, + 197, + "DHIS2 <> publisher" + ], + [ + 532, + 554, + "DHIS2 <> reference population" + ], + [ + 703, + 719, + "DHIS2 <> data type" + ], + [ + 764, + 782, + "DHIS2 <> data type" + ], + [ + 1240, + 1263, + "DHIS2 <> data description" + ] + ], + "validated": true, + "empirical_context": "71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2. The MOH will share HCO-related data with the regions and woredas to provide any disaggregated data needed to monitor HCO results indicators and DLIs.", + "type": "data management system", + "explanation": "DHIS2 is explicitly described as a data management system that regularly collects data, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned in the context of data collection and management.", + "contextual_reason_agent": "DHIS2 is explicitly described as a data management system that regularly collects data, confirming its role as a dataset.", + "contextual_signal": "system but mentioned as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "005_BOSIB-8191b179-7209-4faa-b5e0-11783bcd492d", + "page": 40, + "text": "Responsibility for Data Collection PIU IRI 4. 2 Beneficiary satisfaction survey implemented \u2013 Citizen engagement indicator Description The MEP will implement two beneficiary surveys during the lifetime of the project. Feedback from the first survey will be utilized in improving the implementation of Project activities. Frequency Twice during the project ( estimated second and fourth year of project implementation ) Data source PIU Methodology for Data Collection PIU will hire a firm to conduct the surveys and prepare analytical reports Responsibility for Data Collection PIU", + "ner_text": [ + [ + 48, + 79, + "named" + ], + [ + 162, + 181, + "Beneficiary satisfaction survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "Responsibility for Data Collection PIU IRI 4. 2 Beneficiary satisfaction survey implemented \u2013 Citizen engagement indicator Description The MEP will implement two beneficiary surveys during the lifetime of the project. Feedback from the first survey will be utilized in improving the implementation of Project activities.", + "type": "survey", + "explanation": "This is indeed a dataset as it is described as a survey that collects feedback for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey designed to collect data on beneficiary satisfaction.", + "contextual_reason_agent": "This is indeed a dataset as it is described as a survey that collects feedback for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 16, + "text": "7 22. Moreover, investments in strengthening data systems ( both technical and financial ) need to continue to deepen the use of data for evidence \u2010 based decision making in the sector and further improve resource allocation. The MOE has successfully deployed an education management information system ( EMIS ), which is now hosting data on all schools and students in the system. Additional investments in a geographical information system ( GIS ) are ongoing and will allow MOE to better plan for expansion of access across all regions in the country. Leveraging the data available through the EMIS for decision making in the sector is a key opportunity for the MOE which will require additional technical assistance and capacity building to materialize. In addition, the collection, analysis, and use of student learning data and disaggregated and gender \u2010 sensitive data are essential for monitoring, targeting pedagogical interventions, and improving teacher practices in the classroom. C. Relationship to the Country Partnership Framework and Rationale for Use of Instrument 23. Relationship to the CPF. The proposed operation is fully aligned with the Jordan Country Partnership Framework ( CPF ) discussed by the World Bank Group Board on July 14, 2016. The CPF covers the period FY17 \u2013 22 and highlights the economic, geopolitical, and social challenges that Jordan has been facing, particularly with the Syrian refugee crisis.", + "ner_text": [ + [ + 410, + 441, + "named" + ] + ], + "validated": false, + "empirical_context": "The MOE has successfully deployed an education management information system ( EMIS ), which is now hosting data on all schools and students in the system. Additional investments in a geographical information system ( GIS ) are ongoing and will allow MOE to better plan for expansion of access across all regions in the country. Leveraging the data available through the EMIS for decision making in the sector is a key opportunity for the MOE which will require additional technical assistance and capacity building to materialize.", + "type": "system", + "explanation": "However, the context indicates that it is a system for planning and not explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'information system' in its name, suggesting it handles data.", + "contextual_reason_agent": "However, the context indicates that it is a system for planning and not explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "073_Ethiopia-Consolidated-Water-Supply-Sanitation-and-Hygiene-Account-Project", + "page": 60, + "text": "The MoF and BoF are responsible for the FM aspect at the federal and regional levels, respectively. At the woreda and zone levels a pool system is in place, where the WOFs and ZOFs, respectively, are responsible for all FM aspects of WASH sector offices. 25. Financial management manual: The Project will follow the FM manual developed under the WASHP, which largely follows the government \u2019 s accounting manual, depicting all accounting policies, procedures, internal control issues, financial reporting, fund flow arrangements, budgeting, and external audits. Hence, in view of the new developments and activities of the new phase and the lessons learnt under the WASHP, the FM manual will be revised within two months after the Project \u2019 s effectiveness. The Borrower must obtain a \u201c no objection \u201d to the revised FM manual from the World Bank. Training on the FM manual will be carried out within two months of its approval by the World Bank. 26. Accounting system: For normal treasury funds, the GoE uses the Integrated Budget and Expenditure ( IBEX ) or Integrated Financial Management Information System ( IFMIS ). The IBEX accounting system is now operational in almost all public bodies while the IFMIS is under testing at federal ministries.", + "ner_text": [ + [ + 1014, + 1047, + "named" + ] + ], + "validated": false, + "empirical_context": "26. Accounting system: For normal treasury funds, the GoE uses the Integrated Budget and Expenditure ( IBEX ) or Integrated Financial Management Information System ( IFMIS ). The IBEX accounting system is now operational in almost all public bodies while the IFMIS is under testing at federal ministries.", + "type": "system", + "explanation": "However, it is described as an accounting system, not as a data source or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Integrated Budget and Expenditure', which sounds like a structured collection of financial data.", + "contextual_reason_agent": "However, it is described as an accounting system, not as a data source or dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 46, + "text": "The World Bank Socio-economic Inclusion of Refugees & Host Communities in Rwanda Project Phase II ( P509677 ) Page 31 Proportion of beneficiaries indicating satisfaction with infrastructure subprojects constructed or upgraded by the project ( Percentage ) Description Quantitative indicator counting proportion of beneficiaries indicating satisfaction with infrastructure subprojects constructed or upgraded by the project. Indictor is a composite of beneficiaries responding \u201c satisfied \u201d or \u201c very satisfied \u201d on a Likert scale. Frequency Annual. Data source Annual survey. Methodology for Data Collection Survey. Responsibility for Data Collection MINEMA. Kilometers of road upgraded ( Kilometers ) Description Quantitative indicator counting kilometers of roads upgraded ( completed ). Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation. RTDA data fed to MINEMA. Responsibility for Data Collection RTDA and MINEMA. Market facilities and Integrated Craft Production Centers constructed or upgraded ( Number ) Description Quantitative indicator counting number of market facilities and ICPCs constructed or upgraded ( completed ). Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation. Responsibility for Data Collection MINEMA.", + "ner_text": [ + [ + 1000, + 1006, + "named" + ] + ], + "validated": false, + "empirical_context": "RTDA data fed to MINEMA. Responsibility for Data Collection RTDA and MINEMA. Market facilities and Integrated Craft Production Centers constructed or upgraded ( Number ) Description Quantitative indicator counting number of market facilities and ICPCs constructed or upgraded ( completed ).", + "type": "organization", + "explanation": "However, MINEMA is identified as an organization responsible for data collection, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed MINEMA is a dataset because it is mentioned in the context of data collection.", + "contextual_reason_agent": "However, MINEMA is identified as an organization responsible for data collection, not a structured collection of data itself.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "005_BOSIB-8191b179-7209-4faa-b5e0-11783bcd492d", + "page": 34, + "text": "in 25 % of target schools trained Staff in 50 % of target schools trained Staff in 75 % of target schools trained 4. Project Management and Operational Analytical Support Students supported with better education ( Number of people ) Mar / 2025 Mar / 2026 Mar / 2027 Mar / 2028 Mar / 2029 120, 000 600, 000 900, 000 900, 000 \uf0d8 Students supported with better education \u2013 Female ( Number of people ) Mar / 2025 Mar / 2026 Mar / 2027 Mar / 2028 Mar / 2029 60, 000 300, 000 450, 000 450, 000 IRI 4. 2 Beneficiary satisfaction survey implemented \u2013 Citizen engagement indicator ( Text ) Mar / 2024 Mar / 2025 Mar / 2026 Mar / 2027 Mar / 2028 Sep / 2029 N / A Survey 1 prepared Survey 1 implemented Survey 2 prepared Survey 2 implemented Surveys 1 and 2 implemented", + "ner_text": [ + [ + 496, + 527, + "named" + ], + [ + 542, + 570, + "Beneficiary satisfaction survey <> data description" + ], + [ + 586, + 590, + "Beneficiary satisfaction survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "Project Management and Operational Analytical Support Students supported with better education ( Number of people ) Mar / 2025 Mar / 2026 Mar / 2027 Mar / 2028 Mar / 2029 120, 000 600, 000 900, 000 900, 000 \uf0d8 Students supported with better education \u2013 Female ( Number of people ) Mar / 2025 Mar / 2026 Mar / 2027 Mar / 2028 Mar / 2029 60, 000 300, 000 450, 000 450, 000 IRI 4. 2 Beneficiary satisfaction survey implemented \u2013 Citizen engagement indicator ( Text ) Mar / 2024 Mar / 2025 Mar / 2026 Mar / 2027 Mar / 2028 Sep / 2029 N / A Survey 1 prepared Survey 1 implemented Survey 2 prepared Survey 2 implemented Surveys 1 and 2 implemented", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned as a survey that is implemented to gather data on citizen engagement.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data on beneficiary satisfaction.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a survey that is implemented to gather data on citizen engagement.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "090_Djibouti-Public-Admin-PAD-PAD2604-04062018", + "page": 27, + "text": "Similarly, in Albania, the project highlighted the importance of business process simplification for increased efficiency, piloting of CSC to allow for learning and adaptation, and development of a short message service ( SMS ) - based Citizen Feedback Mechanism ( CFM ) to close the feedback loop. To date, the CFM system has reached over 187, 000 citizens to solicit feedback regarding service delivery. 16 The Djibouti Social Safety Net Project is a World Bank-funded project, with an Additional Financing that also supports the Social Registry and National Social Protection Strategy. The registry currently contains information about 42, 000 households, exceeding the target of 20, 000. The collection of biometric data about these households has been launched and biometric information about 33, 000 beneficiaries has been registered. The Djibouti Public Administration Modernization Project will be collaborating closely with the safety net project team. The Project will build on lessons learned from the enrollment and registration phase conducted by the Social Affairs Department.", + "ner_text": [ + [ + 532, + 547, + "named" + ], + [ + 453, + 458, + "Social Registry <> publisher" + ], + [ + 639, + 657, + "Social Registry <> reference population" + ], + [ + 710, + 724, + "Social Registry <> data type" + ], + [ + 1064, + 1089, + "Social Registry <> author" + ] + ], + "validated": true, + "empirical_context": "To date, the CFM system has reached over 187, 000 citizens to solicit feedback regarding service delivery. 16 The Djibouti Social Safety Net Project is a World Bank-funded project, with an Additional Financing that also supports the Social Registry and National Social Protection Strategy. The registry currently contains information about 42, 000 households, exceeding the target of 20, 000.", + "type": "registry", + "explanation": "The Social Registry is explicitly mentioned as containing structured information about households, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a registry containing information about households.", + "contextual_reason_agent": "The Social Registry is explicitly mentioned as containing structured information about households, confirming its role as a data source.", + "contextual_signal": "described as a registry that contains information about households", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "028_Ethiopia-Response-Recovery-Resilience-for-Conflict-Affected-Communities-in-Ethiopia-Project", + "page": 32, + "text": "To achieve the first objective, key response activities will include provision of mobile support for health, education, and WASH services to conflict-affected communities. Recovery activities will include local mapping and factfinding of conflict-affected people, households, and communities \u2019 needs, impacts, response services and a local conflict analysis and participatory climate risk assessments; consultation, planning, and implementation of sustainable solutions for communities; and recovery plans ( Percentage ); ( ii ) Neighborhood Relations Committees formed and reinforced and still operational one year after receiving funding ( Percentage ); ( iii ) Beneficiaries that feel project investments reflect their needs ( Percentage ); and ( iv ) People reporting increased awareness of available GBV response services in their community ( Percentage ). 65 These are: ( i ) Number of financed sub-projects that are functioning or delivering services to communities six months after completion ( with disaggregation to education, WASH, health, and other types of sub-projects ); ( ii ) Beneficiaries with rebuilt and improved access to climate - resilient community infrastructure; and ( iii ) Community recovery plans implemented. 66 The average population size of Woredas in the four regions is calculated from the July 2021 projections of the Ethiopia Central Statistics Agency. IDP numbers are calculated using a combination of IOM \u2019 s Site Assessment Survey ( Round 26 ), collected in June-July 2021, and the Emergency Site Assessment ( ESA ) round 8, collected in July-August 2021. The ESA data is still pending confirmation from the government. Data collection could not be completed in parts of the Tigray, Western Oromia, and Benishangul-Gumuz regions due to insecurity and access restrictions.", + "ner_text": [ + [ + 1447, + 1469, + "named" + ], + [ + 1273, + 1280, + "Site Assessment Survey <> reference population" + ], + [ + 1329, + 1333, + "Site Assessment Survey <> publication year" + ], + [ + 1439, + 1442, + "Site Assessment Survey <> publisher" + ], + [ + 1497, + 1511, + "Site Assessment Survey <> reference year" + ], + [ + 1714, + 1720, + "Site Assessment Survey <> data geography" + ], + [ + 1722, + 1736, + "Site Assessment Survey <> data geography" + ], + [ + 1742, + 1759, + "Site Assessment Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "66 The average population size of Woredas in the four regions is calculated from the July 2021 projections of the Ethiopia Central Statistics Agency. IDP numbers are calculated using a combination of IOM \u2019 s Site Assessment Survey ( Round 26 ), collected in June-July 2021, and the Emergency Site Assessment ( ESA ) round 8, collected in July-August 2021. The ESA data is still pending confirmation from the government.", + "type": "survey", + "explanation": "The context confirms it is used in the research as it provides data for calculating IDP numbers.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that collects data.", + "contextual_reason_agent": "The context confirms it is used in the research as it provides data for calculating IDP numbers.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "151_533550PAD0P117101Official0Use0Only1", + "page": 14, + "text": "2008, the SWF launched and completed an extensive national survey \u2019 to further identify the poor and vulnerable in Yemen and apply improved targeting methods to the data collected in order to expand the program to cover those in need. It is expected that beneficiary application for SWF support and assessments of applicants \u2019 eligibility will be a continuous process. The SWF newly established database is the most comprehensive national record of poor and vulnerable individuals available in Yemen. Such a national database can be used to target and coordinate other funds and benefits across a range of social programs. 9. The implementation of the SWF reforms and program expansion are entering a critical period requiring immediate technical assistance, policy guidance, capacity building and training support. Technical assistance has been provided by the European Commission ( EC ) for several years, and more recently the World Bank has provided initial guidance on CT program design including targeting. A draft core Operations Manual reflecting key reform elements of the program was developed in 2009 through World Bank technical assistance. Further guidance on ( i ) poverty-based targeting; ( ii ) reaching the ultra poor; ( iii ) phasing out the ineligible beneficiaries; ( iv ) assessing benefit levels; and ( v ) CT program fiscal implications and sustainability issues, is needed. 10.", + "ner_text": [ + [ + 50, + 65, + "named" + ], + [ + 0, + 4, + "national survey <> reference year" + ], + [ + 115, + 120, + "national survey <> data geography" + ], + [ + 508, + 525, + "national survey <> data type" + ] + ], + "validated": true, + "empirical_context": "2008, the SWF launched and completed an extensive national survey \u2019 to further identify the poor and vulnerable in Yemen and apply improved targeting methods to the data collected in order to expand the program to cover those in need. It is expected that beneficiary application for SWF support and assessments of applicants \u2019 eligibility will be a continuous process.", + "type": "survey", + "explanation": "This is indeed a dataset as it is described as an extensive national survey aimed at identifying the poor and vulnerable, indicating its use as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'national survey' implies a structured collection of data collected for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it is described as an extensive national survey aimed at identifying the poor and vulnerable, indicating its use as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "158_40156", + "page": 70, + "text": "There is adequate back up arrangements ( on site and off-site back systems system ), maintenance support from SUN office in Nairobi, and security to various users in terms data entry, verification and approvals. The system can also maintain data base information which does not contain accounting data but can be used to enhance the program monitoring system such as M & E, procurement etc. 17. Periodic Reporting for Project Monitoring: Formats of the interim un-audited financial reports ( IFRs ), i. e. periodic financial monitoring reports, are designed to provide quality and timely information to the World Bank and various stakeholders on the project \u2019 s performance. ( Samples of these reports are included in the project financial procedure addendum. Refer to Financial Monitoring Reports for World Bank-Financed Projects: Guidelines for Borrowers Dated November 30, 2002 ). 18. Within 45 days of the end of each quarter, the following reports would be prepared by the IGAD and submitted to the World Bank, and other stakeholders. The contents of these reports should, at a minimum, include the following: ( i ) Financial reports which sets forth sources and uses of funds by project activity / component, and statement of actual and budget expenditures, both cumulatively and for the period covered by said report, showing separately funds provided under the IDA and other financiers, and explains variances between the actual and planned uses of such funds; ( ii ) physical progress / output monitoring report which describes physical progress in Project implementation, both cumulatively and for the period covered by said report, and explains variances between the actual and planned Project implementation; and ( iii ) Procurement report which sets forth the status of procurement under the Project, as at the end of the period covered by said report 66", + "ner_text": [ + [ + 1476, + 1520, + "named" + ] + ], + "validated": false, + "empirical_context": "Within 45 days of the end of each quarter, the following reports would be prepared by the IGAD and submitted to the World Bank, and other stakeholders. The contents of these reports should, at a minimum, include the following: ( i ) Financial reports which sets forth sources and uses of funds by project activity / component, and statement of actual and budget expenditures, both cumulatively and for the period covered by said report, showing separately funds provided under the IDA and other financiers, and explains variances between the actual and planned uses of such funds; ( ii ) physical progress / output monitoring report which describes physical progress in Project implementation, both cumulatively and for the period covered by said report, and explains variances between the actual and planned Project implementation; and ( iii ) Procurement report which sets forth the status of procurement under the Project, as at the end of the period covered by said report 66", + "type": "report", + "explanation": "However, it is not a dataset as it is described as a report detailing physical progress rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'report' which could imply structured information.", + "contextual_reason_agent": "However, it is not a dataset as it is described as a report detailing physical progress rather than a structured collection of data.", + "contextual_signal": "mentioned only as a report, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 20, + "validated": 4, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 12, + "text": "Comparison of Key Health and Education Indicators in the Province of Balochistan and Pakistan Health Indicatorsa Balochistan Pakistan Infant mortality rate ( per 1, 000 live births ) 66. 0 62. 0 Stunting rate ( % of children under 5 ) 47. 4 37. 6 Skilled birth delivery ( % of pregnant women ) 38. 2 69. 3 Immunization, measles ( % of children ages 12 \u2013 23 months ) 33. 3 73. 0 Gender Gap: Enrollment and transition ratesb Girls Boys Girls Boys Net enrollment at the primary level ( % ) 35 56 71 83 Net enrollment at the secondary level ( % ) 13 20 35 43 Effective transition rate from primary to middle ( % ) 69 71 84 84 Effective transition rate from middle to high schools ( % ) 78 83 88 94 Gender Gap: Students ages 5 \u2013 16 yearsc Girls Boys Girls Boys Could read a grade 2 level story in Urdu ( % ) 20 31 43 47 Could read grade 2 level sentences in English ( % ) 15 26 36 44 Could do subtraction ( % ) 26 34 39 43 Source: a. PDHS 2017 \u2013 18; b. Pakistan Education Statistics 2016 \u2013 17; c. Annual State of Education Report ( ASER ) \u2010 National 2018. 13.", + "ner_text": [ + [ + 929, + 933, + "named" + ], + [ + 69, + 80, + "PDHS <> data geography" + ], + [ + 85, + 93, + "PDHS <> data geography" + ], + [ + 113, + 133, + "PDHS <> data geography" + ], + [ + 934, + 943, + "PDHS <> publication year" + ] + ], + "validated": true, + "empirical_context": "0 Gender Gap: Enrollment and transition ratesb Girls Boys Girls Boys Net enrollment at the primary level ( % ) 35 56 71 83 Net enrollment at the secondary level ( % ) 13 20 35 43 Effective transition rate from primary to middle ( % ) 69 71 84 84 Effective transition rate from middle to high schools ( % ) 78 83 88 94 Gender Gap: Students ages 5 \u2013 16 yearsc Girls Boys Girls Boys Could read a grade 2 level story in Urdu ( % ) 20 31 43 47 Could read grade 2 level sentences in English ( % ) 15 26 36 44 Could do subtraction ( % ) 26 34 39 43 Source: a. PDHS 2017 \u2013 18; b. Pakistan Education Statistics 2016 \u2013 17; c.", + "type": "survey", + "explanation": "'PDHS' is indeed a dataset as it is cited in the context as a source for empirical data on education statistics.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'PDHS' is a dataset because it is referenced as a source of enrollment and transition rates data.", + "contextual_reason_agent": "'PDHS' is indeed a dataset as it is cited in the context as a source for empirical data on education statistics.", + "contextual_signal": "'mentioned as a source for data on enrollment and transition rates'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 25, + "text": "The project M & E will leverage and strengthen existing routine information systems, and finance the generation of user \u2010 friendly evidence for efficient service delivery. Routine surveys will be used to collate data from target facilities, which will be triangulated through the existing management information system within the Health and Secondary Education Departments. The remote monitoring system within the SED uses technology \u2010 based data management solutions with a dashboard to display the broader analysis. The project will support the Health Department to adopt a similar system and cater for the monitoring needs of the project. The project will also support an innovative, technology \u2010 based pilot intervention to track the presence of providers at HFs and assess their knowledge to design appropriate trainings. In addition, the project will train district \u2010 and cluster \u2010 level education and health teams in data collection, management, analysis, and timely course correction. Process evaluations will be used to measure the quality of implementation. To tackle the challenges in evidence \u2010 based decision making and improved accountability within the Health Department, the project will support the GoB to ( a ) establish or strengthen an HRH database, a health institutional database that routinely tracks facility 47 During the early phase of implementation, the Governance and Policy Program ( GPP ) PMU will provide back \u2010 up support. Implementation / Monitoring Operational Coordination Oversight PSC Headed by Additional Chief Secretary, to oversee the project implementation and provide stewardship PCC Headed by Secretary, Health and Secretary Education, to coordinate and facilitate project implementation PMU \u2010 Health Existing PMU to manage the project implementation and monitoring District Health Teams Headed by district health officer to plan, implement, and monitor the project activities PMU \u2010 Education Existing PMU to manage the project implementation and monitoring PMU District Teams Headed by District Coordinator to plan, implement, and monitor the project activities", + "ner_text": [ + [ + 289, + 318, + "named" + ] + ], + "validated": false, + "empirical_context": "The project M & E will leverage and strengthen existing routine information systems, and finance the generation of user \u2010 friendly evidence for efficient service delivery. Routine surveys will be used to collate data from target facilities, which will be triangulated through the existing management information system within the Health and Secondary Education Departments. The remote monitoring system within the SED uses technology \u2010 based data management solutions with a dashboard to display the broader analysis.", + "type": "system", + "explanation": "However, it is mentioned as a system that supports data management rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'information system' which often relates to data management.", + "contextual_reason_agent": "However, it is mentioned as a system that supports data management rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "082_PAD2809-PAD-PUBLIC-disclosed-9-12-2018-IDA-R2018-0286-1", + "page": 65, + "text": "The World Bank Chad - Refugees and Host Communities Support Project ( P164748 ) Page 60 human resources agency. CFS has developed, trained on, and is using a complete set of operational manuals acceptable to the World Bank, including an Administrative and Financial Manual, a Manual on Targeting, a Manual on Cash Transfers, a Manual on Cash for Work and an Environmental and Social Mitigation Framework. Originally produced for the PFS, these manuals are being newly revised for this project. CFS has a main office in Ndjamena and two regional offices, in Logone Occidentale and Bar-el-Ghazel. It has surveyed close to 30, 000 potential beneficiaries, registered and provided cash transfers to approximately 6, 200 households, and is supporting the creation of the Unified Social Registry. 5. The CFS will open at least three additional fully staffed regional offices. Aside from a regional coordinator, each regional office will include a financial management specialist, M & E specialist, safeguards specialist, two technical specialists to manage components 1 and 2, and a communications / community engagement specialist, plus support staff as needed. 6. Given the breadth of the refugee situation and based on the performance of the project, the option to expand the project to the national level will be explored at mid-term review.", + "ner_text": [ + [ + 766, + 789, + "named" + ], + [ + 4, + 14, + "Unified Social Registry <> publisher" + ], + [ + 519, + 527, + "Unified Social Registry <> data geography" + ], + [ + 557, + 575, + "Unified Social Registry <> data geography" + ], + [ + 580, + 593, + "Unified Social Registry <> data geography" + ], + [ + 1392, + 1410, + "Unified Social Registry <> usage context" + ] + ], + "validated": true, + "empirical_context": "CFS has a main office in Ndjamena and two regional offices, in Logone Occidentale and Bar-el-Ghazel. It has surveyed close to 30, 000 potential beneficiaries, registered and provided cash transfers to approximately 6, 200 households, and is supporting the creation of the Unified Social Registry. 5.", + "type": "registry", + "explanation": "The Unified Social Registry is mentioned in the context of supporting beneficiaries, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Registry', which often implies a structured collection of data.", + "contextual_reason_agent": "The Unified Social Registry is mentioned in the context of supporting beneficiaries, indicating it functions as a data source.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 17, + "text": "Through successful implementation of the two strategies, the government expects to: ( a ) reduce expenditure on social safety net programs for Rwandese and humanitarian aid for refugees; and ( b ) unlock the potential of refugees to support economic development. This will also contribute to key results in Rwanda \u2019 s National Strategy for Transformation 2 ( 2024-2029 ) on creating jobs, improving education and the quality-of service-delivery. Shifting to a development approach to achieve self-reliance is increasingly urgent in light of dwindling humanitarian assistance. For instance, funding for UNHCR in Rwanda fell from US $ 49 million in 2020 ( only 49 percent of needs ) to US $ 37 million in 2023 ( 41 percent of needs ), despite almost no change in refugee numbers. 6. Progress towards achieving refugee self-reliance is hampered by a lack of employment and income - generating opportunities. A tailored Refugee Self-Reliance Index ( RSRI ) prepared by the GoR, the World Bank and UNHCR measures refugee self-reliance in Rwanda. As noted above, the 2022 census and the first round of RSRI data showed that most refugees have good access to basic services, but fare poorly with respect to employment and income. The employment to population ratio for refugees is 15 percent as against 46 percent for the total population.", + "ner_text": [ + [ + 1061, + 1072, + "named" + ] + ], + "validated": true, + "empirical_context": "A tailored Refugee Self-Reliance Index ( RSRI ) prepared by the GoR, the World Bank and UNHCR measures refugee self-reliance in Rwanda. As noted above, the 2022 census and the first round of RSRI data showed that most refugees have good access to basic services, but fare poorly with respect to employment and income. The employment to population ratio for refugees is 15 percent as against 46 percent for the total population.", + "type": "census", + "explanation": "The 2022 census is explicitly mentioned as a source of data that informs the analysis of refugee self-reliance.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a census, which typically involves structured data collection.", + "contextual_reason_agent": "The 2022 census is explicitly mentioned as a source of data that informs the analysis of refugee self-reliance.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "066_Jordan-Youth-Technology-and-Jobs-Project", + "page": 11, + "text": "The World Bank Jordan Youth, Technology, and Jobs Project ( P170669 ) Page 7 of 106 Economic Empowerment Action Plan ( supported by the Mashreq Gender Facility ). Furthermore, the Jordan National Commission for Women is developing a new National Women \u2019 s Strategy, encompassing the government \u2019 s vision of women \u2019 s empowerment and the national plans that address various pillars of gender equality. 4. There is a dearth of economic opportunities among Syrian refugees in Jordan. There are 660, 000 registered Syrian refugees in Jordan, according to UNHCR. Only 5 percent of registered Syrian refugee women work while half of registered Syrian refugee men work ( United Nations High Commissioner for Refugees - UNHCR, VAF2017 ). Jordan Labor Market Panel Survey ( JLMPS ) 2016 data indicated that 91 percent of those who work do so informally. Registered Syrian refugees are concentrated in construction ( 27 percent ), manufacturing ( 18 percent ), and the wholesale and retail sectors ( 19 percent ). B. Sectoral and Institutional Context 5. By 2020, one in five jobs in the Arab world will require digital skills that are not widely available today. The future of work for youth, women and refugees in Jordan will be determined by their ability to supply the skills demanded in emerging sectors driven by automation and innovation.", + "ner_text": [ + [ + 731, + 763, + "named" + ], + [ + 4, + 14, + "Jordan Labor Market Panel Survey <> publisher" + ], + [ + 455, + 470, + "Jordan Labor Market Panel Survey <> reference population" + ], + [ + 474, + 480, + "Jordan Labor Market Panel Survey <> data geography" + ], + [ + 731, + 737, + "Jordan Labor Market Panel Survey <> data geography" + ], + [ + 766, + 771, + "Jordan Labor Market Panel Survey <> acronym" + ], + [ + 774, + 778, + "Jordan Labor Market Panel Survey <> publication year" + ], + [ + 1207, + 1213, + "Jordan Labor Market Panel Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "Only 5 percent of registered Syrian refugee women work while half of registered Syrian refugee men work ( United Nations High Commissioner for Refugees - UNHCR, VAF2017 ). Jordan Labor Market Panel Survey ( JLMPS ) 2016 data indicated that 91 percent of those who work do so informally. Registered Syrian refugees are concentrated in construction ( 27 percent ), manufacturing ( 18 percent ), and the wholesale and retail sectors ( 19 percent ).", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned to provide data on the labor market conditions of registered Syrian refugees.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides data on the labor market.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned to provide data on the labor market conditions of registered Syrian refugees.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "112_IDAR2016-0221-PAD-09012016", + "page": 47, + "text": "The MDLF will be responsible for monitoring and evaluation activities under this proposed project, including the achievement of the PDO, project outcomes, and physical, fiduciary, and safeguard performance. The MoLG will be responsible for providing technical input to the MDLF, namely all of the technical aspects of procurement, achieved results per result indicators to monitor progress toward PDO, and details of the proposed project activities. The monitoring and evaluation activities will also include the results that are not captured in the results framework, such as semi-annual feedback collection from the proposed project participating urban areas and subsequent fine-tuning of the proposed project activities. A structured assessment of capacity developed through this proposed project is also planned at the mid-term and end of the project. MDLF has been performing at a satisfactory level to collect, aggregate, and analyze results achieved by municipalities under the MDP-II and the MoLG under LGSIP as both programs \u2019 implementing agency. The MDLF has a robust monitoring and evaluation system that uses a web-based Program Management Information System ( PGMIS ) as well as the FMIS to automate data aggregation, storage, and presentation as part of a results-based M & E system. The MDLF \u2019 s Results-Based Monitoring Manual ( RBMM ) has aligned the individual objectives of the MDLF-implemented various projects and programs with the PA \u2019 s NDP strategic objectives. As such, no additional arrangements or staff is required for the M & E of this proposed project.", + "ner_text": [ + [ + 1134, + 1171, + "named" + ] + ], + "validated": false, + "empirical_context": "MDLF has been performing at a satisfactory level to collect, aggregate, and analyze results achieved by municipalities under the MDP-II and the MoLG under LGSIP as both programs \u2019 implementing agency. The MDLF has a robust monitoring and evaluation system that uses a web-based Program Management Information System ( PGMIS ) as well as the FMIS to automate data aggregation, storage, and presentation as part of a results-based M & E system. The MDLF \u2019 s Results-Based Monitoring Manual ( RBMM ) has aligned the individual objectives of the MDLF-implemented various projects and programs with the PA \u2019 s NDP strategic objectives.", + "type": "system", + "explanation": "However, it is described as a management information system, not a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves data aggregation and storage.", + "contextual_reason_agent": "However, it is described as a management information system, not a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 13, + "text": "The World Bank Tajikistan Water Supply and Sanitation Investment Project ( P177325 ) Page 10 of 89 less than 5, 000 inhabitants. 20 The lack of consolidated and reliable data on distribution of population by water sources; low local capacity for on-site water storage to bridge intermittent water supply; and exposure to prolonged periods of droughts, which are expected to become more frequent and longer because of climate change, make long-term planning of service delivery a challenge, especially in the context of the evident impact of climate change on different water sources. Figure 1. Distribution of Population by Density, 2020 Figure 2. Spatial Use of Improved Water Sources by Regions, Household Survey 2017 Source: TajStat, Population Census Data, 2020 Source: WB, WASH Poverty Diagnostics, 2017 10. Even where WSS infrastructure is available for provision of piped safely managed WSW services, this infrastructure is largely outdated and inefficient. This infrastructure was not designed to ensure resilience to climate change shocks, associated with high operation and maintenance ( O & M ) costs, and the institutions \u2019 capacity to consider alternative WSW collection / treatment options is limited. Maintenance is limited to quick fixes of apparent system breakdowns, affecting operational performance in the long term. The number of incidents in the piped water supply networks is extremely high.", + "ner_text": [ + [ + 737, + 759, + "named" + ], + [ + 4, + 14, + "Population Census Data <> publisher" + ], + [ + 15, + 25, + "Population Census Data <> data geography" + ], + [ + 633, + 637, + "Population Census Data <> publication year" + ], + [ + 728, + 735, + "Population Census Data <> author" + ], + [ + 761, + 765, + "Population Census Data <> publication year" + ], + [ + 774, + 776, + "Population Census Data <> publisher" + ] + ], + "validated": true, + "empirical_context": "Distribution of Population by Density, 2020 Figure 2. Spatial Use of Improved Water Sources by Regions, Household Survey 2017 Source: TajStat, Population Census Data, 2020 Source: WB, WASH Poverty Diagnostics, 2017 10. Even where WSS infrastructure is available for provision of piped safely managed WSW services, this infrastructure is largely outdated and inefficient.", + "type": "census", + "explanation": "This is indeed a dataset as it is explicitly referenced as a source of information in the context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Census Data', which typically refers to a structured collection of demographic information.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referenced as a source of information in the context.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "102_Kenya-Water-PAD-04072017", + "page": 86, + "text": "The data to track many of the key performance indicators will come from national sources, the implementing agencies, and project-specific data collection efforts. For example, information on the number of people benefiting from the project will come from the national census ( most recently carried out in 2009 ) and projections of the National Bureau of Statistics of expected population growth. Data on operational performance of the WSPs will come from the WSPs themselves, with oversight and confirmation by WASREB. Information on the creation of project-financed infrastructure will come from reports of the independent supervision consultants. 66. Reporting. The MWI PCU is responsible for submitting quarterly project progress reports to the World Bank and to the relevant government officials at the MWI within six weeks of the end of each quarter. The Government and World Bank teams will discuss the findings of reports during each implementation support mission and agree on actions to address issues raised in the reports. 67. Beneficiary assessment. Within six months of the closing of the project, the MWI PCU will recruit an independent firm to conduct a beneficiary assessment. The objective of the beneficiary assessment will be to produce information on the benefits of the infrastructure investments and capacity-building support provided under the project. The assessment will review issues such as the relevance of the infrastructure to people living and working in the places that benefited from the investments, the number of people benefiting ( disaggregated by gender and vulnerable people ), and the performance of the relevant agencies in operating and maintaining the infrastructure. The assessment will also explore the quality and relevance of the studies in informing the design of the overall project interventions and the extent to which the capacity - building support achieved its objectives. 68. Capacity building for M & E. As mentioned above, most of the information on project implementation performance and progress toward results will come from the WSPs. Although", + "ner_text": [ + [ + 259, + 274, + "named" + ], + [ + 306, + 310, + "national census <> publication year" + ] + ], + "validated": true, + "empirical_context": "The data to track many of the key performance indicators will come from national sources, the implementing agencies, and project-specific data collection efforts. For example, information on the number of people benefiting from the project will come from the national census ( most recently carried out in 2009 ) and projections of the National Bureau of Statistics of expected population growth. Data on operational performance of the WSPs will come from the WSPs themselves, with oversight and confirmation by WASREB.", + "type": "census", + "explanation": "The national census is explicitly mentioned as a source of information for tracking key performance indicators, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured collection of demographic data collected at a national level.", + "contextual_reason_agent": "The national census is explicitly mentioned as a source of information for tracking key performance indicators, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "054_Sudan-Basic-Education-Emergency-Support-Project", + "page": 20, + "text": "The World Bank Sudan Basic Education Emergency Support Project ( P172812 ) Page 16 of 40 Figure 4: Heat map of Sudan \u2019 s basic education schools Source: Based on 2018 / 19 School Census using Arcgis software. 39. Component 2 Program coordination and management ( US $ 0. 3 million ). This component will support the Federal Ministry of Educaiton ( MoE ) in overall program coordination, monitoring and evaluation. The PCU will cover functions such as planning, procurement, financial management, environmental and social safeguards and monitoring and evaluation. Technical experts will be mobilized as necessary. The PCU will monitor the progress by collecting and analyzing school-level data under the the Annual School Census. C. Project Beneficiaries 40. Primary beneficiaries are schoolchildren, teachers, and parents. Approximately 5. 4 million students will benefit from the project through provision of school grants. Communities in targeted areas will also benefit from enhance participatory school management.", + "ner_text": [ + [ + 675, + 692, + "named" + ], + [ + 4, + 14, + "school-level data <> publisher" + ], + [ + 15, + 20, + "school-level data <> data geography" + ], + [ + 111, + 116, + "school-level data <> data geography" + ], + [ + 162, + 185, + "school-level data <> reference year" + ], + [ + 784, + 798, + "school-level data <> reference population" + ] + ], + "validated": true, + "empirical_context": "Technical experts will be mobilized as necessary. The PCU will monitor the progress by collecting and analyzing school-level data under the the Annual School Census. C.", + "type": "data", + "explanation": "In this context, 'school-level data' is explicitly mentioned as being collected and analyzed, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'school-level data' is a dataset because it refers to data collected from schools.", + "contextual_reason_agent": "In this context, 'school-level data' is explicitly mentioned as being collected and analyzed, indicating it functions as a data source.", + "contextual_signal": "follows 'collecting and analyzing' indicating it is used as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 3, + "validated": 2, + "not_validated": 1 + } + }, + { + "filename": "190_multi-page", + "page": 10, + "text": "Prior to the certification, and including the pre-implementation period during which the PPF will be used, disbursements will be based on SOEs subject to the thresholds described in Annex 6, as well as on supporting documentation for advances made to the SA for expenditures above the threshold and for direct payments. Each disbursement application will be signed by two authorized signatories whose names and corresponding signature specimens will be subrnitted to the Administrator through MOH. Counterpart Funding. The project will open a separate bank account for the counterpart funding from the PA for the Value Added Tax ( VAT ) compensation, as well as for the counterpart contributions financed by the PA, which represent 85 % of the incremental operating costs. Prior to Board presentation, the PIU will open an account for counterpart funding in a conmmercial bank, based on a formal agreement reached between the Ministry of Finance ( MOF ) and MOH regarding the procedure for VAT reimbursement. Monitoring and Evaluation ( M & E ). The PIU will be responsible for monitoring progress against agreed-upon performance indicators ( specified in Annex 1 ). For this purpose, it will develop and maintain a project information system which will generate annual progress reports for the PA and relevant donors, including the Administrator. Under Component 3, an in-depth Client Access, Utilization, and Satisfaction Survey ( CAUS ) will be conducted at the start and end of the project imnplementation period in order to measure the impact of the project on the quality and efficiency of the PHC services in the selected clinics. The MOH ' s technical units responsible for implementing each project component will provide the PIU with quarterly progress reports summarizing the current status of project implementation, including financial - 7 -", + "ner_text": [ + [ + 1379, + 1430, + "named" + ] + ], + "validated": true, + "empirical_context": "For this purpose, it will develop and maintain a project information system which will generate annual progress reports for the PA and relevant donors, including the Administrator. Under Component 3, an in-depth Client Access, Utilization, and Satisfaction Survey ( CAUS ) will be conducted at the start and end of the project imnplementation period in order to measure the impact of the project on the quality and efficiency of the PHC services in the selected clinics. The MOH ' s technical units responsible for implementing each project component will provide the PIU with quarterly progress reports summarizing the current status of project implementation, including financial - 7 -", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly described as a survey conducted to measure specific impacts, indicating it is used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly described as a survey conducted to measure specific impacts, indicating it is used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 67, + "text": "A robust and reliable water resources monitoring network is key to the operationalization of the IWRM framework and to building Uganda \u2019 s resilience to climatic variation. A review of the water monitoring network carried out by the DWRM in 2005 showed that there is insufficient baseline data on groundwater as well as monitoring of boreholes to represent the full range of hydrogeological and climatic conditions in Uganda. For surface water, the review revealed that the operational gauges are inadequate and that the digitized data are not transmitted to a central processing center due to lack of a data transmission interface with an automated sensor. To address these issues, the WMDP supported the strengthening and expansion of the existing hydrological, hydrometeorological, water quality monitoring network in the Kyoga and Upper Nile WMZs as well as the design of a comprehensive WIS that integrates the hydrological information system, spatial data system, information management system, knowledge management system, and data management system. The WMDP supported the development of the detailed design of the National WIS, which will now be implemented at the national and regional level in all WMZs through this project. 34. In addition, the Project will further improve water resources monitoring by providing additional monitoring stations and equipment to monitor surface water, groundwater, water quality, and climate variations. The Project will also support the use of online remote sensing equipment to enable real-time monitoring and improve the quality and timeliness of data collection for the WIS. The Project will also support the rehabilitation of the National Water Quality Reference Laboratory. Component 4: Project Implementation and Institutional Strengthening ( US $ 5. 5 million of which national IDA US $ 5. 0 million and counterpart funds US $ 0. 5 million ) This component will finance activities designed to ensure effective and efficient Project implementation and coordination as well as institutional strengthening to support WSS service delivery reforms. Project management activities will include ( a ) coordination of planning, monitoring, reporting and supervision of the Project; ( b ) training of MWE and NWSC staff on World Bank procedures related to procurement,", + "ner_text": [ + [ + 1123, + 1135, + "named" + ] + ], + "validated": false, + "empirical_context": "To address these issues, the WMDP supported the strengthening and expansion of the existing hydrological, hydrometeorological, water quality monitoring network in the Kyoga and Upper Nile WMZs as well as the design of a comprehensive WIS that integrates the hydrological information system, spatial data system, information management system, knowledge management system, and data management system. The WMDP supported the development of the detailed design of the National WIS, which will now be implemented at the national and regional level in all WMZs through this project. 34.", + "type": "system", + "explanation": "However, 'National WIS' is described as a system and not explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'National WIS' is a dataset because it includes 'information system' in its name.", + "contextual_reason_agent": "However, 'National WIS' is described as a system and not explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "104_PAD2161-PUBLIC-P162004", + "page": 21, + "text": "Therefore, MoNE has acquired the experience and capacity to implement the project and scale up its infrastructure capacity. ( b ) Turkey \u2019 s regulations and codes for structural design and seismic safety are well developed and applying those would avoid the creation of new risks and improve resilience in education facilities. ( c ) Project design and location selection is based on thorough analysis of quality education needs of SuTP at the provincial and community levels. Increasing education facilities \u2019 capacity is a major expressed need, as well as the support mechanisms to encourage SuTP children and youth to remain in school. ( d ) The prioritization of education facilities to be constructed was done by MoNE during project preparation and is in line with the findings of the World Bank-Government \u2019 s joint Muhtar survey ( formally titled \u2018 Mapping Location and Living Conditions of Syrians under Temporary Protection in Turkey: Results from National Muhtar Survey \u2019 ), which identifies localities where access to education is most stringent. ( e ) As learned from other country examples ( for example, the Syrian response in Jordan and Lebanon ), the geographical proximity of construction sites to communities experiencing fast growth due to intake of displaced populations provides useful criteria for education access expansion. Thus, the design and prioritization of investment packages seek to maximize efficient usage by the careful selection of locations to construct new or expanded education facilities.", + "ner_text": [ + [ + 957, + 979, + "named" + ], + [ + 130, + 136, + "National Muhtar Survey <> data geography" + ], + [ + 790, + 811, + "National Muhtar Survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "Increasing education facilities \u2019 capacity is a major expressed need, as well as the support mechanisms to encourage SuTP children and youth to remain in school. ( d ) The prioritization of education facilities to be constructed was done by MoNE during project preparation and is in line with the findings of the World Bank-Government \u2019 s joint Muhtar survey ( formally titled \u2018 Mapping Location and Living Conditions of Syrians under Temporary Protection in Turkey: Results from National Muhtar Survey \u2019 ), which identifies localities where access to education is most stringent. ( e ) As learned from other country examples ( for example, the Syrian response in Jordan and Lebanon ), the geographical proximity of construction sites to communities experiencing fast growth due to intake of displaced populations provides useful criteria for education access expansion.", + "type": "survey", + "explanation": "The National Muhtar Survey is explicitly mentioned as a source of findings that inform project preparation, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides findings relevant to education access.", + "contextual_reason_agent": "The National Muhtar Survey is explicitly mentioned as a source of findings that inform project preparation, indicating it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "190_multi-page", + "page": 25, + "text": "The project will support two workshops related to the production of the Data Dictionary and Data Model: ( i ) a preparatory workshop to define the objectives and pri. pare a detailed work program and strategies for the technical working group; and ( ii ) an eva luation workshop to review the Data Dictionary and Data Model that have been developed, and p [ lan for their dissemnination and use. ( b ) Preparation of the Health Data Dictionary anad Data Model. The Data Dictionary will contain the specific \" semantics \" of each term needed to define the MIS associated with the GHI and: linical information systems being developed under the project. The Data Model, which is the computer analog of the Data Dictionary, will define how each field is formatted within computer databases and how data elements are related to one another. The project will finance technical assista: ace for preparing the Health Data Dictionary and Data M: odel. * 2. 3 Creating a Health Insurance Information System ( US $ 0. 40 million ): The objective of this activity is to create the basis for a modern Health Insurance System that has a single registry Co: r all beneficiaries, and one that provides consistent health insurance services for all Palestinians in WBG. The new systemn would be a successor to today ' s existing systems - - based largely on their res-rective designs - - but it will also introduce a more updated capacity than exists in the current system. TVie new system will be housed in the new HIC in Gaza and linked to the West Bank and other areas in Gaza. At this stage, the system will include enhanced beneficiary registration, premium collectio; n, and eligibility retrieval functionality. Later projects would continue to build on this foundation,. dding payer functionality for claims adjudication and reimbursement, capitation, contracting, andl other advanced functions that are essential in expanding the choice of services available to the beneficiaries, while maintaining efficiency and control in terms of cost and quality. - 22 -", + "ner_text": [ + [ + 92, + 102, + "named" + ] + ], + "validated": false, + "empirical_context": "The project will support two workshops related to the production of the Data Dictionary and Data Model: ( i ) a preparatory workshop to define the objectives and pri. pare a detailed work program and strategies for the technical working group; and ( ii ) an eva luation workshop to review the Data Dictionary and Data Model that have been developed, and p [ lan for their dissemnination and use.", + "type": "model", + "explanation": "'Data Model' is not a dataset as it refers to a conceptual framework rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Data Model' is a dataset because it includes the term 'data'.", + "contextual_reason_agent": "'Data Model' is not a dataset as it refers to a conceptual framework rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "105_Mali-Reinsertion-of-Ex-combatants-Project", + "page": 31, + "text": "MINUSMA and UNOPS will ensure to have a well-knowledgeable Social Scientist whose role will be to ( i ) ensure project is in compliance with UNOPS applicable safeguards policies; and ( ii ) project is socially sound and inclusive ( gender, youth and disable persons, etc. have equal access to the benefits and are therefore not being victimized or segregated ). The Social Scientist of UNOPS will work closely with the Social Development Specialist of the World Bank to monitor the soundness of the overall project activities. 77. Gender-sensitive and informed project. A 2016 study commissioned by the World Bank TDRP, recent study findings, information from various national documents and reports in Mali, as well as field mission observations indicate that pre-existing gender inequalities in Mali are being aggravated and deepened by the continuing conflict situation. Before the crisis, there were glaring gender inequalities in most sectors of the social, political and economic life. About 5. 42 percent are female ex-combatants according to the recent survey conducted on a sample basis. The framing of the women, peace and security agenda by the Government Ministries and MINUSMA seeks to achieve better outcomes for both youth and women. Violence and crime exacerbated with fatal sporadic actions continue to undermine the personal and collective security of both Malian and foreigner girls and women. 78. Citizen engagement and participation.", + "ner_text": [ + [ + 719, + 745, + "named" + ] + ], + "validated": false, + "empirical_context": "Gender-sensitive and informed project. A 2016 study commissioned by the World Bank TDRP, recent study findings, information from various national documents and reports in Mali, as well as field mission observations indicate that pre-existing gender inequalities in Mali are being aggravated and deepened by the continuing conflict situation. Before the crisis, there were glaring gender inequalities in most sectors of the social, political and economic life.", + "type": "observations", + "explanation": "However, 'field mission observations' refers to qualitative insights rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'observations' can imply collected data.", + "contextual_reason_agent": "However, 'field mission observations' refers to qualitative insights rather than a structured collection of data.", + "contextual_signal": "mentioned only as observations, not as a data source", + "tags": [] + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 60, + "text": "The design of the Program is informed by extensive analytical background work, policy dialogue and operational experience with governance reforms, including digital reforms, in Jordan. The World Bank has commissioned a range of assessment reports regarding transparency and accountability systems ( such as the 2007 Access to Information Law and the government \u2019 s Grievance Redress platform called At Your Service ) and service delivery ( including e-service public value assessments, and the effectiveness of the judiciary system ). Under its ongoing projects, the World Bank is also financing the commissioning by the GOJ of critical evaluations for the purpose of the operation, such as a taxpayer journey mapping by the Income & Sales Tax Department and e-services public value evaluations by MODEE. In 2022, the World Bank has also applied the Country-Level Institutional Assessment to Jordan. It has engaged in policy dialogue with the governmental committee, which drafted the Public Sector Modernization Roadmap. It also provided it with several on-demand policy briefs ( for example, on performance management, the governance of public-private partnerships, the governance of accountability institutions and mechanisms, the planning function, the role and functions of the center of government, policy making, and change management ). In addition, the Bank advised the Committee on Digital Transformation.", + "ner_text": [ + [ + 693, + 717, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank has commissioned a range of assessment reports regarding transparency and accountability systems ( such as the 2007 Access to Information Law and the government \u2019 s Grievance Redress platform called At Your Service ) and service delivery ( including e-service public value assessments, and the effectiveness of the judiciary system ). Under its ongoing projects, the World Bank is also financing the commissioning by the GOJ of critical evaluations for the purpose of the operation, such as a taxpayer journey mapping by the Income & Sales Tax Department and e-services public value evaluations by MODEE. In 2022, the World Bank has also applied the Country-Level Institutional Assessment to Jordan.", + "type": "program", + "explanation": "However, it is described as a process or initiative rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'mapping' suggests a structured approach to data collection.", + "contextual_reason_agent": "However, it is described as a process or initiative rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "101_Project-Appraisal-Document-PAD-P163830-2017-05-22-CLEAN-05232017", + "page": 86, + "text": "In addition, the Third-Party Technical Review will focus on specific internal control reviews in respect of cash transfers in particular: ( i ) analysis and control of the database of beneficiaries and list of payments; ( ii ) review and monitoring selected payment agents / volunteers; ( iii ) cash transfers financial management arrangements, funds mechanisms and related financial reports for payment, and; ( iv ) grievance redress mechanism. The recruitment for the TPTR shall be guided by robust TOR and Risk based data Analytic Tools. 60. Conclusion: Internal controls risks are assessed as high. The risk rating is largely due to inherent risks associated with conditional and non-conditional cash transfers as well distribution of inputs / supplies ( through redeemable vouchers ) to the mass target population. Financial Reporting 61. FAO and ICRC have adequate FM systems and arrangements to provide quality and timely financial management reports. FAO and ICRC Heads of Finance at the Country offices in consultation with the technical teams and their Head Office counterpart staff shall each prepare and submit to the WB Six - Monthly Interim Unaudited Financial Reports ( IFRs ) to the World Bank no later than 45 days after the end of the reporting period. The IFRs, which shall form basis for funds flow draw down shall be prepared in content and format as shall be agreed between the WB and FAO and ICRC. During the FM assessments, it was confirmed FAO, FPMIS has the capability to be configured to support generation of the project financial reports. ICRC Sun system tracks and reports on expenditures occurred by assigned general objective code. In case a general objective is financed by several partners, the system supports customization of the project", + "ner_text": [ + [ + 1470, + 1475, + "named" + ] + ], + "validated": false, + "empirical_context": "The IFRs, which shall form basis for funds flow draw down shall be prepared in content and format as shall be agreed between the WB and FAO and ICRC. During the FM assessments, it was confirmed FAO, FPMIS has the capability to be configured to support generation of the project financial reports. ICRC Sun system tracks and reports on expenditures occurred by assigned general objective code.", + "type": "system", + "explanation": "FPMIS is described as a system that supports report generation, but it is not explicitly identified as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed FPMIS is a dataset because it is mentioned in the context of generating financial reports.", + "contextual_reason_agent": "FPMIS is described as a system that supports report generation, but it is not explicitly identified as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 80, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 75 of 117 survey designed and implemented for interventions such as the roll out of the CBC, scholarships provision and implementation of the school grants. Survey results are disseminated. implementing entities and other key stakeholders. Refugee learners, including in camp - based refugee schools and host communities are included in the surveys Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed. Phone based / on-line beneficiary satisfaction survey designed and implemented for interventions such as the roll out of the CBC, scholarships provision and implementation of the school grants. Survey results are disseminated. Annual MOE MoE will conduct the phone / online based satisfaction survey, analyze the findings and disseminate the results to schools, implementing entities and other key stakeholders. MOE.", + "ner_text": [ + [ + 517, + 570, + "named" + ], + [ + 317, + 333, + "Phone based / on-line beneficiary satisfaction survey <> reference population" + ], + [ + 348, + 376, + "Phone based / on-line beneficiary satisfaction survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "Refugee learners, including in camp - based refugee schools and host communities are included in the surveys Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed. Phone based / on-line beneficiary satisfaction survey designed and implemented for interventions such as the roll out of the CBC, scholarships provision and implementation of the school grants. Survey results are disseminated.", + "type": "survey", + "explanation": "This is indeed a dataset as it is a structured collection of data gathered through a survey for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data on beneficiary satisfaction.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data gathered through a survey for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "006_BOSIB-2c22668f-f4ba-42f5-a0d1-7949b7b8fe34", + "page": 11, + "text": "The participation rate in the labor force is low, and only 31 percent of the working-age population is employed. 5 The capital-intensive mining sector, the prime export earner ( securing over 70 percent of export receipts ), contributed 17. 5 percent of GDP in 2021 but only 2 percent of employment. 6 Lusaka and the mineral-rich Copperbelt Province contribute over half of GDP, while the other eight provinces combined contribute the rest. By contrast, agriculture is high in labor intensity ( 24 percent of total employment ) but with low productivity ( the contribution to GDP was 3. 4 percent in 2021 ). 7 4. Zambia \u2019 s economy and population are highly vulnerable to climate change. The country \u2019 s dependence on rainfed agriculture, which employs two-thirds of the workforce, and on hydropower makes it particularly vulnerable to climate shocks, threatening food production, electricity supply, and economic growth. While the country experiences low exposure to natural disaster risks in general, resilience is hindered by social vulnerability. Adverse impacts of climate change include increased frequency and severity of seasonal droughts, higher temperatures, flash floods, occasional dry spells, and changes in the growing season. In agriculture, the key risk stemming from climate change is the projected lower maize yields, as this is the country \u2019 s staple crop. Zambia is currently experiencing drought conditions in 84 of its 116 1 Zamstats. gov. zm. 2 Zambia Statistics Agency. 2023. Highlights of the 2022 Poverty Assessment in Zambia. World Bank. 2023. Zambia Gender Assessment. 3 Zambia: Selected Issues, IMF, 2023. 4 2022 Census of Population and Housing, GRZ 2023. 5 According to the 2021 Labour Force Survey Report by Zamstat, the working-age population ( 15 and older ) is 10 million, of which only 3. 1 million are employed. 6 Zambia: Selected Issues, IMF, 2023. 7 Zambia: Selected Issues, IMF, 2023.", + "ner_text": [ + [ + 1710, + 1736, + "named" + ], + [ + 77, + 99, + "Labour Force Survey Report <> reference population" + ], + [ + 261, + 265, + "Labour Force Survey Report <> publication year" + ], + [ + 302, + 308, + "Labour Force Survey Report <> data geography" + ], + [ + 330, + 349, + "Labour Force Survey Report <> data geography" + ], + [ + 600, + 604, + "Labour Force Survey Report <> reference year" + ], + [ + 613, + 619, + "Labour Force Survey Report <> data geography" + ], + [ + 1376, + 1382, + "Labour Force Survey Report <> data geography" + ], + [ + 1545, + 1551, + "Labour Force Survey Report <> data geography" + ], + [ + 1571, + 1577, + "Labour Force Survey Report <> data geography" + ], + [ + 1705, + 1709, + "Labour Force Survey Report <> publication year" + ], + [ + 1740, + 1747, + "Labour Force Survey Report <> publisher" + ], + [ + 1753, + 1775, + "Labour Force Survey Report <> reference population" + ] + ], + "validated": true, + "empirical_context": "4 2022 Census of Population and Housing, GRZ 2023. 5 According to the 2021 Labour Force Survey Report by Zamstat, the working-age population ( 15 and older ) is 10 million, of which only 3. 1 million are employed.", + "type": "survey", + "explanation": "This is indeed a dataset as it presents structured data regarding the working-age population and employment statistics.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a report that provides statistical information about the labor force.", + "contextual_reason_agent": "This is indeed a dataset as it presents structured data regarding the working-age population and employment statistics.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "029_IBArchive-ea5ff2ad-6f1f-45bf-a2ad-036440562c18", + "page": 107, + "text": "The updated UBOS / MTF Survey and access assessment will include the following datasets: a ) Ongoing geo-referencing of customers by the MEMD will establish the platform to keep track of gender-disaggregated data on customers. b ) Data collected by the UECCC would provide information on male and female access to efficient appliances, productive uses technologies, and clean cooking solutions. 7. Productive uses market assessment update. Women and men use energy differently. Women in Uganda work in micro and small-scale enterprises, particularly in the informal sector. Women frequently run income-generating activities from home since it enables them to combine productive tasks with activities in the household, such as cooking and childcare60. With the support of the World Bank, an assessment of the solar off-grid market for productive uses was conducted in 2019. 61 The assessment, a first of its kind, would be updated to include an assessment of women \u2019 s access to productive uses technologies, affordability, willingness to pay, and financial inclusion. The assessment will be conducted in close collaboration with the \u2018 Financial profiling of beneficiaries for customized debt instruments \u2019 study under the UECCC under Component 4. The ongoing geo-referencing of customers would further inform improved gender-disaggregated consumption trends and demand estimation. The activity will be implemented by the UECCC in close collaboration with the MEMD and the SPs. 8.", + "ner_text": [ + [ + 12, + 29, + "named" + ], + [ + 187, + 212, + "UBOS / MTF Survey <> data type" + ], + [ + 440, + 445, + "UBOS / MTF Survey <> reference population" + ], + [ + 478, + 483, + "UBOS / MTF Survey <> reference population" + ], + [ + 487, + 493, + "UBOS / MTF Survey <> data geography" + ], + [ + 775, + 785, + "UBOS / MTF Survey <> publisher" + ], + [ + 867, + 871, + "UBOS / MTF Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "The updated UBOS / MTF Survey and access assessment will include the following datasets: a ) Ongoing geo-referencing of customers by the MEMD will establish the platform to keep track of gender-disaggregated data on customers. b ) Data collected by the UECCC would provide information on male and female access to efficient appliances, productive uses technologies, and clean cooking solutions.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it is mentioned alongside other datasets and is part of a data collection effort.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that collects data.", + "contextual_reason_agent": "The context confirms it is a dataset as it is mentioned alongside other datasets and is part of a data collection effort.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "073_Ethiopia-Consolidated-Water-Supply-Sanitation-and-Hygiene-Account-Project", + "page": 83, + "text": "The World Bank One WASH \u2014 Consolidated Water Supply, Sanitation, and Hygiene Account Project ( One WASH \u2014 CWA ) ( P167794 ) Page 79 of 103 the lack of logistics, getting real-time data on service delivery indicators such as functionality will continue to be a challenge. In response to this challenge, the CWA will provide support to explore options for community-based WASH data collection. The support will include: ( i ) assessment of the different community-based data collection tools that could complement and integrate with the WASH MIS; ( ii ) designing ( including the selection of frequently needed indicators ), piloting, and rolling out the selected community-based data collection tool; ( iii ) institutionalizing the data collection responsibility at the WASHCOM level ( inclusion of this responsibility as part of the WASHCOM legalization document ); and ( iv ) continuous capacity building of WASHCOM members for regular reporting. Information collected from community-level monitoring will be integrated into sector MIS. c. Introducing the rural water and sanitation information system ( SIASAR ) in Ethiopia: The WASH sector in Ethiopia can draw lessons from the SIASAR model implemented in different countries of Latin America, Africa, and Central Asia. The Project will build on the ongoing WASH MIS initiative for Ethiopia and will link to SIASAR to allow a compressive and broader range of data analysis for decision making.", + "ner_text": [ + [ + 1057, + 1102, + "named" + ] + ], + "validated": false, + "empirical_context": "c. Introducing the rural water and sanitation information system ( SIASAR ) in Ethiopia: The WASH sector in Ethiopia can draw lessons from the SIASAR model implemented in different countries of Latin America, Africa, and Central Asia. The Project will build on the ongoing WASH MIS initiative for Ethiopia and will link to SIASAR to allow a compressive and broader range of data analysis for decision making.", + "type": "system", + "explanation": "However, the context describes it as a system linked to a project, not explicitly as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'information system' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "However, the context describes it as a system linked to a project, not explicitly as a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "159_38147core", + "page": 33, + "text": "The UNHCR supervised a census o f refugee camps in Puttalam in April, 2006. The survey provided detailed information on IDPs living in refugee camps. This included the number o f refugee camps, number o f families in each camp, their level o f education, vulnerability, employment status, obstacles to return, extent o f land ownership and title to such land, type o f houses and other relevant socio-economic information. ii. The PHP sponsored a Social Assessment ( SA ) o f IDPs in Puttalam which mapped the type o f housing, land ownership, possession o f title, livelihood opportunity, socio-economic profile, social capital, and the relationship between IDPs and non-IDPs. It did this through focus group discussions and beneficiary interviews using participatory rural appraisal methods in 11 1 refugee camps. iii. The Environmental Assessment ( EA ) reviewed the suitability o f 111 refugee camps for housing construction. The PPU used a questionnaire - Environmental Checklist - to obtain basic data. This was followed by an in-depth examination o f environmental suitability in each camp. The report provided important information such as drainage conditions, solid waste disposal, sanitation facilities and the threat o f flooding. This was a deciding factor for the sequencing o f refugee camps for housing assistance. 28", + "ner_text": [ + [ + 23, + 59, + "named" + ], + [ + 4, + 9, + "census o f refugee camps in Puttalam <> publisher" + ], + [ + 51, + 59, + "census o f refugee camps in Puttalam <> data geography" + ], + [ + 63, + 74, + "census o f refugee camps in Puttalam <> publication year" + ], + [ + 120, + 148, + "census o f refugee camps in Puttalam <> reference population" + ], + [ + 168, + 192, + "census o f refugee camps in Puttalam <> data description" + ], + [ + 194, + 226, + "census o f refugee camps in Puttalam <> data description" + ], + [ + 310, + 358, + "census o f refugee camps in Puttalam <> data description" + ], + [ + 360, + 375, + "census o f refugee camps in Puttalam <> data description" + ] + ], + "validated": true, + "empirical_context": "The UNHCR supervised a census o f refugee camps in Puttalam in April, 2006. The survey provided detailed information on IDPs living in refugee camps.", + "type": "census", + "explanation": "This is indeed a dataset as it is a structured collection of data regarding refugees in camps, used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a census, which typically involves structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data regarding refugees in camps, used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 9, + "text": "Despite some moderation, the risk of debt distress remains high, with the public debt accounting for 43 percent of the GDP. An increasing number of households reported reduced expenditure, including on food ( 33 percent in August 2021 versus 28 percent a year earlier ), and poverty rates increased, reversing the previous trend. 5 A sharp increase in consumer prices and falling wage incomes reduced food security, particularly for vulnerable households without remittance income. Disposable incomes have fallen, and recovery is expected to be slow. The Tajik economy continues to be vulnerable to economic shocks, including the unfolding crisis in Ukraine, 1 World Bank. World Development Indicators. 2 Word Bank. 2021. Tajikistan Macroeconomic and Poverty Outlook. 3 Republic of Tajikistan. 2020. Tajikistan COVID-19 Country Preparedness and Response Plan. Dushanbe, Tajikistan. 4 International Finance Corporation. 2020. COVID-19 Impact Assessment Survey. Tajikistan and Kyrgyz Republic. 5 World Bank. 2020. Tajikistan: Economic Slowdown Amid the Pandemic. Tajikistan Country Economic Update ( Fall 2021 ).", + "ner_text": [ + [ + 673, + 701, + "named" + ], + [ + 230, + 234, + "World Development Indicators <> publication year" + ], + [ + 661, + 671, + "World Development Indicators <> publisher" + ], + [ + 705, + 714, + "World Development Indicators <> publisher" + ], + [ + 722, + 732, + "World Development Indicators <> data geography" + ], + [ + 770, + 792, + "World Development Indicators <> data geography" + ], + [ + 794, + 798, + "World Development Indicators <> publication year" + ], + [ + 800, + 810, + "World Development Indicators <> data geography" + ], + [ + 919, + 923, + "World Development Indicators <> publication year" + ], + [ + 960, + 970, + "World Development Indicators <> data geography" + ], + [ + 994, + 1004, + "World Development Indicators <> publisher" + ], + [ + 1103, + 1107, + "World Development Indicators <> publication year" + ] + ], + "validated": true, + "empirical_context": "The Tajik economy continues to be vulnerable to economic shocks, including the unfolding crisis in Ukraine, 1 World Bank. World Development Indicators. 2 Word Bank.", + "type": "dataset", + "explanation": "It is indeed a dataset as it is a recognized source of data used for empirical analysis of economic indicators.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced in the context of economic analysis.", + "contextual_reason_agent": "It is indeed a dataset as it is a recognized source of data used for empirical analysis of economic indicators.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 40, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 29 Monitoring & Evaluation Plan: PDO Indicators Indicator Name Direct Project Beneficiaries Definition / Description Beneficiaries are people or groups who directly receive benefits from interventions of the project ( water supply, sanitation, and WRM related investments ). Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC Area performance reports, Refugee Database and UPMiS. Secondary sources include: MWE / DWD / DWRM progress reports, annual water and environments sector performance report and NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE and NWSC Indicator Name of which female Definition / Description Beneficiaries are people or groups who directly receive benefits from interventions of the project ( water supply, sanitation, and WRM related investments ). Frequency Bi-annually Data Source MWE / DWD / DWRM progress reports, annual water & environments sector performance report, Water Supply Atlas ( database of water supply, functionality and distribution of all Uganda districts ), NWSC annual report and refugee database.", + "ner_text": [ + [ + 413, + 429, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Integrated Water Management and Development Project ( P163782 ) 29 Monitoring & Evaluation Plan: PDO Indicators Indicator Name Direct Project Beneficiaries Definition / Description Beneficiaries are people or groups who directly receive benefits from interventions of the project ( water supply, sanitation, and WRM related investments ). Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC Area performance reports, Refugee Database and UPMiS. Secondary sources include: MWE / DWD / DWRM progress reports, annual water and environments sector performance report and NWSC annual report.", + "type": "system", + "explanation": "However, it is mentioned as a monitoring and evaluation system, not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'system' in its name, which can imply data collection.", + "contextual_reason_agent": "However, it is mentioned as a monitoring and evaluation system, not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 155, + "text": "The World Bank Niger Accelerating Electricity Access Project ( Hask\u00e9 ) ( P174034 ) Page 148 of 174 data provided by the UNHCR67 on refugee camps as well as host communities and ( b ) the list of priority areas under the PRA. While this exercise has been finalized for Component 1 where a preliminary list of localities for grid densification and extension is defined, the same exercise will be carried out for other components as part of ongoing studies. 5. In figure 4. 1, the first map provides the number and locations of refugees and IDPs ( according to the UNHCR data ). There are 47 PARCA communes that host refugees and IDPs. The second map of Niger shows the PRA priority communes and refugee zones and the existing national electricity grid, isolated mini grids electrified localities, and new areas to be electrified through the grid under Component 1. The three other maps ( in the bottom ) show the locations of refugees in Tillab\u00e9ry and Tahoua Regions, Maradi Region, and Diffa Region from left to right.", + "ner_text": [ + [ + 562, + 572, + "named" + ], + [ + 15, + 20, + "UNHCR data <> data geography" + ], + [ + 131, + 144, + "UNHCR data <> reference population" + ], + [ + 538, + 542, + "UNHCR data <> reference population" + ], + [ + 936, + 945, + "UNHCR data <> data geography" + ], + [ + 950, + 964, + "UNHCR data <> data geography" + ], + [ + 966, + 979, + "UNHCR data <> data geography" + ], + [ + 985, + 997, + "UNHCR data <> data geography" + ] + ], + "validated": true, + "empirical_context": "In figure 4. 1, the first map provides the number and locations of refugees and IDPs ( according to the UNHCR data ). There are 47 PARCA communes that host refugees and IDPs.", + "type": "data", + "explanation": "In this context, 'UNHCR data' is indeed a dataset as it provides structured information used to identify the locations and numbers of refugees and IDPs.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'UNHCR data' is a dataset because it is referenced in relation to the number and locations of refugees and IDPs.", + "contextual_reason_agent": "In this context, 'UNHCR data' is indeed a dataset as it provides structured information used to identify the locations and numbers of refugees and IDPs.", + "contextual_signal": "follows 'according to' indicating it is a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "033_IBArchive-38b35d89-2c5c-4636-ab7e-0ba662b95a5f", + "page": 64, + "text": "The World Bank Djibouti Skills Development for Employment Project ( P175483 ) Page 60 of 68 of short cycle courses. IRI # 18: Modernizing Education and Skills Governance Sub-component 2. 1 Annually Annual Work Plan and Budgets DGETFP will gather data from AWPBs DGETFP IRI # 19 Share of TVET graduates in project supported training programs who report that they are satisfied with their acquisition of employability skills. ( disaggregated ) Indicator will be disaggregated by economic sector, gender, refugee status and disability status. The findings compiled through the beneficiary surveys will be used to plan and implement time and bound actions or action plans to address this feedback. The results of the beneficiary survey will inform the development and implementation of the CEPs. Annually Survey conducted by PMU Survey using technology DGETFP ME IO Table SPACE", + "ner_text": [ + [ + 574, + 593, + "named" + ], + [ + 4, + 14, + "beneficiary surveys <> publisher" + ], + [ + 15, + 23, + "beneficiary surveys <> data geography" + ], + [ + 287, + 301, + "beneficiary surveys <> reference population" + ] + ], + "validated": true, + "empirical_context": "( disaggregated ) Indicator will be disaggregated by economic sector, gender, refugee status and disability status. The findings compiled through the beneficiary surveys will be used to plan and implement time and bound actions or action plans to address this feedback. The results of the beneficiary survey will inform the development and implementation of the CEPs.", + "type": "survey", + "explanation": "In the context, 'beneficiary surveys' are explicitly mentioned as a source of findings that will inform actions and plans, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'beneficiary surveys' implies a structured collection of data gathered from beneficiaries.", + "contextual_reason_agent": "In the context, 'beneficiary surveys' are explicitly mentioned as a source of findings that will inform actions and plans, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "034_Lebanon-Beirut-Housing-Rehabilitation-and-Cultural-and-Creative-Industries-Recovery", + "page": 46, + "text": "The World Bank Beirut Housing Rehabilitation and Cultural and Creative Industries Recovery ( P176577 ) Page 41 of 66 project will undertake proactive efforts to communicate the service standard to address and respond to feedback that will be received. mechanism Beneficiaries reporting satisfaction with project activities Percentage of beneficiaries in component 1 satisfied with project application, grant disbursement, implementation, and technical support. Beneficiaries in component 2 reporting improved community cohesion, enhanced social inclusion, and neighborhood revitalization. The findings of these surveys will be published and / or that the survey findings will be used by the implementing entity to generate an action plan to address the feedback acquired through the surveys. At mid - point of project and project closure The scope of the GRM will include complaints and other types of feedback such as suggestions, queries ( e. g. Quality of Life Survey ) and compliments A survey will be carried out with direct beneficiaries of the project. The survey will be administered halfway through implementation and at the culmination of project activities.", + "ner_text": [ + [ + 948, + 970, + "named" + ], + [ + 15, + 21, + "Quality of Life Survey <> data geography" + ], + [ + 262, + 275, + "Quality of Life Survey <> reference population" + ], + [ + 991, + 997, + "Quality of Life Survey <> data type" + ], + [ + 1023, + 1043, + "Quality of Life Survey <> reference population" + ], + [ + 1184, + 1202, + "Quality of Life Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "g. Quality of Life Survey ) and compliments A survey will be carried out with direct beneficiaries of the project. The survey will be administered halfway through implementation and at the culmination of project activities.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned that a survey will be carried out to collect data from beneficiaries.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically collects structured data.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned that a survey will be carried out to collect data from beneficiaries.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "015_BOSIB0e7334a5d0570a3e40f8ae4d0c1266", + "page": 61, + "text": "The World Bank ENHANCING CONNECTIVITY AND RESILIENCE IN THE FAR NORTH OF CAMEROON FOR INCLUSIVENESS PROJECT ( P178207 ) Page 62 of 82 UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Average travel time between Mora and Kouss\u00e9ri This indicator measures vehicles travel time between Mora and Kousseri. Annually Traffic Survey and M & E Reports. Open source from Geospatial Data ( e. g., Open Street Map ) Traffic Survey Project Implementation Unit Length of MDK road vulnerable to identified climate change hazards ( floods and heat ). Measures the length of the MDK road corridor that is at risk of flooding and extreme heat, which are projected to increase in frequency and intensity with climate change. Annually Implementati on reports. Verification of implementation of proposed climate resilience features. Project implementation Unit and MINTP. The share of people with access to an all - weather passable road within five kilometers of the MDK road section ( modified road access index ). This indicator measures the number of beneficiaries with improved access to an all-weather passable road within five kilometers of the MDK road section.", + "ner_text": [ + [ + 525, + 540, + "named" + ], + [ + 60, + 81, + "Open Street Map <> data geography" + ], + [ + 322, + 367, + "Open Street Map <> data description" + ] + ], + "validated": true, + "empirical_context": "g. , Open Street Map ) Traffic Survey Project Implementation Unit Length of MDK road vulnerable to identified climate change hazards ( floods and heat ). Measures the length of the MDK road corridor that is at risk of flooding and extreme heat, which are projected to increase in frequency and intensity with climate change.", + "type": "database", + "explanation": "In this context, Open Street Map is used as a source of geographic data relevant to the traffic survey project.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because Open Street Map is known for providing geographic data.", + "contextual_reason_agent": "In this context, Open Street Map is used as a source of geographic data relevant to the traffic survey project.", + "contextual_signal": "mentioned as a data source for the project", + "tags": [] + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 19, + "text": "The World Bank Building Skills for Human Capital Development in South Sudan ( P178654 ) Page 15 of 66 23. Functional mechanisms to support evidence-based decision-making and management of the sector are needed. MoGEI has an Education Management Information System ( EMIS ), but its implementation has not been tackled comprehensively to date and the existing data essential for sector evaluation and development has significant gaps. The EMIS requires dissemination to all states and harmonization with other databases in the MoGEI, such as the examination results, South Sudan School Attendance and Monitoring System, Human Resources Management Information System, and Financial Management Information System. The existing data collection instruments also need revision to incorporate the required information for monitoring the implementation of key indicators. Quality, reliable data is fundamental to inform education decisions and policies and enhance transparency. Accordingly, strengthening the EMIS and the annual school survey process to provide quality education information, in a timely manner, at every administrative level is essential to support the functions of the education ministries. 24. The education governing ministries lack the policy tools and implementation capacity needed for education sector management. The General Education system is characterized by inequitable allocation of teachers across the states, unclear deployment drivers, and outdated salary structure.", + "ner_text": [ + [ + 670, + 709, + "named" + ] + ], + "validated": false, + "empirical_context": "MoGEI has an Education Management Information System ( EMIS ), but its implementation has not been tackled comprehensively to date and the existing data essential for sector evaluation and development has significant gaps. The EMIS requires dissemination to all states and harmonization with other databases in the MoGEI, such as the examination results, South Sudan School Attendance and Monitoring System, Human Resources Management Information System, and Financial Management Information System. The existing data collection instruments also need revision to incorporate the required information for monitoring the implementation of key indicators.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source, indicating it does not function as a dataset in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting it could contain data.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source, indicating it does not function as a dataset in this context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 69, + "text": "The World Bank Cash for Jobs Project ( P175327 ) Page 65 of 86 Beneficiaries of job-focused interventions - host communities, disaggregated by gender Beneficiaries of job-focused interventions that live in communities hosting refugees Annual Project Management Information System The enrolment module of the MIS contains information on the locality of residence of the beneficiaries PIU ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Number of provinces implementing cash transfers Number of provinces of the country where cash transfers activities financed by the project are implemented Annual Implementati on Progress Reports The Project Implementation Unit will inform of the provinces where the project is being implemented Project Implementation Unit Beneficiaries receiving payments on the schedule set-up in the operational manual Share of cash transfers beneficiaries that receive the transfers within the time period stipulated in the project operational manual Every 6 months Payment agent reports The payment agent reports include the date of payments and this will be compared with the date initially planned Proportion of beneficiary households in the beneficiary collines participating in behavior change promotion activities Share of households that attend sessions on human capital development Every six months Attendance sheets from NGOs carrying out the sessions NGOs implementing accompanying measures on human capital development will report on the attendance of Project Implementation Unit", + "ner_text": [ + [ + 1149, + 1170, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Cash for Jobs Project ( P175327 ) Page 65 of 86 Beneficiaries of job-focused interventions - host communities, disaggregated by gender Beneficiaries of job-focused interventions that live in communities hosting refugees Annual Project Management Information System The enrolment module of the MIS contains information on the locality of residence of the beneficiaries PIU ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Number of provinces implementing cash transfers Number of provinces of the country where cash transfers activities financed by the project are implemented Annual Implementati on Progress Reports The Project Implementation Unit will inform of the provinces where the project is being implemented Project Implementation Unit Beneficiaries receiving payments on the schedule set-up in the operational manual Share of cash transfers beneficiaries that receive the transfers within the time period stipulated in the project operational manual Every 6 months Payment agent reports The payment agent reports include the date of payments and this will be compared with the date initially planned Proportion of beneficiary households in the beneficiary collines participating in behavior change promotion activities Share of households that attend sessions on human capital development Every six months Attendance sheets from NGOs carrying out the sessions NGOs implementing accompanying measures on human capital development will report on the attendance of Project Implementation Unit", + "type": "report", + "explanation": "However, 'Payment agent reports' are mentioned as a source of information rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'reports' which can imply structured information.", + "contextual_reason_agent": "However, 'Payment agent reports' are mentioned as a source of information rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a report, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 20, + "validated": 4, + "not_validated": 16 + } + }, + { + "filename": "101_Project-Appraisal-Document-PAD-P163830-2017-05-22-CLEAN-05232017", + "page": 79, + "text": "Initially the survey verifies and ensures the beneficiaries identified in the selection process meet the selection criteria ( male, female, age, IDP, HH heads, etc. ) of the project. In a second stage, the survey focuses on the work performed ( site, number of days, tools used, type of infrastructure, supervision, payments received from the money vendor, etc. ). In a third stage the Call Centre will focus on further work performed and verifies that the payments or benefits that were to be received by beneficiaries from earlier stages of the project, were in fact received. Based on validation and clearances of the Call Centre data, the contractual payments will be made to the Service Provider ( SP ) and the beneficiaries. Any diversions or lack of compliance with contractual obligations will result that a payment requested by the SP or due to the beneficiaries will be put on hold until an analysis / evaluation of the results found by the Call Centre is clarified or found to be justified. The payments due to the SP or the beneficiaries will be adjusted in cases where compliance with contractual obligation ( s ) is weak or lacking. 36.", + "ner_text": [ + [ + 14, + 20, + "named" + ], + [ + 46, + 59, + "survey <> reference population" + ], + [ + 506, + 519, + "survey <> reference population" + ], + [ + 716, + 729, + "survey <> reference population" + ], + [ + 858, + 871, + "survey <> reference population" + ], + [ + 1036, + 1049, + "survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "Initially the survey verifies and ensures the beneficiaries identified in the selection process meet the selection criteria ( male, female, age, IDP, HH heads, etc. ) of the project. In a second stage, the survey focuses on the work performed ( site, number of days, tools used, type of infrastructure, supervision, payments received from the money vendor, etc. ).", + "type": "survey", + "explanation": "In this context, the survey is explicitly described as verifying and collecting data about beneficiaries and their work, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because surveys are often structured collections of data used for analysis.", + "contextual_reason_agent": "In this context, the survey is explicitly described as verifying and collecting data about beneficiaries and their work, indicating it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 44, + "text": "The World Bank Mauritania Social Safety Net System Project II ( P171125 ) Page 39 of 61 Households with complete information in the Social Registry - Refugee Number of refugee households registred within the Social Registry database and for which the socio - economic data has been collected and recorded. Semester Social Registry MIS Specific request on the Social Registry database Social Registry Directorate Households with updated information in the Social registry ( less than 3 years ) Percentage of households included in the Social Register database for which socio-economic data was collected within the previous three years. Semester Social Registry MIS Request on the Social Registry database Social Registry Directorate Cases from the Social Registry grievance redress mechanism resolved in a timely manner Percentage of complaints or requests for information received by the Social Registry that have been resolved within 60 days. Semester Social Registry MIS MIS extraction Social Registry Directorate Spot-check surveys carried-out and disclosed by the Social Registry Number of spot checks on the Social Registry carried out by an external firm and / or the Social Registry during the duration of the project and which have been the subject of a report and feedback to the teams.", + "ner_text": [ + [ + 132, + 147, + "named" + ], + [ + 4, + 14, + "Social Registry <> publisher" + ], + [ + 15, + 25, + "Social Registry <> data geography" + ], + [ + 88, + 98, + "Social Registry <> reference population" + ], + [ + 168, + 186, + "Social Registry <> reference population" + ], + [ + 251, + 272, + "Social Registry <> data type" + ], + [ + 412, + 422, + "Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "The World Bank Mauritania Social Safety Net System Project II ( P171125 ) Page 39 of 61 Households with complete information in the Social Registry - Refugee Number of refugee households registred within the Social Registry database and for which the socio - economic data has been collected and recorded. Semester Social Registry MIS Specific request on the Social Registry database Social Registry Directorate Households with updated information in the Social registry ( less than 3 years ) Percentage of households included in the Social Register database for which socio-economic data was collected within the previous three years.", + "type": "database", + "explanation": "The Social Registry is indeed a dataset as it is described as a database that collects and records socio-economic data for households.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a 'database' containing socio-economic data.", + "contextual_reason_agent": "The Social Registry is indeed a dataset as it is described as a database that collects and records socio-economic data for households.", + "contextual_signal": "described as a database that collects and records data", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 14, + "text": "In FY16 / 17, 64 percent of boys and 78 percent of girls ( between the ages of 5 and 16 ) were not enrolled in primary and secondary schools in Balochistan, compared to 40 percent of boys and 49 percent of girls at the national level. 22 The overall net enrollment and effective transition rates, from primary to middle school and middle to high school, were low compared to national rates, especially among girls. When it comes to student learning metrics, children in Balochistan also perform poorly in comparison to the same age groups in rural Pakistan communities. For example, approximately 60 percent of children in grade 5 could not perform a two \u2010 digit division problem. The 2018 ASER report also highlighted a wide gender gap in student learning, with 31 percent of boys and 20 percent of girls ( ages 5 to 16 years ) being able to read second \u2010 grade level sentences 19 Expanded Program on Immunization ( EPI ), Tuberculosis, Malaria and Vector Borne Diseases Control Program, Maternal, Newborn, and Child Health ( MNCH ), Lady Health Workers ( LHW ) Program, District Health Information System ( DHIS ), Nutrition Program, HIV \u2010 AIDS, Prime Minister \u2019 s Initiative for Hepatitis Control Program, Leprosy Control Program, National Program for Prevention and Control of Blindness, and Provincial Disaster and Surveillance", + "ner_text": [ + [ + 432, + 456, + "named" + ] + ], + "validated": false, + "empirical_context": "22 The overall net enrollment and effective transition rates, from primary to middle school and middle to high school, were low compared to national rates, especially among girls. When it comes to student learning metrics, children in Balochistan also perform poorly in comparison to the same age groups in rural Pakistan communities. For example, approximately 60 percent of children in grade 5 could not perform a two \u2010 digit division problem.", + "type": "concept", + "explanation": "'Student learning metrics' is mentioned as a concept describing performance rather than a specific dataset or data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'student learning metrics' refers to a structured collection of data related to student performance.", + "contextual_reason_agent": "'Student learning metrics' is mentioned as a concept describing performance rather than a specific dataset or data source.", + "contextual_signal": "mentioned only as a concept, not as a data source", + "tags": [] + }, + { + "filename": "028_Ethiopia-Response-Recovery-Resilience-for-Conflict-Affected-Communities-in-Ethiopia-Project", + "page": 18, + "text": "The Government of Ethiopia \u2019 s 2016 Ethiopia Demographic and Health Survey indicates that 26 percent of all women aged 15 to 49 reported having experienced physical or sexual violence in their lifetime, with 23 percent reporting physical violence and 10 percent experiencing sexual violence. 30 These data, however, likely belie the full extent of the challenge. A survey conducted by the UN Population Fund ( UNFPA ) in 2010 among youths between the ages 12 and 24 in select regions indicates that GBV prevalence may be even higher31, with 15 percent of young women reporting having experienced sexual violence in their lifetime. 32 Experience of intimate partner violence is particularly pronounced; more than one third of women ( 34 percent ) have experienced some form of spousal violence \u2013 physical, sexual, or emotional. The survey found that acceptability of use of violence at home was high, with 63 percent of women and 27. 6 percent of men believing that wife beating was justified for at least one specified reason. 33 Help seeking behavior of GBV survivors was found to be limited \u2013 only 23 percent of women who experienced physical and / or sexual violence sought help, while 66 percent of women never sought help nor told anyone about their experience.", + "ner_text": [ + [ + 36, + 74, + "named" + ], + [ + 31, + 35, + "Ethiopia Demographic and Health Survey <> publication year" + ], + [ + 36, + 44, + "Ethiopia Demographic and Health Survey <> data geography" + ], + [ + 108, + 127, + "Ethiopia Demographic and Health Survey <> reference population" + ], + [ + 365, + 371, + "Ethiopia Demographic and Health Survey <> data type" + ] + ], + "validated": true, + "empirical_context": "The Government of Ethiopia \u2019 s 2016 Ethiopia Demographic and Health Survey indicates that 26 percent of all women aged 15 to 49 reported having experienced physical or sexual violence in their lifetime, with 23 percent reporting physical violence and 10 percent experiencing sexual violence. 30 These data, however, likely belie the full extent of the challenge.", + "type": "survey", + "explanation": "This is indeed a dataset as it is a structured collection of data used to report on demographic and health statistics.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides statistical data on women's experiences with violence.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data used to report on demographic and health statistics.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 118, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 113 of 117 Action Description DLI Respon sible Timing Completion measurement 2. Review and analyse NEMIS capabilities on collecting, disaggregation and reporting on data for learners with special needs and disability and recommendations to close identified gaps. Integrated in DLI # 6 MoE / KI SE / KICD / KNEC / TSC Year 1 Reports generated from NEMIS with granular data on special needs children by category of disability, age, and gender. 3. Complete the ongoing process to institutionalize the process and procedures for administration and management of scholarships by Jomo Kenyatta Foundation ( JKF ) Integrated in DLI # 4 MoE Year 2 Scholarship administration and management processes and procedures established at JKF. 4. Design and develop a GRM MIS module compatible with NEMIS and interoperable between agencies and able to capture refugee data. N / A. MoE By Year 2, 2023 Complaints and grievances lodged through the NEMIS. Number of grievances satisfactorily addressed 5. Assess the outcomes of biogas pilots and analyze existing practices on use of biogas in schools N / A MoE Year 2 Costed action plan developed to scale up biogas projects.", + "ner_text": [ + [ + 176, + 181, + "named" + ], + [ + 251, + 293, + "NEMIS <> reference population" + ], + [ + 452, + 474, + "NEMIS <> reference population" + ], + [ + 956, + 960, + "NEMIS <> publication year" + ] + ], + "validated": true, + "empirical_context": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 113 of 117 Action Description DLI Respon sible Timing Completion measurement 2. Review and analyse NEMIS capabilities on collecting, disaggregation and reporting on data for learners with special needs and disability and recommendations to close identified gaps. Integrated in DLI # 6 MoE / KI SE / KICD / KNEC / TSC Year 1 Reports generated from NEMIS with granular data on special needs children by category of disability, age, and gender.", + "type": "system", + "explanation": "NEMIS is indeed a data source as it is described as having capabilities for collecting and reporting data on learners with special needs.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is mentioned in the context of collecting and reporting data.", + "contextual_reason_agent": "NEMIS is indeed a data source as it is described as having capabilities for collecting and reporting data on learners with special needs.", + "contextual_signal": "system but mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "172_multi0page", + "page": 8, + "text": "Rehabilitation of the School System ( i ) 50 % of primary and JSS schools in the target districts reach the BOL; ( ii ) 70 % completion rate in primary schools in target districts ( iii ) 20 % increase in girls ' enrollment rate in primary schools in the target districts of northern and eastern regions ( iv ) 20 % increase in enrollment in Junior Secondary Schooling ( JSS ) in the target districts ( v ) 20 % increase in passing rate of students taking the Basic Education Certificate Exammnation ( BECE ) Sector management ( i ) A Project Coordination Unit ( PCU ) exists within the MEST capable of conducting: the procurement and financial management of the project; technical review of proposals to develop Component I of the Project; and monitoring and supervision of all project activities; ( ii ) An enhanced capacity of the MEST to conduct: planning, monitoring and evaluation ( M & E ) of the provision of school services, and stakeholder coordination; ( iii ) Existence of an operational Education Management Information System ( EMIS ) fully integrated - 3 -", + "ner_text": [ + [ + 1000, + 1039, + "named" + ] + ], + "validated": false, + "empirical_context": "Rehabilitation of the School System ( i ) 50 % of primary and JSS schools in the target districts reach the BOL; ( ii ) 70 % completion rate in primary schools in target districts ( iii ) 20 % increase in girls ' enrollment rate in primary schools in the target districts of northern and eastern regions ( iv ) 20 % increase in enrollment in Junior Secondary Schooling ( JSS ) in the target districts ( v ) 20 % increase in passing rate of students taking the Basic Education Certificate Exammnation ( BECE ) Sector management ( i ) A Project Coordination Unit ( PCU ) exists within the MEST capable of conducting: the procurement and financial management of the project; technical review of proposals to develop Component I of the Project; and monitoring and supervision of all project activities; ( ii ) An enhanced capacity of the MEST to conduct: planning, monitoring and evaluation ( M & E ) of the provision of school services, and stakeholder coordination; ( iii ) Existence of an operational Education Management Information System ( EMIS ) fully integrated - 3 -", + "type": "system", + "explanation": "However, it is mentioned as a system that supports management and coordination, not as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Information System' which often relates to data management.", + "contextual_reason_agent": "However, it is mentioned as a system that supports management and coordination, not as a data source itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "148_492780PAD0P111101Official0Use0Only1", + "page": 35, + "text": "One could argue that all Palestinian students are currently experiencing a diversity of needs, educational and / or psychosocial, and that these should be recognized and catered for. This requires a change of focus from providing access to providing quality education relevant to the diverse needs of all students \u2013 a paradigm shift from a special education and disability focus to inclusive education. The limited ability of the school system to provide psychosocial counseling and other services for affected children, and especially for students with special needs, has become evident. Although the EDSP calls for disability disaggregated monitoring of results, the EMIS and the M & E system are currently not capable of producing reliable data and indicators for evidence-based decision-making related to disabled learners. 118. The EDSP reports an alarming decline in pre-school education. Poor and special needs children stand to benefit most of preschool programs. In regular classrooms the current academic and overloaded school curriculum presents disproportionate challenges to learners with special needs. Meeting the needs of conflict-affected children, as well as the needs of their teachers and parents, deserves special attention especially in Gaza but also in the West Bank. Providing access and improving support services for students with special educational needs is among the key challenges of the EDSP. Supervision 119. Current Situation. Supervision is the essential link between the school and the planner.", + "ner_text": [ + [ + 669, + 673, + "named" + ] + ], + "validated": false, + "empirical_context": "The limited ability of the school system to provide psychosocial counseling and other services for affected children, and especially for students with special needs, has become evident. Although the EDSP calls for disability disaggregated monitoring of results, the EMIS and the M & E system are currently not capable of producing reliable data and indicators for evidence-based decision-making related to disabled learners. 118.", + "type": "system", + "explanation": "However, EMIS is described as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is mentioned in the context of data production.", + "contextual_reason_agent": "However, EMIS is described as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 14, + "text": "The World Bank SPF: Improved Livelihoods for Internally Displaced Persons in Azerbaijan ( P178125 ) Page 12 of 34 16. The project is targeting 520 vulnerable IDP participants with the aim of benefiting their households, so a broader group of 2, 100 total beneficiaries. The average household size in Azerbaijan is 4. 1 persons. C. PDO-Level Results Indicators 17. Achievement of the proposed Project Development Objective, will be measured through the following indicators: \u2022 Percentage of participants self-employed or employed by firms \u2022 Increase in income of households with individuals participating in the project \u2022 Percentage of registered participants completing training and receiving certificates \u2022 Beneficiaries of job-focused interventions, of which female ( core World Bank indicator ) \u2022 Percentage of beneficiaries taking a more active role in their communities disaggregated by gender and persons with disability 18. Baseline data on indicators will be collected to facilitate the measurement of project impact. Upon registration of participants for project support, data will be gathered to establish baseline conditions for each beneficiary and their household. Follow-up surveys will be conducted to compare baseline conditions to those after the completion of project activities. III. PROJECT DESCRIPTION A. Project Components 19. Component 1: Skills development.", + "ner_text": [ + [ + 931, + 944, + "named" + ], + [ + 4, + 14, + "Baseline data <> publisher" + ], + [ + 77, + 87, + "Baseline data <> data geography" + ], + [ + 143, + 174, + "Baseline data <> reference population" + ], + [ + 476, + 537, + "Baseline data <> data description" + ], + [ + 540, + 618, + "Baseline data <> data description" + ], + [ + 621, + 705, + "Baseline data <> data description" + ], + [ + 800, + 926, + "Baseline data <> data description" + ] + ], + "validated": true, + "empirical_context": "Achievement of the proposed Project Development Objective, will be measured through the following indicators: \u2022 Percentage of participants self-employed or employed by firms \u2022 Increase in income of households with individuals participating in the project \u2022 Percentage of registered participants completing training and receiving certificates \u2022 Beneficiaries of job-focused interventions, of which female ( core World Bank indicator ) \u2022 Percentage of beneficiaries taking a more active role in their communities disaggregated by gender and persons with disability 18. Baseline data on indicators will be collected to facilitate the measurement of project impact. Upon registration of participants for project support, data will be gathered to establish baseline conditions for each beneficiary and their household.", + "type": "data", + "explanation": "In this context, 'baseline data' is explicitly mentioned as being collected to measure project impact, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'baseline data' refers to a dataset because it implies a collection of data points used for measurement.", + "contextual_reason_agent": "In this context, 'baseline data' is explicitly mentioned as being collected to measure project impact, indicating it functions as a data source.", + "contextual_signal": "mentioned as data to be collected for measurement purposes", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "050_Cameroon-Secondary-Education-and-Skills-Development-Project", + "page": 13, + "text": "Since 2014, the repeated terrorist attacks of Boko Haram in the Far North region led to over 244, 000 IDPs, and the region also took in over 308, 000 refugees from Nigeria in 2019. Cameroon \u2019 s Eastern, Northern, and Adamawa regions received a surge of refugees from the Central African Republic. Education outcomes and level of education attainment was previously higher in anglophone regions than in some other regions. However, the persistent sociopolitical crisis in the North West and South West anglophone regions resulted in approximately 950, 000 IDPs, 51 percent of whom were children, in 2019. 13 These political crises have led to the destruction of 7 Cameroon Household Survey 2007 and 2014. 8 Staff calculations using the Fourth Cameroon Household Survey ( ECAM4 ) 2014, National Institute of Statistics ( NIS ). 9 The narrow unemployment rate among the labor force ages 15 \u2013 64 averaged 3. 3 in 2007 and 4. 0 percent in 2014 and 5. 4 and 5. 8 percent among youth in 2007 and 2014, respectively. The broader measure of unemployment, which includes workers who are inactive but would accept a job if offered one, averaged 3 percentage points higher. 10 The HCI measures the amount of human capital that a child born today can expect to attain by age 18.", + "ner_text": [ + [ + 735, + 767, + "named" + ], + [ + 6, + 10, + "Fourth Cameroon Household Survey <> reference year" + ], + [ + 64, + 80, + "Fourth Cameroon Household Survey <> data geography" + ], + [ + 181, + 189, + "Fourth Cameroon Household Survey <> data geography" + ], + [ + 217, + 232, + "Fourth Cameroon Household Survey <> data geography" + ], + [ + 689, + 693, + "Fourth Cameroon Household Survey <> reference year" + ], + [ + 698, + 702, + "Fourth Cameroon Household Survey <> publication year" + ], + [ + 778, + 782, + "Fourth Cameroon Household Survey <> publication year" + ], + [ + 784, + 816, + "Fourth Cameroon Household Survey <> author" + ], + [ + 839, + 856, + "Fourth Cameroon Household Survey <> data description" + ], + [ + 934, + 938, + "Fourth Cameroon Household Survey <> reference year" + ], + [ + 989, + 993, + "Fourth Cameroon Household Survey <> reference year" + ] + ], + "validated": true, + "empirical_context": "13 These political crises have led to the destruction of 7 Cameroon Household Survey 2007 and 2014. 8 Staff calculations using the Fourth Cameroon Household Survey ( ECAM4 ) 2014, National Institute of Statistics ( NIS ). 9 The narrow unemployment rate among the labor force ages 15 \u2013 64 averaged 3.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it is explicitly referenced in relation to staff calculations and is part of the empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Household Survey', which typically indicates a structured collection of data.", + "contextual_reason_agent": "The context confirms it is a dataset as it is explicitly referenced in relation to staff calculations and is part of the empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 61, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 57 of 74 and have received cash transfers, at least for one payment cycle. on a quarterly basis Payment data will be regularly updated in the SNSOP MIS Percentage of Labor Intensive Public Works subprojects that mitigate climate - induced shocks Percentage of LIPW subprojects that are designed to mitigate the impacts of climate-induced shocks in beneficiary communities, as identified under the Project Operations Manual. This indicator will be measured at least on a quarterly basis through missions and ISRs LIPW monitoring forms The implementing partner will periodically monitor LIPW as part of quality control and monitoring and evaluation activities carried out by dedicated field staff including field-based engineers Implementing partner Number of LIPW work days created Number of total LIPW work days generated by SNSOP This indicator will be measured at minimum on a quarterly basis including through missions and ISR ' s SNSOP MIS which hosts information on LIPW work days used to generate payment schedules. The number of LIPW work days will be documented at LIPW work sites and collected by field-based staff.", + "ner_text": [ + [ + 236, + 245, + "named" + ], + [ + 4, + 14, + "SNSOP MIS <> publisher" + ], + [ + 190, + 202, + "SNSOP MIS <> data type" + ], + [ + 842, + 866, + "SNSOP MIS <> data description" + ] + ], + "validated": true, + "empirical_context": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 57 of 74 and have received cash transfers, at least for one payment cycle. on a quarterly basis Payment data will be regularly updated in the SNSOP MIS Percentage of Labor Intensive Public Works subprojects that mitigate climate - induced shocks Percentage of LIPW subprojects that are designed to mitigate the impacts of climate-induced shocks in beneficiary communities, as identified under the Project Operations Manual. This indicator will be measured at least on a quarterly basis through missions and ISRs LIPW monitoring forms The implementing partner will periodically monitor LIPW as part of quality control and monitoring and evaluation activities carried out by dedicated field staff including field-based engineers Implementing partner Number of LIPW work days created Number of total LIPW work days generated by SNSOP This indicator will be measured at minimum on a quarterly basis including through missions and ISR ' s SNSOP MIS which hosts information on LIPW work days used to generate payment schedules.", + "type": "management information system", + "explanation": "It is indeed a dataset as it is described as a management information system that stores records related to LIPW work days.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a system that hosts information.", + "contextual_reason_agent": "It is indeed a dataset as it is described as a management information system that stores records related to LIPW work days.", + "contextual_signal": "described as a management information system that stores records", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 18, + "validated": 10, + "not_validated": 8 + } + }, + { + "filename": "041_Burkina-Faso-Support-to-Land-and-Mining-Management-Strengthening-Project", + "page": 27, + "text": "Agriculture, gender, education, decentralization departments all have specific roles and responsibilities for a proper management of the sectors. The project will provide: ( a ) training and equipment for relevant departments including those in charge of agriculture, gender, children, decentralization, and education to promote an inclusive and climate resilient management of land and mining including awareness of climate change risks and impacts; ( b ) consultative coordination of inter-ministerial group of experts for land and mining matters supported by external facilitators and coaches to assist in establishing more efficient communication channels to properly use the Information Decision Support System tool and other related modules which have a wide inter-ministerial reach; ( c ) support for addressing drivers of conflict and risks to gender and children, and to stimulate income generating activities; and ( d ) security protocols with police and gendarmes in the areas of operations of BUMIGEB, ANEEMAS, and land agencies to mitigate security risks for project activities. COMPONENT 2: STRENGTHENING OF LAND AND MINING INFORMATION MANAGEMENT AND IMPROVING LAND TENURE SECURITY ( US $ 72 MILLION EQUIVALENT ) 37. The objective of this component is to modernize information management systems in land and mining and strengthen land tenure security through capacity building and systematic registration of land rights. This component will support the production of cadastral base maps, the development of a new Land Information System ( LIS ), the modernization of the mining cadastre, the collection of geo-data, and the provision of capacity building to support land registration and the systematic registration of land rights. The delineation of classified forests and their exclusion from mining licensing and climate friendly revegetation of closed mine land sites will", + "ner_text": [ + [ + 1527, + 1550, + "named" + ] + ], + "validated": false, + "empirical_context": "The objective of this component is to modernize information management systems in land and mining and strengthen land tenure security through capacity building and systematic registration of land rights. This component will support the production of cadastral base maps, the development of a new Land Information System ( LIS ), the modernization of the mining cadastre, the collection of geo-data, and the provision of capacity building to support land registration and the systematic registration of land rights. The delineation of classified forests and their exclusion from mining licensing and climate friendly revegetation of closed mine land sites will", + "type": "system", + "explanation": "However, the context describes it as a system for managing information rather than a specific dataset or data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "However, the context describes it as a system for managing information rather than a specific dataset or data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "040_Iraq-COVID-19-Vaccination-Project", + "page": 39, + "text": "These services are offered by health facility staff that received GBV counselling and messaging as part of their regular on-the-job training to support and direct vulnerable women to specific support channels and resources. MOHE, supported by UN agencies, developed a remote and face-to-face GBV counseling flowchart targeting primary health care workers to clarify management methods and referral pathways. These service adaptations were informed by a rapid assessment of available health care options for survivors of GBV during the COVID-19 outbreak. The survey included health care workers from primary health care centers, hospitals, and mobile medical clinics from 16 districts in Iraq. Of those surveyed, 69 percent of health facilities reported that their staff have already been trained on GBV. Following the COVID-19 outbreak, 81 percent of health facilities surveyed have already updated their referral pathways. Among those health facilities, 95 percent included GBV services in their updates. These interventions will be monitored and measured through the project \u2019 s results framework, TPMA reports, and through ESF instruments. 13 Sex-disaggregated data by priority group on vaccination uptake is not available, however, the gender gap in uptake among these groups is likely to be similar to the overall trend. This project will contribute to collection of sex-disaggregated data across priority groups whenever possible.", + "ner_text": [ + [ + 1146, + 1168, + "named" + ], + [ + 687, + 691, + "Sex-disaggregated data <> data geography" + ], + [ + 1452, + 1470, + "Sex-disaggregated data <> usage context" + ] + ], + "validated": true, + "empirical_context": "These interventions will be monitored and measured through the project \u2019 s results framework, TPMA reports, and through ESF instruments. 13 Sex-disaggregated data by priority group on vaccination uptake is not available, however, the gender gap in uptake among these groups is likely to be similar to the overall trend. This project will contribute to collection of sex-disaggregated data across priority groups whenever possible.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to specific data collected on vaccination uptake disaggregated by sex.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific type of data collected for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it refers to specific data collected on vaccination uptake disaggregated by sex.", + "contextual_signal": "mentioned as part of data collection efforts", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "068_Burundi-Integrated-Community-Development-Project", + "page": 33, + "text": "Much of the data collected will be disaggregated by gender and status as refugee or member of the host community member. The overall M & E system is guided by the Project \u2019 s results framework. It will use smart-phone technology and a web-based Management Information System ( MIS ) to support efficient data collection, aggregation and analysis to enhance transparency and allow for real-time monitoring of project progress and to facilitate rapid corrective actions. The Bank will augment government M & E by piloting Iterative Beneficiary Monitoring ( IBM ) in the target areas. IBM is a method to generate cost-efficient regular and timely monitoring of project activities and results on a quarterly basis. It will strengthen management oversight and accountability while enhancing citizen engagement. 85. Management Information System ( MIS ). The project \u2019 s MIS will provide data on key project inputs, outputs and progress. This will include the tracking of financial and physical progress in project implementation. The PIU will regularly review and analyze these data to report on progress, assess performance and identify issues for follow-up action. A central MIS unit within the PIU will support the roll-out and maintenance of the system, data analysis and regular reporting to the ITC, PSC and the World Bank.", + "ner_text": [ + [ + 245, + 274, + "named" + ] + ], + "validated": false, + "empirical_context": "The overall M & E system is guided by the Project \u2019 s results framework. It will use smart-phone technology and a web-based Management Information System ( MIS ) to support efficient data collection, aggregation and analysis to enhance transparency and allow for real-time monitoring of project progress and to facilitate rapid corrective actions. The Bank will augment government M & E by piloting Iterative Beneficiary Monitoring ( IBM ) in the target areas.", + "type": "system", + "explanation": "However, it is described as a system for managing information rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves data collection and analysis.", + "contextual_reason_agent": "However, it is described as a system for managing information rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "173_multi0page", + "page": 1, + "text": "CURRENCY EQUIVALENTS ( Exchange Rate Effective March 21, 2002 ) Currency Unit = GNF GNF 100 = US $. 05 ( or 5 cents ) US $ 1 = 1978 GNF FISCAL YEAR ABBREVIATIONS AND ACRONYMS ACTafrica AIDS Campaign Team for Africa AGBEF Guinean Association for Family Well-Being ARV ( T ) Anti-retroviral ( Therapy ) CAS Country Assistance Strategy CBO Community-based Organization CCC Communication for Changing Behavior CDF Comprehensive Development Framework CIDA Canadian International Development Agency CNLS National Comittee for the fight against AIDS CPLS Prefectoral Committee for the fight against AIDS CRLS Regional Committee for the fight againt AIDS CRD Communaute Rurale de Developpement ( rural development communities ) DHS Demographic and Health Survey EA Environmental Assessment EDS Enquete Demographie et Sante EMP Environment Management Plan FMA Financial Management Agent GNF Guinean Francs GTZ German Technical Assistance Organization HCR Haut Commissariat aux RefugiJs HIPC Highly Indebted Poor Country HIV-OI / STI Human Immunodeficiency Virus-Opportunistic Infection / Sexually transmitted infection LkPSO Inter-Agency Procurement Services Office IDP Internally displaced persons KAP Knowledge, Attitudes and Practices MAP Multi-sectoral AIDS Project PACV Projet d ' Appui aux Communautes Villageoises PMTCT Prevention of Mother-to-Child Transmission PLWHA / PWS Person living with HIVIAIDS / Personnes vivant avec VIHISIDA PNLS Plan National de lutte contre le SIDA PNPCSP Programme national de la prise en charge sanitaire et de la prevention PPSG Projet Population et Sante Gdndsique PRSP Poverty Reduction Strategy Paper PSI Population Services International, NGO UNIPAC United Nations Children '", + "ner_text": [ + [ + 720, + 753, + "named" + ] + ], + "validated": true, + "empirical_context": "CURRENCY EQUIVALENTS ( Exchange Rate Effective March 21, 2002 ) Currency Unit = GNF GNF 100 = US $. 05 ( or 5 cents ) US $ 1 = 1978 GNF FISCAL YEAR ABBREVIATIONS AND ACRONYMS ACTafrica AIDS Campaign Team for Africa AGBEF Guinean Association for Family Well-Being ARV ( T ) Anti-retroviral ( Therapy ) CAS Country Assistance Strategy CBO Community-based Organization CCC Communication for Changing Behavior CDF Comprehensive Development Framework CIDA Canadian International Development Agency CNLS National Comittee for the fight against AIDS CPLS Prefectoral Committee for the fight against AIDS CRLS Regional Committee for the fight againt AIDS CRD Communaute Rurale de Developpement ( rural development communities ) DHS Demographic and Health Survey EA Environmental Assessment EDS Enquete Demographie et Sante EMP Environment Management Plan FMA Financial Management Agent GNF Guinean Francs GTZ German Technical Assistance Organization HCR Haut Commissariat aux RefugiJs HIPC Highly Indebted Poor Country HIV-OI / STI Human Immunodeficiency Virus-Opportunistic Infection / Sexually transmitted infection LkPSO Inter-Agency Procurement Services Office IDP Internally displaced persons KAP Knowledge, Attitudes and Practices MAP Multi-sectoral AIDS Project PACV Projet d ' Appui aux Communautes Villageoises PMTCT Prevention of Mother-to-Child Transmission PLWHA / PWS Person living with HIVIAIDS / Personnes vivant avec VIHISIDA PNLS Plan National de lutte contre le SIDA PNPCSP Programme national de la prise en charge sanitaire et de la prevention PPSG Projet Population et Sante Gdndsique PRSP Poverty Reduction Strategy Paper PSI Population Services International, NGO UNIPAC United Nations Children '", + "type": "survey", + "explanation": "In the context, 'DHS' is explicitly defined as 'Demographic and Health Survey', which is a recognized data source used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Demographic and Health Survey' suggests a structured collection of data related to health demographics.", + "contextual_reason_agent": "In the context, 'DHS' is explicitly defined as 'Demographic and Health Survey', which is a recognized data source used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "090_Djibouti-Public-Admin-PAD-PAD2604-04062018", + "page": 50, + "text": "They are able to generate real-time statistics on their production capability, as well as utilization of services based on these IDs. \u2022 In the past year, the Safety Net Program at the Ministry of Social Affairs has started an initiative to register poor people using biometric information of beneficiaries, which includes pictures and fingerprints of 10 fingers. The ID printed is a simple card with biographic information and a picture. A total of 33, 000 beneficiaries have been enrolled and 7, 000 families have been registered. Key program challenges include ( a ) the availability of the whole family to be able to enroll them; ( b ) difficulties in taking fingerprints for labor workers due to extended hand usage; and ( c ) difficulties in taking superior quality pictures during rural enrollment, as the environment is very dusty. At present, biometric information is not used except for the picture for authentication because it is difficult to use biometrics in a rural setting where communication and card readers are not available. 14. The support for the development of the foundation e-ID system is a major priority for the Government, which has to deal with a large inflow of migrants fleeing conflict situations and social unrest", + "ner_text": [ + [ + 1098, + 1109, + "named" + ] + ], + "validated": false, + "empirical_context": "14. The support for the development of the foundation e-ID system is a major priority for the Government, which has to deal with a large inflow of migrants fleeing conflict situations and social unrest", + "type": "system", + "explanation": "However, the context indicates that it is a system for managing identification rather than a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'e-ID system' suggests a structured collection of information.", + "contextual_reason_agent": "However, the context indicates that it is a system for managing identification rather than a data source.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 22, + "text": ") establish a quality of care system through development of guidelines, tools, and standards, training of trainers on quality of care, piloting quality of care teams and supporting national scale up, and support for National and State level quality improvement supervision. 38. Subcomponent 2. 4: Health Management Information Systems ( WHO; US $ 2. 50 million: US $ 0. 93 million equivalent IDA [ including US $ 0. 63 million WHR ] and US $ 1. 57 million Trust Funds [ US $ 0. 17 million SDTF and US $ 1. 40 million MDTF ] ). This subcomponent will focus on developing systems and procedures for the national HMIS, with an emphasis on supporting the collection of routine data through DHIS2, to standardize data collection, entry and cleaning, as well as instituting data quality improvement practices. This will enhance targeting and data tracking for refugees and provide regularly updated information to understand the evolving needs on the ground that will aid further in the decision-making process.", + "ner_text": [ + [ + 297, + 334, + "named" + ] + ], + "validated": false, + "empirical_context": "Subcomponent 2. 4: Health Management Information Systems ( WHO; US $ 2. 50 million: US $ 0.", + "type": "system", + "explanation": "However, it is mentioned as a system and not as a data source, indicating it does not function as a dataset in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Information Systems', which often relates to data management.", + "contextual_reason_agent": "However, it is mentioned as a system and not as a data source, indicating it does not function as a dataset in this context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 27, + "text": "This component will consist of three subcomponents: \uf0b7 Subcomponent 4. 1: Improvement of poverty-related data by providing TA and financing to ( a ) the design, data collection, and analysis of ECAM 5 with a particular focus on northern regions \u2019 rural areas and gender issues ( agricultural income, shocks, and vulnerability, gender, and so on ); ( b ) data collection and analysis of the fourth population census; 28 ( c ) analysis of the complementary household survey ( ECAM 4 ); ( d ) design a system of labor statistics by improving LFS and enterprises and informal sector surveys; 28 This activity budgeted for US $ 1 million will complement the resources ( US $ 4 million ) provided by another World Bank financed project in the health sector. In total the World Bank contribution in the financing of the population will amount to US $ 5 million.", + "ner_text": [ + [ + 440, + 470, + "named" + ], + [ + 227, + 243, + "complementary household survey <> data geography" + ], + [ + 701, + 711, + "complementary household survey <> publisher" + ], + [ + 764, + 774, + "complementary household survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "This component will consist of three subcomponents: \uf0b7 Subcomponent 4. 1: Improvement of poverty-related data by providing TA and financing to ( a ) the design, data collection, and analysis of ECAM 5 with a particular focus on northern regions \u2019 rural areas and gender issues ( agricultural income, shocks, and vulnerability, gender, and so on ); ( b ) data collection and analysis of the fourth population census; 28 ( c ) analysis of the complementary household survey ( ECAM 4 ); ( d ) design a system of labor statistics by improving LFS and enterprises and informal sector surveys; 28 This activity budgeted for US $ 1 million will complement the resources ( US $ 4 million ) provided by another World Bank financed project in the health sector. In total the World Bank contribution in the financing of the population will amount to US $ 5 million.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned in the context as a survey that involves data collection and analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey that collects data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned in the context as a survey that involves data collection and analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "026_Djibouti-Health-System-Strengthening-Project", + "page": 12, + "text": "The situation is worse in rural areas; for instance, only 47 percent of the rural population has access to basic drinking water ( versus 83 percent in urban areas ), and only 19 percent of the rural population has access to safely managed sanitation ( versus 41 percent in urban areas ). The 2017-18 household survey, nquete Djiboutienne Aupres des Menages pour les indicateurs sociaux ( EDAM4-IS ) 3, shows correlation between poor water and sanitation and increased occurrences of ill-health episodes. 6. Significant gender inequalities persist in Djibouti, throughout the lifecycle. Rural women with low levels of education and poor socio-economic status as well as female refugees suffer disproportionately from the multisectoral effects of gender inequalities. The persistence of harmful gender norms affects access to productive resources and limits progress towards gender equality. School enrollment in general and in technical education is growing, but the gender gaps in female-to-male enrollment ratios ( 82 percent in primary, 72 percent in secondary, and 61 percent in higher education ) increase with each level. Unequal access to quality health services particularly affects the sexual and reproductive health of women. The multiplicity of measures taken to economically empower women did not necessarily promote their equitable access to economic opportunities.", + "ner_text": [ + [ + 292, + 316, + "named" + ] + ], + "validated": true, + "empirical_context": "The situation is worse in rural areas; for instance, only 47 percent of the rural population has access to basic drinking water ( versus 83 percent in urban areas ), and only 19 percent of the rural population has access to safely managed sanitation ( versus 41 percent in urban areas ). The 2017-18 household survey, nquete Djiboutienne Aupres des Menages pour les indicateurs sociaux ( EDAM4-IS ) 3, shows correlation between poor water and sanitation and increased occurrences of ill-health episodes. 6.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey that provides data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a household survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey that provides data used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "161_28046", + "page": 19, + "text": "Table: Classification of Regions by Poverty Levels Lower Central Higher Forest Total Urban Rural Guinea Guinea Guinea Guinea Conakry Percent urban Dooulation 30 100 0 23 9 16 22 100 Population density 29 n. a. n. a 31 31 14 34 2429 Access to safe water 52 49 45 43 45 47 48 80 Percent poor 40 n. a. n. a 24 36 40 25 11 Percent with less than U S 3 0 0 40 24 52 42 51 62 33 7 Depth o f poverty ( gap between 13 7 18 14 17 23 9 1 Share o f National Poverty ) ( % ) 100 9 88 22 28 32 15 3 Per capita expenditure ( \u2018 000 469. 5 n. a. n. a 401. 3 377. 6 316. 0 484. 4 n. a. Share of food in total spending 50 43 61 5 1 61 55 57 36 Source: UNPOP 1998, RGPH, General Population and Housing Census 1996, Comprehensive Consumption Budget Survey 199411995, Household Consumption Survey. Taken from the \u2018 Guinea Country Status Report \u201d, In the same vein, health indicators also indicate inequity problems as the table below shows: Table: Household Health Behaviors: Comparison by Place of Residence and by Region Lower Central Higher Forest Indicators Country Urban Rural Guinea Guinea Guinea Guinea Conakry Child health % of newborns < 6 months old on exclusive Median duration of breast feeding 22 4 - 224 242 1 9 9 22 1 22 1 breastfeeding 95. 2 - - 987 9 3 6 9 6 0 9 2 6 95 9 14", + "ner_text": [ + [ + 696, + 735, + "named" + ], + [ + 634, + 639, + "Comprehensive Consumption Budget Survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "a. Share of food in total spending 50 43 61 5 1 61 55 57 36 Source: UNPOP 1998, RGPH, General Population and Housing Census 1996, Comprehensive Consumption Budget Survey 199411995, Household Consumption Survey. Taken from the \u2018 Guinea Country Status Report \u201d, In the same vein, health indicators also indicate inequity problems as the table below shows: Table: Household Health Behaviors: Comparison by Place of Residence and by Region Lower Central Higher Forest Indicators Country Urban Rural Guinea Guinea Guinea Guinea Conakry Child health % of newborns < 6 months old on exclusive Median duration of breast feeding 22 4 - 224 242 1 9 9 22 1 22 1 breastfeeding 95.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source of data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a source of data in the context.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source of data used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 57, + "text": "47 32. The sub-component will support the background analysis for the implementation of the database and its implementation. These include: ( a ) Design and implementation of key tools: updated poverty and malnutrition maps, development of community-based targeting criteria and processes, development of registration questionnaire, construction of proxy-means test score, the organization of the different committees involved in the registration, the implementation of the PMT survey, storing and analysis of the data as well as the preparation of beneficiary lists for the cash transfer program and its complementary activities. It will also support the acquisition of key equipment ( hardware, software, back-up equipment ). ( b ) Implementation of the targeting and registration in selected areas including the organization and support of the targeting committees, the implementation and processing of questionnaires and the preparation of the list of registered households as well as eligible households for the cash transfer program. ( c ) Information campaigns to explain the purposes and processes of the targeting and registration to commune and colline administration and local government staff and to households in the selected collines, potential additional activities to ensure that beneficiaries are aware of the requirements and supporting documents for national ID cards, ( d ) Quality controls: Spot checks, process evaluations, analysis of targeting efficiency to identify necessary adjustments. Sub-component 2. 2.", + "ner_text": [ + [ + 474, + 484, + "named" + ] + ], + "validated": false, + "empirical_context": "The sub-component will support the background analysis for the implementation of the database and its implementation. These include: ( a ) Design and implementation of key tools: updated poverty and malnutrition maps, development of community-based targeting criteria and processes, development of registration questionnaire, construction of proxy-means test score, the organization of the different committees involved in the registration, the implementation of the PMT survey, storing and analysis of the data as well as the preparation of beneficiary lists for the cash transfer program and its complementary activities. It will also support the acquisition of key equipment ( hardware, software, back-up equipment ).", + "type": "survey", + "explanation": "However, 'PMT survey' is mentioned as a survey process rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'PMT survey' is a dataset because it involves data collection related to poverty and malnutrition.", + "contextual_reason_agent": "However, 'PMT survey' is mentioned as a survey process rather than a structured collection of data itself.", + "contextual_signal": "mentioned only as a survey, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 4, + "validated": 1, + "not_validated": 3 + } + }, + { + "filename": "098_CAMEROON-PAD-NEW-08032017", + "page": 83, + "text": "70 Table 5. 1: Investment and Length of Major Roads Municipality Road Length ( km ) Investment ( CFAF, billion ) Yaound\u00e9 Yaound\u00e9 5 6. 505 10, 509, 679, 081 Yaound\u00e9 7 5. 631 7, 256, 986, 078 Douala Douala 3 5. 210 7, 357, 746, 238 Douala 5 3. 320 4, 718, 125, 392 Kumba Kumba 2 5. 772 4, 624, 733, 278 Ngaound\u00e9r\u00e9 Ngaound\u00e9r\u00e9 2 6. 342 5, 188, 111, 850 Batouri Batouri 2. 100 1, 435, 421, 012 Total 42. 885 41, 090, 802, 929 8. Total investment cost for option 1 is estimated at CFAF 41. 1 billion ( US $ 68, 484, 672 ) 21. 9. Benefits were estimated based on Vehicle Operational Costs ( VOC ) savings owing to the construction of the road, building on traffic data and operation costs of vehicles using the road work: ( i ) The Average Daily Traffic ( ADT ) and Average Travel Time ( ATT ) were estimated based on field surveys conducted at the different points along the axis of the selected road sections presented earlier ( early 2017 ).", + "ner_text": [ + [ + 811, + 824, + "named" + ], + [ + 113, + 120, + "field surveys <> data geography" + ], + [ + 121, + 128, + "field surveys <> data geography" + ], + [ + 156, + 163, + "field surveys <> data geography" + ], + [ + 197, + 203, + "field surveys <> data geography" + ], + [ + 301, + 311, + "field surveys <> data geography" + ], + [ + 312, + 322, + "field surveys <> data geography" + ], + [ + 649, + 661, + "field surveys <> data type" + ], + [ + 725, + 746, + "field surveys <> data description" + ], + [ + 759, + 778, + "field surveys <> data description" + ], + [ + 930, + 934, + "field surveys <> publication year" + ] + ], + "validated": true, + "empirical_context": "9. Benefits were estimated based on Vehicle Operational Costs ( VOC ) savings owing to the construction of the road, building on traffic data and operation costs of vehicles using the road work: ( i ) The Average Daily Traffic ( ADT ) and Average Travel Time ( ATT ) were estimated based on field surveys conducted at the different points along the axis of the selected road sections presented earlier ( early 2017 ).", + "type": "survey", + "explanation": "In this context, 'field surveys' are explicitly mentioned as a method for estimating traffic data, indicating they are used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'field surveys' is a dataset because it implies a structured collection of data gathered from observations.", + "contextual_reason_agent": "In this context, 'field surveys' are explicitly mentioned as a method for estimating traffic data, indicating they are used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 42, + "text": "While the elements discussed earlier support the overall sustainability of the project \u2019 s objective, the limited fiscal space and the volatility of the security context may pose challenges for the sustainability of some of the activities in the long term. The aim of the project is to improve the quality of teaching and learning and strengthen education planning and management. The majority of the project \u2019 s funding is allocated to quality improvement and capacity building, which has minor implications on the fiscal space of the Government. Some of the activities, however, may imply incremental costs whose management by the government budget remains uncertain, such as the financing of school grants. It is expected that being able to demonstrate measurable impact of these activities on the improvement of teaching practices and learning outcomes will help mobilize additional domestic resources and attract further external financing needed to improve the education system in Niger. IV. PROJECT APPRAISAL SUMMARY A. Technical, Economic and Financial Analysis 85. The design of project activities was informed by international and national evidence of good practices, which ensures its technical viability. The technical design of the project took into consideration capacity constraints and lessons learned from previous operations in Niger, particularly the GPE-PAEQ, and was supported by analytical work, such as the World Development Report 2018 and the Ending Learning Poverty Report, as well as relevant TA such as the Advisory Services and Analytics on Makaranta schools ( P168795 ), whose findings informed a project subcomponent. 86. The proposed project uses the 2014 household survey National Survey on Household Living Conditions and Agriculture ( Enqu\u00eate nationale sur les Conditions de Vie des M\u00e9nages et l \u2019 Agriculture, ECVMA ) and recent administrative data to identify target beneficiaries ( both in terms of poverty profile and geographic areas ) and estimate the economic and social benefits of the project. Econometric methods", + "ner_text": [ + [ + 1865, + 1884, + "named" + ], + [ + 987, + 992, + "administrative data <> data geography" + ], + [ + 1346, + 1351, + "administrative data <> data geography" + ], + [ + 1683, + 1687, + "administrative data <> reference year" + ], + [ + 1897, + 1917, + "administrative data <> reference population" + ], + [ + 1937, + 1952, + "administrative data <> data description" + ], + [ + 2073, + 2091, + "administrative data <> usage context" + ] + ], + "validated": true, + "empirical_context": "86. The proposed project uses the 2014 household survey National Survey on Household Living Conditions and Agriculture ( Enqu\u00eate nationale sur les Conditions de Vie des M\u00e9nages et l \u2019 Agriculture, ECVMA ) and recent administrative data to identify target beneficiaries ( both in terms of poverty profile and geographic areas ) and estimate the economic and social benefits of the project. Econometric methods", + "type": "data", + "explanation": "In this context, 'administrative data' is explicitly mentioned as being used to identify target beneficiaries and estimate benefits, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'administrative data' is a dataset because it is often used as a source of information in empirical research.", + "contextual_reason_agent": "In this context, 'administrative data' is explicitly mentioned as being used to identify target beneficiaries and estimate benefits, confirming its role as a data source.", + "contextual_signal": "follows 'uses data from'", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 11, + "validated": 10, + "not_validated": 1 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 19, + "text": "Contracted IPs will be responsible for: ( a ) delivering quality health services; ( b ) quality improvement activities; ( c ) supervision of health facilities ( d ) recording of HMIS data, provision of HMIS data to CHDs, and support for entry of DHIS2 data into DHIS2 and data use; ( e ) in-service training complementing UNICEF \u2019 s training activities; ( f ) health facility stock management, recording, and rational use; ( g ) through an integrated approach, developing the capacity of CHDs to plan, supervise, and oversee service delivery and the DHIS2 system; and ( h ) sustain the support of the innovation activities under CERHSSP and expand using the digital health technology to address service delivery and supply chain issues. 31. Subcomponent 1. 2: Boma Health Initiative ( implemented by UNICEF; US $ 12. 41 million: US $ 3. 41 million equivalent IDA [ WHR IDA ] and US $ 9. 0 million Trust Funds [ US $ 0. 95 million SDTF and US $ 8. 05 million MDTF ] ).", + "ner_text": [ + [ + 246, + 251, + "named" + ], + [ + 178, + 187, + "DHIS2 <> data type" + ], + [ + 202, + 211, + "DHIS2 <> data type" + ], + [ + 322, + 328, + "DHIS2 <> publisher" + ], + [ + 488, + 492, + "DHIS2 <> reference population" + ], + [ + 800, + 806, + "DHIS2 <> publisher" + ] + ], + "validated": true, + "empirical_context": "Contracted IPs will be responsible for: ( a ) delivering quality health services; ( b ) quality improvement activities; ( c ) supervision of health facilities ( d ) recording of HMIS data, provision of HMIS data to CHDs, and support for entry of DHIS2 data into DHIS2 and data use; ( e ) in-service training complementing UNICEF \u2019 s training activities; ( f ) health facility stock management, recording, and rational use; ( g ) through an integrated approach, developing the capacity of CHDs to plan, supervise, and oversee service delivery and the DHIS2 system; and ( h ) sustain the support of the innovation activities under CERHSSP and expand using the digital health technology to address service delivery and supply chain issues. 31.", + "type": "system", + "explanation": "DHIS2 is indeed a data system used for health information management, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned in the context of data entry and use.", + "contextual_reason_agent": "DHIS2 is indeed a data system used for health information management, confirming its role as a data source.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "010_BOSIB1554c314c0a2187c019d7e85bc2a91", + "page": 18, + "text": "The Directorate of HPTs, MoH will work closely with KEMSA to ensure maximum efficiency in implementation of this sub-component. 16. Sub-component 1. 2: Health financing and quality of care reforms ( US $ 15 million ): This sub-component will support the recently introduced Government UHC reforms, including but not limited to the transition from the NHIF to the Social Health Authority ( SHA ). Areas of support include development of regulations and implementation roadmaps, design and rationalization of a benefit package, developing a framework for review of the benefit package including strengthening capacity for the health technology assessment, design of business processes and claims processing, stakeholder engagement among others. Additionally, the project will support the MoH to establish / strengthen regulatory bodies and operationalize quality of care reforms for improved strategic purchasing. 17. Sub-component 1. 3: Improve availability and use of quality data for decision making ( US $ 10 million ): This sub - component will support the Government to improve generation and use of strategic information for decision making, specifically through conducting relevant cross-sectional surveys including, but not limited to, the WHO STEPwise approach to non-communicable diseases ( NCD ) risk factor surveillance ( STEPS ) survey, and the Household Health Expenditure and Utilization Survey. Climate sensitive planning for surveys will be used and questions on climate and health impacts will be included in the survey to generate relevant data to inform decision making. Support will also be provided towards dissemination of findings to the lowest level.", + "ner_text": [ + [ + 1357, + 1408, + "named" + ], + [ + 1188, + 1211, + "Household Health Expenditure and Utilization Survey <> data type" + ], + [ + 1466, + 1505, + "Household Health Expenditure and Utilization Survey <> data description" + ], + [ + 1690, + 1708, + "Household Health Expenditure and Utilization Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "Sub-component 1. 3: Improve availability and use of quality data for decision making ( US $ 10 million ): This sub - component will support the Government to improve generation and use of strategic information for decision making, specifically through conducting relevant cross-sectional surveys including, but not limited to, the WHO STEPwise approach to non-communicable diseases ( NCD ) risk factor surveillance ( STEPS ) survey, and the Household Health Expenditure and Utilization Survey. Climate sensitive planning for surveys will be used and questions on climate and health impacts will be included in the survey to generate relevant data to inform decision making.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey that generates relevant data for decision making.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a specific survey that collects data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey that generates relevant data for decision making.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 62, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 58 of 74 working on LIPW under sub - component 1. 1 on behalf of beneficiary HH, of which are refugees and host communities. Refugees are defined as forcibly displaced HHs originating from a country other than South Sudan and registered as refugees in South Sudan by the UNHCR. Host communities are defined as local population groups living in counties with a high concentration of refugees. measured at a minimum on a quarterly basis SNSOP MIS updated over the course of the project. Payment data will also be periodically updated in the MIS Number of beneficiary households receiving Direct Income Support who have a female primary beneficiary ( Number ) Total number of beneficiary households under comopnent 1. 2 that have a primary beneficiary, as registered in the SNSOP MIS, who is a woman.", + "ner_text": [ + [ + 579, + 591, + "named" + ], + [ + 4, + 14, + "Payment data <> publisher" + ], + [ + 201, + 217, + "Payment data <> reference population" + ], + [ + 304, + 315, + "Payment data <> data geography" + ], + [ + 346, + 357, + "Payment data <> data geography" + ], + [ + 637, + 701, + "Payment data <> data description" + ] + ], + "validated": true, + "empirical_context": "measured at a minimum on a quarterly basis SNSOP MIS updated over the course of the project. Payment data will also be periodically updated in the MIS Number of beneficiary households receiving Direct Income Support who have a female primary beneficiary ( Number ) Total number of beneficiary households under comopnent 1. 2 that have a primary beneficiary, as registered in the SNSOP MIS, who is a woman.", + "type": "data", + "explanation": "In this context, 'Payment data' is indeed used as a source of information that is periodically updated in the MIS.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'Payment data' is a dataset because it refers to a specific type of information collected and updated in the context of the project.", + "contextual_reason_agent": "In this context, 'Payment data' is indeed used as a source of information that is periodically updated in the MIS.", + "contextual_signal": "mentioned as data that will be periodically updated in the MIS", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 5, + "validated": 4, + "not_validated": 1 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 46, + "text": "The World Bank Socio-economic Inclusion of Refugees & Host Communities in Rwanda Project Phase II ( P509677 ) Page 31 Proportion of beneficiaries indicating satisfaction with infrastructure subprojects constructed or upgraded by the project ( Percentage ) Description Quantitative indicator counting proportion of beneficiaries indicating satisfaction with infrastructure subprojects constructed or upgraded by the project. Indictor is a composite of beneficiaries responding \u201c satisfied \u201d or \u201c very satisfied \u201d on a Likert scale. Frequency Annual. Data source Annual survey. Methodology for Data Collection Survey. Responsibility for Data Collection MINEMA. Kilometers of road upgraded ( Kilometers ) Description Quantitative indicator counting kilometers of roads upgraded ( completed ). Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation. RTDA data fed to MINEMA. Responsibility for Data Collection RTDA and MINEMA. Market facilities and Integrated Craft Production Centers constructed or upgraded ( Number ) Description Quantitative indicator counting number of market facilities and ICPCs constructed or upgraded ( completed ). Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation. Responsibility for Data Collection MINEMA.", + "ner_text": [ + [ + 948, + 954, + "named" + ] + ], + "validated": false, + "empirical_context": "Methodology for Data Collection Monitoring project implementation. RTDA data fed to MINEMA. Responsibility for Data Collection RTDA and MINEMA.", + "type": "organization", + "explanation": "However, MINEMA is mentioned as an organization responsible for data collection, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed MINEMA is a dataset due to its involvement in data collection.", + "contextual_reason_agent": "However, MINEMA is mentioned as an organization responsible for data collection, not as a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 28, + "text": "For individuals trained only as part of Component 3, a separate pre-training survey will be conducted as well as a follow-up survey conducted at least M & E Specialist", + "ner_text": [ + [ + 64, + 83, + "named" + ], + [ + 4, + 15, + "pre-training survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "For individuals trained only as part of Component 3, a separate pre-training survey will be conducted as well as a follow-up survey conducted at least M & E Specialist", + "type": "survey", + "explanation": "This is indeed a dataset as it is a structured collection of data gathered from individuals through a survey.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data from individuals.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data gathered from individuals through a survey.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 30, + "text": "The NWSC also has an Internal Audit Department comprising qualified and experienced auditors and an audit committee of the board is in place. The Auditor General will audit the Project \u2019 s financial statements in accordance with statutory requirements, auditing standards, and suitable terms of reference ( ToR ). 68. The financial implementation arrangements, which are currently in place under the WMDP, are considered acceptable. Both the MWE and the NWSC have sound internal control procedures in place. However, the following key risks have been identified based on the ongoing WMDP: ( a ) ministry internal audit review reports are not shared regularly with the World Bank; this is risky given the number of ongoing activities and spread of operations where such reviews give management needed assurance of fiduciary status; ( b ) the project module of the IFMS has not been fully operationalized at the MWE, which increases the risk of errors and inaccuracy of financial reports; and ( c ) turnover of qualified and experienced staff is noticed. To ensure that the Project is effectively implemented, the MWE and NWSC", + "ner_text": [ + [ + 863, + 867, + "named" + ] + ], + "validated": false, + "empirical_context": "Both the MWE and the NWSC have sound internal control procedures in place. However, the following key risks have been identified based on the ongoing WMDP: ( a ) ministry internal audit review reports are not shared regularly with the World Bank; this is risky given the number of ongoing activities and spread of operations where such reviews give management needed assurance of fiduciary status; ( b ) the project module of the IFMS has not been fully operationalized at the MWE, which increases the risk of errors and inaccuracy of financial reports; and ( c ) turnover of qualified and experienced staff is noticed. To ensure that the Project is effectively implemented, the MWE and NWSC", + "type": "system", + "explanation": "However, IFMS is described as a project module and not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed IFMS is a dataset because it is mentioned in the context of financial reporting.", + "contextual_reason_agent": "However, IFMS is described as a project module and not as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "116_PAD1510-PAD-P152821-IDA-R2016-0078-1-Box394886B-OUO-9", + "page": 21, + "text": "Two of these wards include the two resettlement areas of Meheba and Mayukwayukwa, the other 12 wards surround these areas. To date, these wards are located respectively in Kaoma district ( 179, 326 people ), Lukulu district ( 37, 231 people ), and Solwezi district ( 107, 794 people ). 12 The total target population in the three districts and 14 wards is 357, 95113 persons, of which 16, 800 former refugees. Beneficiaries also include up to 16, 800 eligible former refugees moving to the resettlement areas, and a matching number of Zambians provided access to land. It is expected that some of the investments, in particular the connective infrastructure, will benefit people in the wider Districts and even Province. C. PDO Level Results Indicators \uf0b7 Direct project beneficiaries ( number ) ( of which percentage female, former refugee, host community ) \uf0b7 Beneficiaries ( number ) with improved access to connective and socio-economic infrastructure ( of which percentage female, former refugees, host community ) \uf0b7 Percentage of beneficiaries of livelihood subprojects who report improved food security / income / welfare ( of which percentage female, most vulnerable ) 12 Given ongoing revisions of district boundaries resulting in the split of Solwezi and Kaoma districts, the targeted wards might eventually fall under different districts. 13 Zambia, 2010, Census of Population and Housing, Zambia Central Statistical Office", + "ner_text": [ + [ + 1365, + 1397, + "named" + ], + [ + 57, + 63, + "Census of Population and Housing <> data geography" + ], + [ + 68, + 80, + "Census of Population and Housing <> data geography" + ], + [ + 172, + 186, + "Census of Population and Housing <> data geography" + ], + [ + 208, + 223, + "Census of Population and Housing <> data geography" + ], + [ + 248, + 264, + "Census of Population and Housing <> data geography" + ], + [ + 535, + 543, + "Census of Population and Housing <> reference population" + ], + [ + 1020, + 1126, + "Census of Population and Housing <> data description" + ], + [ + 1351, + 1357, + "Census of Population and Housing <> data geography" + ], + [ + 1359, + 1363, + "Census of Population and Housing <> publication year" + ], + [ + 1399, + 1432, + "Census of Population and Housing <> publisher" + ] + ], + "validated": true, + "empirical_context": "PDO Level Results Indicators \uf0b7 Direct project beneficiaries ( number ) ( of which percentage female, former refugee, host community ) \uf0b7 Beneficiaries ( number ) with improved access to connective and socio-economic infrastructure ( of which percentage female, former refugees, host community ) \uf0b7 Percentage of beneficiaries of livelihood subprojects who report improved food security / income / welfare ( of which percentage female, most vulnerable ) 12 Given ongoing revisions of district boundaries resulting in the split of Solwezi and Kaoma districts, the targeted wards might eventually fall under different districts. 13 Zambia, 2010, Census of Population and Housing, Zambia Central Statistical Office", + "type": "census", + "explanation": "This is a dataset as it is a structured collection of data used for empirical analysis regarding population and housing.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific census that collects demographic data.", + "contextual_reason_agent": "This is a dataset as it is a structured collection of data used for empirical analysis regarding population and housing.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 22, + "text": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 18 of 47 Figure 1. Theory of Change Output Inadequate infrastructure, equipment, and medicine \uf0b7 Renovation / upgradation of selected HFs to 24 / 7 \uf0b7 Purchasing of equipment and medicines \uf0b7 Increased number of HFs renovated / upgraded including female staff hostels * \uf0b7 Increased number of HFs with essential inputs Improved utilization of quality health services in selected refugee hosting districts in Balochistan Improved child health outcomes ( reduced under \u2010 five mortality, reduced stunting ) among refugees and host communities in Balochistan Improved human capital among refugees and host communities in Balochistan Shortage, absence, low productivity, and competency of providers \uf0b7 Contracting of especially female providers * \uf0b7 Creation of HRH database \uf0b7 Competency \u2010 based training \uf0b7 Increased availability of ( especially female ) providers * at the HFs \uf0b7 Functional HRH database \uf0b7 Increased number of providers with minimum clinical competency Low quality and suboptimal use of health data \uf0b7 Digitization and integration of the HMIS into the DHIS with dashboards \uf0b7 Data review meetings \uf0b7 HMIS ( DHIS, vertical programs ) digitized and integrated into DHIS / DHIS2 \uf0b7 Improved quality of DHIS reports Weak institutional capacity \uf0b7 Capacity building ( HMIS, monitoring, supervision, and management ) \uf0b7 Increased number of targeted staff with basic / advanced competencies Low demand of health and education services \uf0b7 Advocacy and awareness \u2010 raising", + "ner_text": [ + [ + 1119, + 1123, + "named" + ], + [ + 4, + 14, + "HMIS <> publisher" + ], + [ + 15, + 26, + "HMIS <> data geography" + ], + [ + 481, + 492, + "HMIS <> data geography" + ], + [ + 616, + 627, + "HMIS <> data geography" + ], + [ + 657, + 665, + "HMIS <> reference population" + ], + [ + 690, + 701, + "HMIS <> data geography" + ] + ], + "validated": true, + "empirical_context": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 18 of 47 Figure 1. Theory of Change Output Inadequate infrastructure, equipment, and medicine \uf0b7 Renovation / upgradation of selected HFs to 24 / 7 \uf0b7 Purchasing of equipment and medicines \uf0b7 Increased number of HFs renovated / upgraded including female staff hostels * \uf0b7 Increased number of HFs with essential inputs Improved utilization of quality health services in selected refugee hosting districts in Balochistan Improved child health outcomes ( reduced under \u2010 five mortality, reduced stunting ) among refugees and host communities in Balochistan Improved human capital among refugees and host communities in Balochistan Shortage, absence, low productivity, and competency of providers \uf0b7 Contracting of especially female providers * \uf0b7 Creation of HRH database \uf0b7 Competency \u2010 based training \uf0b7 Increased availability of ( especially female ) providers * at the HFs \uf0b7 Functional HRH database \uf0b7 Increased number of providers with minimum clinical competency Low quality and suboptimal use of health data \uf0b7 Digitization and integration of the HMIS into the DHIS with dashboards \uf0b7 Data review meetings \uf0b7 HMIS ( DHIS, vertical programs ) digitized and integrated into DHIS / DHIS2 \uf0b7 Improved quality of DHIS reports Weak institutional capacity \uf0b7 Capacity building ( HMIS, monitoring, supervision, and management ) \uf0b7 Increased number of targeted staff with basic / advanced competencies Low demand of health and education services \uf0b7 Advocacy and awareness \u2010 raising", + "type": "system", + "explanation": "HMIS is indeed a dataset as it refers to a Health Management Information System that collects and manages health data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed HMIS is a dataset because it is mentioned in the context of health data management and integration.", + "contextual_reason_agent": "HMIS is indeed a dataset as it refers to a Health Management Information System that collects and manages health data.", + "contextual_signal": "mentioned as a data source in the context of health data management", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 21, + "validated": 4, + "not_validated": 17 + } + }, + { + "filename": "078_Afghanistan-Eshteghal-Zaiee-Karmondena-EZ-Kar-Project", + "page": 56, + "text": "The World Bank Afghanistan: Eshteghal Zaiee - Karmondena ( EZ-Kar ) ( P166127 ) Page 49 of 85 value may be set by the mid \u2010 term review by when there will be more information from implementation. Of which through Priority Projects in Kabul Number of businesses that are provided with access to ICT services through Priority Projects in Kabul Municipality. Target value for this indicator is currently set at \u2018 0 \u2019, since this Project takes on a demand \u2010 driven approach and the types and numbers of subprojects to be implemented under the Project cannot / should not be predetermined, as it will not be accurate. The target value may be set by the mid \u2010 term review by when there will be more information from implementation. Semi \u2010 annually Project MIS ( KMDP MIS ) Nahia level administrative data, quarterly progress reports, evaluation KM PIU Of which through Priority Projects in the four cities of Herat, Kandahar, Khost, and Jalalabad Number of businesses that are provided with access to ICT services through Priority Projects in Herat, Kandahar, Khost, and Jalalabad.", + "ner_text": [ + [ + 767, + 798, + "named" + ], + [ + 336, + 354, + "Nahia level administrative data <> data geography" + ], + [ + 800, + 826, + "Nahia level administrative data <> data description" + ], + [ + 903, + 908, + "Nahia level administrative data <> data geography" + ], + [ + 910, + 918, + "Nahia level administrative data <> data geography" + ], + [ + 920, + 925, + "Nahia level administrative data <> data geography" + ], + [ + 931, + 940, + "Nahia level administrative data <> data geography" + ], + [ + 1037, + 1042, + "Nahia level administrative data <> data geography" + ], + [ + 1108, + 1126, + "Nahia level administrative data <> usage context" + ] + ], + "validated": true, + "empirical_context": "The target value may be set by the mid \u2010 term review by when there will be more information from implementation. Semi \u2010 annually Project MIS ( KMDP MIS ) Nahia level administrative data, quarterly progress reports, evaluation KM PIU Of which through Priority Projects in the four cities of Herat, Kandahar, Khost, and Jalalabad Number of businesses that are provided with access to ICT services through Priority Projects in Herat, Kandahar, Khost, and Jalalabad.", + "type": "administrative data", + "explanation": "This is indeed a dataset as it refers to structured administrative data used for project monitoring and evaluation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific type of data collected at the Nahia level.", + "contextual_reason_agent": "This is indeed a dataset as it refers to structured administrative data used for project monitoring and evaluation.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "151_533550PAD0P117101Official0Use0Only1", + "page": 62, + "text": "There will also be regular monitoring to ensure that the cash transfer and beneficiary development processes are being implemented in a manner that ensures targets in the Results Framework ( Annex 4 ) are met. Specifically, it will be the responsibility of the branch ( i. e. Governorate ) offices to compile monthly statistics on the number of beneficiaries it has had contact with, the number of new applications received and processes annually, the number of appeals received and responded to annually, and also the number of beneficiaries who receive various BDP services ( including health and education services, skills training, and access to microcredit ). The statistics will be provided by the district offices to the branch offices, which will then report the statistics to the Monitoring and Evaluation Department at the SWF Head Office. The data will be used for measuring whether process performance meets the targets specified in the Results Framework ( Annex 4 ). Separately, a consultant will also be hired to ensure that the targeting policy paper is finalized by the agreed upon year of the project. 165. For component 2, an impact evaluation of the BDP services will be carried out, there will be baseline and follow-up data collection ( the exact time of the baseline and follow-up to be determined ) from a sample of beneficiaries who do and do not receive BDP services.", + "ner_text": [ + [ + 309, + 327, + "named" + ] + ], + "validated": false, + "empirical_context": "e. Governorate ) offices to compile monthly statistics on the number of beneficiaries it has had contact with, the number of new applications received and processes annually, the number of appeals received and responded to annually, and also the number of beneficiaries who receive various BDP services ( including health and education services, skills training, and access to microcredit ). The statistics will be provided by the district offices to the branch offices, which will then report the statistics to the Monitoring and Evaluation Department at the SWF Head Office.", + "type": "statistics", + "explanation": "'Monthly statistics' are not a structured collection of data but rather a summary of data points collected over time.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'monthly statistics' refers to a dataset because it involves numerical data collection.", + "contextual_reason_agent": "'Monthly statistics' are not a structured collection of data but rather a summary of data points collected over time.", + "contextual_signal": "mentioned only as a summary of collected data, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 3, + "validated": 2, + "not_validated": 1 + } + }, + { + "filename": "061_Turkey-Formal-Employment-Creation-Project", + "page": 39, + "text": "The World Bank Formal Employment Creation Project ( P171766 ) Page 35 of 86 participatory needs assessments with loan beneficiary firms to establish their trainings needs at the application stage. ( ii ) Post-training assessments. The TKYB will conduct a post-training assessment to measure the satisfaction and impact of the trainings, with the results being used by training providers to revise and improve the process in subsequent assessments. ( iii ) Satisfaction surveys. The TKYB, in collaboration with PFIs, will conduct satisfaction surveys in the midterm and end term with the loan beneficiary firms regarding the subfinance received in terms of their needs. 30 ( iv ) Biannual beneficiary workshops, roundtables, and focus group discussions ( targeting different beneficiaries: employers, civil society and end beneficiaries ) will be held to discuss the survey results with a view to developing measures that improve the project design ( such as the selection criteria of loans beneficiary firms, loan utilization, and choice of training activities ). This activity will draw from the World Bank team \u2019 s recent experience in carrying out validation workshops in the context of the FRIT I - Strengthening Economic Opportunities for Syrians under Temporary Protection and Turkish Citizens in Selected Localities Project ( P165687 ).", + "ner_text": [ + [ + 456, + 476, + "named" + ], + [ + 4, + 14, + "Satisfaction surveys <> publisher" + ], + [ + 113, + 135, + "Satisfaction surveys <> reference population" + ], + [ + 529, + 549, + "Satisfaction surveys <> data type" + ], + [ + 587, + 609, + "Satisfaction surveys <> reference population" + ], + [ + 1097, + 1107, + "Satisfaction surveys <> author" + ] + ], + "validated": true, + "empirical_context": "The TKYB will conduct a post-training assessment to measure the satisfaction and impact of the trainings, with the results being used by training providers to revise and improve the process in subsequent assessments. ( iii ) Satisfaction surveys. The TKYB, in collaboration with PFIs, will conduct satisfaction surveys in the midterm and end term with the loan beneficiary firms regarding the subfinance received in terms of their needs.", + "type": "survey", + "explanation": "In this context, it is indeed a dataset as it refers to structured surveys conducted to gather data on satisfaction levels.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'satisfaction surveys' imply a structured collection of responses from participants.", + "contextual_reason_agent": "In this context, it is indeed a dataset as it refers to structured surveys conducted to gather data on satisfaction levels.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "110_Lebaon-RACE2-PforR-Board-Package-PAD-WB-9-5-16-09072016", + "page": 55, + "text": "48 DLI # Definition / Description of achievement Scalability of Disbursements Protocol to evaluate achievement of the DLRs and data / results verification Assessment can be national or international, sample or census-based. The results of the assessment should be made public on MEHE and CERD ' s websites. document detailing assessment results ( e. g. OECD document for PISA ) Party the results of the application of one of the assessments. DLI # 9 Internal audit function operational as specified in the Program Operations Manual ( POM ). Internal audit reports should be produced quarterly according to the agreed standards which are defined in the Program Operations Manual ( POM ). No Internal Audit Reports Third Party Reviewing internal audit reports for compliance with quality standards set forth in the POM.", + "ner_text": [ + [ + 371, + 375, + "named" + ] + ], + "validated": false, + "empirical_context": "g. OECD document for PISA ) Party the results of the application of one of the assessments. DLI # 9 Internal audit function operational as specified in the Program Operations Manual ( POM ).", + "type": "document", + "explanation": "'PISA' is mentioned in the context of an OECD document and an internal audit function, indicating it is a program or assessment rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'PISA' is a dataset because it is associated with assessments and data collection.", + "contextual_reason_agent": "'PISA' is mentioned in the context of an OECD document and an internal audit function, indicating it is a program or assessment rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "007_BOSIB-e8e37b29-1d61-491d-8aad-23a07cf57740", + "page": 50, + "text": "This indicator does not include terrestrial or aquatic areas managed as offsets for projectrelated biodiversity impacts ( public or private sector ). The methodology for the indicator is currently under development. Frequency Quaterly Data source PCU Methodology for Data Collection Responsibility for Data Collection PCU Component 3: Access to Market, Finance, and Value Addition Climate-smart market infrastructure ( sale counters ) built ( Number ) Description This indicator measures the number of climate-resilient marketing infrastructures constructed under the project. Frequency Quaterly Data source PCU Methodology for Data Collection Responsibility for Data Collection PCU Agribusiness market information system operational ( Yes / No ) Description This indicator measures the effectiveness and operationality of the integrated MIS established under the project Frequency Annually Data source PCU Methodology for Data Collection Responsibility for Data PCU", + "ner_text": [ + [ + 683, + 721, + "named" + ] + ], + "validated": false, + "empirical_context": "Frequency Quaterly Data source PCU Methodology for Data Collection Responsibility for Data Collection PCU Component 3: Access to Market, Finance, and Value Addition Climate-smart market infrastructure ( sale counters ) built ( Number ) Description This indicator measures the number of climate-resilient marketing infrastructures constructed under the project. Frequency Quaterly Data source PCU Methodology for Data Collection Responsibility for Data Collection PCU Agribusiness market information system operational ( Yes / No ) Description This indicator measures the effectiveness and operationality of the integrated MIS established under the project Frequency Annually Data source PCU Methodology for Data Collection Responsibility for Data PCU", + "type": "system", + "explanation": "However, it is described as a system and not explicitly mentioned as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'information system' which often relates to data management.", + "contextual_reason_agent": "However, it is described as a system and not explicitly mentioned as a data source or structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "008_BOSIB-60d57288-4e09-4519-ae6c-ffdc0037e0b1", + "page": 15, + "text": "According to data from the International Labor Organization, a lack of safe transport reduces the probability of women accessing economic opportunities by 16. 5 percent. 15 In 2020, the regional rate was 18. 7 fatalities per 100, 000 inhabitants, which is higher than the national rate ( 15. 5 ) ( DATASUS data ). 16 Looking at the formal jobs of the Region, 69 percent of the people that receive more than ten minimum wages, on average, are men. Meanwhile, 94 percent of indigenous or black women receive less than 3 minimum wages. Data Source: RAIS, 2019. 17 Gender-disaggregated mobility data for Foz do R\u00edo Itaja\u00ed were collected during the preparation of the Stakeholder Engagement Plan and other project-related documents. 18 Data derived from PNAD-Cont\u00ednua ( 2019 ) State of Santa Catarina. 19 Hayd\u00e9e Svab, Marina Kohler Harkot, and Beatriz Moura Dos Santos, A Baseline Study of Gender and Transport in Sao Paulo, Brazil: Present Initiatives to Improve Women \u2019 s Mobility ( English ) ( Washington, DC: World Bank, 2021 ). 20 Brazil - Improving Mobility and Urban Inclusion in the Amazonas Corridor in Belo Horizonte Project ( Washington, DC: World Bank ). 21 Data reflect cases of harassment in public spaces without disaggregation. However, given the trend in Brazil and the Region, it can be inferred that in the Santa Catarina Region, more women are also survivors of sexual harassment.", + "ner_text": [ + [ + 561, + 595, + "named" + ], + [ + 552, + 556, + "Gender-disaggregated mobility data <> reference year" + ], + [ + 600, + 617, + "Gender-disaggregated mobility data <> data geography" + ], + [ + 772, + 795, + "Gender-disaggregated mobility data <> data geography" + ], + [ + 800, + 811, + "Gender-disaggregated mobility data <> author" + ], + [ + 813, + 833, + "Gender-disaggregated mobility data <> author" + ], + [ + 839, + 863, + "Gender-disaggregated mobility data <> author" + ], + [ + 1008, + 1018, + "Gender-disaggregated mobility data <> publisher" + ], + [ + 1020, + 1024, + "Gender-disaggregated mobility data <> publication year" + ], + [ + 1148, + 1158, + "Gender-disaggregated mobility data <> publisher" + ], + [ + 1321, + 1342, + "Gender-disaggregated mobility data <> data geography" + ] + ], + "validated": true, + "empirical_context": "Data Source: RAIS, 2019. 17 Gender-disaggregated mobility data for Foz do R\u00edo Itaja\u00ed were collected during the preparation of the Stakeholder Engagement Plan and other project-related documents. 18 Data derived from PNAD-Cont\u00ednua ( 2019 ) State of Santa Catarina.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to a structured collection of gender-disaggregated mobility data used in the research.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific type of data collected for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a structured collection of gender-disaggregated mobility data used in the research.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 21, + "text": "The World Bank Strengthening Public Sector Effectiveness and Statiscal Capacity Project ( P151155 ) Page 18 of 93 26. However, the periodicity of basic surveys and censuses is not observed, and quality could be improved. The general population censuses were conducted in 1976, 1987, and then 2005, while the international recommendation is to conduct a census survey every 10 years. The GoC is planning the next population census end of 2017 - early 2018, and preparatory work already started in 2016. Full financing for 2017 data collection and analysis is nearly secured. 20 The Cameroon household surveys ( Enqu\u00eate Camerounaise Aupr\u00e8s des M\u00e9nages, ECAM ) were conducted in 1996, 2001, 2007, and 2014, with a complementing survey in 2016. 21 The last three ECAMs had similar methodologies enabling solid trend analysis. However, ECAMs are supposed to be conducted at least every five years according to INS standards, and the World Bank recommends a survey every three or four years to monitor more closely the impact of public policies. Also, the ECAMs could be improved ( a ) upstream in the design to integrate specific issues related to poverty in rural areas or northern regions where poverty is the highest, including more frequent data production, and ( b ) downstream with more in-depth analysis. Also, the system of labor statistics is weak.", + "ner_text": [ + [ + 581, + 607, + "named" + ], + [ + 4, + 14, + "Cameroon household surveys <> publisher" + ], + [ + 496, + 500, + "Cameroon household surveys <> publication year" + ], + [ + 581, + 589, + "Cameroon household surveys <> data geography" + ], + [ + 676, + 680, + "Cameroon household surveys <> reference year" + ], + [ + 682, + 686, + "Cameroon household surveys <> reference year" + ], + [ + 688, + 692, + "Cameroon household surveys <> reference year" + ], + [ + 698, + 702, + "Cameroon household surveys <> reference year" + ], + [ + 735, + 739, + "Cameroon household surveys <> publication year" + ], + [ + 928, + 938, + "Cameroon household surveys <> publisher" + ] + ], + "validated": true, + "empirical_context": "Full financing for 2017 data collection and analysis is nearly secured. 20 The Cameroon household surveys ( Enqu\u00eate Camerounaise Aupr\u00e8s des M\u00e9nages, ECAM ) were conducted in 1996, 2001, 2007, and 2014, with a complementing survey in 2016. 21 The last three ECAMs had similar methodologies enabling solid trend analysis.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it describes the Cameroon household surveys as structured collections of data collected over multiple years for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a series of surveys conducted in Cameroon, which typically collect structured data.", + "contextual_reason_agent": "The context confirms it is a dataset as it describes the Cameroon household surveys as structured collections of data collected over multiple years for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 12, + "text": "South Sudan \u2019 s GDP is estimated to have contracted by 0. 4 percent in FY 2022 / 23, 1 weighed down by a fourth consecutive year of flooding, lingering impacts of the COVID-19 pandemic, violence flareups, and higher food inflation due to global crises. 3 The 2022 Household Budget Survey estimates that poverty levels in South Sudan remain persistently high \u2013 at around 80 percent of the population, with 6 in 10 South Sudanese living in extreme poverty ( below the food poverty line ). Nearly 80 percent of South Sudan \u2019 s population lives in rural areas where infrastructure is limited, complicating service delivery, particularly during the rainy season. 3. South Sudan has the highest level of vulnerability and lowest level of climate adaptation capacity globally, based on the European Union \u2019 s 2022 INFORM Risk Index. 2 South Sudan, composed entirely of river basins, ranks as the seventh most vulnerable country to riverine flood in the world in an average year. 3 Between 2019 and 2022, the flooding reached record levels with climate change affecting weather patterns, destroying already scarce infrastructure, displacing populations, and decreasing movement throughout the country. It is estimated that 1 million people were affected by flooding and 300, 000 people were displaced in 2021. South Sudan also experiences an intense annual hot season and cyclical drought.", + "ner_text": [ + [ + 259, + 287, + "named" + ] + ], + "validated": true, + "empirical_context": "4 percent in FY 2022 / 23, 1 weighed down by a fourth consecutive year of flooding, lingering impacts of the COVID-19 pandemic, violence flareups, and higher food inflation due to global crises. 3 The 2022 Household Budget Survey estimates that poverty levels in South Sudan remain persistently high \u2013 at around 80 percent of the population, with 6 in 10 South Sudanese living in extreme poverty ( below the food poverty line ). Nearly 80 percent of South Sudan \u2019 s population lives in rural areas where infrastructure is limited, complicating service delivery, particularly during the rainy season.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey that provides estimates on poverty levels in South Sudan.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Household Budget Survey,' which suggests a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey that provides estimates on poverty levels in South Sudan.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 48, + "text": "Methodology for Data Collection Indicator values will be collected from the administrative data of MODEE and public - and private-sector relying parties on the use of transactional digital services that incorporate trusted, people-centric DPI, and cross-checked by the IVA through spot surveys. Responsibility for Data Collection MODEE. Improving trusted, people-centric data sharing Description Availability of trusted, people-centric data sharing [ Yes / No ]. Frequency Annually Data source ( a ) Annual reports on DPI implementation from MODEE, sectoral ministries, and the private sector, ( b ) MODEE \u2019 s software documentation and testing reports, and ( c ) Third-party assessment reports. Methodology for Data Collection Indicator values will be collected from ( a ) DPI implementation reports released by MODEE, sectoral ministries, and the private sector, ( b ) MODEE \u2019 s software documentation, and testing reports, and ( c ) Official reports submitted by third - party assessment bodies recruited to carry out the Privacy Impact Assessments. All indicator values will be cross - checked by the IVA. Responsibility for Data Collection MODEE. Enhanced management of medical records Description Promoting digital transformation in health servcie delivery by scaling up the coverage of an existing EMR platform to all MOH facility sites, prioritizing facility sites in districts with a high concentration of Syrian refugees. Frequency Biannualy. Data source Administrative data from the EHS. Methodology for Data Collection Data collected from the EHS to measure how many MOH facility sites had installed and operationalized the EMR platform. Responsibility for Data Collection MOH and EHS.", + "ner_text": [ + [ + 1305, + 1317, + "named" + ] + ], + "validated": false, + "empirical_context": "Responsibility for Data Collection MODEE. Enhanced management of medical records Description Promoting digital transformation in health servcie delivery by scaling up the coverage of an existing EMR platform to all MOH facility sites, prioritizing facility sites in districts with a high concentration of Syrian refugees. Frequency Biannualy.", + "type": "system", + "explanation": "However, the context indicates it is described as a platform for managing records, not as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'EMR platform' suggests a system that handles electronic medical records.", + "contextual_reason_agent": "However, the context indicates it is described as a platform for managing records, not as a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "066_Jordan-Youth-Technology-and-Jobs-Project", + "page": 90, + "text": "The World Bank Jordan Youth, Technology, and Jobs Project ( P170669 ) Page 86 of 106 high school ( relative to the other students ). Given that a digital curriculum requires updating periodically, we count the estimated the benefits for the first 4 cohorts of students that are exposed to the curriculum. Thus, we take the number of students enrolled in grades 6, 7, 8, and 9 at 2 years after program starts and calculate the proportion of each that would be going directly into the job market after high school, after 2-year technical schools, and after 4-year universities. Since someone at grade 7 graduates high school 5 years later, the benefit streams from the curriculum begins seven years after beginning of project and, following the literature, counted for the next roughly 45 years. 12. Assumptions and parameters in the analysis: Below in table 1 we present the set of values used when to estimate the costs and benefits to each of the above listed approaches and components. The assumptions are mainly based on evidence from the literature, labor force survey data ( JLMPS 2016 ), or our expectations of outputs for the program from different components, which are described in other section of this PAD.", + "ner_text": [ + [ + 1080, + 1085, + "named" + ], + [ + 4, + 14, + "JLMPS <> publisher" + ], + [ + 1054, + 1077, + "JLMPS <> data type" + ], + [ + 1086, + 1090, + "JLMPS <> publication year" + ] + ], + "validated": true, + "empirical_context": "Assumptions and parameters in the analysis: Below in table 1 we present the set of values used when to estimate the costs and benefits to each of the above listed approaches and components. The assumptions are mainly based on evidence from the literature, labor force survey data ( JLMPS 2016 ), or our expectations of outputs for the program from different components, which are described in other section of this PAD.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it is explicitly mentioned as part of the evidence used in the analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced alongside labor force survey data.", + "contextual_reason_agent": "The context confirms it is a dataset as it is explicitly mentioned as part of the evidence used in the analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "005_BOSIB-8191b179-7209-4faa-b5e0-11783bcd492d", + "page": 13, + "text": "These measures would allow the MEP to optimize its management and fulfil its mandate to ensure that resources are used effectively to support the learning and development of students throughout the country. 17 Inclusion: Gender, Refugees and Migrants, and Indigenous Peoples 11. Gender parity between boys and girls to access STEAM related jobs is a challenge on which Costa Rica has made progress, but more policy action is needed. PISA 2022 mean scores for Costa Rica in Reading are above LAC \u2019 s average ( 415 vs. 400 ), as well as mean scores in Mathematics ( 385 vs. 374 ) and Sciences ( 411 vs. 400 ). Figure 1a shows that there is a statistically significant gender gap in Mathematics but not in Reading and Science as observed in PISA data. Looking at graduates of pre-university technical education ( mainly technical stream secondary education ) one finds an interesting pattern comparing three specializations ( Figure 1b ). Gender disparities are high in Electronics and Automation, less in Software Development and were recently eliminated in Network and Database Design. The final example of Network and Database Design in Figure 1b shows that gender disparity can be overcome in a short period of time.", + "ner_text": [ + [ + 433, + 437, + "named" + ], + [ + 369, + 379, + "PISA <> data geography" + ], + [ + 438, + 442, + "PISA <> publication year" + ], + [ + 459, + 469, + "PISA <> data geography" + ], + [ + 738, + 747, + "PISA <> data type" + ], + [ + 1233, + 1251, + "PISA <> usage context" + ] + ], + "validated": true, + "empirical_context": "Gender parity between boys and girls to access STEAM related jobs is a challenge on which Costa Rica has made progress, but more policy action is needed. PISA 2022 mean scores for Costa Rica in Reading are above LAC \u2019 s average ( 415 vs. 400 ), as well as mean scores in Mathematics ( 385 vs.", + "type": "dataset", + "explanation": "In this context, 'PISA' refers to a dataset as it is used to present mean scores for Costa Rica in comparison to LAC averages.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'PISA' is a dataset because it is associated with mean scores that provide quantitative data for analysis.", + "contextual_reason_agent": "In this context, 'PISA' refers to a dataset as it is used to present mean scores for Costa Rica in comparison to LAC averages.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "028_Ethiopia-Response-Recovery-Resilience-for-Conflict-Affected-Communities-in-Ethiopia-Project", + "page": 60, + "text": "The World Bank Response - Recovery - Resilience for Conflict-Affected Communities in Ethiopia ( P177233 ) Page 56 of 104 Beneficiaries with rebuilt and improved access to basic services Basic services refers to education, health, WASH, agriculture, and other public services, mobile and / or permanent. Semi-Annual Woreda Project Coordination Teams, Central Statistical Agency of Ethiopia Aggregation of Woreda-level data. Beneficiaries are the people of the Kebeles where the new or improved services are provided MoF FPCU Beneficiaries with rebuilt and improved access to basic services - female Semi-Annual Woreda Project Coordination Teams, Central Statistical Agency of Ethiopia Aggregation of Woreda-level data on female beneficiaries as per the relevant Kebele \u2019 s demographics MoF FPCU Beneficiaries with rebuilt and improved access to basic services - displaced ' Displaced ' are persons who have been forced or obliged to flee or to leave their homes or places of habitual residence, in particular as a result of or in order to avoid the effects of armed conflict, situations of generalized violence, violations of human rights or natural or human-made disasters.", + "ner_text": [ + [ + 699, + 716, + "named" + ], + [ + 4, + 14, + "Woreda-level data <> publisher" + ], + [ + 85, + 93, + "Woreda-level data <> data geography" + ], + [ + 380, + 388, + "Woreda-level data <> data geography" + ], + [ + 459, + 466, + "Woreda-level data <> reference population" + ], + [ + 675, + 683, + "Woreda-level data <> data geography" + ], + [ + 720, + 740, + "Woreda-level data <> reference population" + ] + ], + "validated": true, + "empirical_context": "Semi-Annual Woreda Project Coordination Teams, Central Statistical Agency of Ethiopia Aggregation of Woreda-level data. Beneficiaries are the people of the Kebeles where the new or improved services are provided MoF FPCU Beneficiaries with rebuilt and improved access to basic services - female Semi-Annual Woreda Project Coordination Teams, Central Statistical Agency of Ethiopia Aggregation of Woreda-level data on female beneficiaries as per the relevant Kebele \u2019 s demographics MoF FPCU Beneficiaries with rebuilt and improved access to basic services - displaced ' Displaced ' are persons who have been forced or obliged to flee or to leave their homes or places of habitual residence, in particular as a result of or in order to avoid the effects of armed conflict, situations of generalized violence, violations of human rights or natural or human-made disasters.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to aggregated data collected from specific administrative divisions (Woredas) for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific data collected at the Woreda level.", + "contextual_reason_agent": "This is indeed a dataset as it refers to aggregated data collected from specific administrative divisions (Woredas) for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 88, + "text": "There was a marked change in the 1999 Third International Mathematics and Science Study ( TIMSS ), where out of 38 countries, Jordan finished ahead of six ( Iran, Indonesia, Chile, Philippines, Morocco and South Africa ) in mathematics and ahead of eight ( Iran, Indonesia, Turkey, Tunisia, Chile, Philippines, Morocco and South Africa ) in science \u2013 although TIMSS is not strictly comparable to IEA. Jordan demonstrated continued improvements in quality as evidenced in 2003, where its TIMSS science scores improved to 475 from 450 in 1999, an increase of 25 points, or 0. 25 standard deviations, which is a significant increase, equivalent to about a whole year of learning. In 2007, Jordan continued to improve, surpassing several countries which had a similar or slightly higher performance in 1999. In fact, between 1999 and 2007, no other country improved as much in science as did Jordan ( Figure 3 ). Zimbabwe Zambia Yemen, Rep. Vietnam Venezuela, RB Vanuatu Uzbekistan Uruguay United States Uganda Trinidad and Tobago Tonga Thailand Tajikistan Switzerland Sweden St. Lucia Spain Slovenia Sierra Leone Saudi Arabia Russian Federation Philippines Peru Paraguay Panama Oman Norway Niger Nicaragua New Zealand Netherlands Nepal Morocco Mongolia Moldova Mexico Mali Malaysia Malawi Macao, China Luxembourg Lesotho Latvia Lao PDR Kyrgyz Republic Korea, Rep.", + "ner_text": [ + [ + 38, + 87, + "named" + ], + [ + 33, + 37, + "Third International Mathematics and Science Study <> reference year" + ], + [ + 90, + 95, + "Third International Mathematics and Science Study <> acronym" + ], + [ + 126, + 132, + "Third International Mathematics and Science Study <> data geography" + ], + [ + 181, + 192, + "Third International Mathematics and Science Study <> data geography" + ], + [ + 263, + 272, + "Third International Mathematics and Science Study <> data geography" + ], + [ + 311, + 318, + "Third International Mathematics and Science Study <> data geography" + ], + [ + 323, + 335, + "Third International Mathematics and Science Study <> data geography" + ], + [ + 401, + 407, + "Third International Mathematics and Science Study <> data geography" + ], + [ + 487, + 492, + "Third International Mathematics and Science Study <> acronym" + ], + [ + 493, + 507, + "Third International Mathematics and Science Study <> data description" + ], + [ + 536, + 540, + "Third International Mathematics and Science Study <> reference year" + ], + [ + 686, + 692, + "Third International Mathematics and Science Study <> data geography" + ], + [ + 830, + 834, + "Third International Mathematics and Science Study <> reference year" + ], + [ + 888, + 894, + "Third International Mathematics and Science Study <> data geography" + ], + [ + 1233, + 1240, + "Third International Mathematics and Science Study <> data geography" + ] + ], + "validated": true, + "empirical_context": "There was a marked change in the 1999 Third International Mathematics and Science Study ( TIMSS ), where out of 38 countries, Jordan finished ahead of six ( Iran, Indonesia, Chile, Philippines, Morocco and South Africa ) in mathematics and ahead of eight ( Iran, Indonesia, Turkey, Tunisia, Chile, Philippines, Morocco and South Africa ) in science \u2013 although TIMSS is not strictly comparable to IEA. Jordan demonstrated continued improvements in quality as evidenced in 2003, where its TIMSS science scores improved to 475 from 450 in 1999, an increase of 25 points, or 0.", + "type": "study", + "explanation": "This is a dataset as it is a structured collection of data used for empirical analysis of mathematics and science education across countries.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a study that collects and analyzes educational data across multiple countries.", + "contextual_reason_agent": "This is a dataset as it is a structured collection of data used for empirical analysis of mathematics and science education across countries.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 21, + "validated": 8, + "not_validated": 13 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 89, + "text": "In what way is this a \u2018 stretch \u2019 to achieve? A coherent and systematic revision of national learning assessments in primary and lower secondary is a significant task to accomplish; to then provide schools with their results so that they may then identify appropriate responses is ambitious, yet realizable. 185. Response: The response in Component 3 of the project is to ( a ) develop and have approved basic education assessment levels in math and at least one language ( French or Arabic ); ( b ) have learning assessments revised and administered; and ( c ) ensure that primary schools receive school reports on their results in math and languages by Challenge \u2022 Weak participation of girls in lower secondary school Activities \u2022 Sub-regional plan approved to fight against weak participation and drop out \u2022 Local solutions piloted and assessed \u2022 Scale-up Outputs \u2022 Local solutions respond to local constraints to female participation, including cutural prefernces, distance to school, safety and security. Outcome \u2022 Greater participation of girls in lower secondary and therefore in general", + "ner_text": [ + [ + 84, + 113, + "named" + ] + ], + "validated": false, + "empirical_context": "In what way is this a \u2018 stretch \u2019 to achieve? A coherent and systematic revision of national learning assessments in primary and lower secondary is a significant task to accomplish; to then provide schools with their results so that they may then identify appropriate responses is ambitious, yet realizable. 185.", + "type": "assessment", + "explanation": "However, it is not a dataset as it is described as a task or process rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'national learning assessments' refers to a dataset because it involves evaluations of student learning outcomes.", + "contextual_reason_agent": "However, it is not a dataset as it is described as a task or process rather than a structured collection of data.", + "contextual_signal": "mentioned only as a task, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + } + }, + { + "filename": "162_PAD12050PAD0P15264600PUBLIC00Box391428B", + "page": 31, + "text": "32 Indicator Description Project Development Objective Indicators Indicator Name Description ( indicator definition etc. ) Frequency Data Source / Methodology Responsibility for Data Collection Direct project beneficiaries Eligible NPTP beneficiaries who enroll with contracted providers and are eligible for the essential healthcare services package. Bi-annually NPTP Database PMU Female beneficiaries Percentage of direct project beneficiaries that are female. Bi-annually NPTP Database PMU User Satisfaction ( percent ) Share of users satisfied by the received health care services. Bi-annually User Satisfaction Survey External Technical Audit Utilization of services: average no of visits per beneficiary per year ( number ) Utilization of services provided by health care service providers that will be calculated as a weighted average of the number of visits per beneficiary per year. Bi-annually HIS PMU", + "ner_text": [ + [ + 598, + 622, + "named" + ], + [ + 194, + 222, + "User Satisfaction Survey <> reference population" + ], + [ + 673, + 718, + "User Satisfaction Survey <> data description" + ], + [ + 927, + 945, + "User Satisfaction Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "Bi-annually NPTP Database PMU User Satisfaction ( percent ) Share of users satisfied by the received health care services. Bi-annually User Satisfaction Survey External Technical Audit Utilization of services: average no of visits per beneficiary per year ( number ) Utilization of services provided by health care service providers that will be calculated as a weighted average of the number of visits per beneficiary per year. Bi-annually HIS PMU", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned in the context as a 'User Satisfaction Survey' that collects data on user satisfaction.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often implies a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned in the context as a 'User Satisfaction Survey' that collects data on user satisfaction.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "141_760530PAD0P127010Box377322B00OUO090", + "page": 16, + "text": "Nevertheless, the country ' s medium growth prospects have improved considerably and a level of political stability has been achieved. This is evidenced by the return of the country to constitutional order following the democratic national elections in 2009, and by the projected expansion of the mining sector, manufacturing, and expected improvements to the commercial and fishing sectors. In the context of the service - dependent mining sector and the growing secondary and tertiary sectors, the role of the urban and rural centers in leading economic growth as service providers and employment generators becomes particularly important. B. Sectoral and Institutional Context 3. Mauritania has experienced particularly rapid urbanization in recent years and over the past decade has been transformed from one of the most rural countries in sub-Saharan Africa ( SSA ) to one of the most urbanized, with over 62 percent1 of the population living in cities2. Moreover, the process of urbanization has been highly asymmetric, as evident both in the concentration of the urban population in three cities, with the majority ( about 50 % ) living in Nouakchott alone, as well as in the starkly differing economic characteristics of the towns, as a consequence of their specific location and regional context. The rural areas are characterized by extremely dispersed settlement patterns and very low densities, largely the result of water scarcity, small overall populations, and vast distances to service centers. The asymmetries of urbanization and development of growth centers in Mauritania are accentuated by public and private sector 1 This figure is based on the last national census in 2002, adjusted based on annual projections officially recognized by Government. 2 Mauritania has experienced rapid urbanization with an urban population growth estimated around 400 % for the past 50 years ( from a 4 % level of urbanization in 1960 to 62 % in 2011 ). Current urbanization rate is estimated at 2. 5 %.", + "ner_text": [ + [ + 1671, + 1686, + "named" + ], + [ + 683, + 693, + "national census <> data geography" + ], + [ + 1070, + 1086, + "national census <> reference population" + ], + [ + 1690, + 1694, + "national census <> publication year" + ], + [ + 1758, + 1768, + "national census <> publisher" + ] + ], + "validated": true, + "empirical_context": "The rural areas are characterized by extremely dispersed settlement patterns and very low densities, largely the result of water scarcity, small overall populations, and vast distances to service centers. The asymmetries of urbanization and development of growth centers in Mauritania are accentuated by public and private sector 1 This figure is based on the last national census in 2002, adjusted based on annual projections officially recognized by Government. 2 Mauritania has experienced rapid urbanization with an urban population growth estimated around 400 % for the past 50 years ( from a 4 % level of urbanization in 1960 to 62 % in 2011 ).", + "type": "census", + "explanation": "In this context, it is confirmed as a dataset since it is explicitly mentioned as the basis for the figure and is used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'national census' typically refers to a structured collection of demographic data.", + "contextual_reason_agent": "In this context, it is confirmed as a dataset since it is explicitly mentioned as the basis for the figure and is used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 56, + "text": "A household will be considered as a beneficiary household if it is both enrolled in the project and have received a cash transfer, at least for one payment cycle This indicator will be measured on a quarterly basis during missions and ISRs SNSOP MIS which will track beneficiary registration and payment data The implementing agency will collect registration and payment data over the course of the project Implementing Partner Beneficiary and non-beneficiary households reporting satisfaction with community assets created through LIPW The total number of surveyed beneficiary and non-beneficiary households This indicator will be measured on SNSOP Management Information Data on satisfaction will be collected during Post Distribution Monitoring Implementing Partner", + "ner_text": [ + [ + 240, + 249, + "named" + ], + [ + 36, + 57, + "SNSOP MIS <> reference population" + ], + [ + 267, + 308, + "SNSOP MIS <> data type" + ] + ], + "validated": true, + "empirical_context": "A household will be considered as a beneficiary household if it is both enrolled in the project and have received a cash transfer, at least for one payment cycle This indicator will be measured on a quarterly basis during missions and ISRs SNSOP MIS which will track beneficiary registration and payment data The implementing agency will collect registration and payment data over the course of the project Implementing Partner Beneficiary and non-beneficiary households reporting satisfaction with community assets created through LIPW The total number of surveyed beneficiary and non-beneficiary households This indicator will be measured on SNSOP Management Information Data on satisfaction will be collected during Post Distribution Monitoring Implementing Partner", + "type": "management information system", + "explanation": "It is indeed a dataset as it is explicitly mentioned to track beneficiary registration and payment data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'MIS' which often stands for Management Information System, typically associated with data collection.", + "contextual_reason_agent": "It is indeed a dataset as it is explicitly mentioned to track beneficiary registration and payment data.", + "contextual_signal": "mentioned as a data source tracking beneficiary registration and payment data", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 18, + "validated": 10, + "not_validated": 8 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 19, + "text": "Teacher shortages, inadequate instructional materials and school running costs ( including for assessment and national exams ), as well as lack of basic school infrastructure have been identified as key priority issues in the refugee camp-based and host community schools in Kakuma, Dadaab and Kalobeyei. 16 Additional support to these schools in refugee hosting counties would contribute to the raising of education outcomes in some of the most economically and educationally disadvantaged counties. In urban areas, refugees \u2019 main barrier to access education is the cost of transport, books, uniforms, and other indirect costs. Other key limitations include different educational experiences and linguistic competencies which can result in students falling behind or dropping out, lack of information and resources to support the processes for recognition for prior learning, and lack of birth certificates and differences in registration documents, required to be registered in the National Education Management Information System ( NEMIS ) and for national examinations. C. Relationship to the CPS / CPF and Rationale for Use of Instrument 21. The proposed PforR is aligned with a draft World Bank Group Country Partnership Framework for Kenya ( CPF, FY22 \u2013 - FY27 ), which identifies three High-Level Outcomes ( HLO ): ( i ) faster labor productivity growth; ( ii ) inclusion and equality institutionalized; and ( iii ) greater productivity and preservation of Kenya \u2019 s 14 Ibid. 15 UNHCR and World Bank, ( 2021 ), Understanding the Socio-economic Conditions of Refugees in Kenya: Volume C: Results from the 2020-21 Urban Socioeconomic Survey. 16 Baseline assessment conducted by UNHCR and MoE in light of schools reopening", + "ner_text": [ + [ + 1613, + 1647, + "named" + ] + ], + "validated": true, + "empirical_context": "The proposed PforR is aligned with a draft World Bank Group Country Partnership Framework for Kenya ( CPF, FY22 \u2013 - FY27 ), which identifies three High-Level Outcomes ( HLO ): ( i ) faster labor productivity growth; ( ii ) inclusion and equality institutionalized; and ( iii ) greater productivity and preservation of Kenya \u2019 s 14 Ibid. 15 UNHCR and World Bank, ( 2021 ), Understanding the Socio-economic Conditions of Refugees in Kenya: Volume C: Results from the 2020-21 Urban Socioeconomic Survey. 16 Baseline assessment conducted by UNHCR and MoE in light of schools reopening", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to a survey that provides empirical data on the socio-economic conditions of refugees in Kenya.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes a specific title of a survey that likely contains structured data.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a survey that provides empirical data on the socio-economic conditions of refugees in Kenya.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "059_Uganda-Investing-in-Forests-and-Protected-Areas-for-Climate-Smart-Development-Project", + "page": 64, + "text": "The World Bank Uganda Investing in Forests and Protected Areas for Climate-Smart Development Project ( P170466 ) Page 61 of 83 15. In addition, the RF includes indicators matching the definition of the Predictive Proxy Indicators ( PPIs ) concept to evaluate the effectiveness of forest-related interventions, as suggested in the Forest Action Plan. 66 While the project does not pursue the complete formation of specific PPI clusters, its indicators directly relate to the poverty, biodiversity, and climate change themes in such clusters as sustainable income, positive environmental impacts related to biodiversity and to climate change, and increased carbon stocks. Such indicators include \u2018 land area under sustainable land management practices \u2019, \u2018 people employed in production and processing of forest products \u2019, \u2018 refugee households benefitting from the woodfuel scheme for refugee settlements \u2019, and \u2018 smallholder plantations established \u2019. 16. Data will be collected and managed by the MWE, UWA, and NFA, with overall responsibility for data quality assessment, consolidation, analysis, and reporting resting with the MWE.", + "ner_text": [ + [ + 202, + 229, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Uganda Investing in Forests and Protected Areas for Climate-Smart Development Project ( P170466 ) Page 61 of 83 15. In addition, the RF includes indicators matching the definition of the Predictive Proxy Indicators ( PPIs ) concept to evaluate the effectiveness of forest-related interventions, as suggested in the Forest Action Plan. 66 While the project does not pursue the complete formation of specific PPI clusters, its indicators directly relate to the poverty, biodiversity, and climate change themes in such clusters as sustainable income, positive environmental impacts related to biodiversity and to climate change, and increased carbon stocks.", + "type": "concept", + "explanation": "However, 'Predictive Proxy Indicators' is described as a concept for evaluating interventions, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'indicators' which often relates to data metrics.", + "contextual_reason_agent": "However, 'Predictive Proxy Indicators' is described as a concept for evaluating interventions, not as a structured collection of data.", + "contextual_signal": "mentioned only as a concept, not as a data source", + "tags": [] + }, + { + "filename": "159_38147core", + "page": 34, + "text": "The PPU would publicize the criteria for the selection o f refugee camps in each phase, the identification of beneficiaries, details on the cash grant scheme and disbursement procedure. The communication campaign, to be monitored by UNHCR, would ensure transparency, accountability and allow IDPs to avail o f the grievance redressal mechanism. Housing Assessment Survey Since 25 % o f the housing caseload falls under the partly-completed category, it was necessary to verify the precise physical improvement required. This was done through a Housing Assessment Survey conducted by the PPU. While a permanent housing unit was defined in keeping with the core housing concept used by the Government for the conflict and tsunami housing programs, any unit not falling within the minimum physical specification o f a permanent house and definition o f a temporary thatched house was considered a partly-completed unit. The Housing Assessment Survey determined the extent of physical improvement required for the partly-completed houses be it a permanent roof, additional room ( s ), toilet, permanent flooring / plastering, etc. Houses with poor foundation that require reconstruction were categorized as a temporary unit. The Housing Assessment Survey provided a template for project implementation with basic information such as name and identification number o f the beneficiary, bank account, type o f land ownership, type o f house and type o f physical construction required in the case o f partly-completed houses. It would be the baseline document in each beneficiary file. Project Implementation Physical Construction Requirement. A permanent housing unit i s defined as one with:.. One safe ( closed ) room,. Kitchen ( internal or external ),. Veranda,. Permanent roof,. Permanent floor / internal plastering, and. Independent toilet. A minimum plinth area o f 500 square ft, 29", + "ner_text": [ + [ + 345, + 370, + "named" + ], + [ + 4, + 7, + "Housing Assessment Survey <> author" + ], + [ + 292, + 296, + "Housing Assessment Survey <> reference population" + ], + [ + 587, + 590, + "Housing Assessment Survey <> author" + ], + [ + 1329, + 1379, + "Housing Assessment Survey <> data description" + ], + [ + 1381, + 1393, + "Housing Assessment Survey <> data description" + ], + [ + 1395, + 1418, + "Housing Assessment Survey <> data description" + ], + [ + 1420, + 1434, + "Housing Assessment Survey <> data description" + ] + ], + "validated": true, + "empirical_context": "The communication campaign, to be monitored by UNHCR, would ensure transparency, accountability and allow IDPs to avail o f the grievance redressal mechanism. Housing Assessment Survey Since 25 % o f the housing caseload falls under the partly-completed category, it was necessary to verify the precise physical improvement required. This was done through a Housing Assessment Survey conducted by the PPU.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a 'Housing Assessment Survey' conducted to gather data on housing conditions.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a 'Housing Assessment Survey' conducted to gather data on housing conditions.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "076_Rwanda-Socio-Economic-Inclusion-of-Refugees-and-Host-Communities-in-Rwanda-Project", + "page": 13, + "text": "Poverty in five of the six districts hosting refugees is higher than the national average. Gisagara ( which hosts the Mugombwa camp ) and Karongi ( which hosts the Kiziba camp ) are two of the four poorest districts in the country, with poverty levels of 56 and 53 percent respectively. Host communities suffer from the same development constraints as refugees \u2013 limited employment opportunities, poor quality education and a dependence on low-income agriculture for livelihood. While relations are generally good between refugees and hosts, the project will promote continued peaceful co-existence by mitigating the negative impacts of refugee presence, by providing equitable access to project benefits to both groups and by promoting joint economic activity. 12. Rwanda is exerting efforts to improve access to finance, but more can be done, including for refugees. In 2017, 50 percent of Rwandans held an account at a financial institution, declining to 48 percent for women and 38. 7 percent in rural areas. 9 Only 8. 1 percent of adults have access to credit nation-wide. 10 Umurenge Savings and Credit Co - Operatives ( U-SACCOs, referred to as SACCOs in this document ) and Microfinance Institutions ( MFIs ) play an important role in improving financial inclusion. There are 416 SACCOs, one in each administrative sector, covering all of Rwanda \u2019 s 30 Districts. From 2008 to 2012, SACCOs increased access to formal financial institutions from 21 to 42 percent. 7 World Bank. Poverty and Equity Data Portal. Data as of October 2018. See http: / / povertydata. worldbank. org / poverty / country / RWA 8 Taylor, J. Edward, Mateusz J. Filipski, Mohamad Alloush, Anubhab Gupta, Ruben Irvin Rojas Valdes, and Ernesto Gonzalez-Estrada. 2016. \u201c Economic Impact of refugees. \u201d PNAS 113 ( 27 ): 7449-53. 9 Findex 2017. 10 Ibid.", + "ner_text": [ + [ + 1485, + 1515, + "named" + ], + [ + 91, + 99, + "Poverty and Equity Data Portal <> data geography" + ], + [ + 138, + 145, + "Poverty and Equity Data Portal <> data geography" + ], + [ + 766, + 772, + "Poverty and Equity Data Portal <> data geography" + ], + [ + 872, + 876, + "Poverty and Equity Data Portal <> publication year" + ], + [ + 892, + 900, + "Poverty and Equity Data Portal <> reference population" + ], + [ + 1347, + 1353, + "Poverty and Equity Data Portal <> data geography" + ], + [ + 1377, + 1389, + "Poverty and Equity Data Portal <> reference year" + ], + [ + 1473, + 1483, + "Poverty and Equity Data Portal <> publisher" + ], + [ + 1528, + 1540, + "Poverty and Equity Data Portal <> reference year" + ], + [ + 1612, + 1629, + "Poverty and Equity Data Portal <> author" + ], + [ + 1631, + 1650, + "Poverty and Equity Data Portal <> author" + ], + [ + 1652, + 1667, + "Poverty and Equity Data Portal <> author" + ], + [ + 1669, + 1682, + "Poverty and Equity Data Portal <> author" + ], + [ + 1684, + 1708, + "Poverty and Equity Data Portal <> author" + ], + [ + 1714, + 1738, + "Poverty and Equity Data Portal <> author" + ] + ], + "validated": true, + "empirical_context": "7 World Bank. Poverty and Equity Data Portal. Data as of October 2018.", + "type": "data portal", + "explanation": "The term refers to a data portal, which typically serves as a structured collection of data for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Data' in its title, suggesting a collection of information.", + "contextual_reason_agent": "The term refers to a data portal, which typically serves as a structured collection of data for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "182_multi0page", + "page": 20, + "text": "Sub-project level monitoring will be carried during the sub-project cycle and be more closely defined once the operations manual has been developed. To monitor the indicators that have been developed in the project LogFrame ( see Annex 1 ), each of the indicators will be associated with baseline and target values, frequency of collection, responsibility and methods of measurement. The indicators in the LogFrame will be further developed to include input, output, and outcome. In some cases those would be associated with relevant risk and sustainability indicators. The monitoring system through the use of pre-formatted reports will require the recipient of the report to provide comments and feedback on the information submitted. This will insure the presence of a feedback mechanism. In addition, the annual work plan will be required to have monitoring activities as part of component activities to insure that such activities are not set aside and that they become an integral part of project implementation. Using the established baselines from the VNICA study and other studies to be carried out during the project lifetime the monitoring will be carried out starting with project effectiveness. Monitoring reports will be required on a periodic basis in addition to various reports from special studies and research activities. As part of the Bank monitoring, semi-annual supervision missions and a mid-term review will be undertaken. D. Project Rationale 1. Project alternatives considered and reasons for rejection: I. Project alternatives considered and reasons for rejection: * The project initially considered up-front financing of a large package of community services. However, it was determined that systemic reform would best be accomplished through a two-stage process, with the first stage of the project supporting a smaller package of services and intensive policy and institutional strengthening, and the second stage financing a larger package of services once a set of policy triggers had been achieved. * The possibility of adding a component for social services delivery in the Albania Development Fund - 17 -", + "ner_text": [ + [ + 406, + 414, + "named" + ] + ], + "validated": false, + "empirical_context": "To monitor the indicators that have been developed in the project LogFrame ( see Annex 1 ), each of the indicators will be associated with baseline and target values, frequency of collection, responsibility and methods of measurement. The indicators in the LogFrame will be further developed to include input, output, and outcome. In some cases those would be associated with relevant risk and sustainability indicators.", + "type": "framework", + "explanation": "However, the LogFrame is described as a project framework rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is associated with indicators and values.", + "contextual_reason_agent": "However, the LogFrame is described as a project framework rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "013_BOSIB0efb09b920d90858a0135df22da7d1", + "page": 42, + "text": "The budget monitoring system will be at the transaction, system, and reporting levels. The budget control in the Integrated Financial Management Information System ( IFMIS ) will be applied based on the Government budget code. The accounting system to be used would enable budget controls and monitoring, budget tracking, and periodic reporting. Expenditures will also be compared to the budgets regularly, explanations will be sought for significant variances, and remedial actions will be taken as appropriate. IFRs would include a variance report along with explanations of material variances. Management will take midway corrective measures based on the reports and explanations. 3. Accounting and staffing arrangement for the project. The GoE \u2019 s accounting policies ( modified cash basis ) and procedures will apply to the project. Separate accounts for the project will be maintained at the PMO. NIDP will develop a project specific FMM, which follows the government procedures and addresses the peculiarities of the project. Preparation of the FMM will be completed within three months of effectiveness. The chart of accounts of the PMO will be updated to accommodate the project. The project is expected to use an accounting system that captures project records at the component, subcomponent, and activity levels. In addition, to comply with government reporting requirements, the project will have to maintain records through IFMIS.", + "ner_text": [ + [ + 1437, + 1442, + "named" + ] + ], + "validated": false, + "empirical_context": "The project is expected to use an accounting system that captures project records at the component, subcomponent, and activity levels. In addition, to comply with government reporting requirements, the project will have to maintain records through IFMIS.", + "type": "system", + "explanation": "However, IFMIS is mentioned as a system for maintaining records, not as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed IFMIS is a dataset because it is related to data management in the context of project records.", + "contextual_reason_agent": "However, IFMIS is mentioned as a system for maintaining records, not as a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "144_84657-PAD-P132268-Project-Commitment-Paper", + "page": 24, + "text": "The World Bank project has been focusing on capacity building, trainings and operational manuals. The envisaged OBA pilot intends to build on this initiative. Complementary technical assistance activities, such as assistance in developing a MIS, evaluating fee collection mechanisms and conducting awareness raising workshops and education campaigns will be undertaken to increase the chances of success. A key component of this assistance will include the development of a MIS to track performance across municipalities, thereby providing data to enable better management. Proper financial management, including revenue mobilization, planned expenditure and maintenance of financial discipline, is critical to effective delivery of urban services, including SWM. Therefore, the MIS will track OBA Targets and indicators, as well as other data useful to system managers in understanding the nature of continuing areas for improvement. The scope of work and costs of the technical assistance are provided in Annex 11. \uf0b7 Stakeholder buy-in is key to successful project implementation. Several preparatory activities have been carried out to gauge stakeholders \u2019 concerns and ensure their early engagement. Separate focus group meetings were held with both end-users and municipal SWM staff to discuss SWM issues and concerns. Community meetings were held in rural and urban areas within both Hebron and Bethlehem governorates ( five in Hebron and four in Bethlehem ), during which discussions were held and participants were asked to fill out questionnaires. The questionnaires generally covered demographics, waste management services, and waste management fees. Moreover, stakeholder meetings held during project preparation revealed that users \u2019 reluctance to pay stems from dissatisfaction with the current service level, and indicated a higher WTP for satisfactory level of service. Targets for fee increase were estimated taking into account this information. Consultations with key regional SWM management ( JSC-H & B, JSCs ) were also held to understand practical concerns and challenges in implementing the Project. A", + "ner_text": [ + [ + 779, + 782, + "named" + ] + ], + "validated": false, + "empirical_context": "Proper financial management, including revenue mobilization, planned expenditure and maintenance of financial discipline, is critical to effective delivery of urban services, including SWM. Therefore, the MIS will track OBA Targets and indicators, as well as other data useful to system managers in understanding the nature of continuing areas for improvement. The scope of work and costs of the technical assistance are provided in Annex 11.", + "type": "system", + "explanation": "'MIS' is not a dataset; it is described as a management information system that tracks data but is not itself a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is related to tracking targets and indicators.", + "contextual_reason_agent": "'MIS' is not a dataset; it is described as a management information system that tracks data but is not itself a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "029_IBArchive-ea5ff2ad-6f1f-45bf-a2ad-036440562c18", + "page": 125, + "text": "The World Bank Electricity Access Scale-up Project ( EASP ) ( P166685 ) Page 120 of 130 4. The design of the component supports the implementation of Uganda \u2019 s COVID-19 priorities and Strategy Note on Support for Refugee and Host Communities, as well as UNHCR \u2019 s Uganda 2020 \u2013 21 Refugee Response Plan, the directions outlined in the CRRF Strategic Direction and National Plan of Action, and the SERP. More specifically, the project follows the overall request of the GoU to development partners to switch from in-kind contributions to cash-based support and overall market stimulation. This new approach also builds on the recent literature on Minimum Expenditure Baskets, which highlights that establishing markets in refugee areas supports the graduation from humanitarian to development aid. Overall, experience world - wide also indicates that entirely free electricity services ( grid and off-grid ) do not translate into adequate quality of services and sustainability of access efforts. 5. The WHR grant funding will also support the integration into the Geospatial National Planning Platform of the data available for the districts hosting refugees. The platform will ensure a least-cost approach to grid and off-grid service delivery, to inform the GIS-based design of the PDSC under MEMD and private sector markets. Furthermore, it will provide least-cost solutions targeted for the socioeconomic development of the districts through a cross-sectoral approach.", + "ner_text": [ + [ + 1065, + 1102, + "named" + ] + ], + "validated": false, + "empirical_context": "5. The WHR grant funding will also support the integration into the Geospatial National Planning Platform of the data available for the districts hosting refugees. The platform will ensure a least-cost approach to grid and off-grid service delivery, to inform the GIS-based design of the PDSC under MEMD and private sector markets.", + "type": "system", + "explanation": "However, it is described as a platform, not a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'data' and relates to planning.", + "contextual_reason_agent": "However, it is described as a platform, not a structured collection of data or a data source.", + "contextual_signal": "mentioned only as a platform, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 19, + "text": "The subcomponent will also support strengthened supervision, management, and on-the-job coaching for IPs and service providers through an integrated supervisory approach in which IPs develop CHD capacity, inclusive of health service delivery planning, supervision, and data entry into District Health Information Software 2 ( DHIS2 ). The subcomponent will include planning and execution of outreach ( village visits, mass campaigns, and so on ) and transportation ( vehicle, boat, and foot ) modalities paying close attention to seasonal population movement patterns and access. Climate sensitive health service delivery and planning will be integral to the approach under this subcomponent. 30. Subcomponent 1. 1 will channel resources through UNICEF to sub-contract national and international NGOs for health service delivery and coordination. 29 In collaboration and through the leadership of the MoH, SMoHs, and CHDs, UNICEF will be responsible for ( a ) oversight and coordination of health services and DHIS2 data collection and entry systems; ( b ) supervision and quality assurance of IPs and health facilities in line with national plans and guidelines; ( c ) coordinating and conducting in-service training; ( d ) through an integrated approach, developing the capacity of SMoHs to plan, supervise, and oversee service delivery and the DHIS2 system; and ( e ) integrated pharmaceutical procurement, quantification, and forecasting.", + "ner_text": [ + [ + 1347, + 1352, + "named" + ], + [ + 746, + 752, + "DHIS2 <> publisher" + ], + [ + 923, + 929, + "DHIS2 <> publisher" + ] + ], + "validated": true, + "empirical_context": "1 will channel resources through UNICEF to sub-contract national and international NGOs for health service delivery and coordination. 29 In collaboration and through the leadership of the MoH, SMoHs, and CHDs, UNICEF will be responsible for ( a ) oversight and coordination of health services and DHIS2 data collection and entry systems; ( b ) supervision and quality assurance of IPs and health facilities in line with national plans and guidelines; ( c ) coordinating and conducting in-service training; ( d ) through an integrated approach, developing the capacity of SMoHs to plan, supervise, and oversee service delivery and the DHIS2 system; and ( e ) integrated pharmaceutical procurement, quantification, and forecasting.", + "type": "system", + "explanation": "DHIS2 is indeed a data collection system used for health information, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned in the context of data collection and entry systems.", + "contextual_reason_agent": "DHIS2 is indeed a data collection system used for health information, confirming its role as a dataset.", + "contextual_signal": "mentioned as a data collection and entry system", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "015_BOSIB0e7334a5d0570a3e40f8ae4d0c1266", + "page": 61, + "text": "Climate change impacts are expected to affect road Annual GIS database prepared during project preparation The methodology will consist on using population distribution to calculate the share of population located within a 5km - buffer zone with access to an all-weather Project Implementation Unit", + "ner_text": [ + [ + 51, + 70, + "named" + ], + [ + 145, + 168, + "Annual GIS database <> data description" + ], + [ + 195, + 240, + "Annual GIS database <> reference population" + ] + ], + "validated": true, + "empirical_context": "Climate change impacts are expected to affect road Annual GIS database prepared during project preparation The methodology will consist on using population distribution to calculate the share of population located within a 5km - buffer zone with access to an all-weather Project Implementation Unit", + "type": "database", + "explanation": "This is indeed a dataset as it is described in the context as being prepared for project implementation, indicating its use as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a 'GIS database' which typically implies a structured collection of geographic data.", + "contextual_reason_agent": "This is indeed a dataset as it is described in the context as being prepared for project implementation, indicating its use as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "108_Jordan-PforR-PAD-P159522-FINAL-DISCLOSURE-10052016", + "page": 9, + "text": "The largest camp is the Al Zaatari camp in Mafraq, host to almost 80, 000 Syrian refugees. According to the recently concluded census, the total number of Syrians refugees has reached about 1. 3 million, and there are a further 1. 6 million non - nationals in Jordan. According to the Ministry of Labor ( MOL ), 324, 000 foreigners have work permits, 65 percent of them are Egyptians, 3 percent are from other Arab countries, 26 percent are South Asians, and only 2 percent are Syrian refugees. 3. The situation of most Syrians in Jordan is highly vulnerable. Extremely vulnerable female - headed households represent a quarter of all refugee households. Approximately 17 percent of the refugees live in camps. The majority of refugees \u2014 about 80 percent \u2014 are below the national poverty line and need assistance through access to expanded education, health, and housing as well as economic opportunities. 4. The Government of Jordan ( GoJ ) and the international community requested World Bank Group ( WBG ) support for a holistic approach to the Syrian refugees \u2019 influx, targeting both the Jordanian host communities and the refugees in Jordan.", + "ner_text": [ + [ + 127, + 133, + "named" + ], + [ + 43, + 49, + "census <> data geography" + ], + [ + 74, + 89, + "census <> reference population" + ], + [ + 260, + 266, + "census <> data geography" + ], + [ + 531, + 537, + "census <> data geography" + ] + ], + "validated": true, + "empirical_context": "The largest camp is the Al Zaatari camp in Mafraq, host to almost 80, 000 Syrian refugees. According to the recently concluded census, the total number of Syrians refugees has reached about 1. 3 million, and there are a further 1.", + "type": "census", + "explanation": "In this context, 'census' is used to provide empirical data about the number of Syrian refugees, confirming it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'census' is a dataset because it typically refers to a structured collection of data about a population.", + "contextual_reason_agent": "In this context, 'census' is used to provide empirical data about the number of Syrian refugees, confirming it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "162_PAD12050PAD0P15264600PUBLIC00Box391428B", + "page": 36, + "text": "Quality of Care will be monitored and maintained through the MoPH PHCC accreditation program. The PHC accreditation program was initiated in 2009 by the MoPH in collaboration with Accreditation Canada International ( ACI ). With support of local experts, the program was developed, piloted and implemented in a phased approach in PHC network facilities. Currently, 34 out of the 75 PHCCs are accredited and the other 42 are preparing for accreditation in 2015. Moreover, quality of clinical care will be monitored by the MoPH through the EHCP clinical indicators listed in Annex V. Enrollment 11. Contracted PHCCs will receive from CMU of PCM through the MoPH the list of beneficiaries in their catchment area and will be responsible for enrolling those beneficiaries through marketing and outreach campaigns. Once enrolled, individuals will be exempted from payment for EHCP services as they will be fully subsidized by the government. A nominal fee of around US $ 12 will be paid by each household for registration. Upon enrollment, each member of the household will receive a photo identification enrollment card which will be saved in the system for proper verification of beneficiaries.", + "ner_text": [ + [ + 538, + 562, + "named" + ] + ], + "validated": false, + "empirical_context": "Currently, 34 out of the 75 PHCCs are accredited and the other 42 are preparing for accreditation in 2015. Moreover, quality of clinical care will be monitored by the MoPH through the EHCP clinical indicators listed in Annex V. Enrollment 11.", + "type": "indicator", + "explanation": "However, 'EHCP clinical indicators' is not presented as a structured collection of data but rather as a set of metrics for monitoring quality of care.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'EHCP clinical indicators' refers to a dataset because it includes the term 'indicators' which can imply data points.", + "contextual_reason_agent": "However, 'EHCP clinical indicators' is not presented as a structured collection of data but rather as a set of metrics for monitoring quality of care.", + "contextual_signal": "mentioned only as a set of clinical indicators, not as a data source", + "tags": [] + }, + { + "filename": "035_Senegal-Casamance-Economic-Development-Project", + "page": 28, + "text": "Activities will include ( a ) communication support; ( b ) monitoring and evaluation ( M & E ) arrangements, including the setup of a Management Information System ( MIS ); and ( c ) measures for enhanced transparency and accountability. The component will also strengthen the PIUs \u2019 capacities to monitor project activities while supporting an improved understanding of climate change risks and impacts, GHG sources and trends, design of adaptation and mitigation strategies, and policy actions in the face of climate change. 57. The project will establish a Casamance Knowledge Management Platform to address gaps across data collection and analytics in the region. Implementation of the Knowledge Management Plateform ( KMP ) activities will be done in collaboration with the Assane Seck University of Ziguinchor, in particular the sociology department for social inclusion and gender issues and the geography department on climate change and agriculture. The Project will enable young researchers to reflect on issues directly related to the project and thus contribute to the analytics. The KMP will also enable greater transparency and real-time monitoring through the dissemination of local and DDPs and monitoring progress in their implementation through geo-locating investments, in conjunction with the CDD application discussed below. 58.", + "ner_text": [ + [ + 134, + 163, + "named" + ] + ], + "validated": false, + "empirical_context": "Activities will include ( a ) communication support; ( b ) monitoring and evaluation ( M & E ) arrangements, including the setup of a Management Information System ( MIS ); and ( c ) measures for enhanced transparency and accountability. The component will also strengthen the PIUs \u2019 capacities to monitor project activities while supporting an improved understanding of climate change risks and impacts, GHG sources and trends, design of adaptation and mitigation strategies, and policy actions in the face of climate change.", + "type": "system", + "explanation": "However, it is mentioned as a system for monitoring and evaluation, not explicitly as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Management Information System' suggests a structured collection of data.", + "contextual_reason_agent": "However, it is mentioned as a system for monitoring and evaluation, not explicitly as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 139, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 127 and complexity of activity which will be indicated in the Procurement Plan in STEP. These prior-review contracts will be updated in the Procurement Plan annually or as necessary during implementation, based on the procurement capacity assessment during implementation support missions. 64. The World Bank will carry out regular procurement supervision missions on an annual basis and carry out procurement post-review on an annual basis. Contracts not subject to prior review will be subject to post - review by the World Bank as per procedures set forth in Annex II122 \u2013 \u201c Procurement Oversight \u201d of the Procurement Regulations. The sample contracts for the procurement post-reviews will be risk-based. The World Bank will undertake the post-reviews online accessing procurement documents and data from STEP. The Recipient will upload all procurement process documentation and information in STEP at each roadmap stage. At the completion of the contracts, the Recipient will upload completion documents including inspection, acceptance, delivery, and final payment evidenced in STEP. 65.", + "ner_text": [ + [ + 868, + 872, + "named" + ] + ], + "validated": false, + "empirical_context": "The sample contracts for the procurement post-reviews will be risk-based. The World Bank will undertake the post-reviews online accessing procurement documents and data from STEP. The Recipient will upload all procurement process documentation and information in STEP at each roadmap stage.", + "type": "system", + "explanation": "However, STEP is described as a system for managing procurement documentation, not as a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed STEP is a dataset because it is mentioned in the context of accessing procurement documents and data.", + "contextual_reason_agent": "However, STEP is described as a system for managing procurement documentation, not as a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 15, + "validated": 1, + "not_validated": 14 + } + }, + { + "filename": "041_Burkina-Faso-Support-to-Land-and-Mining-Management-Strengthening-Project", + "page": 45, + "text": "The PITs will maintain a M & E system to report quarterly or on demand on project progress and provide timely information for decision-making. The PIT will be responsible for the PM & E system, which will support project management, reporting and evaluation needs. Independent evaluations will be conducted at the beginning, mid-term, and end of the project to get feedback directly from project beneficiaries. These evaluations will be tendered on a competitive basis in accordance with the ToR agreed with International Development Association ( IDA ). A baseline customer survey will be carried out during the first months of implementation of the project. Information on project beneficiaries will help to understand project impacts in a disaggregated manner. Data collection will support the establishment of a database for future financial and economic analysis. The PIT will be responsible for collecting and reporting data as agreed in the PM & E section included in the PIM and reflected in the Results Framework. The Geo-Enabling initiative for Monitoring and Supervision and kobo toolbox may be used by the PITs for project monitoring given the context of insecurity in the country. C. Sustainability 87. The project is designed to build capacity at different levels to deliver timely land administration services as part of a programmatic approach. Medium to long-term sustainability for the land component will require availability of well-trained staff to deliver the services, at both central and local levels, including the ability to", + "ner_text": [ + [ + 557, + 581, + "named" + ], + [ + 388, + 409, + "baseline customer survey <> reference population" + ], + [ + 675, + 696, + "baseline customer survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "These evaluations will be tendered on a competitive basis in accordance with the ToR agreed with International Development Association ( IDA ). A baseline customer survey will be carried out during the first months of implementation of the project. Information on project beneficiaries will help to understand project impacts in a disaggregated manner.", + "type": "survey", + "explanation": "This is indeed a dataset as it is described as a baseline customer survey that will provide information for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects structured data from customers.", + "contextual_reason_agent": "This is indeed a dataset as it is described as a baseline customer survey that will provide information for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "062_Turkey-Municipal-Services-Improvement-Project", + "page": 45, + "text": "Municipal PIUs and ILBANK PMU People benefitting from safely managed sanitation services in the selected municipalities ( out of which host community ) This indicator measures the percentage of host community benefitted from safely managed sanitation services in the selected municipalities that have been provided through this Project. Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs. Municipal PIUs and ILBANK PMU People benefitting from safely managed sanitation services in the selected municipalities ( out of which This indicator measures the percentage of refugees benefitted from safely Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU Municipal PIUs and ILBANK PMU", + "ner_text": [ + [ + 747, + 755, + "named" + ], + [ + 393, + 407, + "PMU Data <> author" + ], + [ + 424, + 444, + "PMU Data <> data type" + ], + [ + 528, + 534, + "PMU Data <> publisher" + ], + [ + 828, + 834, + "PMU Data <> publisher" + ], + [ + 854, + 872, + "PMU Data <> usage context" + ] + ], + "validated": true, + "empirical_context": "Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs. Municipal PIUs and ILBANK PMU People benefitting from safely managed sanitation services in the selected municipalities ( out of which This indicator measures the percentage of refugees benefitted from safely Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU Municipal PIUs and ILBANK PMU", + "type": "data", + "explanation": "In the context, 'PMU Data' is explicitly mentioned as being compiled and recorded, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'PMU Data' is a dataset because it is referenced in the context of compiling reports and recording beneficiary records.", + "contextual_reason_agent": "In the context, 'PMU Data' is explicitly mentioned as being compiled and recorded, indicating it functions as a data source.", + "contextual_signal": "mentioned as a data source for compiling reports", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 10, + "validated": 9, + "not_validated": 1 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 27, + "text": "This subcomponent will define public procurement performance indicators and a mechanism for gathering and analyzing performance data. It will introduce mechanisms to orient the system toward better performance, including, but not limited to, performance contracts within the process chain, with the aim to reduce the procurement time lines. To this end, a \u2018 project team \u2019 will be put in place to oversee this innovative approach. This team will be hosted in MINMAP and will comprise the various procurement committees and the MINMAP Directorates. Component 4: Enhancing the use of statistics for policy making ( US $ 9 million ) 41. The purpose of this component is to improve the national statistical system to produce regular and reliable economic and social population statistics and in-depth analysis. Robust statistics are a prerequisite to evidence-based policy making and policy results \u2019 monitoring.", + "ner_text": [ + [ + 30, + 71, + "named" + ] + ], + "validated": false, + "empirical_context": "This subcomponent will define public procurement performance indicators and a mechanism for gathering and analyzing performance data. It will introduce mechanisms to orient the system toward better performance, including, but not limited to, performance contracts within the process chain, with the aim to reduce the procurement time lines.", + "type": "concept", + "explanation": "However, it is not a dataset as it refers to performance indicators, which are metrics rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'indicators' which often relates to data metrics.", + "contextual_reason_agent": "However, it is not a dataset as it refers to performance indicators, which are metrics rather than a structured collection of data.", + "contextual_signal": "mentioned only as a concept, not as a data source", + "tags": [] + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 41, + "text": "The Operation will build on the existing M & E structures at MoE and strengthen these under the IPF component. MoE \u2019 s Central Planning and Project Management Unit ( CPPMU ), headed by the Chief Economists ( from Basic Education, TVET, University and Post Training and Skills Development ), are responsible for overall coordination and monitoring of NESSP implementation. The CPPMU works closely with the Kenya Bureau of Statistics ( KNBS ). The CPPMU, in collaboration with the KNBS team, develops and publishes educational statistical booklets. The MoE \u2019 s DPCAD oversees day-to day implementation of key donor funded projects, including the ongoing education projects. 65. The Operation results monitoring will build on the existing databases used by the CPPMU that comprise the online - based NEMIS platform. The NEMIS, hosted by MoE, is being revamped to ensure data credibility and data security. The NEMIS contains up-to-date key education data, including for primary education, and has been used to generate the annual educational statistical booklets. KNEC hosts the platform for data on learning assessments, including the school specific learning assessments reports. The Directorate of Quality Assurance and Standards, and the Directorate of School Audit at MoE conduct regular visits to schools and submit their data and reports to MoE management and relevant IEs for action.", + "ner_text": [ + [ + 907, + 912, + "named" + ], + [ + 61, + 64, + "NEMIS <> publisher" + ], + [ + 111, + 114, + "NEMIS <> publisher" + ], + [ + 513, + 545, + "NEMIS <> data type" + ], + [ + 551, + 554, + "NEMIS <> publisher" + ], + [ + 834, + 837, + "NEMIS <> publisher" + ], + [ + 967, + 984, + "NEMIS <> reference population" + ], + [ + 1133, + 1177, + "NEMIS <> data description" + ], + [ + 1270, + 1273, + "NEMIS <> publisher" + ], + [ + 1345, + 1348, + "NEMIS <> publisher" + ] + ], + "validated": true, + "empirical_context": "The NEMIS, hosted by MoE, is being revamped to ensure data credibility and data security. The NEMIS contains up-to-date key education data, including for primary education, and has been used to generate the annual educational statistical booklets. KNEC hosts the platform for data on learning assessments, including the school specific learning assessments reports.", + "type": "system", + "explanation": "NEMIS is indeed a dataset as it is explicitly mentioned to contain up-to-date key education data and is utilized for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is described as containing key education data and is used to generate statistical booklets.", + "contextual_reason_agent": "NEMIS is indeed a dataset as it is explicitly mentioned to contain up-to-date key education data and is utilized for empirical analysis.", + "contextual_signal": "described as containing key education data and used to generate statistical booklets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 110, + "text": "As one of the core responsibilities of the regulatory units, the assessment has noted that generally procurement audits have been undertaken on a regular basis guided by audit checklists adopted by each region. The data received from FPPPA indicate the regional regulatory bodies have increased their procurement audit coverage since the last assessment. According to FPPPA data, all regions except Afar have covered at least 15 percent of woredas as indicated in Table 4. 5. However, the audit reports are not publicized and have limited circulation between the auditee and the BOFED, and there is no practice in communicating the report to the wider public and regional administration. Hence, the incentive in addressing audit findings and working on the recommendations is limited. 45. The assessment received the woreda audit reports of the regional regulatory bodies and noted that findings across regions are similar. The main findings include: ( a ) issues related to procurement plans such as not being fully prepared, not following the template, not approved by the authorized person, not being chosen by the right method or not including a method, and not implemented per the procurement plan; ( b ) issues in the bidding documents such as use of brand names in specifications, bidding documents not prepared using the standard template, missing conditions of contracts, unclear evaluation criteria, unspecified bid validity period, not specifying bid opening date and time; ( c ) bid invitation issued without making the bidding documents ready,", + "ner_text": [ + [ + 368, + 378, + "named" + ], + [ + 234, + 239, + "FPPPA data <> publisher" + ], + [ + 253, + 279, + "FPPPA data <> reference population" + ] + ], + "validated": true, + "empirical_context": "The data received from FPPPA indicate the regional regulatory bodies have increased their procurement audit coverage since the last assessment. According to FPPPA data, all regions except Afar have covered at least 15 percent of woredas as indicated in Table 4. 5.", + "type": "data", + "explanation": "This is indeed a dataset as it is used to indicate procurement audit coverage and is referenced as a source of information in the context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to data received from FPPPA, suggesting a structured collection of information.", + "contextual_reason_agent": "This is indeed a dataset as it is used to indicate procurement audit coverage and is referenced as a source of information in the context.", + "contextual_signal": "mentioned as a source of information", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "112_IDAR2016-0221-PAD-09012016", + "page": 47, + "text": "The MDLF will be responsible for monitoring and evaluation activities under this proposed project, including the achievement of the PDO, project outcomes, and physical, fiduciary, and safeguard performance. The MoLG will be responsible for providing technical input to the MDLF, namely all of the technical aspects of procurement, achieved results per result indicators to monitor progress toward PDO, and details of the proposed project activities. The monitoring and evaluation activities will also include the results that are not captured in the results framework, such as semi-annual feedback collection from the proposed project participating urban areas and subsequent fine-tuning of the proposed project activities. A structured assessment of capacity developed through this proposed project is also planned at the mid-term and end of the project. MDLF has been performing at a satisfactory level to collect, aggregate, and analyze results achieved by municipalities under the MDP-II and the MoLG under LGSIP as both programs \u2019 implementing agency. The MDLF has a robust monitoring and evaluation system that uses a web-based Program Management Information System ( PGMIS ) as well as the FMIS to automate data aggregation, storage, and presentation as part of a results-based M & E system. The MDLF \u2019 s Results-Based Monitoring Manual ( RBMM ) has aligned the individual objectives of the MDLF-implemented various projects and programs with the PA \u2019 s NDP strategic objectives. As such, no additional arrangements or staff is required for the M & E of this proposed project.", + "ner_text": [ + [ + 1197, + 1201, + "named" + ] + ], + "validated": false, + "empirical_context": "MDLF has been performing at a satisfactory level to collect, aggregate, and analyze results achieved by municipalities under the MDP-II and the MoLG under LGSIP as both programs \u2019 implementing agency. The MDLF has a robust monitoring and evaluation system that uses a web-based Program Management Information System ( PGMIS ) as well as the FMIS to automate data aggregation, storage, and presentation as part of a results-based M & E system. The MDLF \u2019 s Results-Based Monitoring Manual ( RBMM ) has aligned the individual objectives of the MDLF-implemented various projects and programs with the PA \u2019 s NDP strategic objectives.", + "type": "system", + "explanation": "FMIS is mentioned as a system for automating data processes, not as a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed FMIS is a dataset because it is involved in data aggregation and storage.", + "contextual_reason_agent": "FMIS is mentioned as a system for automating data processes, not as a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 47, + "text": "For Results area 2, TA will support capacity building activities, such as the creation of a multisectoral team at the woreda level, staffing of key specialists to support SPG implementation; provision of training for relevant staff; M & E including data collection and entry for DHIS2, UNISE, and EMIS; incentive mechanisms for the service delivery system; development of innovations for effective service delivery at federal and regional levels by focusing in selected woredas for demonstrations; and impact evaluation to assess the effectiveness of SPG interventions. 87. The capacity building interventions supported under the IPF component are described in more detail in Annex 8. The HCO includes a PAP to drive intermediate outputs linked to the achievement of the results outlined under each results area, and the IPF component ( subcomponent 2. 3 ) provides TA for the completion of activities in the PAP ( Annex 6 ). Key capacity building activities are summarized in section II. D above, and a more detailed description is provided in Annex 8. D. Capacity Building", + "ner_text": [ + [ + 279, + 284, + "named" + ], + [ + 470, + 477, + "DHIS2 <> data geography" + ] + ], + "validated": true, + "empirical_context": "For Results area 2, TA will support capacity building activities, such as the creation of a multisectoral team at the woreda level, staffing of key specialists to support SPG implementation; provision of training for relevant staff; M & E including data collection and entry for DHIS2, UNISE, and EMIS; incentive mechanisms for the service delivery system; development of innovations for effective service delivery at federal and regional levels by focusing in selected woredas for demonstrations; and impact evaluation to assess the effectiveness of SPG interventions. 87.", + "type": "system", + "explanation": "DHIS2 is indeed a data source as it is referenced in relation to data collection and monitoring activities.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned in the context of data collection and entry.", + "contextual_reason_agent": "DHIS2 is indeed a data source as it is referenced in relation to data collection and monitoring activities.", + "contextual_signal": "mentioned as a data source for data collection and entry", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 54, + "text": "DLR 5. 3: Enhanced digital literacy / skills. DLR 5. 4: Utilization of the HRMIS and other digital platforms for recording competitive recruitments, performance-based promotions, and digital upskilling. Data source / Agency DLR 5. 0: SPAC DLR 5. 1 and 2: SPAC Assessment Center HR interview and evaluation records, as well as HRMIS data records. DLR 5. 3: Training registry maintained by the IPA and HRMIS data records. DLR 5. 4: HRMIS data reports. Verification Entity KACE. Procedure DLR 5. 0: Verification of adopted SPAC by-law and subsidiary regulation. DLR 5. 1: Verification of recruitment records and competency assessment reports shared biannually by the SPAC and competence-based recruitments recorded by the HRMIS. DLR 5. 2: Verification of promotion records and competency assessment reports shared biannually by the SPAC and performance-based promotions recorded by the HRMIS and other digital platforms. DLR 5. 3: Verification of digital skill certifying training records shared annually by IPA and digital skill credentials recorded by the HRMIS and other digital platforms. DLR 5. 4: Cross-validation of HRMIS and other digital platforms data reports with SPAC Assessment Center interview and evaluation records and IPA training assessment reports. DLI 6 on the establishment of digital health standards across a national Health Information Exchange ( HIE ) Formula Scalable. The DLI disburses up to US $ 29 million across the three following DLRs:", + "ner_text": [ + [ + 1055, + 1060, + "named" + ] + ], + "validated": false, + "empirical_context": "DLR 5. 3: Verification of digital skill certifying training records shared annually by IPA and digital skill credentials recorded by the HRMIS and other digital platforms. DLR 5.", + "type": "system", + "explanation": "However, HRMIS is described as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HRMIS is a dataset because it is mentioned in the context of training records and credentials.", + "contextual_reason_agent": "However, HRMIS is described as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "045_Turkey-Agricultural-Employment-Support-for-Refugees-and-Turkish-Citizens-through-Enhanced-Market-Linkages-Project", + "page": 13, + "text": "Household Labor Force Survey collects information from a representative sample of registered household residents living in Turkey, but the coverage of refugee population is thought to be extremely low as many of them are not registered residents in households and lack Turkish language skills to respond to the questionnaire. 12 Those in refugee status do not need to obtain work permits to work as agricultural seasonal workers. 13 Source: Turkish Statistical Institute Household Labor Force Survey. 14 These reports are reflected in the sectoral distribution of workers by age groups, which suggests that Turkish youth have been shifting out of agriculture: less than 10 percent of Turkish youth is employed in the agricultural sector as opposed to 50 percent for the older age groups. 15 Limited data are available for the agriculture sector in Turkey due to very high informality, and findings rely heavily on the interviews with farmers in the field that were carried out as part of project preparation.", + "ner_text": [ + [ + 471, + 499, + "named" + ], + [ + 82, + 112, + "Household Labor Force Survey <> reference population" + ], + [ + 123, + 129, + "Household Labor Force Survey <> data geography" + ], + [ + 441, + 470, + "Household Labor Force Survey <> publisher" + ], + [ + 539, + 585, + "Household Labor Force Survey <> data description" + ], + [ + 848, + 854, + "Household Labor Force Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "12 Those in refugee status do not need to obtain work permits to work as agricultural seasonal workers. 13 Source: Turkish Statistical Institute Household Labor Force Survey. 14 These reports are reflected in the sectoral distribution of workers by age groups, which suggests that Turkish youth have been shifting out of agriculture: less than 10 percent of Turkish youth is employed in the agricultural sector as opposed to 50 percent for the older age groups.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source for the sectoral distribution of workers.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a source of information for labor statistics.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source for the sectoral distribution of workers.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 55, + "text": "These shall include: ( 1 ) a client registry ( that supports the unique identification and management of patient identities ); ( 2 ) one or more terminology registries accessible as a service ( that provide a standardized classification or a query-able source for health information exchange, including clinical terminologies, ontologies, dictionaries, code systems, and value sets ); ( 3 ) a universal facility registry ( that sets the unique identifier for locations where health services are provided ); ( 4 ) a health professional registry ( that sets the unique identifier for health workers that provide services within a country ); and ( 5 ) a common product catalogue ( that properly defines and categorizes medical products or commodities ). 24 Core standards include: ( 1 ) content standards ( that dictate the structure of electronic documents and types of data they must contain by ensuring data is properly organized and represented in a clear manner ); ( 2 ) terminology standards ( that ensure that all parties will be able to understand and use it while exchanging health data ); ( 3 ) transport standards ( that facilitate data exchange between different systems by defining what formats, document architecture, data elements, methods, and application programming interfaces to use for achieving interoperability ); and ( 4 ) security standards ( that establish administrative and technical rules to protect sensitive data from misuse, unauthorized access, or disclosure ). 25 Health information systems could include the national EMR platform, supply chain management information system, and surveillance systems managed by the MOH. Selected health information systems will be described in the Program Operational Manual ( including the verification protocol ).", + "ner_text": [ + [ + 1539, + 1560, + "named" + ] + ], + "validated": false, + "empirical_context": "24 Core standards include: ( 1 ) content standards ( that dictate the structure of electronic documents and types of data they must contain by ensuring data is properly organized and represented in a clear manner ); ( 2 ) terminology standards ( that ensure that all parties will be able to understand and use it while exchanging health data ); ( 3 ) transport standards ( that facilitate data exchange between different systems by defining what formats, document architecture, data elements, methods, and application programming interfaces to use for achieving interoperability ); and ( 4 ) security standards ( that establish administrative and technical rules to protect sensitive data from misuse, unauthorized access, or disclosure ). 25 Health information systems could include the national EMR platform, supply chain management information system, and surveillance systems managed by the MOH. Selected health information systems will be described in the Program Operational Manual ( including the verification protocol ).", + "type": "system", + "explanation": "However, the context describes it as a health information system, not explicitly as a data source or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'EMR' which suggests electronic medical records that could imply data collection.", + "contextual_reason_agent": "However, the context describes it as a health information system, not explicitly as a data source or dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "076_Rwanda-Socio-Economic-Inclusion-of-Refugees-and-Host-Communities-in-Rwanda-Project", + "page": 30, + "text": "The World Bank Socio-economic Inclusion of Refugees & Host Communities in Rwanda Project ( P164130 ) Page 26 of 82 77. Project-specific MIS ( P-MIS ). The P-MIS will be web-based and user-friendly and will not duplicate existing systems. It will be used to gather and consolidate data across the components. It will allow the project to track and report on project-specific data requirements disaggregated by group and sub-group ( refugee, host community, women, children, etc. ). The P-MIS will be housed at the MINEMA SPIU and accessible via a web-based application in project districts and at BRD. The system will be used to ( a ) collect data and information and monitor progress of project activities; and ( b ) generate reports, figures and tables for project M & E. 78. Regular progress monitoring will be complemented by externally-administered surveys to measure progress and effectiveness of activities and to understand beneficiary perceptions of socio-economic inclusion. Periodic technical audits will be carried out to verify infrastructure quality. A baseline study will be conducted during the inception phase and followed up with annual surveys and other assessments, a midterm evaluation, and at project closing, to evaluate qualitative and quantitative aspects of project results.", + "ner_text": [ + [ + 119, + 139, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Socio-economic Inclusion of Refugees & Host Communities in Rwanda Project ( P164130 ) Page 26 of 82 77. Project-specific MIS ( P-MIS ). The P-MIS will be web-based and user-friendly and will not duplicate existing systems.", + "type": "system", + "explanation": "However, the context indicates it is a project-specific management information system, not a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'MIS' which often refers to management information systems that handle data.", + "contextual_reason_agent": "However, the context indicates it is a project-specific management information system, not a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 51, + "text": "41 the program \u2019 s objectives, aligned with the priority needs identified in the vulnerability analysis of 2012-13, appropriate given the delivery method and the learning can yield potential significant impacts and spillovers at the household and community levels. For example, evaluations of Tubaramure, a food-assisted integrated health and nutrition intervention trial in Ruyigi and Cankuso provinces ( IFPRI, 2014 and Leroy et al., 2016 ) show very little knowledge and practice about complementary foods for children above 6 months of age, scant knowledge of danger signs for acute respiratory infections ( the main cause of death for children below five years of age ) and management of diarrhea. The 2010 DHS also revealed high unmet contraception needs ( 31 percent for limiting and spacing births ), a significant proportion of women not deciding about the use of their own income ( 16. 2 percent in the North and 7. 5 percent in the East ), a high proportion of women not involved in decisions about important purchases for the household ( 42 percent ), and a high female acceptance of intimate partner violence ( 74 to 78 percent for at least one of the following reasons: burning food, arguing with the husband, neglecting children, going out without informing the husband, or refusing sex ). 15.", + "ner_text": [ + [ + 712, + 715, + "named" + ], + [ + 107, + 114, + "DHS <> reference year" + ], + [ + 375, + 381, + "DHS <> data geography" + ], + [ + 386, + 403, + "DHS <> data geography" + ], + [ + 422, + 434, + "DHS <> author" + ], + [ + 436, + 440, + "DHS <> publication year" + ], + [ + 707, + 711, + "DHS <> publication year" + ], + [ + 837, + 842, + "DHS <> reference population" + ] + ], + "validated": true, + "empirical_context": "For example, evaluations of Tubaramure, a food-assisted integrated health and nutrition intervention trial in Ruyigi and Cankuso provinces ( IFPRI, 2014 and Leroy et al., 2016 ) show very little knowledge and practice about complementary foods for children above 6 months of age, scant knowledge of danger signs for acute respiratory infections ( the main cause of death for children below five years of age ) and management of diarrhea. The 2010 DHS also revealed high unmet contraception needs ( 31 percent for limiting and spacing births ), a significant proportion of women not deciding about the use of their own income ( 16. 2 percent in the North and 7.", + "type": "survey", + "explanation": "In this context, 'DHS' is explicitly mentioned as revealing data about unmet contraception needs, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'DHS' is a dataset because it is commonly known as the Demographic and Health Surveys, which are structured collections of data.", + "contextual_reason_agent": "In this context, 'DHS' is explicitly mentioned as revealing data about unmet contraception needs, indicating it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 7, + "validated": 6, + "not_validated": 1 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 15, + "text": "Data from a survey on the schooling of refugee children conducted by MENFOP \u2019 s planning department, the OOSC 2019 and the EDAM4 provide some information on the differential experiences of refugee and host students. The most commonly cited reason by refugees for not attending school is the need to help the family ( 36 percent females; 21 percent males ), while among host communities, the most common reason was the lack of interest in school ( 39 percent females; 36 percent males ). Around 20 percent of host communities indicated that girls having to stay home as a reason for not attending school, compared to 14 percent of refugee respondents. Although food insecurity is common among both refugee households and rural host communities, the lack of school canteens is cited more frequently by refugees as a reason for non - school attendance. Data on the prevalence of disability among refugee and host community children is unavailable; however, 1 percent of host community respondents and 1. 4 percent of refugee respondents cite disability as a reason for not attending school. 16. Focus group discussions with various stakeholder groups ( school administrators, teachers, parents, and students ) in Ali-Addeh, Holl-Holl, and Markazi confirm the quantitative indicators and provide additional information on refugee school needs, which include school canteens, school kits, improving the school environment, and accelerating implementation of the national program ( which would allow certification to access to higher levels of education ), and increased teacher support. A study underway by the World Bank 5 One school in Djibouti Ville was observed to have an average class size of 1: 44", + "ner_text": [ + [ + 105, + 114, + "named" + ] + ], + "validated": true, + "empirical_context": "Data from a survey on the schooling of refugee children conducted by MENFOP \u2019 s planning department, the OOSC 2019 and the EDAM4 provide some information on the differential experiences of refugee and host students. The most commonly cited reason by refugees for not attending school is the need to help the family ( 36 percent females; 21 percent males ), while among host communities, the most common reason was the lack of interest in school ( 39 percent females; 36 percent males ).", + "type": "survey", + "explanation": "In the context, OOSC 2019 is mentioned as a source of information related to the schooling of refugee children, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced in the context of a survey providing data on schooling experiences.", + "contextual_reason_agent": "In the context, OOSC 2019 is mentioned as a source of information related to the schooling of refugee children, indicating it functions as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "144_84657-PAD-P132268-Project-Commitment-Paper", + "page": 17, + "text": "Indicator ( 1 ) includes the following studies and actions to be implemented in the first year: - Indicator ( 1 ) SWM Strategy: \uf0b7 Strategy Development: this entails development of plans for management and treatment of slaughterhouse and medical waste and the development of plans for closure and rehabilitation of unsanitary dumpsites8. \uf0b7 Implementation of the MIS: the design and implementation of the MIS is targeted for the first year. Relevant SWM data such as waste tonnage handled, costs, etc., is currently tracked by service providers; however, the MIS will facilitate the consolidation and reporting of this data and allow JSC-H & B to more effectively track progress and performance. B3. Project Design Targeting In the absence of data on household income, the Project uses geographical targeting at the level of the governorates where poverty level is estimated at 18. 3 % on average, 32. 5 % in Hebron and 21. 3 % in Bethlehem ( 2010 ) 9. Both governorates also have the highest unemployment rates in the West Bank with averages of 22. 8 % and 22. 4 %, respectively10. The vulnerability of these groups is exacerbated by mobility restrictions and poor market access. Output-based subsidy payment 8 These closures are part of the World Bank project and including them in the OBA targets will further incentivize JSC-H & B to achieve closures according to the planned schedule. In addition, these closures are necessary to meet OBA targets for Indicator ( 3 ) Waste Managed. 9 PCBS: West Bank Southern Governorates Statistical Yearbook, 2011 10 PCSBS Press Release on Labour Force Survey Results, Labour Force Survey ( January-March, 2011 ) Round ( Q1 / 2011 ).", + "ner_text": [ + [ + 448, + 456, + "named" + ], + [ + 465, + 493, + "SWM data <> data description" + ], + [ + 907, + 913, + "SWM data <> data geography" + ], + [ + 929, + 938, + "SWM data <> data geography" + ], + [ + 941, + 945, + "SWM data <> reference year" + ], + [ + 1017, + 1026, + "SWM data <> data geography" + ], + [ + 1241, + 1251, + "SWM data <> publisher" + ], + [ + 1547, + 1551, + "SWM data <> publication year" + ] + ], + "validated": true, + "empirical_context": "\uf0b7 Implementation of the MIS: the design and implementation of the MIS is targeted for the first year. Relevant SWM data such as waste tonnage handled, costs, etc., is currently tracked by service providers; however, the MIS will facilitate the consolidation and reporting of this data and allow JSC-H & B to more effectively track progress and performance. B3.", + "type": "data", + "explanation": "In the context, 'SWM data' is used as a source of information that is tracked and reported, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'SWM data' refers to a dataset because it includes specific metrics like waste tonnage and costs.", + "contextual_reason_agent": "In the context, 'SWM data' is used as a source of information that is tracked and reported, indicating it functions as a dataset.", + "contextual_signal": "described as data tracked by service providers", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 50, + "text": "Data source / Agency Annual reports on the usage of digital services from MODEE, sectoral ministries, and the private sector. Verification Entity KACE. Procedure Indicator values will be collected from MODEE \u2019 s administrative data on the usage of transactional digital services that use trusted, people-centric DPI and cross-checked by the IVA through spot surveys. DLI 2: Number of individuals adopting people-centric digital identity Formula The DLI will disburse US $ 3 for each unique individual activating people-centric digital identity, up to a total 3. 5 million individuals, in the limit of US $ 10. 5 million. Moreover, it will disburse the following additional amounts: \u2022 US $ 4 for each woman activating people-centric digital identity, up to 1. 75 million women, in the limit of US $ 7 million \u2022 US $ 5 for each elder activating people-centric digital identity, up to 200, 000 elders, in the limit of US $ 1 million \u2022 US $ 15 for each refugee activating people-centric digital identity, up to 100, 000 refugees, in the limit of US $ 1. 5 million Description The Program disburses against the number of unique individuals activating people-centric digital identity, disaggregated by type of user ( women, elders, refugees ). Data source / Agency Annual reports on digital ID implementation by MODEE. Verification Entity KACE.", + "ner_text": [ + [ + 212, + 231, + "named" + ], + [ + 74, + 79, + "administrative data <> publisher" + ], + [ + 202, + 207, + "administrative data <> publisher" + ], + [ + 891, + 897, + "administrative data <> reference population" + ], + [ + 1016, + 1024, + "administrative data <> reference population" + ], + [ + 1306, + 1311, + "administrative data <> publisher" + ] + ], + "validated": true, + "empirical_context": "Verification Entity KACE. Procedure Indicator values will be collected from MODEE \u2019 s administrative data on the usage of transactional digital services that use trusted, people-centric DPI and cross-checked by the IVA through spot surveys. DLI 2: Number of individuals adopting people-centric digital identity Formula The DLI will disburse US $ 3 for each unique individual activating people-centric digital identity, up to a total 3.", + "type": "administrative data", + "explanation": "In this context, 'administrative data' is explicitly mentioned as a source of information used to collect indicator values, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'administrative data' typically refers to structured data collected for administrative purposes.", + "contextual_reason_agent": "In this context, 'administrative data' is explicitly mentioned as a source of information used to collect indicator values, confirming its role as a dataset.", + "contextual_signal": "mentioned as a source of information for collecting indicator values", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 11, + "validated": 10, + "not_validated": 1 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 54, + "text": "DLR 5. 3: Enhanced digital literacy / skills. DLR 5. 4: Utilization of the HRMIS and other digital platforms for recording competitive recruitments, performance-based promotions, and digital upskilling. Data source / Agency DLR 5. 0: SPAC DLR 5. 1 and 2: SPAC Assessment Center HR interview and evaluation records, as well as HRMIS data records. DLR 5. 3: Training registry maintained by the IPA and HRMIS data records. DLR 5. 4: HRMIS data reports. Verification Entity KACE. Procedure DLR 5. 0: Verification of adopted SPAC by-law and subsidiary regulation. DLR 5. 1: Verification of recruitment records and competency assessment reports shared biannually by the SPAC and competence-based recruitments recorded by the HRMIS. DLR 5. 2: Verification of promotion records and competency assessment reports shared biannually by the SPAC and performance-based promotions recorded by the HRMIS and other digital platforms. DLR 5. 3: Verification of digital skill certifying training records shared annually by IPA and digital skill credentials recorded by the HRMIS and other digital platforms. DLR 5. 4: Cross-validation of HRMIS and other digital platforms data reports with SPAC Assessment Center interview and evaluation records and IPA training assessment reports. DLI 6 on the establishment of digital health standards across a national Health Information Exchange ( HIE ) Formula Scalable. The DLI disburses up to US $ 29 million across the three following DLRs:", + "ner_text": [ + [ + 75, + 80, + "named" + ] + ], + "validated": false, + "empirical_context": "DLR 5. 4: Utilization of the HRMIS and other digital platforms for recording competitive recruitments, performance-based promotions, and digital upskilling. Data source / Agency DLR 5.", + "type": "system", + "explanation": "HRMIS is mentioned as a system for recording but not as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HRMIS is a dataset because it is associated with recording data.", + "contextual_reason_agent": "HRMIS is mentioned as a system for recording but not as a data source itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "036_Tajikistan-Resilient-Landscape-Restoration-Project", + "page": 81, + "text": "For over 30 years, Tajikistan has been without national-level data on its forests. During these decades, there have been significant quantitative and qualitative changes in forest ecosystems. The project will finance a national-level systematic NFI using a low sampling density. The NFI exercise will employ state-of-the-art methodologies for conducting forest inventories, including geospatial data. The NFI process will begin with a national land cover classification project that will ( a ) guide the selection of field locations for sample plot inventory and ( b ) classify the entire national land cover according to nationally agreed categories. The NFI will establish key parameters such as the total areas of forest by type and ownership ( as needed ), total standing volumes by species and size class, regeneration, incidence of pests and disease, and the distribution of key indicator species for biodiversity conservation. Other relevant data will also be collected, for example, evidence of illegal removals, erosion, forest fires, condition / species of pasture, and so on, as required. 24. Forest management plans. The project will finance the preparation and implementation of sustainable forest management plans for eight SFMEs in the project sites. Preparation of the plans will", + "ner_text": [ + [ + 435, + 477, + "named" + ] + ], + "validated": false, + "empirical_context": "The NFI exercise will employ state-of-the-art methodologies for conducting forest inventories, including geospatial data. The NFI process will begin with a national land cover classification project that will ( a ) guide the selection of field locations for sample plot inventory and ( b ) classify the entire national land cover according to nationally agreed categories. The NFI will establish key parameters such as the total areas of forest by type and ownership ( as needed ), total standing volumes by species and size class, regeneration, incidence of pests and disease, and the distribution of key indicator species for biodiversity conservation.", + "type": "project", + "explanation": "However, it is described as a project and not explicitly mentioned as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves classification and data collection related to land cover.", + "contextual_reason_agent": "However, it is described as a project and not explicitly mentioned as a data source or structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "184_multi-page", + "page": 24, + "text": "In addition, the project foresees the increased involvement of parent associations or community-based associations in the management of project activities on the ground ( i. e., operations & maintenance ). 6. 4 What institutional arrangements have been provided to ensure the project achieves its social development outcomes? The DGEN will be responsible for monitoring the gender gap in enrollment issues, and the gap between the poorest and the richest quintiles, and related education services available to them. The data collection on enrollment will be strengthened by the capacity building support provided to the Ministry of Education ' s planning unit - thus over time these issues can be effectively monitored. Triggers are included in the APL phasing to ensure that various social development goals are met e. g. decreasing the enrollment gap between the rich and the poor, decreasing the gender gap and increasing community participation in school management. 6. 5 How will the project monitor performance in terms of social development outcomes? The MOE planning unit will monitor enrollment paying attention to gender gaps, socioeconomic gaps and performance of students by socioeconomic class through use of surveys of students.", + "ner_text": [ + [ + 1222, + 1241, + "named" + ], + [ + 374, + 405, + "surveys of students <> data description" + ] + ], + "validated": true, + "empirical_context": "5 How will the project monitor performance in terms of social development outcomes? The MOE planning unit will monitor enrollment paying attention to gender gaps, socioeconomic gaps and performance of students by socioeconomic class through use of surveys of students.", + "type": "survey", + "explanation": "In this context, it is confirmed as a dataset since it is explicitly mentioned as a method for monitoring performance and outcomes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'surveys of students' implies a structured collection of data gathered for analysis.", + "contextual_reason_agent": "In this context, it is confirmed as a dataset since it is explicitly mentioned as a method for monitoring performance and outcomes.", + "contextual_signal": "follows 'through use of'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "172_multi0page", + "page": 46, + "text": "grant program and oversight WMA A Finance Directorate able to Adequate procedures in manage all project funds and compliance with IDA financial treasury and to comply with management and accounting accounting, budgeting and rules. financial reporting requirements. Internal Audit unit able to Report on budget execution per eview all MEST operations quarter and annual budget forecast based on assessment of needs and pnorities. EMIS established with data on EMIS network infrastructure four school annual surveys and set up with procedures in place gradually integrating and for users. linking data on finance ( i. e. udget execution ); personnel Four annual school surveys ( payroll and deployment published. ecords ); and indicators on OL standards. EMIS data are effectively used EMIS reports. for planning and prospective work on the education sector. Improved personnel Audit report on personnel management system with management contracted out. effective procedures enforced for recruitment, deployment Records on MEST ' s staff on and payment of MEST ' s staff the payroll. and teachers on the payroll. 80 trainers trained in first year List of trainers published and of Project to serve as trainers evaluation report on training for in-service teacher training received. activities to be implemented via school grants. 600 SMCs established with Reports on school tnspections prior mobilization from the from Inspectorate. community and with capacity built ( training when required ) to monitor school grant", + "ner_text": [ + [ + 753, + 757, + "named" + ] + ], + "validated": false, + "empirical_context": "ecords ); and indicators on OL standards. EMIS data are effectively used EMIS reports. for planning and prospective work on the education sector.", + "type": "program", + "explanation": "However, EMIS is mentioned as a program rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with data and reports.", + "contextual_reason_agent": "However, EMIS is mentioned as a program rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 10, + "text": "As of August 2017, Jordan hosts 660, 5822 registered Syrian refugees, of which 232, 8683 are school \u2010 aged children requiring the provision of education services. Eighty percent of refugees live in host communities, representing 10 percent of Jordan \u2019 s population, while the rest live in dedicated refugee camps. Per the recent 2016 government census, the total number of Syrians in Jordan reached an estimated 1. 265 million, representing 13. 2 percent of population. 4 Jordan has been committed to integrating Syrian refugee children in the public formal sector, and as of June 2017, approximately 10 percent of children in public schools were Syrian refugees. Therefore, it is important that education services to refugee children in Jordan respond to the nature of the challenges they face in the education system. 3. Jordan \u2019 s economic development hinges on the existence of an education system that provides students with the cognitive and socioemotional skills needed to succeed in the labor market. Realizing the full potential of educational investments for economic prosperity requires improving access and quality of education for both girls and boys. 5 Additionally, the cost of not educating refugee children is high in terms of loss of human capital for regional economic development, as well as for the long \u2010 term processes of peace, stability, and reconstruction. It is thus necessary for the education system to be agile and address the existing education quality challenges by strengthening the system ' s ability to manage a growing number of students, including refugee and vulnerable children. 1 Human Development Data ( 1990 \u2013 2015 ); Human Development Report. 2016. 2 United Nations High Commissioner for Refugees ( UNHCR ). August 6, 2017. 3 Brussels Conference Paper. 2017. 4 Department of Statistics ( DOS ); National census. November 2016. 5 OECD. 2016. PISA. 2015 Results ( Volume I ): Excellence and Equity in Education, OECD Publishing, Paris. http: / / dx. doi. org / 10. 1787 / 9789264266490 \u2010 en.", + "ner_text": [ + [ + 1620, + 1642, + "named" + ], + [ + 13, + 17, + "Human Development Data <> publication year" + ], + [ + 19, + 25, + "Human Development Data <> data geography" + ], + [ + 243, + 249, + "Human Development Data <> data geography" + ], + [ + 384, + 390, + "Human Development Data <> data geography" + ], + [ + 472, + 478, + "Human Development Data <> data geography" + ], + [ + 738, + 744, + "Human Development Data <> data geography" + ], + [ + 823, + 829, + "Human Development Data <> data geography" + ], + [ + 1645, + 1656, + "Human Development Data <> reference year" + ], + [ + 1838, + 1853, + "Human Development Data <> data type" + ] + ], + "validated": true, + "empirical_context": "It is thus necessary for the education system to be agile and address the existing education quality challenges by strengthening the system ' s ability to manage a growing number of students, including refugee and vulnerable children. 1 Human Development Data ( 1990 \u2013 2015 ); Human Development Report. 2016.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to a structured collection of data used in the Human Development Report.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes a specific time range and is associated with a report.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a structured collection of data used in the Human Development Report.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "005_BOSIB-8191b179-7209-4faa-b5e0-11783bcd492d", + "page": 39, + "text": "The World Bank Costa Rica Results in Education ( CORE ) ( P181174 ) Page 30 Collection IRI 3. 1. 5 Percentage of students in technical Secondary schools that pass level B2 in English according to the digital app for language learning Description This indicator measures student performance on English with the digital app registering both written and spoken language. Frequency Annual Data source MEP Department for Technical Secondary Education Methodology for Data Collection Data derived from digital app Responsibility for Data Collection MEP Department for Technical Secondary Education IRI 3. 1. 6 An intersectoral plan for the transition of vulnerable and disabled students into the labor market is implemented. Description This indicator measures the development of a strategic document to guide the transition of vulnerable and disabled students into the labor market, with a special focus on technical secondary education. Frequency Annual Data source MEP Department for Technical Secondary Education \u2013 MEP Department for Special Education Methodology for Data Collection Administrative records from MEP Department for Technical Secondary Education and Special Education Responsibility for Data Collection MEP Department for Technical Secondary Education IRI 3. 1. 7 Digital tools including career information systems to support employability deployed Description This indicator measures the development and use of a digital tool to provide secondary technical students with information on the labor market.", + "ner_text": [ + [ + 200, + 233, + "named" + ] + ], + "validated": false, + "empirical_context": "1. 5 Percentage of students in technical Secondary schools that pass level B2 in English according to the digital app for language learning Description This indicator measures student performance on English with the digital app registering both written and spoken language. Frequency Annual Data source MEP Department for Technical Secondary Education Methodology for Data Collection Data derived from digital app Responsibility for Data Collection MEP Department for Technical Secondary Education IRI 3.", + "type": "program", + "explanation": "However, it is described as a digital app, which is a tool rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves data collection related to student performance.", + "contextual_reason_agent": "However, it is described as a digital app, which is a tool rather than a structured collection of data.", + "contextual_signal": "mentioned only as a tool, not as a data source", + "tags": [] + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 23, + "text": "The process evaluations will continue in the second phase to provide real-time information about scale-up and implementation in different provinces. The process evaluation will be complemented by regular beneficiary surveys to help map out implementation successes and issues, externalities, and community dynamics and contribute to the establishment of the grievance redress mechanism. 44. The impact evaluation will focus on key poverty, welfare, and human development indicators at the household and community-levels for the cash transfers. The random selection at the colline-level will support a randomized control trial design based on a sample of participating and non-participating collines. Baseline data will be collected prior to the first transfer. A mid-term data collection will take place at 24 months. To evaluate the sustainability of impacts, the end-line data collection will take place six months after the end of the program activities ( at 42 months ).", + "ner_text": [ + [ + 204, + 223, + "named" + ], + [ + 138, + 147, + "beneficiary surveys <> data geography" + ], + [ + 431, + 481, + "beneficiary surveys <> data description" + ], + [ + 654, + 698, + "beneficiary surveys <> reference population" + ], + [ + 990, + 1008, + "beneficiary surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "The process evaluations will continue in the second phase to provide real-time information about scale-up and implementation in different provinces. The process evaluation will be complemented by regular beneficiary surveys to help map out implementation successes and issues, externalities, and community dynamics and contribute to the establishment of the grievance redress mechanism. 44.", + "type": "survey", + "explanation": "In the context, 'beneficiary surveys' are explicitly mentioned as a means to gather information about implementation successes and issues, indicating they are used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'beneficiary surveys' implies a structured collection of data gathered from participants.", + "contextual_reason_agent": "In the context, 'beneficiary surveys' are explicitly mentioned as a means to gather information about implementation successes and issues, indicating they are used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "118_PAD1199-PAD-P144637-IDA-R2015-0247-1-Box393201B-OUO-9", + "page": 1, + "text": "Grievance Redress Mechanism Grievance Redress Service Global Positioning System GOC Government of Cameroon ICR Implementation Completion and Results Report IDA International Development Association IFMIS Integrated Financial Management Information System", + "ner_text": [ + [ + 198, + 203, + "named" + ] + ], + "validated": false, + "empirical_context": "Grievance Redress Mechanism Grievance Redress Service Global Positioning System GOC Government of Cameroon ICR Implementation Completion and Results Report IDA International Development Association IFMIS Integrated Financial Management Information System", + "type": "system", + "explanation": "However, IFMIS is mentioned as a system and not explicitly as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed IFMIS is a dataset because it includes 'Information System' in its name, suggesting it handles data.", + "contextual_reason_agent": "However, IFMIS is mentioned as a system and not explicitly as a data source in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 35, + "text": "This can also provide a platform for remote supervision and real-time risk monitoring of project activities. 89. Data verification will be built into the M & E arrangements. Data will be verified on a sample basis by a third - party monitoring ( TPM ) agent. The TPM will be recruited to monitor progress of project activities and verify project data on a sample basis throughout implementation. The TPM will report directly to the PSC. The information collected by the TPM will help triangulate the data submitted by the various implementing arms including NTTIs and universities. Spending reported by participants will be verified through an independent audit of MoGEI and MoHEST accounts. Indicators that are reported will be cross-referenced with EMIS data. Effective verification will depend on the quality of EMIS data available, which will be strengthened with support from the Project. 90. The Project will invest in an integrated system for the management and analysis of data collected through various channels for effective monitoring. The integrated monitoring system will be designed to capture the data needed to report across all the indicators outlined in the Results Framework. This system will incorporate capacities for data importing, either manually or directly via online survey and / or SMS survey platforms and EMIS data. Data collected on mobile devices by project monitoring officers or from the communities will also be uploaded to the system.", + "ner_text": [ + [ + 751, + 760, + "named" + ] + ], + "validated": true, + "empirical_context": "Spending reported by participants will be verified through an independent audit of MoGEI and MoHEST accounts. Indicators that are reported will be cross-referenced with EMIS data. Effective verification will depend on the quality of EMIS data available, which will be strengthened with support from the Project.", + "type": "data", + "explanation": "In this context, 'EMIS data' is indeed used as a data source for cross-referencing indicators, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'EMIS data' is a dataset because it is referenced in the context of verification and quality assessment.", + "contextual_reason_agent": "In this context, 'EMIS data' is indeed used as a data source for cross-referencing indicators, confirming its role as a dataset.", + "contextual_signal": "follows 'cross-referenced with'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 69, + "text": "These tools may advance the use of Kobo-toolbox platform for self - reporting purposes. The M & E specialists will be responsible for verification of the accuracy and timeliness of data entry and training of new staff, WASH-committed contractors, and other implementation partners on the use of the M & E system. The project M & E system will be, to the extent possible, integrated with the WSW MIS and allow gender disaggregation. 24. A key aspect of the results monitoring will be the consumer satisfaction surveys for water services and engagement processes. Consumer satisfaction surveys will be conducted every year for selected subprojects after the start of project implementation to assess satisfaction levels and measure attributable outcomes of the project. Surveys for the engagement processes will be conducted every year using various means of communication. Baseline surveys will be conducted in the first year of project implementation after effectiveness. At the same time, the project will continue relying on WASH - committee models adopted under the RWSSP to enable frequent community roundtables or forums with water users to inform them of the status of investments, seek their feedback regarding project implementation progress, and discuss any corrective action which was taken to address issues raised through the feedback process. Results of such meetings will be documented and reported through the regular M & E process.", + "ner_text": [ + [ + 391, + 398, + "named" + ] + ], + "validated": false, + "empirical_context": "The M & E specialists will be responsible for verification of the accuracy and timeliness of data entry and training of new staff, WASH-committed contractors, and other implementation partners on the use of the M & E system. The project M & E system will be, to the extent possible, integrated with the WSW MIS and allow gender disaggregation. 24.", + "type": "system", + "explanation": "However, the context indicates it is a system mentioned for integration and not explicitly as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MIS' suggests a Management Information System that could store data.", + "contextual_reason_agent": "However, the context indicates it is a system mentioned for integration and not explicitly as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 76, + "text": "The World Bank CAMEROON Education Reform Support Project ( P160926 ) Page 73 of 148 DLI Protocol to Evaluate Achievement of the DLI and Data / Result Verification Data Source / Agency Verification Entity Definitions Verification Procedure \u2022 National EMIS Platform ( adapted integrated technical platform based on the EMIS development under the CEQUIL Project ), operational and functional, guidelines developed and person-in-charge designed, in all four ministries in charge of education in Cameroon, and meet international quality standards \u2022 Budget approved for EMIS activities for subsequent year Years 3, 4, 5 \u2022 Integrated Statistical Yearbook of the Education and Training Sector will contain relevant data from the 4 ministries in charge of education in Cameroon, namely MINEDUB, MINESEC, MINEFOP, and MINESUP. It will respect the DQAF ( Data Quality Assessment Framework ), be methodologically validated by INS, and follow international norms and standards \u2022 Integrated Statistical Yearbook published and available online, which covers the whole education and training sector \u2022 Integrated education sector database exists, covering national and sub-national levels, and is compatible with and linked to other systems used for the management of state personnel ( SIGIPES ) Year 4 \u2022 Integrated Statistical Yearbook of the Education and Training Sector will contain relevant data from the 4 ministries in charge of education in Cameroon, namely Strategy will be approved by the Committee overseeing EMIS and acceptable to the Bank Budget line", + "ner_text": [ + [ + 1269, + 1276, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank CAMEROON Education Reform Support Project ( P160926 ) Page 73 of 148 DLI Protocol to Evaluate Achievement of the DLI and Data / Result Verification Data Source / Agency Verification Entity Definitions Verification Procedure \u2022 National EMIS Platform ( adapted integrated technical platform based on the EMIS development under the CEQUIL Project ), operational and functional, guidelines developed and person-in-charge designed, in all four ministries in charge of education in Cameroon, and meet international quality standards \u2022 Budget approved for EMIS activities for subsequent year Years 3, 4, 5 \u2022 Integrated Statistical Yearbook of the Education and Training Sector will contain relevant data from the 4 ministries in charge of education in Cameroon, namely MINEDUB, MINESEC, MINEFOP, and MINESUP. It will respect the DQAF ( Data Quality Assessment Framework ), be methodologically validated by INS, and follow international norms and standards \u2022 Integrated Statistical Yearbook published and available online, which covers the whole education and training sector \u2022 Integrated education sector database exists, covering national and sub-national levels, and is compatible with and linked to other systems used for the management of state personnel ( SIGIPES ) Year 4 \u2022 Integrated Statistical Yearbook of the Education and Training Sector will contain relevant data from the 4 ministries in charge of education in Cameroon, namely Strategy will be approved by the Committee overseeing EMIS and acceptable to the Bank Budget line", + "type": "system", + "explanation": "SIGIPES is described as a system used for the management of state personnel, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed SIGIPES is a dataset because it is mentioned in the context of data management.", + "contextual_reason_agent": "SIGIPES is described as a system used for the management of state personnel, not as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 63, + "text": "EMIS deconcentrated ( MINEDUB / MINESEC ) and operational in at least five ( 5 ) regions ( c ) Budget for EMIS activities, including evaluation, for subsequent year for ministries in charge of education sector approved DLR 6. 5: ( a ) Integrated statistical yearbook of education and training sector available and publicly disseminated ( b ) EMIS deconcentrated ( MINEDUB / MINESEC ) and operational in all ten ( 10 ) regions ( c ) Evaluation of national EMIS completed ( d ) Budget for EMIS activities for subsequent year for ministries in charge of education sector approved", + "ner_text": [ + [ + 0, + 4, + "named" + ] + ], + "validated": false, + "empirical_context": "EMIS deconcentrated ( MINEDUB / MINESEC ) and operational in at least five ( 5 ) regions ( c ) Budget for EMIS activities, including evaluation, for subsequent year for ministries in charge of education sector approved DLR 6. 5: ( a ) Integrated statistical yearbook of education and training sector available and publicly disseminated ( b ) EMIS deconcentrated ( MINEDUB / MINESEC ) and operational in all ten ( 10 ) regions ( c ) Evaluation of national EMIS completed ( d ) Budget for EMIS activities for subsequent year for ministries in charge of education sector approved", + "type": "program", + "explanation": "However, EMIS is mentioned as a program and not as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is related to education statistics.", + "contextual_reason_agent": "However, EMIS is mentioned as a program and not as a structured collection of data or a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 29, + "text": "The Program will require that: ( 1 ) appropriate procurement staff are allocated for the procurement transactions and contract management; ( 2 ) sound procurement plans are developed yearly and updated quarterly following the Annual Work Plans; ( 3 ) qualified technical experts are assigned to support the development of technical specifications and procurement documents; ( 4 ) training pertaining to procurement processing and contract management is deployed; ( 5 ) internal audit will advise on procurement processing to increase its efficiency; ( 6 ) the procurement cycle will cover planning, procurement processing, contract management, and inventory and acceptance of deliverables; ( 7 ) there is systematic coordination between the implementing agencies and the central procurement departments ( namely, the General Tendering Department ( GTD ) and the General Procurement Department ( GPD ) ); ( 8 ) a central agency ( for example, MOPIC ) coordinates and integrates the Program by a central agency ( for example, MOPIC ), especially for RAs that include multiple agencies; and ( 9 ) a procurement complaint log is maintained and published through the Jordan Online E-Procurement System ( JONEPS ) or agency websites. 51.", + "ner_text": [ + [ + 1162, + 1196, + "named" + ] + ], + "validated": false, + "empirical_context": "The Program will require that: ( 1 ) appropriate procurement staff are allocated for the procurement transactions and contract management; ( 2 ) sound procurement plans are developed yearly and updated quarterly following the Annual Work Plans; ( 3 ) qualified technical experts are assigned to support the development of technical specifications and procurement documents; ( 4 ) training pertaining to procurement processing and contract management is deployed; ( 5 ) internal audit will advise on procurement processing to increase its efficiency; ( 6 ) the procurement cycle will cover planning, procurement processing, contract management, and inventory and acceptance of deliverables; ( 7 ) there is systematic coordination between the implementing agencies and the central procurement departments ( namely, the General Tendering Department ( GTD ) and the General Procurement Department ( GPD ) ); ( 8 ) a central agency ( for example, MOPIC ) coordinates and integrates the Program by a central agency ( for example, MOPIC ), especially for RAs that include multiple agencies; and ( 9 ) a procurement complaint log is maintained and published through the Jordan Online E-Procurement System ( JONEPS ) or agency websites. 51.", + "type": "system", + "explanation": "However, it is mentioned as a system for maintaining a procurement complaint log, not as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'System' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "However, it is mentioned as a system for maintaining a procurement complaint log, not as a data source itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "007_BOSIB-e8e37b29-1d61-491d-8aad-23a07cf57740", + "page": 82, + "text": "The National Commission for the Reception, Reintegration of Refugees and Repatriates ( CNARR ) is responsible for protecting the well-being of refugees and asylum-seekers in communication with relevant ministries. 3. The Sudanese crisis has been characterized by the arrival in Chad of refugees from different professional backgrounds. During the registration process, UNHCR collected and shared the socio-economic data of specific ' high profile \u2019 categories with technical and financial partners for their potential inclusion in development projects. UNHCR initiated contacts with ONAPE ( Office Nationale pour la Promotion de l \u2019 Emploi ) in the prospect of signing an MOU to lift barriers and facilitate access to the job market for refugees. Under the 2023 Decree, refugees legally residing in Chad have the same rights and treatment as foreign nationals. Challenges remain in aligning asylum laws with labor-related legislation. Many refugees work in the informal sector, particularly in agriculture, but face obstacles in accessing land, infrastructure, and financial services. As customary and Islamic laws continue to manage access to and control of land and natural resources in urban and rural areas, land ownership can be challenging. Refugee access to agricultural land is facilitated through sharecropping agreements; however, many face obstacles in accessing large, fertile land parcels. 4.", + "ner_text": [ + [ + 400, + 419, + "named" + ], + [ + 143, + 151, + "socio-economic data <> reference population" + ], + [ + 278, + 282, + "socio-economic data <> data geography" + ], + [ + 286, + 294, + "socio-economic data <> reference population" + ], + [ + 369, + 374, + "socio-economic data <> publisher" + ], + [ + 553, + 558, + "socio-economic data <> publisher" + ], + [ + 757, + 761, + "socio-economic data <> publication year" + ], + [ + 770, + 778, + "socio-economic data <> reference population" + ], + [ + 799, + 803, + "socio-economic data <> data geography" + ], + [ + 940, + 948, + "socio-economic data <> reference population" + ] + ], + "validated": true, + "empirical_context": "The Sudanese crisis has been characterized by the arrival in Chad of refugees from different professional backgrounds. During the registration process, UNHCR collected and shared the socio-economic data of specific ' high profile \u2019 categories with technical and financial partners for their potential inclusion in development projects. UNHCR initiated contacts with ONAPE ( Office Nationale pour la Promotion de l \u2019 Emploi ) in the prospect of signing an MOU to lift barriers and facilitate access to the job market for refugees.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to collected information used for analysis and decision-making regarding refugees.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'socio-economic data' implies a structured collection of information relevant to the context.", + "contextual_reason_agent": "This is indeed a dataset as it refers to collected information used for analysis and decision-making regarding refugees.", + "contextual_signal": "mentioned as data collected and shared for potential inclusion in development projects", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "031_IBArchive-bd2c789e-ee04-4df7-a219-9409a5f705d3", + "page": 78, + "text": "Female Participation in Ownership Indicator Chad Sub - Saharan Africa All Countries Percent of firms with female participation in ownership 13. 1 29. 6 35. 8 Percent of firms with majority female ownership 9. 0 12. 3 14. 4 Percent of firms with a female top manager 12. 0 15. 4 18. 0 Proportion of permanent full-time workers that are female ( % ) 14. 8 28. 2 33. 3 Proportion of permanent full-time production workers that are female ( % ) a 1. 9 19. 0 26. 8 Proportion of permanent full-time non-production workers that are female ( % ) a 9. 6 29. 4 37. 0 Source: World Bank ( 2018 ) Enterprise Survey. https: / / www. enterprisesurveys. org / en / data / exploretopics / gender. Note: a. Using data from manufacturing firms only. 49 Ibid. 50 World Bank. 2017. Findex database.", + "ner_text": [ + [ + 586, + 603, + "named" + ], + [ + 44, + 69, + "Enterprise Survey <> data geography" + ], + [ + 566, + 576, + "Enterprise Survey <> publisher" + ], + [ + 579, + 583, + "Enterprise Survey <> publication year" + ], + [ + 707, + 726, + "Enterprise Survey <> reference population" + ], + [ + 745, + 755, + "Enterprise Survey <> publisher" + ], + [ + 757, + 761, + "Enterprise Survey <> reference year" + ], + [ + 795, + 813, + "Enterprise Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "4 37. 0 Source: World Bank ( 2018 ) Enterprise Survey. https: / / www.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it is explicitly identified as the 'Enterprise Survey' from the World Bank, which is known to collect structured data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey from a reputable source.", + "contextual_reason_agent": "The context confirms it is a dataset as it is explicitly identified as the 'Enterprise Survey' from the World Bank, which is known to collect structured data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "054_Sudan-Basic-Education-Emergency-Support-Project", + "page": 20, + "text": "The World Bank Sudan Basic Education Emergency Support Project ( P172812 ) Page 16 of 40 Figure 4: Heat map of Sudan \u2019 s basic education schools Source: Based on 2018 / 19 School Census using Arcgis software. 39. Component 2 Program coordination and management ( US $ 0. 3 million ). This component will support the Federal Ministry of Educaiton ( MoE ) in overall program coordination, monitoring and evaluation. The PCU will cover functions such as planning, procurement, financial management, environmental and social safeguards and monitoring and evaluation. Technical experts will be mobilized as necessary. The PCU will monitor the progress by collecting and analyzing school-level data under the the Annual School Census. C. Project Beneficiaries 40. Primary beneficiaries are schoolchildren, teachers, and parents. Approximately 5. 4 million students will benefit from the project through provision of school grants. Communities in targeted areas will also benefit from enhance participatory school management.", + "ner_text": [ + [ + 162, + 185, + "named" + ] + ], + "validated": true, + "empirical_context": "The World Bank Sudan Basic Education Emergency Support Project ( P172812 ) Page 16 of 40 Figure 4: Heat map of Sudan \u2019 s basic education schools Source: Based on 2018 / 19 School Census using Arcgis software. 39.", + "type": "census", + "explanation": "This is indeed a dataset as it is explicitly mentioned as the source of data used in the analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a census, which typically involves structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as the source of data used in the analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 27, + "text": "This system would store and update the teacher profile, including qualifications, experience, language proficiency, preferred service areas, recruitment process documentation, and performance assessment. This tool would be essential for organizing and tracking teacher activities, such as deployment and training, and enhancing accountability in the process. 64. The capacity development plan activities will be tailored to the needs of each entity. For example, they could include ( i ) training and knowledge sharing in areas such as fiduciary management, strategic planning, communication, teacher management, teacher professional development, school inspection, vulnerability management, and diversity and inclusion; ( ii ) reviewing roles and responsibilities and optimizing work processes to enhance the management of education service delivery, mainly the functions related to teachers \u2019 management and professional development; ( iii ) developing a performance and reporting system, M & E system, and strengthening and decentralizing the existing data management systems ( explained under subcomponent 4. 2 ); ( iv ) preparing policy frameworks ( i. e., the teachers ' recruitment and retention strategy ); and ( v ) provision of equipment and rehabilitation of the physical and IT infrastructure that could be needed to facilitate and sustain the implementation of the introduced capacity change. The detailed activities will be clearly listed upon concluding the capacity development plans. Subcomponent 4. 1 will finance the implementation of the priority activities identified in the capacity development plans at the national and sub-national levels.", + "ner_text": [ + [ + 39, + 54, + "named" + ] + ], + "validated": false, + "empirical_context": "This system would store and update the teacher profile, including qualifications, experience, language proficiency, preferred service areas, recruitment process documentation, and performance assessment. This tool would be essential for organizing and tracking teacher activities, such as deployment and training, and enhancing accountability in the process.", + "type": "tool", + "explanation": "'Teacher profile' is not a dataset as it refers to individual records rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'teacher profile' is a dataset because it contains structured information about teachers.", + "contextual_reason_agent": "'Teacher profile' is not a dataset as it refers to individual records rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "104_PAD2161-PUBLIC-P162004", + "page": 10, + "text": "Unfortunately, the same provinces are also among the least developed in the country. As such, their capacity to absorb the large influx of SuTP is very limited, and the socioeconomic consequences of hosting SuTP have not all been positive. Some negative socioeconomic impacts are evidenced through competition over jobs, rising rents, growing demand for municipal services, and capacity distress in social services, including education and health infrastructure. 2 World Bank ( 2016 ), Mapping Location and Living Conditions of Syrians under Temporary Protection in Turkey: Results from National Muhtar Survey 3 Muhtar ( headmen ) is the elected head of neighborhoods and villages in Turkey. 4 UNHCR and Directorate General for Migration Management ( DGMM ) data, November 2016, available at: http: / / reliefweb. int / sites / reliefweb. int / files / resources / RegisteredSyrianRefugees-09November16. pdf", + "ner_text": [ + [ + 587, + 609, + "named" + ], + [ + 465, + 475, + "National Muhtar Survey <> publisher" + ], + [ + 478, + 482, + "National Muhtar Survey <> publication year" + ], + [ + 528, + 562, + "National Muhtar Survey <> reference population" + ], + [ + 566, + 572, + "National Muhtar Survey <> data geography" + ], + [ + 773, + 777, + "National Muhtar Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "Some negative socioeconomic impacts are evidenced through competition over jobs, rising rents, growing demand for municipal services, and capacity distress in social services, including education and health infrastructure. 2 World Bank ( 2016 ), Mapping Location and Living Conditions of Syrians under Temporary Protection in Turkey: Results from National Muhtar Survey 3 Muhtar ( headmen ) is the elected head of neighborhoods and villages in Turkey. 4 UNHCR and Directorate General for Migration Management ( DGMM ) data, November 2016, available at: http: / / reliefweb.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey providing empirical results.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides results related to living conditions.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey providing empirical results.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "003_BOSIB-850e0c11-07c1-4c9c-8d44-4286704221bd", + "page": 30, + "text": "This will include civil society, RHC, and other marginalized groups. Citizen engagement activities will include the implementation of the following mechanisms: ( i ) consultations; ( ii ) a grievance mechanism; and ( iii ) a satisfaction survey at project midterm and end of project. Satisfaction surveys will be diverse and widespread and will include a sample space of respondents from beneficiary groups for each activity set. The surveys will seek to understand beneficiary perceptions of various aspects of the project, including efficiency and relevance. The information gathered from midterm satisfaction surveys will be used to inform and recalibrate project implementation. Representative consultations will also gather feedback on how project activities affect security and are affected by insecurity. Outreach and awareness campaigns will take place to ensure that youth, women, and people with disabilities, including RHC, are included in project-supported activity streams. Representative committees with RHC representation, including IDPs, will take an active role in identifying and monitoring community investments, livelihoods, and support to business plan development within the value chain operator support initiatives. The grievance mechanism will include specific procedures to address complaints related to sexual exploitation, abuse, and harassment; referral to GBV service providers; and confidential, survivor-centered complaint-management protocols. 50. Gender. The project will contribute to increasing women \u2019 s physical accessibility to obstetric care. Feeder and rural roads to be paved / rehabilitated will also be selected based on their potential to connect communities to obstetric care facilities, following a prioritization methodology involving utilization of GIS tools to locate these facilities and the fastest roads that can be used to be reached from different villages, coupled with consultations with women, including refugees, 67 The Sahelian Financial Company ( SAHFI SA - Soci\u00e9t\u00e9 Sah\u00e9lienne de Financement ) established in 2005 as a joint initiative between the European Union ( EU ) and the State of Niger for providing guarantees to small and medium enterprises and small and medium industries ( SMEs / SMIs ).", + "ner_text": [ + [ + 225, + 244, + "named" + ], + [ + 371, + 406, + "satisfaction survey <> reference population" + ], + [ + 883, + 888, + "satisfaction survey <> reference population" + ], + [ + 894, + 918, + "satisfaction survey <> reference population" + ], + [ + 2138, + 2152, + "satisfaction survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "This will include civil society, RHC, and other marginalized groups. Citizen engagement activities will include the implementation of the following mechanisms: ( i ) consultations; ( ii ) a grievance mechanism; and ( iii ) a satisfaction survey at project midterm and end of project. Satisfaction surveys will be diverse and widespread and will include a sample space of respondents from beneficiary groups for each activity set.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to a structured collection of data gathered from respondents about their satisfaction.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'satisfaction survey' implies a structured collection of responses from participants.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a structured collection of data gathered from respondents about their satisfaction.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 107, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 95 32. In addition to the IBEX data, the assessment looked at KPI data that were submitted annually as part of DLI deliverables under ESPES. The main data include the total procurement expenditure, list of procurement activities, share of the open bidding procedure, and the bid process time. The data received indicates no large value contract exists at regional level that reaches the Output and Performance-based Road Contracts ( OPRC ) threshold for exclusion of contracts under PforR operations. The maximum contract amount identified is ETB 250 million ( around US $ 7 million ) which is much below the threshold for goods at US $ 30 million. However, it is noted that the total amount of contracts reported under the KPI does not match the data received from IBEX. This indicates that regions are not registering all the contracts for each sector. To address this quality issue, the HCO includes a DLI that requires alignment between the KPI procurement report and budget allocation and expenditure data. 33. While the KPI data have quality issues particularly related to the comprehensiveness of the data captured, the practice is encouraging. It is understood that building systems is a process that passes through many obstacles and the result cannot be achieved in one go. The effort requires continuous engagement and resources.", + "ner_text": [ + [ + 1086, + 1094, + "named" + ] + ], + "validated": false, + "empirical_context": "33. While the KPI data have quality issues particularly related to the comprehensiveness of the data captured, the practice is encouraging. It is understood that building systems is a process that passes through many obstacles and the result cannot be achieved in one go.", + "type": "data", + "explanation": "'KPI data' is mentioned in relation to quality issues but is not described as a structured collection or data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'KPI data' refers to a dataset because it includes the term 'data' which often implies a structured collection.", + "contextual_reason_agent": "'KPI data' is mentioned in relation to quality issues but is not described as a structured collection or data source.", + "contextual_signal": "mentioned only as data, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "186_multi-page", + "page": 7, + "text": "HIV / AIDS and the need for AIDS Education, Prevention, Counseling and Testing: The problem of AIDS, initially regarded as a health issue, has become a major development constraint for countries like Rwanda. Since the discovery of the first cases of AIDS in Rwanda in 1983, the infection has spread throughout the country. From a 1997 survey, 1 1. I percent of the population 12 years old and above, approximately half a million persons, was found to be seropositive. An estimated 22, 000 people between 15 and 49 years of age died of AIDS in 1998. In Rwanda as elsewhere, HIV prevalence is not uniformly distributed, with high rates among females between 25 and 34 years of age ( 20 percent infected ), males between 40 to 49 years ( 18 percent ), commercial sex workers, people with sexually transmitted diseases and military personnel. Involuntary migration, war, poor education, and separation of families, are catalysts for HIV infection. However, among men, HIV infections are also higher for wealthy and well-educated middle age groups. The last twelve years have witnessed efforts by the Government, with the support from its external partners, including the World Bank, to monitor and stem the spread of AIDS. Campaigns have been organized to promote public awareness on its prevention. Notwithstanding these efforts, progress has been slow. This is in part due to the low level of human, material and financial resources allocated to the health sector and the lack of an adequate AIDS education and prevention strategy. However, to a large extent, - 4 -", + "ner_text": [ + [ + 330, + 341, + "named" + ] + ], + "validated": true, + "empirical_context": "Since the discovery of the first cases of AIDS in Rwanda in 1983, the infection has spread throughout the country. From a 1997 survey, 1 1. I percent of the population 12 years old and above, approximately half a million persons, was found to be seropositive.", + "type": "survey", + "explanation": "This is indeed a dataset as it is a structured collection of data from a survey that is used to report on the seropositive population.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey conducted in 1997 that provides statistical data on HIV prevalence.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data from a survey that is used to report on the seropositive population.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "115_PAD1678-PAD-P152822-IDA-R2016-0090-1-Box394886B-OUO-9", + "page": 63, + "text": "Since 2009, departure points around the Obock areas of Djibouti have become the locus for the mixed migration flows of individuals attempting to reach Yemen. The latest data on sea arrivals shows that, despite the ongoing conflict, 92, 446 people arrived by boat into Yemen in 2015 ( UNHCR 2016 ). While the International Organization on Migration in 27 Djibouti-Inter-agency update for the response to the Yemeni crisis # 33, 12 January 2016", + "ner_text": [ + [ + 169, + 189, + "named" + ], + [ + 40, + 63, + "data on sea arrivals <> data geography" + ], + [ + 151, + 156, + "data on sea arrivals <> data geography" + ], + [ + 277, + 281, + "data on sea arrivals <> publication year" + ], + [ + 284, + 289, + "data on sea arrivals <> publisher" + ], + [ + 290, + 294, + "data on sea arrivals <> publication year" + ], + [ + 438, + 442, + "data on sea arrivals <> publication year" + ], + [ + 458, + 476, + "data on sea arrivals <> usage context" + ] + ], + "validated": true, + "empirical_context": "Since 2009, departure points around the Obock areas of Djibouti have become the locus for the mixed migration flows of individuals attempting to reach Yemen. The latest data on sea arrivals shows that, despite the ongoing conflict, 92, 446 people arrived by boat into Yemen in 2015 ( UNHCR 2016 ). While the International Organization on Migration in 27 Djibouti-Inter-agency update for the response to the Yemeni crisis # 33, 12 January 2016", + "type": "data", + "explanation": "This is indeed a dataset as it provides empirical data on the number of people arriving by boat, used for analysis of migration flows.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific numerical information regarding sea arrivals.", + "contextual_reason_agent": "This is indeed a dataset as it provides empirical data on the number of people arriving by boat, used for analysis of migration flows.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "138_781290PAD0JO0R0t0Box377365B00OUO090", + "page": 33, + "text": "In addition, 23 percent of Syrian refugees have chronic diseases or serious medical conditions that require medical follow up. Comparative morbidity data show a different disease profile with increased levels of morbidity for Syrian refugees than Jordanians which may affect the disease burden in the future. According to Jordan \u2019 s national cancer statistics, Syrian refugees presenting with cancer at health facilities rose from 134 in 2011 to 169 in the first quarter of 2013, representing 14 percent increase in Jordan \u2019 s total cancer disease burden. Similarly, MOH morbidity data show a rise in selected communicable diseases. For example, TB case notification increased from 5 / 100, 000 among Jordanians in 2009 to 13 / 100, 000 among Syrian refugees in 2013. While no measles cases have been reported in Jordan since 2009, recent MOH data show that 18 Jordanians and 23 Syrians have been diagnosed with the disease in 2013. Polio which had been eradicated since 1999 was also detected in two cases in 2013. With this higher demand for 1414 World Health Organization Statistics, 2013", + "ner_text": [ + [ + 127, + 153, + "named" + ], + [ + 27, + 42, + "Comparative morbidity data <> reference population" + ], + [ + 322, + 328, + "Comparative morbidity data <> data geography" + ], + [ + 438, + 442, + "Comparative morbidity data <> reference year" + ], + [ + 474, + 478, + "Comparative morbidity data <> publication year" + ], + [ + 646, + 666, + "Comparative morbidity data <> data description" + ], + [ + 715, + 719, + "Comparative morbidity data <> reference year" + ], + [ + 762, + 766, + "Comparative morbidity data <> publication year" + ], + [ + 813, + 819, + "Comparative morbidity data <> data geography" + ], + [ + 839, + 842, + "Comparative morbidity data <> publisher" + ], + [ + 927, + 931, + "Comparative morbidity data <> reference year" + ], + [ + 1010, + 1014, + "Comparative morbidity data <> publication year" + ], + [ + 1049, + 1085, + "Comparative morbidity data <> publisher" + ], + [ + 1087, + 1091, + "Comparative morbidity data <> publication year" + ] + ], + "validated": true, + "empirical_context": "In addition, 23 percent of Syrian refugees have chronic diseases or serious medical conditions that require medical follow up. Comparative morbidity data show a different disease profile with increased levels of morbidity for Syrian refugees than Jordanians which may affect the disease burden in the future. According to Jordan \u2019 s national cancer statistics, Syrian refugees presenting with cancer at health facilities rose from 134 in 2011 to 169 in the first quarter of 2013, representing 14 percent increase in Jordan \u2019 s total cancer disease burden.", + "type": "data", + "explanation": "This is indeed a dataset as it provides empirical data on morbidity profiles for Syrian refugees compared to Jordanians.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific morbidity data that can be compared across populations.", + "contextual_reason_agent": "This is indeed a dataset as it provides empirical data on morbidity profiles for Syrian refugees compared to Jordanians.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "086_NEW-ETHIOPIA-PAD-06072018", + "page": 31, + "text": "The establishment of a feedback loop by collecting high frequency data will help the implementation of the Economic Opportunities Program. To accomplish this objective, ARRA will need technical support to be able to track Program interventions and results as it relates to the refugee agenda. C. Disbursement Arrangements 45. Disbursements for the PforR will follow the World Bank \u2019 s Policy and Directive on PforR. Disbursements will be made based on verified results, as measured by DLIs. For each achieved disbursement-linked result, a specific verification protocol is established, as described in annex 3. 1, and will be further detailed in the Program Operations Manual ( POM ). The government will enlist the services of an independent verification agent to be hired by the PCU; currently the Ethiopian Development Research Institute and / or the Ethiopian Economic Association are being considered as potential verification agents. Verification reports are expected to be submitted within two months following achievement of results. Once the verification report has been completed, the EIC will submit the documentation accompanied by the verification report to the World Bank. The World Bank will review and notify the Government to confirm ( fully or partially ) the achievement of results and the amount to be paid from each co-financier accordingly.", + "ner_text": [ + [ + 51, + 70, + "named" + ] + ], + "validated": false, + "empirical_context": "The establishment of a feedback loop by collecting high frequency data will help the implementation of the Economic Opportunities Program. To accomplish this objective, ARRA will need technical support to be able to track Program interventions and results as it relates to the refugee agenda.", + "type": "data", + "explanation": "'High frequency data' is not a dataset itself but rather a type of data that may be collected, thus not functioning as a structured collection in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'high frequency data' refers to a dataset due to its structured nature implied in the context.", + "contextual_reason_agent": "'High frequency data' is not a dataset itself but rather a type of data that may be collected, thus not functioning as a structured collection in this context.", + "contextual_signal": "mentioned only as a type of data, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "074_Djibouti-Integrated-Cash-Transfer-and-Human-Capital-Project", + "page": 37, + "text": "The World Bank Integrated Cash Transfer and Human Capital Project ( P166220 ) Page 33 of 44 communicated to the complainant Refugee households included in the social registry Number of unique heads of refugee households in the registry, regardless of poverty status or program eligibility Quarterly Project administrativ e data Routine monitoring SEAS Communities trained on the identification, implementation, and maintenance of community sub-projects Number of communities having received all modules of FGB training program Quarterly Project administrativ e data Routine monitoring SEAS Community sub-projects constructed or rehabilitated Number of community sub - project completed ( r\u00e9ception provisoire ) Quarterly Project administrativ e data Routine monitoring SEAS Community sub-projects functional one year after completion Percentage of infrastructure assets rated as functional by an independent technical auditor one year after they are completed ( with completion defined as having passed the stage of r\u00e9ception provisoire ) Once Technical audit report Technical audit SEAS Beneficiaries satisfied with the community infrastructures financed by the project Percentage of direct beneficiaries of community infrastructures that are globally satisfied with the infrastructures Once Survey Survey at end of project SEAS People provided with access to improved sanitation services The indicator measures the cumulative number of people who benefited from improved sanitation Quarterly Project administrativ e data Routine monitoring SEAS", + "ner_text": [ + [ + 159, + 174, + "named" + ], + [ + 124, + 142, + "social registry <> reference population" + ], + [ + 175, + 219, + "social registry <> data description" + ] + ], + "validated": true, + "empirical_context": "The World Bank Integrated Cash Transfer and Human Capital Project ( P166220 ) Page 33 of 44 communicated to the complainant Refugee households included in the social registry Number of unique heads of refugee households in the registry, regardless of poverty status or program eligibility Quarterly Project administrativ e data Routine monitoring SEAS Communities trained on the identification, implementation, and maintenance of community sub-projects Number of communities having received all modules of FGB training program Quarterly Project administrativ e data Routine monitoring SEAS Community sub-projects constructed or rehabilitated Number of community sub - project completed ( r\u00e9ception provisoire ) Quarterly Project administrativ e data Routine monitoring SEAS Community sub-projects functional one year after completion Percentage of infrastructure assets rated as functional by an independent technical auditor one year after they are completed ( with completion defined as having passed the stage of r\u00e9ception provisoire ) Once Technical audit report Technical audit SEAS Beneficiaries satisfied with the community infrastructures financed by the project Percentage of direct beneficiaries of community infrastructures that are globally satisfied with the infrastructures Once Survey Survey at end of project SEAS People provided with access to improved sanitation services The indicator measures the cumulative number of people who benefited from improved sanitation Quarterly Project administrativ e data Routine monitoring SEAS", + "type": "registry", + "explanation": "In the context, it is explicitly mentioned as a source of information regarding refugee households included in the social registry.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'social registry' suggests a structured collection of data regarding households.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as a source of information regarding refugee households included in the social registry.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 8, + "validated": 7, + "not_validated": 1 + } + }, + { + "filename": "005_BOSIB-8191b179-7209-4faa-b5e0-11783bcd492d", + "page": 20, + "text": "This will be supported through: ( i ) learning standards in line with the competency-based curricula for all grades and levels; ( ii ) a high-quality digital item bank to automatically generate competency-based formative and summative assessments; ( iii ) development of a digital assessment platform whose data will feed into automated information system modules available at the central and regional levels; and ( iv ) training on the use and maintenance of the platform for end users. The digital assessment platform will be used to implement the national standardized assessments. This subcomponent will finance technical assistance ( consultancy firms and individual ) and 26 It is expected that most of the hardware will be leased to MEP, as explained in the next paragraph. Computational services would be provided through modular mobile carts that will be wheeled to classrooms. Modeling with empirical data indicates that 153, 785 computers would be sufficient to cover the needs of PNFT. PNFT has two dimensions: the first comprises computational thinking or computational science, which concerns the implementation of the PNFT curriculum that requires 2 mandatory lessons per week. Dimension 2 comprises the use of computers and digital tools for all subjects in the curriculum, which is expected to grow over time as progress in Dimension 1 generates awareness, interest, and capabilities.", + "ner_text": [ + [ + 830, + 850, + "named" + ] + ], + "validated": false, + "empirical_context": "This subcomponent will finance technical assistance ( consultancy firms and individual ) and 26 It is expected that most of the hardware will be leased to MEP, as explained in the next paragraph. Computational services would be provided through modular mobile carts that will be wheeled to classrooms. Modeling with empirical data indicates that 153, 785 computers would be sufficient to cover the needs of PNFT.", + "type": "program", + "explanation": "However, 'modular mobile carts' are described as physical equipment for delivering services, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'modular mobile carts' could imply a structured system for data collection.", + "contextual_reason_agent": "However, 'modular mobile carts' are described as physical equipment for delivering services, not as a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 61, + "text": "Women and men have the same rights to vote and be elected. Affirmative action measures include quotas. In 2013, the election law set up a 20 percent quota of women on electoral lists. This enabled women to get 22. 5 percent of seats in the Assembly, 18 percent in the Senate that year and 35 percent of municipal councils, with six female mayors in 2018. Progress is slow: in 2019, the national assembly has 25 percent of seats occupied by women and 36 percent in local assemblies. Hurdles facing women \u2019 s participation include: low schooling levels, lack of resources to engage in political activities, and social norms about the role women can play in public life. 12. Mobility. Women can obtain a passport and national identity card, independently of their marital status. However, foreign women who marry a Mauritanian citizen obtain citizenship through marriage while Mauritanian women cannot transmit their nationality to foreign husbands. 13. GBV. Data about GBV ( including female genital mutilation, sexual violence, early and forced marriages ) is difficult to come by. Legal protection of survivors is weak, which may also deter reporting. The 2015 Demographic and Health Survey yields the following statistics: a. Two-thirds of women aged 15 to 49 have suffered some form of female genital mutilation / excision ( FGM / E ). These rates are higher in rural ( 75 percent ) than in urban ( 55 percent ) areas and decrease with education ( 75 percent of women without schooling vs. 49 percent among women with at least secondary education ). More than half of female respondents declared that at least one of their daughters alive had undergone some form of MGF / E. Highest rates are in Assaba and Hodh el Gharbi", + "ner_text": [ + [ + 1156, + 1190, + "named" + ] + ], + "validated": true, + "empirical_context": "Legal protection of survivors is weak, which may also deter reporting. The 2015 Demographic and Health Survey yields the following statistics: a. Two-thirds of women aged 15 to 49 have suffered some form of female genital mutilation / excision ( FGM / E ).", + "type": "survey", + "explanation": "This is indeed a dataset as it is referenced for yielding statistics related to female genital mutilation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a survey that provides statistics.", + "contextual_reason_agent": "This is indeed a dataset as it is referenced for yielding statistics related to female genital mutilation.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "073_Ethiopia-Consolidated-Water-Supply-Sanitation-and-Hygiene-Account-Project", + "page": 10, + "text": "), Central Statistical Agency ( CSA ), Ethiopia \u2014 Demographic and Health Survey 2016 ( CSA, 2017 ). 6 International Monetary Fund ( IMF ), World Economic Outlook, April 2018 ( IMF, 2018 ). 7 World Bank, World Development Indicators ( World Bank, 2018 ). 8 United Nations Development Programme ( UNDP ), Human Development Reports: 2018 Statistical Update ( UNDP, 2018 ). 9 CSA and ORC Macro, Ethiopia Demographic and Health Survey 2016 ( CSA, 2016 ). 10 Woredas or districts are the third-level administrative divisions. They are further subdivided into wards ( kebeles ) or neighborhood associations, which are the smallest unit of local government. 11 World Bank, Ethiopia \u2014 Priorities for Ending Extreme Poverty and Promoting Shared Prosperity: Systematic Country Diagnostic ( World Bank, 2016 ).", + "ner_text": [ + [ + 50, + 79, + "named" + ], + [ + 3, + 29, + "Demographic and Health Survey <> author" + ], + [ + 39, + 47, + "Demographic and Health Survey <> data geography" + ], + [ + 80, + 84, + "Demographic and Health Survey <> publication year" + ], + [ + 391, + 399, + "Demographic and Health Survey <> data geography" + ], + [ + 665, + 673, + "Demographic and Health Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "), Central Statistical Agency ( CSA ), Ethiopia \u2014 Demographic and Health Survey 2016 ( CSA, 2017 ). 6 International Monetary Fund ( IMF ), World Economic Outlook, April 2018 ( IMF, 2018 ).", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referenced as a survey conducted by the Central Statistical Agency.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referenced as a survey conducted by the Central Statistical Agency.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "037_Uganda-Investment-for-Industrial-Transformation-and-Employment-Project", + "page": 19, + "text": "Only one in five refugee households has a loan and less than one in ten of refugees receives any form of credit according to the World Bank \u2019 s Uganda Refugee and Host Communities 2018 Household Survey. 27 Furthermore, low firm capabilities ( in this case, business acumen or technology access ) limit the ability of supply chains to expand in RHDs. Lastly, relatively low disposable incomes result in limited market demand potential in RHDs, creating an environment where only business models based on low entry costs, scalability, and portability are profitable. 20. COVID-19 has had a profoundly negative impact on Uganda \u2019 s labor markets, affecting refugees and women more than men. Around 19 percent of respondents who worked before the first lockdown stopped working by June 2020. 28 Respondents from urban areas and those who worked in the services sector had the largest incidence of work stoppages. Female respondents were more likely to stop working than male respondents ( 23 versus 16 percent ) and the gap was particularly pronounced in urban areas and among respondents age 15 \u2013 30. For refugee women, reduced humanitarian assistance and fewer food rations coupled with the lockdowns and economic recession has further reduced their incomes and exacerbated their vulnerability.", + "ner_text": [ + [ + 144, + 201, + "named" + ] + ], + "validated": true, + "empirical_context": "Only one in five refugee households has a loan and less than one in ten of refugees receives any form of credit according to the World Bank \u2019 s Uganda Refugee and Host Communities 2018 Household Survey. 27 Furthermore, low firm capabilities ( in this case, business acumen or technology access ) limit the ability of supply chains to expand in RHDs.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a household survey providing data on refugee households.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides statistical information.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a household survey providing data on refugee households.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 38, + "text": "Annual DHIS ( 2 ) Administrative data Health Department, HMIS, PMU", + "ner_text": [ + [ + 57, + 61, + "named" + ] + ], + "validated": false, + "empirical_context": "Annual DHIS ( 2 ) Administrative data Health Department, HMIS, PMU", + "type": "program", + "explanation": "HMIS is mentioned as part of the context but not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HMIS is a dataset because it is associated with administrative data.", + "contextual_reason_agent": "HMIS is mentioned as part of the context but not explicitly as a data source or structured collection of data.", + "contextual_signal": "mentioned only as a program, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 21, + "validated": 4, + "not_validated": 17 + } + }, + { + "filename": "105_Mali-Reinsertion-of-Ex-combatants-Project", + "page": 80, + "text": "70 VI. Project Monitoring and Evaluation 78. Monitoring and Evaluation ( M & E ) will be conducted by a dedicated team based on the achievements and lessons learned through other project implementation in Mali as well as similar reinsertion project in the region. The M & E team will have the following objectives: ( i ) Improve project management; ( ii ) Ensure transparency in data sharing of the project with various stakeholders; ( iii ) Ensure efficiency of the activities; ( iv ) Provide accurate and timely information to adjust or modify the activities in relation to the evolution of the context during implementation; and ( v ) Provide accurate and timely information to help management take the right decisions. 79. The M & E team will produce the monthly, quarterly and annual program activity reports. These reports will be available in electronic form to facilitate access by the various government partners and donors. In addition, ad hoc assessments will be conducted as well as studies based on identified needs. This will include formal assessments including on the demobilization process and regular tracer beneficiary surveys combining qualitative and quantitative data. Specific studies on the inclusion of gender in the reinsertion of adults and children will also be conducted. 80. A mid-term review will be conducted one year after the start of the project in collaboration with donors and government.", + "ner_text": [ + [ + 1119, + 1145, + "named" + ], + [ + 205, + 209, + "tracer beneficiary surveys <> data geography" + ], + [ + 1257, + 1263, + "tracer beneficiary surveys <> reference population" + ] + ], + "validated": true, + "empirical_context": "In addition, ad hoc assessments will be conducted as well as studies based on identified needs. This will include formal assessments including on the demobilization process and regular tracer beneficiary surveys combining qualitative and quantitative data. Specific studies on the inclusion of gender in the reinsertion of adults and children will also be conducted.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as part of the assessments that will be conducted to gather data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'tracer beneficiary surveys' implies a structured collection of data gathered from beneficiaries.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as part of the assessments that will be conducted to gather data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 69, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 57 refugee host areas percentage of refugee children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in high refugee host areas Share of new entrants in the first grade of primary school attended pre-primary education in 29 selected SPG woredas The indicator measures children enrolled in the first year of primary school with some exposure to preschool education and calculated by dividing the gross enrollment rate for pre-primary level by the gross intake rate of primary level. Annual EMIS Annual data collection through school census MOE and WOFED Pregnant women & caregivers of children 0-23 months participating in Community Conversations ( CC ) in 29 selected SPG woredas These are currently pregnant women and caregivers of children 0-23 months who attended at least 1 CC session per month Annual UNISE / DHIS2 UNISE / DHIS2 - - Data collected by woreda health offices MOH and MOF Quarterly reports of minutes of multisectoral committee meetings in 29 selected SPG woredas Quarterly minutes report submitted by WOFED to MOF and verified by ESS Annual Admin WOFED report quarterly meeting minutes with all committee members signed to MOF WOFED Students benefiting from direct interventions to enhance learning Annual Project M & E Questionnaires will be developed to keep track on number of beneficiaries MOE and WOEFD", + "ner_text": [ + [ + 626, + 639, + "named" + ], + [ + 4, + 14, + "school census <> publisher" + ], + [ + 640, + 643, + "school census <> publisher" + ], + [ + 1130, + 1133, + "school census <> publisher" + ] + ], + "validated": true, + "empirical_context": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 57 refugee host areas percentage of refugee children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in high refugee host areas Share of new entrants in the first grade of primary school attended pre-primary education in 29 selected SPG woredas The indicator measures children enrolled in the first year of primary school with some exposure to preschool education and calculated by dividing the gross enrollment rate for pre-primary level by the gross intake rate of primary level. Annual EMIS Annual data collection through school census MOE and WOFED Pregnant women & caregivers of children 0-23 months participating in Community Conversations ( CC ) in 29 selected SPG woredas These are currently pregnant women and caregivers of children 0-23 months who attended at least 1 CC session per month Annual UNISE / DHIS2 UNISE / DHIS2 - - Data collected by woreda health offices MOH and MOF Quarterly reports of minutes of multisectoral committee meetings in 29 selected SPG woredas Quarterly minutes report submitted by WOFED to MOF and verified by ESS Annual Admin WOFED report quarterly meeting minutes with all committee members signed to MOF WOFED Students benefiting from direct interventions to enhance learning Annual Project M & E Questionnaires will be developed to keep track on number of beneficiaries MOE and WOEFD", + "type": "census", + "explanation": "In this context, 'school census' is indeed used as a data source for annual data collection through the school census.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'school census' is a dataset because it implies a systematic collection of data regarding schools.", + "contextual_reason_agent": "In this context, 'school census' is indeed used as a data source for annual data collection through the school census.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 46, + "text": "In addition, a survey will be conducted in sampled SPG woredas as part of the impact evaluation to measure composite indicators not collected through routine information systems. 71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2. The MOH will share HCO-related data with the regions and woredas to provide any disaggregated data needed to monitor HCO results indicators and DLIs. Likewise, the central MOE is responsible for the management of EMIS and NLA which collects varying aspects of data at the school level. MOE will validate and share disaggregated data with regions and woredas to monitor HCO results indicators and DLIs. Other M & E data sources will be coordinated with various implementation units and service providers. The SPG impact evaluation survey will be 71 Such as Minimal Acceptable Diet for children 6 \u2013 23 months. Minimal Acceptable Diet is calculated according to breasfeeding status, number of food groups consumed, and number of times fed. https: / / inddex. nutrition. tufts. edu / data4diets / indicator / minimum-acceptable-diet-mad.", + "ner_text": [ + [ + 251, + 290, + "named" + ] + ], + "validated": false, + "empirical_context": "In addition, a survey will be conducted in sampled SPG woredas as part of the impact evaluation to measure composite indicators not collected through routine information systems. 71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2.", + "type": "system", + "explanation": "However, it is described as a management information system that stores records, not as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a data-related function.", + "contextual_reason_agent": "However, it is described as a management information system that stores records, not as a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "065_Mauritania-Decentralization-and-Productive-Cities-Support-Project", + "page": 42, + "text": "The World Bank Productive & Resilient Intermediate Cities ( P169332 ) Page 37 of 76 Disagregated by refugees This indicator captures the number of refugees benefiting from various types of services such as electricity, market, waste management, road, as a result of the project. Semi-annual Progress reports by PCU and semi-annual surveys In each targeted locality, the PCU will conduct surveys on a semi - annual basis to estimate the number of refugees who are accessing each service provided under the project. The methodology for each specific service will be specified in the POM. PCU Beneficiary local businesses in targeted localities positively impacted by the infrastructures financed by the project This indicator captures the number of local businesses positively impacted by the various types of services financed by the project such as electricity, market, waste management, road, etc. At mid-term and prior to closing of the project Survey conducted at mid-term and prior to closing of project An impact evaluation survey will be conducted by mid-term and prior to closing of the project focusing on businesses benefiting from the infrastructures which activities have been positively impacted. The same methodology used to identify beneficiary people ( or households ) will be used to identify the beneficiary businesses.", + "ner_text": [ + [ + 1011, + 1035, + "named" + ], + [ + 4, + 14, + "impact evaluation survey <> publisher" + ], + [ + 602, + 618, + "impact evaluation survey <> reference population" + ], + [ + 1247, + 1265, + "impact evaluation survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "The methodology for each specific service will be specified in the POM. PCU Beneficiary local businesses in targeted localities positively impacted by the infrastructures financed by the project This indicator captures the number of local businesses positively impacted by the various types of services financed by the project such as electricity, market, waste management, road, etc. At mid-term and prior to closing of the project Survey conducted at mid-term and prior to closing of project An impact evaluation survey will be conducted by mid-term and prior to closing of the project focusing on businesses benefiting from the infrastructures which activities have been positively impacted. The same methodology used to identify beneficiary people ( or households ) will be used to identify the beneficiary businesses.", + "type": "survey", + "explanation": "This is indeed a dataset as it is described as a survey that will collect data on businesses impacted by the project.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data on the impact of the project.", + "contextual_reason_agent": "This is indeed a dataset as it is described as a survey that will collect data on businesses impacted by the project.", + "contextual_signal": "described as a survey that collects data on impacts", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "124_PAD8220PAD0P12010Box385319B00OUO090", + "page": 19, + "text": "The project will deliver benefits to 1. 6 million residents across the GBML, located within the BMLWE areas of service provision that are geographically divided into four zones and 21 municipalities. Beneficiaries will benefit from increased volume and quality of public water provided to the household and a subsequent decrease in the cost of alternative sources of water. 13 39. The decrease in total cost of water will directly and positively impact the poor. Of the 506, 000 people across the GBML that live below US $ 4 per day, 460, 000 are located in the project area, as determined by a project specific survey of 1, 200 project households, Lebanon \u2019 s 2005 Poverty Assessment, and available census data. 40. A household survey of 1, 200 beneficiary households across the GBML was conducted as part of project preparation. Half the project survey respondents reported per capita incomes of less than LBP 600, 000 LBP ( US $ 400 ) per month, equivalent to less than the US $ 4 per day national poverty line. A map of the percent of surveyed households within each municipality whose monthly income is within the bottom third relative to the sample is presented in Figure 1: 13 Households currently buy tanker water, bottled water and / or construct private wells to supplement the low volumes of public water. Water supplied by the Bisri dam will substitute these alternative sources of water supply. 8", + "ner_text": [ + [ + 700, + 711, + "named" + ], + [ + 71, + 75, + "census data <> data geography" + ], + [ + 497, + 501, + "census data <> data geography" + ], + [ + 649, + 656, + "census data <> data geography" + ], + [ + 661, + 684, + "census data <> publication year" + ], + [ + 780, + 784, + "census data <> data geography" + ] + ], + "validated": true, + "empirical_context": "The decrease in total cost of water will directly and positively impact the poor. Of the 506, 000 people across the GBML that live below US $ 4 per day, 460, 000 are located in the project area, as determined by a project specific survey of 1, 200 project households, Lebanon \u2019 s 2005 Poverty Assessment, and available census data. 40.", + "type": "census", + "explanation": "In this context, 'census data' is explicitly mentioned as a source of information used to determine the number of people living below a certain income level.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'census data' is a dataset because it refers to a structured collection of demographic information.", + "contextual_reason_agent": "In this context, 'census data' is explicitly mentioned as a source of information used to determine the number of people living below a certain income level.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "062_Turkey-Municipal-Services-Improvement-Project", + "page": 45, + "text": "Municipal PIUs and ILBANK PMU People benefitting from safely managed sanitation services in the selected municipalities ( out of which host community ) This indicator measures the percentage of host community benefitted from safely managed sanitation services in the selected municipalities that have been provided through this Project. Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs. Municipal PIUs and ILBANK PMU People benefitting from safely managed sanitation services in the selected municipalities ( out of which This indicator measures the percentage of refugees benefitted from safely Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU Municipal PIUs and ILBANK PMU", + "ner_text": [ + [ + 366, + 374, + "named" + ], + [ + 424, + 444, + "PMU Data <> data type" + ] + ], + "validated": true, + "empirical_context": "Municipal PIUs and ILBANK PMU People benefitting from safely managed sanitation services in the selected municipalities ( out of which host community ) This indicator measures the percentage of host community benefitted from safely managed sanitation services in the selected municipalities that have been provided through this Project. Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs. Municipal PIUs and ILBANK PMU People benefitting from safely managed sanitation services in the selected municipalities ( out of which This indicator measures the percentage of refugees benefitted from safely Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU Municipal PIUs and ILBANK PMU", + "type": "data", + "explanation": "In this context, 'PMU Data' is indeed used as a structured collection of data for reporting purposes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'PMU Data' is a dataset because it is mentioned in the context of compiling and recording information.", + "contextual_reason_agent": "In this context, 'PMU Data' is indeed used as a structured collection of data for reporting purposes.", + "contextual_signal": "mentioned as data to be compiled and recorded in progress reports", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 10, + "validated": 9, + "not_validated": 1 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 76, + "text": "The World Bank CAMEROON Education Reform Support Project ( P160926 ) Page 73 of 148 DLI Protocol to Evaluate Achievement of the DLI and Data / Result Verification Data Source / Agency Verification Entity Definitions Verification Procedure \u2022 National EMIS Platform ( adapted integrated technical platform based on the EMIS development under the CEQUIL Project ), operational and functional, guidelines developed and person-in-charge designed, in all four ministries in charge of education in Cameroon, and meet international quality standards \u2022 Budget approved for EMIS activities for subsequent year Years 3, 4, 5 \u2022 Integrated Statistical Yearbook of the Education and Training Sector will contain relevant data from the 4 ministries in charge of education in Cameroon, namely MINEDUB, MINESEC, MINEFOP, and MINESUP. It will respect the DQAF ( Data Quality Assessment Framework ), be methodologically validated by INS, and follow international norms and standards \u2022 Integrated Statistical Yearbook published and available online, which covers the whole education and training sector \u2022 Integrated education sector database exists, covering national and sub-national levels, and is compatible with and linked to other systems used for the management of state personnel ( SIGIPES ) Year 4 \u2022 Integrated Statistical Yearbook of the Education and Training Sector will contain relevant data from the 4 ministries in charge of education in Cameroon, namely Strategy will be approved by the Committee overseeing EMIS and acceptable to the Bank Budget line", + "ner_text": [ + [ + 241, + 263, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank CAMEROON Education Reform Support Project ( P160926 ) Page 73 of 148 DLI Protocol to Evaluate Achievement of the DLI and Data / Result Verification Data Source / Agency Verification Entity Definitions Verification Procedure \u2022 National EMIS Platform ( adapted integrated technical platform based on the EMIS development under the CEQUIL Project ), operational and functional, guidelines developed and person-in-charge designed, in all four ministries in charge of education in Cameroon, and meet international quality standards \u2022 Budget approved for EMIS activities for subsequent year Years 3, 4, 5 \u2022 Integrated Statistical Yearbook of the Education and Training Sector will contain relevant data from the 4 ministries in charge of education in Cameroon, namely MINEDUB, MINESEC, MINEFOP, and MINESUP. It will respect the DQAF ( Data Quality Assessment Framework ), be methodologically validated by INS, and follow international norms and standards \u2022 Integrated Statistical Yearbook published and available online, which covers the whole education and training sector \u2022 Integrated education sector database exists, covering national and sub-national levels, and is compatible with and linked to other systems used for the management of state personnel ( SIGIPES ) Year 4 \u2022 Integrated Statistical Yearbook of the Education and Training Sector will contain relevant data from the 4 ministries in charge of education in Cameroon, namely Strategy will be approved by the Committee overseeing EMIS and acceptable to the Bank Budget line", + "type": "system", + "explanation": "However, the context indicates it is described as a system for managing data rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a platform related to data management.", + "contextual_reason_agent": "However, the context indicates it is described as a system for managing data rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 49, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XL Direct project beneficiaries \u2013 Refugees and Host Communities Description Number of direct beneficiaries of digital identification, EMR and Tawjihi digitalization who are refugees or living in host communities with a high concentration of refugees ( > 10 percent of the population ), disaggregated by gender Frequency Annual Data source Reports on digital ID implementation by MODEE, administrative data from the EHS, Education Management Information System ( EMIS ) Methodology for Data Collection Data collected and compiled from administrative sources by MOPIC to measure the number of refugee beneficiaries and host communities Responsibility for Data Collection MOPIC Result Area 2 on enhanced government effectiveness through digitalization Increased access to reliable and efficient digital student assessment ( DLI ) Description Percentage of students sitting for the digital Tawjihi exam, including refugees. Frequency Annually. Data source MOE, Education Management Information System ( EMIS ) Methodology for Data Collection Review of EMIS-generated report. Responsibility for Data Collection MOE. Strengthened digital skills in civil service Description Number of senior officials and applicants to jobs in the civil service with digital skills certified as a result of training. Frequency Once, by Program closure. Data source Roster of digitally skilled trained and certified senior officials and applicants.", + "ner_text": [ + [ + 510, + 513, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XL Direct project beneficiaries \u2013 Refugees and Host Communities Description Number of direct beneficiaries of digital identification, EMR and Tawjihi digitalization who are refugees or living in host communities with a high concentration of refugees ( > 10 percent of the population ), disaggregated by gender Frequency Annual Data source Reports on digital ID implementation by MODEE, administrative data from the EHS, Education Management Information System ( EMIS ) Methodology for Data Collection Data collected and compiled from administrative sources by MOPIC to measure the number of refugee beneficiaries and host communities Responsibility for Data Collection MOPIC Result Area 2 on enhanced government effectiveness through digitalization Increased access to reliable and efficient digital student assessment ( DLI ) Description Percentage of students sitting for the digital Tawjihi exam, including refugees. Frequency Annually.", + "type": "system", + "explanation": "'EHS' is mentioned as part of administrative data but not explicitly defined as a dataset or data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'EHS' is a dataset because it is mentioned in the context of data collection.", + "contextual_reason_agent": "'EHS' is mentioned as part of administrative data but not explicitly defined as a dataset or data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 132, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 120 will be maintained. Variance analysis with explanations formed part of the regular reports for ESPES. The reporting for HCO builds on that experience. Management will take corrective measures based on these reports. 30. Policies and procedures. The government follows a double-entry bookkeeping system and modified cash basis of accounting, as documented in the GOE \u2019 s Accounting Manual. For the HCO, the GOE \u2019 s accounting policies and procedures will be used for the accounting of the project. The specific project arrangements with regard to the submission of quarterly reports, fund flow, and audits will be included in the POM that will be prepared for the operation not later than six months after effectiveness. 31. Accounting system. The MOF and federal-level implementers use IFMIS while the regional BOF and implementing sector bureaus use the IBEX system, which will be tailored to the project on a standalone basis. The HCO chart of accounts will be developed using the GOE \u2019 s chart of accounts to capture the components, subcomponents, categories, and activities of the project expenditures. The chart of accounts will be part of the FM section of the POM. 32. Accounting centers and accounting documents.", + "ner_text": [ + [ + 850, + 855, + "named" + ] + ], + "validated": false, + "empirical_context": "Accounting system. The MOF and federal-level implementers use IFMIS while the regional BOF and implementing sector bureaus use the IBEX system, which will be tailored to the project on a standalone basis. The HCO chart of accounts will be developed using the GOE \u2019 s chart of accounts to capture the components, subcomponents, categories, and activities of the project expenditures.", + "type": "system", + "explanation": "However, IFMIS is described as an accounting system, not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed IFMIS is a dataset because it is related to data management in accounting.", + "contextual_reason_agent": "However, IFMIS is described as an accounting system, not as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "026_Djibouti-Health-System-Strengthening-Project", + "page": 12, + "text": "The situation is worse in rural areas; for instance, only 47 percent of the rural population has access to basic drinking water ( versus 83 percent in urban areas ), and only 19 percent of the rural population has access to safely managed sanitation ( versus 41 percent in urban areas ). The 2017-18 household survey, nquete Djiboutienne Aupres des Menages pour les indicateurs sociaux ( EDAM4-IS ) 3, shows correlation between poor water and sanitation and increased occurrences of ill-health episodes. 6. Significant gender inequalities persist in Djibouti, throughout the lifecycle. Rural women with low levels of education and poor socio-economic status as well as female refugees suffer disproportionately from the multisectoral effects of gender inequalities. The persistence of harmful gender norms affects access to productive resources and limits progress towards gender equality. School enrollment in general and in technical education is growing, but the gender gaps in female-to-male enrollment ratios ( 82 percent in primary, 72 percent in secondary, and 61 percent in higher education ) increase with each level. Unequal access to quality health services particularly affects the sexual and reproductive health of women. The multiplicity of measures taken to economically empower women did not necessarily promote their equitable access to economic opportunities.", + "ner_text": [ + [ + 388, + 396, + "named" + ], + [ + 76, + 92, + "EDAM4-IS <> reference population" + ], + [ + 292, + 299, + "EDAM4-IS <> publication year" + ], + [ + 300, + 316, + "EDAM4-IS <> data type" + ], + [ + 550, + 558, + "EDAM4-IS <> data geography" + ] + ], + "validated": true, + "empirical_context": "The situation is worse in rural areas; for instance, only 47 percent of the rural population has access to basic drinking water ( versus 83 percent in urban areas ), and only 19 percent of the rural population has access to safely managed sanitation ( versus 41 percent in urban areas ). The 2017-18 household survey, nquete Djiboutienne Aupres des Menages pour les indicateurs sociaux ( EDAM4-IS ) 3, shows correlation between poor water and sanitation and increased occurrences of ill-health episodes. 6.", + "type": "survey", + "explanation": "It is indeed a dataset as it is explicitly identified as a household survey that collects data on social indicators.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a household survey providing indicators.", + "contextual_reason_agent": "It is indeed a dataset as it is explicitly identified as a household survey that collects data on social indicators.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 46, + "text": "The Project would also provide TA by sharing international experiences and best practices and by promoting joint activities between the different levels and actors involved in the activities of the Project. Notably, the Project \u2019 s design has built in institutional capacity assessments at the national and subnational levels to clearly identify the key constraints in the sector, based on which the capacity building and TA will be designed. To further ensure the Project \u2019 s sustainability, consultations, communication, and outreach will be carried out throughout Project implementation to ensure that local stakeholders and communities have a clear understanding and expectations of the Project \u2019 s objectives and target populations. Moreover, the Project \u2019 s design incorporates a citizen engagement indicator and a Grievance Redress Mechanism to ensure that citizens are able to provide feedback and that any grievances or concerns are addressed in a timely manner. By focusing on building systems and institutions, the Project will also help establish capacity to continue the developed programs beyond its lifespan. In refugee-hosting areas, specific efforts will be made, in coordination with UNHCR, to engage with existing refugee and host community committees to ensure they can meaningfully provide input to the design and implementation of project activities in their areas. 126. Environmental and social risks are rated Substantial.", + "ner_text": [ + [ + 786, + 814, + "named" + ] + ], + "validated": false, + "empirical_context": "To further ensure the Project \u2019 s sustainability, consultations, communication, and outreach will be carried out throughout Project implementation to ensure that local stakeholders and communities have a clear understanding and expectations of the Project \u2019 s objectives and target populations. Moreover, the Project \u2019 s design incorporates a citizen engagement indicator and a Grievance Redress Mechanism to ensure that citizens are able to provide feedback and that any grievances or concerns are addressed in a timely manner. By focusing on building systems and institutions, the Project will also help establish capacity to continue the developed programs beyond its lifespan.", + "type": "indicator", + "explanation": "However, it is not a dataset as it refers to a measure or metric rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'indicator', which can imply a measure of data.", + "contextual_reason_agent": "However, it is not a dataset as it refers to a measure or metric rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project component, not as a data source", + "tags": [] + }, + { + "filename": "004_BOSIB-87c444de-4797-4bf9-b654-4932a7fb0112", + "page": 10, + "text": "Additionally, poor health outcomes for women, including high rates of female genital mutilation and maternal health challenges, further impede human capital accumulation. Private sector surveys highlight a critical mismatch between labor market demands and the available skills. Women face even greater challenges due to entrenched social norms and structural barriers to self-employment and entrepreneurship, as reflected in Djibouti \u2019 s low ranking on the Women, Business, and the Law Index ( World Bank 2022a ). 4. Djibouti \u2019 s vulnerability to climate-related disasters exacerbates these economic and social challenges. The country frequently experiences heatwaves, droughts and floods, which take a heavy toll on the population by reducing productivity and disrupting economic activity. Between 1980 and 2019, Djibouti recorded ten major droughts, with the 2008-2011 drought alone shrinking GDP by four percent and affecting over 100, 000 people, leading to the loss of half of Djibouti \u2019 s livestock. More recent flooding events between 2018 and 2020 caused substantial damage, impacting 250, 000 people and requiring an estimated US $ 25 million for recovery and reconstruction. The increasing frequency and intensity of these extreme weather events place additional pressure on rural communities, which are already struggling with poverty, underdeveloped infrastructure, and limited access to essential services. In 2021, 89 percent of Djibouti \u2019 s population was exposed to climate hazards, with extreme heat affecting more people ( 82. 8 percent ) than any other 1 In Djibouti less than 1, 000 square kilometers of land is arable and annual rainfall is extremely low ( 130 mm ).", + "ner_text": [ + [ + 171, + 193, + "named" + ], + [ + 279, + 284, + "Private sector surveys <> reference population" + ], + [ + 426, + 434, + "Private sector surveys <> data geography" + ], + [ + 495, + 505, + "Private sector surveys <> publisher" + ], + [ + 518, + 526, + "Private sector surveys <> data geography" + ], + [ + 815, + 823, + "Private sector surveys <> data geography" + ], + [ + 983, + 991, + "Private sector surveys <> data geography" + ], + [ + 1424, + 1428, + "Private sector surveys <> publication year" + ], + [ + 1444, + 1452, + "Private sector surveys <> data geography" + ], + [ + 1578, + 1586, + "Private sector surveys <> data geography" + ] + ], + "validated": true, + "empirical_context": "Additionally, poor health outcomes for women, including high rates of female genital mutilation and maternal health challenges, further impede human capital accumulation. Private sector surveys highlight a critical mismatch between labor market demands and the available skills. Women face even greater challenges due to entrenched social norms and structural barriers to self-employment and entrepreneurship, as reflected in Djibouti \u2019 s low ranking on the Women, Business, and the Law Index ( World Bank 2022a ).", + "type": "survey", + "explanation": "In this context, 'private sector surveys' is used to highlight critical labor market mismatches, indicating it serves as a data source for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'private sector surveys' implies a collection of data gathered from various sources.", + "contextual_reason_agent": "In this context, 'private sector surveys' is used to highlight critical labor market mismatches, indicating it serves as a data source for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "037_Uganda-Investment-for-Industrial-Transformation-and-Employment-Project", + "page": 89, + "text": "The monitoring component of the M & E approach will require data collection across different dimensions of the Project: ( 1 ) Performance Tracking data ( e. g. sales, employment, wages, transactions, etc ); ( 2 ) Activity Tracking data reflecting the Theory of Change ( e. g. as reflected by the number of loans serviced on the project \u2019 s web platform, the number of receivables purchased on the factoring platform, the number of refugees receiving business training, etc. ); ( 3 ) Key Results data ( e. g. value of private investment in manufacturing firms, formal employment in manufacturing firms, etc ); and ( 4 ) Key Risks tracking ( e. g. project implementation performance, NPL ratio of banks and PAR of MFIs, etc ). The evaluation component will build on the data collected under the monitoring component, but additionally focus on implementing a structured impact evaluation to measure the impact and attribution of the different policies under the project i. e. incubators, industrial parks, etc.,", + "ner_text": [ + [ + 483, + 499, + "named" + ], + [ + 560, + 600, + "Key Results data <> data description" + ], + [ + 682, + 700, + "Key Results data <> data description" + ] + ], + "validated": true, + "empirical_context": "g. as reflected by the number of loans serviced on the project \u2019 s web platform, the number of receivables purchased on the factoring platform, the number of refugees receiving business training, etc. ); ( 3 ) Key Results data ( e. g.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to specific measurable results that are likely collected and analyzed.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'data' and is associated with measurable outcomes.", + "contextual_reason_agent": "This is indeed a dataset as it refers to specific measurable results that are likely collected and analyzed.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 63, + "text": "The World Bank CAMEROON Education Reform Support Project ( P160926 ) Page 60 of 148 Disbursement - linked Indicator Baseline Academic Year 2018 \u2013 19 Disbursement-linked Result Academic Year 2019 \u2013 20 Disbursement-linked Result Academic Year 2020 \u2013 21 Disbursement-linked Result Academic Year 2021 \u2013 22 Disbursement-linked Result Academic Year 2022 \u2013 23 Disbursement-linked Result unit ( d ) Student assessment conducted for primary education MINESEC approved DLI 6: Integrated education management information system functional and operational ( a ) Data collection system exists, but is fragmented, with limited data availability and quality, and does not produce or analyze data in a timely manner ( b ) EMIS diagnostic completed for basic and secondary levels and available and validated DLR 6. 1: ( a ) National EMIS implementation strategy and operational plan approved ( b ) Budget for EMIS activities for subsequent year for ministries in charge of education sector approved DLR 6. 2: ( a ) National EMIS platform developed and operational ( b ) Budget for EMIS activities for subsequent year for ministries in charge of education sector approved DLR 6. 3: ( a ) Integrated statistical yearbook of education and training sector available and publicly disseminated ( b ) Budget for EMIS activities for subsequent year for ministries in charge of education sector approved DLR 6. 4: ( a ) Integrated statistical yearbook of education and training sector available and publicly disseminated ( b )", + "ner_text": [ + [ + 706, + 710, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank CAMEROON Education Reform Support Project ( P160926 ) Page 60 of 148 Disbursement - linked Indicator Baseline Academic Year 2018 \u2013 19 Disbursement-linked Result Academic Year 2019 \u2013 20 Disbursement-linked Result Academic Year 2020 \u2013 21 Disbursement-linked Result Academic Year 2021 \u2013 22 Disbursement-linked Result Academic Year 2022 \u2013 23 Disbursement-linked Result unit ( d ) Student assessment conducted for primary education MINESEC approved DLI 6: Integrated education management information system functional and operational ( a ) Data collection system exists, but is fragmented, with limited data availability and quality, and does not produce or analyze data in a timely manner ( b ) EMIS diagnostic completed for basic and secondary levels and available and validated DLR 6. 1: ( a ) National EMIS implementation strategy and operational plan approved ( b ) Budget for EMIS activities for subsequent year for ministries in charge of education sector approved DLR 6.", + "type": "program", + "explanation": "'EMIS' is mentioned as a management information system, not as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'EMIS' is a dataset because it relates to data management in education.", + "contextual_reason_agent": "'EMIS' is mentioned as a management information system, not as a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "035_Senegal-Casamance-Economic-Development-Project", + "page": 69, + "text": "Specifically, it will include six core functions: ( a ) e-learning, including instructional material with concrete guidance for CECs, facilitators, and communal authorities on their role at different stages of the annual investment cycles, ( b ) participatory diagnostics, designed to support the mapping of available infrastructure and services and document prioritized needs by different groups, ( c ) sub-project cycle, connected to the project MIS, which allows to submit agreed investment proposal, track approvals, and monitor implementation, ( d ) community pulse surveys, consisting of high-frequency or on demand surveys to gain regular community insights to help project implementation, ( e ) community scorecards to allow citizens to assess sub-projects, and ( f ) a GRM through multiple channels ( analogue, feature phones, smartphones, web form ) for citizens to voice complaints, concerns or provide feedback on project activities. Collected data will also feed into the publicly accessible Casamance Knowledge Management Platform developed under the Component 4, to inform dialogue among practitioners, policymakers and the public. The deployment of these tools will take into consideration connectivity and literacy constraints.", + "ner_text": [ + [ + 555, + 578, + "named" + ], + [ + 594, + 629, + "community pulse surveys <> data description" + ], + [ + 733, + 741, + "community pulse surveys <> reference population" + ], + [ + 864, + 872, + "community pulse surveys <> reference population" + ] + ], + "validated": true, + "empirical_context": "Specifically, it will include six core functions: ( a ) e-learning, including instructional material with concrete guidance for CECs, facilitators, and communal authorities on their role at different stages of the annual investment cycles, ( b ) participatory diagnostics, designed to support the mapping of available infrastructure and services and document prioritized needs by different groups, ( c ) sub-project cycle, connected to the project MIS, which allows to submit agreed investment proposal, track approvals, and monitor implementation, ( d ) community pulse surveys, consisting of high-frequency or on demand surveys to gain regular community insights to help project implementation, ( e ) community scorecards to allow citizens to assess sub-projects, and ( f ) a GRM through multiple channels ( analogue, feature phones, smartphones, web form ) for citizens to voice complaints, concerns or provide feedback on project activities. Collected data will also feed into the publicly accessible Casamance Knowledge Management Platform developed under the Component 4, to inform dialogue among practitioners, policymakers and the public.", + "type": "survey", + "explanation": "This is a dataset as it refers to high-frequency surveys designed to gather regular community insights, which are used for project implementation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'community pulse surveys' implies a structured method of collecting data from the community.", + "contextual_reason_agent": "This is a dataset as it refers to high-frequency surveys designed to gather regular community insights, which are used for project implementation.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "101_Project-Appraisal-Document-PAD-P163830-2017-05-22-CLEAN-05232017", + "page": 55, + "text": "The World Bank Somalia Emergency Drought Response and Recovery Project ( P163830 ) Page 51 of 111 Indicator Name Core Unit of Measure Baseline End Target Frequency Data Source / Methodology Responsibility for Data Collection report that there Communities were consulted and their project - related grievances were addressed assessment survey by FAO & ICRC Units Description: To be measured through beneficiary impact assessment survey by FAO & ICRC for their components. Target will be an average across ICRC and FAO components for a selection of major intervention types out of their respective components.", + "ner_text": [ + [ + 398, + 434, + "named" + ], + [ + 15, + 22, + "beneficiary impact assessment survey <> data geography" + ], + [ + 243, + 254, + "beneficiary impact assessment survey <> reference population" + ], + [ + 351, + 355, + "beneficiary impact assessment survey <> publisher" + ], + [ + 438, + 441, + "beneficiary impact assessment survey <> publisher" + ], + [ + 444, + 448, + "beneficiary impact assessment survey <> author" + ] + ], + "validated": true, + "empirical_context": "The World Bank Somalia Emergency Drought Response and Recovery Project ( P163830 ) Page 51 of 111 Indicator Name Core Unit of Measure Baseline End Target Frequency Data Source / Methodology Responsibility for Data Collection report that there Communities were consulted and their project - related grievances were addressed assessment survey by FAO & ICRC Units Description: To be measured through beneficiary impact assessment survey by FAO & ICRC for their components. Target will be an average across ICRC and FAO components for a selection of major intervention types out of their respective components.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned as a method for measuring project-related grievances through a structured assessment.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a method for measuring project-related grievances through a structured assessment.", + "contextual_signal": "described as a method for measuring through assessment survey", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 25, + "text": "The project M & E will leverage and strengthen existing routine information systems, and finance the generation of user \u2010 friendly evidence for efficient service delivery. Routine surveys will be used to collate data from target facilities, which will be triangulated through the existing management information system within the Health and Secondary Education Departments. The remote monitoring system within the SED uses technology \u2010 based data management solutions with a dashboard to display the broader analysis. The project will support the Health Department to adopt a similar system and cater for the monitoring needs of the project. The project will also support an innovative, technology \u2010 based pilot intervention to track the presence of providers at HFs and assess their knowledge to design appropriate trainings. In addition, the project will train district \u2010 and cluster \u2010 level education and health teams in data collection, management, analysis, and timely course correction. Process evaluations will be used to measure the quality of implementation. To tackle the challenges in evidence \u2010 based decision making and improved accountability within the Health Department, the project will support the GoB to ( a ) establish or strengthen an HRH database, a health institutional database that routinely tracks facility 47 During the early phase of implementation, the Governance and Policy Program ( GPP ) PMU will provide back \u2010 up support. Implementation / Monitoring Operational Coordination Oversight PSC Headed by Additional Chief Secretary, to oversee the project implementation and provide stewardship PCC Headed by Secretary, Health and Secretary Education, to coordinate and facilitate project implementation PMU \u2010 Health Existing PMU to manage the project implementation and monitoring District Health Teams Headed by district health officer to plan, implement, and monitor the project activities PMU \u2010 Education Existing PMU to manage the project implementation and monitoring PMU District Teams Headed by District Coordinator to plan, implement, and monitor the project activities", + "ner_text": [ + [ + 378, + 402, + "named" + ] + ], + "validated": false, + "empirical_context": "Routine surveys will be used to collate data from target facilities, which will be triangulated through the existing management information system within the Health and Secondary Education Departments. The remote monitoring system within the SED uses technology \u2010 based data management solutions with a dashboard to display the broader analysis. The project will support the Health Department to adopt a similar system and cater for the monitoring needs of the project.", + "type": "system", + "explanation": "However, it is described as a system and not explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves data management solutions.", + "contextual_reason_agent": "However, it is described as a system and not explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 129, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 117 accountability, financial transparency, and grievance redress mechanisms. Under PFM, several expenditure and financial management assessments highlight how issues related to budget prioritization, reliability, and predictability impede service delivery and results. 119 The PFM subcomponent will provide TA in linking budgets with overarching HCO strategic results and supporting PFM oversight for institutions. Specifically, it will support the implementation of PBB and accountability for program results at both federal and regional levels. This will include: ( a ) training on PBB processes; ( b ) defining roles, responsibilities, and coordination mechanisms; ( c ) piloting PBB at the regional level ( Figure 8. 1 ) with the development of a regional PBB manual; ( d ) testing financial and human resource incentives to support a performance culture; and ( e ) creating regular monitoring and reporting system for financial and nonfinancial performance. PBB guidelines will be included in the POM as part of the IPF component. In addition, overall financial data collection and reporting will be strengthened ( using existing systems such as the Integrated Financial Management Information System ( IFMIS ) and IBEX ) to ensure proper tracking and reporting such as codification of human capital and cross-cutting activities and objectives in the budget system of regions and woredas.", + "ner_text": [ + [ + 1216, + 1266, + "named" + ] + ], + "validated": false, + "empirical_context": "PBB guidelines will be included in the POM as part of the IPF component. In addition, overall financial data collection and reporting will be strengthened ( using existing systems such as the Integrated Financial Management Information System ( IFMIS ) and IBEX ) to ensure proper tracking and reporting such as codification of human capital and cross-cutting activities and objectives in the budget system of regions and woredas.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Information System', which often relates to data management.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "006_BOSIB-2c22668f-f4ba-42f5-a0d1-7949b7b8fe34", + "page": 38, + "text": "Policy by addressing inconsistencies in existing laws that prevent refugees and former refugees from accessing basic services and regularizing their immigration status. Frequency Biannual Data source Project progress report, cabinet memos Methodology for Data Collection Data collected through minutes of the interministerial committee ( MORHCSA ) meetings, Parliamentary committee reports Responsibility for Data Collection MoHAIS Stakeholder consultations convened and priority measures identified Description Number of high-level consultations held with relevant government ministries and civil society organizations to deliberate on measures identified for legislative and regulatory reform, implementation performance, roles and responsibilities of", + "ner_text": [ + [ + 358, + 389, + "named" + ] + ], + "validated": false, + "empirical_context": "Policy by addressing inconsistencies in existing laws that prevent refugees and former refugees from accessing basic services and regularizing their immigration status. Frequency Biannual Data source Project progress report, cabinet memos Methodology for Data Collection Data collected through minutes of the interministerial committee ( MORHCSA ) meetings, Parliamentary committee reports Responsibility for Data Collection MoHAIS Stakeholder consultations convened and priority measures identified Description Number of high-level consultations held with relevant government ministries and civil society organizations to deliberate on measures identified for legislative and regulatory reform, implementation performance, roles and responsibilities of", + "type": "document", + "explanation": "However, it is not a dataset as it is mentioned as a document rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it mentions 'reports' which can imply structured information.", + "contextual_reason_agent": "However, it is not a dataset as it is mentioned as a document rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a document, not as a data source", + "tags": [] + }, + { + "filename": "185_multi-page", + "page": 182, + "text": "180 Attachment 1. Project Design Summary Sector-related Country Assistance Strategy Long-Term Program Indicators: Program Reports: ( From Goal to Mission ) ( CAS ) Goal: * By 2003, HIV prevalence will be reduced * National HIV / AIDS surveillance * Social and cultural behavior chag from 14 percent to 13 percent asnong reports, mnidterm review, and end improves throughout the life of the young people ( I15-24 years ). of project evaluation. project To mitigate the social and economic impact of the HIV / AIDS epidemic in Kenya. By 2004, HIV / AIDS prevalence among * National HIV / AIDS surveillance adults ( 1549 years ) will remain below 14 reports, mnidterm review, and end percent. of project evaluation. Project Development Objective: Outcome / Impact Indicators: Project Reports: ( From Purpose to Goal ) * By 2003, the proportion of adults ( 15-49 * Poverty reduction efforts continue To intensify the achievement of the multi-sectoral years ) possessing accurate knowledge of * Kenya Demographic and Health throughout the life of the project. HIV / AIDS primary targets in the National HIV / AIDS means of preventing HIV infection will Survey ( KDHS ) data Strategic Plan with the full participation of communities. increase.", + "ner_text": [ + [ + 1148, + 1154, + "named" + ], + [ + 175, + 179, + "Survey <> publication year" + ], + [ + 390, + 402, + "Survey <> reference population" + ], + [ + 525, + 530, + "Survey <> data geography" + ], + [ + 535, + 539, + "Survey <> publication year" + ], + [ + 990, + 995, + "Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "Project Development Objective: Outcome / Impact Indicators: Project Reports: ( From Purpose to Goal ) * By 2003, the proportion of adults ( 15-49 * Poverty reduction efforts continue To intensify the achievement of the multi-sectoral years ) possessing accurate knowledge of * Kenya Demographic and Health throughout the life of the project. HIV / AIDS primary targets in the National HIV / AIDS means of preventing HIV infection will Survey ( KDHS ) data Strategic Plan with the full participation of communities. increase.", + "type": "survey", + "explanation": "In this context, 'Survey' refers to the Kenya Demographic and Health Survey (KDHS), which is explicitly mentioned as a source of data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Survey' is often associated with structured data collection.", + "contextual_reason_agent": "In this context, 'Survey' refers to the Kenya Demographic and Health Survey (KDHS), which is explicitly mentioned as a source of data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "107_PAD-Citizens-Charter-Afghanistan-P160567-Oct-7-Board-version-10072016", + "page": 63, + "text": "Afghanistan is highly prone to intense and recurring natural hazards, including earthquakes, floods, flash floods, landslides, avalanches and droughts. Since 1980, disasters caused by natural hazards have affected 9 million people and caused over 20, 000 fatalities in Afghanistan. While earthquakes cause the highest loss of life, drought affects the most people and flooding causes the most economic damage. Most disaster risk management programs in Afghanistan support disaster preparedness and humanitarian responses. National community development programs provide a unique opportunity to significantly improve both local level disaster-preparedness as well as cost-effective, rapidly mobilized post-disaster reconstruction. 20. CCAP will include a stronger focus on disaster risk reduction ( DRR ) to ensure resilient community infrastructure, greater sustainability in investments as well as more resilient communities. While FPs have been asked under earlier phases of NSP to train CDCs on DRR, this was never done in a consistent and coherent manner across all communities. Also, important lessons have since been learned both within Afghanistan ( from NGO partners working at the community level ) and in other countries, which could help inform a stronger approach in the CCAP. Several activities will be carried out: ( a ) A national multi-hazard risk assessment was just recently finalized and will provide critical data and mappings to inform resilient infrastructure designs, differentiated by provincial and district-level risk profiles; ( b ) A training program for the MRRD and IDLG staff will be rolled out to raise awareness of resilient infrastructure aspects. Easy to use checklists will be developed for provincial officials and FPs to use in their engagements with CDCs; ( c ) The Operations Manual will incorporate resilience aspects; and 23 Post-War Reconstruction & Development Unit ( PRDU ), University of York. 2012. The Study of NSP \u2019 s Impact on IDP / Refugee Returnee Reintegration in Afghanistan. York: University of York.", + "ner_text": [ + [ + 1337, + 1374, + "named" + ] + ], + "validated": false, + "empirical_context": "Also, important lessons have since been learned both within Afghanistan ( from NGO partners working at the community level ) and in other countries, which could help inform a stronger approach in the CCAP. Several activities will be carried out: ( a ) A national multi-hazard risk assessment was just recently finalized and will provide critical data and mappings to inform resilient infrastructure designs, differentiated by provincial and district-level risk profiles; ( b ) A training program for the MRRD and IDLG staff will be rolled out to raise awareness of resilient infrastructure aspects. Easy to use checklists will be developed for provincial officials and FPs to use in their engagements with CDCs; ( c ) The Operations Manual will incorporate resilience aspects; and 23 Post-War Reconstruction & Development Unit ( PRDU ), University of York.", + "type": "assessment", + "explanation": "However, it is described as an assessment, not a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it mentions 'data and mappings' in relation to the assessment.", + "contextual_reason_agent": "However, it is described as an assessment, not a structured collection of data or a data source.", + "contextual_signal": "mentioned only as an assessment, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 13, + "text": "One in five students in grade 2 cannot read a single word from a reading passage, while nearly half are unable to perform a single subtraction task correctly, thus lacking the foundational literacy and numeracy skills that enable further cognitive skill development. 14 With a weak start, skills deficits compound such that by age 15, two \u2010 thirds of students do not meet the most basic level of proficiency in mathematics, and half are below basic proficiency in reading and science, as measured by the 2015 Program for International Student Assessment ( PISA ). Furthermore, learning outcome data show a reverse gender gap with girls performing better than boys in reading, mathematics, and science. 15 International comparisons place Jordan in the bottom 20 percent of PISA \u2010 participating countries and economies, substantially below average in mathematics, reading, and science. 13. A key contributor to the learning crisis is the misalignment of policies related to teacher selection, preparation, management, and pedagogical practices. Teachers in Jordan are ill \u2010 prepared for the challenges of classroom teaching. This is largely because they receive insufficient and highly theoretical preservice training, limited in \u2010 service training, and often suffer from weaknesses in subject specific knowledge and skills to advance the potential of children irrespective of their gender and socioeconomic background.", + "ner_text": [ + [ + 577, + 598, + "named" + ], + [ + 504, + 508, + "learning outcome data <> publication year" + ], + [ + 737, + 743, + "learning outcome data <> data geography" + ], + [ + 1055, + 1061, + "learning outcome data <> data geography" + ] + ], + "validated": true, + "empirical_context": "14 With a weak start, skills deficits compound such that by age 15, two \u2010 thirds of students do not meet the most basic level of proficiency in mathematics, and half are below basic proficiency in reading and science, as measured by the 2015 Program for International Student Assessment ( PISA ). Furthermore, learning outcome data show a reverse gender gap with girls performing better than boys in reading, mathematics, and science. 15 International comparisons place Jordan in the bottom 20 percent of PISA \u2010 participating countries and economies, substantially below average in mathematics, reading, and science.", + "type": "data", + "explanation": "In this context, 'learning outcome data' is used to describe measurable results in education, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'learning outcome data' refers to a dataset because it implies a collection of performance metrics.", + "contextual_reason_agent": "In this context, 'learning outcome data' is used to describe measurable results in education, indicating it functions as a data source.", + "contextual_signal": "described as measurable results in education", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "033_IBArchive-38b35d89-2c5c-4636-ab7e-0ba662b95a5f", + "page": 24, + "text": "The country has also put into place programs to support educational development for refugee children. The GoD signed the \u201c Djibouti Declaration on the Education of Refugees in Intergovernmental Authority on Development Member States \u201d on December 14, 2017. Thereby affirming its responsibility for the education of all school-age children within its borders. In line with this, MENFOP is currently taking over the operation of all refugee village / camp schools and is working to develop long-term sustainable education services for refugee children in Djibouti. MENFOP has developed a roadmap for this transition which is regularly followed up by senior officials of the ministry. MENFOP has been keeping track of refugee related data since 2017 / 18 and include these in their annual educational statistical tables. A recent out of school children ( OOSC ) survey 2019, estimates that about 42 percent of 6-10-year-old refugees and 40 percent of 11-14-year-old refugees are enrolled in schools nationally. Rural regions, which are home to over 80 percent of all refugees, are home to about large out of school populations amounting to 44 percent of 6-10-year-olds and 38. 7 percent of 11-14-year-olds. MENFOP also support language specific interventions for refugee populations to support the large number of Somali, Yemeni and other sub-population needs in these settings. The proposed operation will also focus on support to youth members of the refugee population both in terms of training and access to job opportunities. There are three 29 https: / / www. borgenmagazine. com / education-in-djibouti / 30 The Project had initially envisaged the use of differentiated training costs to incentivize training providers to target the most disadvantaged groups and increase their access to training opportunities. This approach, which has been used successfully in other similar projects, but the lack of a reliable set of estimates on area and location specific unit costs makes the use of this approach impossible. A key project objective will be to establish unit costs by the Project mid-term for programs in these priority areas.", + "ner_text": [ + [ + 715, + 735, + "named" + ], + [ + 84, + 100, + "refugee related data <> reference population" + ], + [ + 123, + 131, + "refugee related data <> data geography" + ], + [ + 251, + 255, + "refugee related data <> reference year" + ], + [ + 378, + 384, + "refugee related data <> publisher" + ], + [ + 553, + 561, + "refugee related data <> data geography" + ], + [ + 682, + 688, + "refugee related data <> publisher" + ], + [ + 742, + 751, + "refugee related data <> reference year" + ], + [ + 779, + 816, + "refugee related data <> data type" + ], + [ + 866, + 870, + "refugee related data <> publication year" + ], + [ + 1204, + 1210, + "refugee related data <> publisher" + ] + ], + "validated": true, + "empirical_context": "MENFOP has developed a roadmap for this transition which is regularly followed up by senior officials of the ministry. MENFOP has been keeping track of refugee related data since 2017 / 18 and include these in their annual educational statistical tables. A recent out of school children ( OOSC ) survey 2019, estimates that about 42 percent of 6-10-year-old refugees and 40 percent of 11-14-year-old refugees are enrolled in schools nationally.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to a structured collection of data related to refugees that is being monitored and reported.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific type of data being tracked over time.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a structured collection of data related to refugees that is being monitored and reported.", + "contextual_signal": "mentioned as data being tracked and included in annual statistical tables", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "145_689710PAD0P1300Official0Use0Only090", + "page": 183, + "text": "The RAPs for the step 1 provides detailed information on the census survey, socio-economic information, resettlement assistance in terms of type of livelihood restoration for different ethnic and economic groups, and a detailed monitoring and evaluation and audit system to be implemented. There are five ethnic groups: Songhai, Peulh, Touareg, Kourtheyes, and Wogos. Although agriculture is the main economic activity for about 89 % of the heads of households for the PAPs during step 1 of the resettlement process, the RAPs for that step include other compensation than irrigated land ( for example activities to improve traditional fishing, etc. ). The RAPs for the step 1 includes a detailed description of the institutional arrangements for the implementation of the RAPs ( e. g. the monitoring and evaluation aspects ). There is a clear description on the role of each stakeholder in the implementation and monitoring and evaluation of the RAPs ( including ways that PAPs will be involved in the monitoring of the RAPs ). The monitoring will be based on monitoring of the following aspects: social and economic, technical, and restoration of livelihood. The monitoring indicators will be in the form of general indicators ( such as number of resettled people, amount of compensation received, etc ), and specific social and economic indicators ( such as number of children going to school, access to drinking water, electricity, crop production, etc. ). The follow up on the post-resettlement monitoring will be conducted for three years and could be extended based on conclusions of the independent audit of the RAP for the step 1.", + "ner_text": [ + [ + 61, + 74, + "named" + ], + [ + 1238, + 1264, + "census survey <> data description" + ], + [ + 1360, + 1394, + "census survey <> data description" + ] + ], + "validated": true, + "empirical_context": "The RAPs for the step 1 provides detailed information on the census survey, socio-economic information, resettlement assistance in terms of type of livelihood restoration for different ethnic and economic groups, and a detailed monitoring and evaluation and audit system to be implemented. There are five ethnic groups: Songhai, Peulh, Touareg, Kourtheyes, and Wogos.", + "type": "survey", + "explanation": "In this context, it is confirmed as a dataset since it provides detailed information on socio-economic factors and is explicitly mentioned as part of the data collection process.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'census survey' typically refers to a structured collection of data gathered from a population.", + "contextual_reason_agent": "In this context, it is confirmed as a dataset since it provides detailed information on socio-economic factors and is explicitly mentioned as part of the data collection process.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "129_PAD1030-ENGLISH-P149242-PUBLIC-FINAL-LEB-ENPTP-English", + "page": 38, + "text": "The project will rely on WFP to provide technical assistance in implementing the prepaid E-card voucher system for the extremely poor Lebanese households, especially those affected by the Syrian crisis. The NPTP will collaborate with WFP to provide all the necessary documentation to distribute the e-card food voucher benefit on a monthly basis. 14. The NPTP will provide a file of eligible beneficiaries ( Household Registry Statement ) once or twice a month to WFP based on the agreed on criteria between NPTP and WFP. The Household Registry statement includes the NPTP household registration number, the family size, the date of birth of the family head, gender of the household head, and relevant distribution SDC. The NPTP beneficiary file is imported into the WFP database. WFP will provide BLF with the Household Registry Statement; based on the statement, the bank will assign a card number corresponding to the NPTP registration number of each household, and print the cards accordingly. The printed cards are delivered to the WFP office by the bank. The cards are arranged by BLF according to the SDC distribution area and sorted by NPTP registration number. 15.", + "ner_text": [ + [ + 408, + 436, + "named" + ], + [ + 25, + 28, + "Household Registry Statement <> publisher" + ], + [ + 119, + 153, + "Household Registry Statement <> reference population" + ], + [ + 464, + 467, + "Household Registry Statement <> publisher" + ], + [ + 508, + 512, + "Household Registry Statement <> publisher" + ], + [ + 517, + 520, + "Household Registry Statement <> publisher" + ], + [ + 608, + 619, + "Household Registry Statement <> data description" + ], + [ + 625, + 657, + "Household Registry Statement <> data description" + ], + [ + 659, + 687, + "Household Registry Statement <> data description" + ] + ], + "validated": true, + "empirical_context": "14. The NPTP will provide a file of eligible beneficiaries ( Household Registry Statement ) once or twice a month to WFP based on the agreed on criteria between NPTP and WFP. The Household Registry statement includes the NPTP household registration number, the family size, the date of birth of the family head, gender of the household head, and relevant distribution SDC.", + "type": "registry", + "explanation": "It is indeed a dataset as it provides a structured collection of data regarding household registration for the purpose of eligibility.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes structured information about eligible beneficiaries.", + "contextual_reason_agent": "It is indeed a dataset as it provides a structured collection of data regarding household registration for the purpose of eligibility.", + "contextual_signal": "described as a file of eligible beneficiaries", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "182_multi0page", + "page": 8, + "text": "Poverty and Social Impact of Transition: The recent repeated shocks - - a consequence of political strife and social unrest - - have created major economic and social problems in the country. Because of these shocks, many Albanians have realized a loss of income and earning opportunities and have incurred risks to their personal well-being and safety. Poverty, which is concentrated in the rural areas and north of the country, is a result of the inability of individuals, households or communities to protect themselves, formally or informally, against the outcome of these shocks, or even against the more mundane income risks faced over their lifetime. A poverty profile based on 1996 data showed that some 30 percent of the rural and some 15 percent of the urban population are poor, with many others vulnerable to poverty due to their incomes being close to the poverty threshold ( this information will be updated through an analysis of a Living Conditions Survey, a Multiple Indicators Cluster Survey and ( likely ) a Household Budget Survey to be completed by May 2001, and through an FY2002 LSMS ). Young people, less educated people, and families with many children, are particularly at risk of poverty.", + "ner_text": [ + [ + 975, + 1009, + "named" + ], + [ + 222, + 231, + "Multiple Indicators Cluster Survey <> reference population" + ], + [ + 392, + 403, + "Multiple Indicators Cluster Survey <> data geography" + ], + [ + 685, + 689, + "Multiple Indicators Cluster Survey <> reference year" + ], + [ + 1070, + 1078, + "Multiple Indicators Cluster Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "Poverty, which is concentrated in the rural areas and north of the country, is a result of the inability of individuals, households or communities to protect themselves, formally or informally, against the outcome of these shocks, or even against the more mundane income risks faced over their lifetime. A poverty profile based on 1996 data showed that some 30 percent of the rural and some 15 percent of the urban population are poor, with many others vulnerable to poverty due to their incomes being close to the poverty threshold ( this information will be updated through an analysis of a Living Conditions Survey, a Multiple Indicators Cluster Survey and ( likely ) a Household Budget Survey to be completed by May 2001, and through an FY2002 LSMS ). Young people, less educated people, and families with many children, are particularly at risk of poverty.", + "type": "survey", + "explanation": "This is a dataset as it is mentioned in the context of updating poverty information through empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that collects data on living conditions.", + "contextual_reason_agent": "This is a dataset as it is mentioned in the context of updating poverty information through empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "187_multi-page", + "page": 30, + "text": "Direct Bank involvement and assistance in undertaking and ensuring widespread dissemination of key impact monitoring inteventions will be particularly important in this regard These include the surveys of public officials, as well as the Public Expenditure Tracking Surveys. In addition, intensive and close supervision of the PAR by the Bank will be required to continuously adjust strategy and tactics to rapid and constantly changing conditions and challenges. To this end, the pairing of the PAR with ( i ) related Bank policy operations ( the PESP and SAC ), and ( ii ) complementary ESW ( Public Expenditure Review ) should help to address the Bank ' s capacity to continually reinforce counterpart commitment. - 27 -", + "ner_text": [ + [ + 238, + 273, + "named" + ], + [ + 7, + 11, + "Public Expenditure Tracking Surveys <> publisher" + ], + [ + 205, + 221, + "Public Expenditure Tracking Surveys <> reference population" + ], + [ + 338, + 342, + "Public Expenditure Tracking Surveys <> publisher" + ], + [ + 519, + 523, + "Public Expenditure Tracking Surveys <> publisher" + ], + [ + 650, + 654, + "Public Expenditure Tracking Surveys <> publisher" + ] + ], + "validated": true, + "empirical_context": "Direct Bank involvement and assistance in undertaking and ensuring widespread dissemination of key impact monitoring inteventions will be particularly important in this regard These include the surveys of public officials, as well as the Public Expenditure Tracking Surveys. In addition, intensive and close supervision of the PAR by the Bank will be required to continuously adjust strategy and tactics to rapid and constantly changing conditions and challenges.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned in the context as a survey used for monitoring and evaluation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'surveys', which often refers to structured data collections.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned in the context as a survey used for monitoring and evaluation.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "186_multi-page", + "page": 47, + "text": "the sector. It should be pointed out that many of the cited indicators are estimates based on projected figures from before the war since MINEDUC has not collected school-based data since 1992. The RHRDP will, as a priority, support development of an education management information system. In 1998, primary enrollment was estimated to be about 1, 273, 000 students, with a gross enrollment rate ( GER ) of approximately 89 percent. Large numbers of underage / overage students explain the significantly lower primary net enrollment rate of about 65 percent. Efficiency gains, resulting from lower repetition and dropout rates, could significantly reduce the number of over-age students and raise the net enrollment rate. The existence of vast numbers of untrained primary teachers is also evident in the very high ratio of pupils to trained teachers ( 125: 1 ). Access to secondary education drops precipitously. Only about 90, 000 secondary students ( both cycles ) were enrolled in government, church and private secondary schools. Producing-and retaining adequate numbers of trained secondary school teachers is a challenge to the secondary system as it is in the primary.", + "ner_text": [ + [ + 251, + 290, + "named" + ] + ], + "validated": false, + "empirical_context": "It should be pointed out that many of the cited indicators are estimates based on projected figures from before the war since MINEDUC has not collected school-based data since 1992. The RHRDP will, as a priority, support development of an education management information system. In 1998, primary enrollment was estimated to be about 1, 273, 000 students, with a gross enrollment rate ( GER ) of approximately 89 percent.", + "type": "system", + "explanation": "However, the context indicates it is described as a system to support development, not as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'information system' which often relates to data management.", + "contextual_reason_agent": "However, the context indicates it is described as a system to support development, not as a data source itself.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "167_27761", + "page": 32, + "text": "For health issues, the levels of malnutrition ( i. e., height and weight ) and anemia ( i. e., hemoglobin ) every time the child visits the health center will be gathered and the data collected in pertinent verification forms. For education, the levels of school enrollment and the dropout rates will be measured using the verification forms. In this manner, the outcome indicators will be recorded in the MIS so that an external fm can analyze the information and determine the changes. Outcome indicators will be obtained from the MIS. Data will be gathered for the three kinds of conditions: attendance levels, payments, and compliance; also indicators for beneficiary households, vulnerable members, and financial indicators. This information will be gathered in regular reports to be prepared initially by the project coordination team and afterward by a department at MOSA. In addition to the above, the external firm that analyzes the outcome indicators will also perform operational monitoring for the program at least once a year. Operations under the program vis - & vis the operational manual will be assessed through field visits, consultation to stakeholders, and interviews with program officials, and a review of progress reports. The firm will make recommendations to introduce corrective measures and improve operational efficiency.", + "ner_text": [ + [ + 406, + 409, + "named" + ] + ], + "validated": false, + "empirical_context": "For education, the levels of school enrollment and the dropout rates will be measured using the verification forms. In this manner, the outcome indicators will be recorded in the MIS so that an external fm can analyze the information and determine the changes. Outcome indicators will be obtained from the MIS.", + "type": "system", + "explanation": "However, 'MIS' is described as a management information system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is associated with recording and analyzing information.", + "contextual_reason_agent": "However, 'MIS' is described as a management information system, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "013_BOSIB0efb09b920d90858a0135df22da7d1", + "page": 14, + "text": "Political instability in the region, which remains unpredictable because of national and neighboring countries \u2019 political instability, conflict, and humanitarian crisis, risks future refugee inflows into Ethiopian territory, and these agreements will strengthen Ethiopia \u2019 s ability to prepare for these. 15 World Bank. ET - Health SDG Program for Results ( P123531 ) Implementation Completion Report ( ICR ) Review. https: / / documents1. worldbank. org / curated / en / 099041923195522835 / pdf / P1235310c601d00940959307b7687b4eccf. pdf. 16 The GoE ( RRS ) is now fine-tuning its draft pledges for the upcoming second GRF to be held in December 2023. It is expected that refugee inclusion in the Fayda Digital ID system will contribute toward filling identification-related gaps in the pledge implementation process. 17 Fayda is the official brand name of the Ethiopian digital identification initiative.", + "ner_text": [ + [ + 700, + 723, + "named" + ] + ], + "validated": false, + "empirical_context": "16 The GoE ( RRS ) is now fine-tuning its draft pledges for the upcoming second GRF to be held in December 2023. It is expected that refugee inclusion in the Fayda Digital ID system will contribute toward filling identification-related gaps in the pledge implementation process. 17 Fayda is the official brand name of the Ethiopian digital identification initiative.", + "type": "system", + "explanation": "However, the context indicates that it is described as an initiative rather than a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Digital ID system', which suggests a structured collection of data.", + "contextual_reason_agent": "However, the context indicates that it is described as an initiative rather than a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "037_Uganda-Investment-for-Industrial-Transformation-and-Employment-Project", + "page": 15, + "text": "For example, the average value of assets among all households ( both refugee and host ) in the District of Arua is UGX 560, 000 ( US $ 144 ), which is only 10 percent of comparable asset values in the Kampala region. 9. COVID-19 is already disrupting incomes and livelihoods, with the poorest wealth quintiles most adversely affected. Since the COVID-19 outbreak, 91 percent of households have reported reduced income ( or losses ) from at least one of their sources of livelihood. Services, such as trade, transport and accommodation and food services have been the sectors most affected by the COVID-19 restrictions and have also lost the highest share of workers. Although employment levels have recovered partially, income levels for many households have not returned to pre-COVID-19 levels. By April 2021, income levels were still below pre-COVID-19 levels for at least one third of households. The second lockdown in mid-2021 is likely to have stalled and even possibly reversed progress in income recovery. In fact, 49 percent of MSMEs interviewed on the impact of the second lockdown 10 Uganda Comprehensive Refugee Response Portal ( https: / / data2. unhcr. org / en / country / uga ), September 20, 2020. 11 Calculation based on district-level firm data from Census of Business Establishments, and refugee and host community household data from the Refugee and Host Community Household Survey.", + "ner_text": [ + [ + 1239, + 1263, + "named" + ], + [ + 95, + 111, + "district-level firm data <> data geography" + ], + [ + 1209, + 1213, + "district-level firm data <> publication year" + ], + [ + 1419, + 1437, + "district-level firm data <> usage context" + ] + ], + "validated": true, + "empirical_context": "org / en / country / uga ), September 20, 2020. 11 Calculation based on district-level firm data from Census of Business Establishments, and refugee and host community household data from the Refugee and Host Community Household Survey.", + "type": "data", + "explanation": "This is indeed a dataset as it is explicitly mentioned as data collected from a census, used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific data collected at the district level.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as data collected from a census, used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "182_multi0page", + "page": 20, + "text": "and technical review will be carried out prior to the project mid-term review and completion; ii ) independent financial audit will be undertaken annually; and iii ) World Bank and donor supervision and mid-term review missions. The following tools will be used for the performance monitoring: i ) Management Information System ( MIS ); ii ) semi-annual progress reports; iii ) special reports; and norms and standards. The coordinators of sub-components will submit semi-annual reports in a pre-agreed format to the PCU. The PCU will prepare summary report and will submit them to the GASS and MOLSA, World Bank and co-financiers. b ) Project Impact Monitoring will be carried out through qualitative and quantitative impact assessments studies to be carried out at the district and regional level and beneficiary impact assessments. In addition, the project will finance a regular living standards measurement survey ( LSMS ) which will monitor the broader impact of the Government reform program. c ) Monitoring of Community-based Services: each community-based sub-project will have a monitoring indicators and evaluation plan incorporated in project design. The monitoring will be carried out by the social services officers of GASS regional and central offices based on the norms and standards and in conformity with monitoring indicators outlined in individual contracts.", + "ner_text": [ + [ + 883, + 918, + "named" + ], + [ + 166, + 176, + "living standards measurement survey <> publisher" + ], + [ + 602, + 612, + "living standards measurement survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "b ) Project Impact Monitoring will be carried out through qualitative and quantitative impact assessments studies to be carried out at the district and regional level and beneficiary impact assessments. In addition, the project will finance a regular living standards measurement survey ( LSMS ) which will monitor the broader impact of the Government reform program. c ) Monitoring of Community-based Services: each community-based sub-project will have a monitoring indicators and evaluation plan incorporated in project design.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey that will be financed to monitor impacts, indicating its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey designed to collect data on living standards.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey that will be financed to monitor impacts, indicating its role as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 52, + "text": "Annually Surveys / Data from UCC Surveys will be conducted in all 13 host communities and 14 refugee settlements to determine beneficiaries that can access internet through 1 ) Personal Subscriptio ns 2 ) Wi-Fi hotspot ( within 1 km from home ) 3 ) Internet NITA-U, UCC & OPM", + "ner_text": [ + [ + 29, + 40, + "named" + ], + [ + 93, + 112, + "UCC Surveys <> reference population" + ], + [ + 126, + 139, + "UCC Surveys <> reference population" + ], + [ + 205, + 218, + "UCC Surveys <> data description" + ] + ], + "validated": true, + "empirical_context": "Annually Surveys / Data from UCC Surveys will be conducted in all 13 host communities and 14 refugee settlements to determine beneficiaries that can access internet through 1 ) Personal Subscriptio ns 2 ) Wi-Fi hotspot ( within 1 km from home ) 3 ) Internet NITA-U, UCC & OPM", + "type": "survey", + "explanation": "In the context, 'UCC Surveys' is explicitly mentioned as a source of data to determine beneficiaries, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'UCC Surveys' implies a structured collection of data gathered from host communities and refugee settlements.", + "contextual_reason_agent": "In the context, 'UCC Surveys' is explicitly mentioned as a source of data to determine beneficiaries, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "158_40156", + "page": 25, + "text": "service delivery. HIV services delivered in the community will, however, are based on specific target populations as such data can be collected from UNHCR ( for component 1a ) and the NGOs and other appropriate agencies ( for component 1b ) that implement such projects. All routine data collected, will be based on the country \u2019 s data collection protocols, and will be sent to both the PFO and the NAC M & E unit to ensure that the NACs have ownership and are informed of developments. 80. Regular supervision by PFO, IRAPP Country Focal Points and UNHCR will assess implementation quality and verify data submitted and will be funded, for activities implemented by this project, by the grant. For this purpose, supervision guidelines will be developed and piloted for use by those involved in supervision and data quality assurance. 4. Sustainability 81. Long-term financial sustainability depends on external assistance, and therefore additional resources will be required urgently. Over the four year implementation period, financial resources from the project are sufficient for start-up of activities, and to mobilize additional funding from other partners. ( Currently country contributions cover the operating costs of the IGAD Secretariat.", + "ner_text": [ + [ + 275, + 287, + "named" + ] + ], + "validated": false, + "empirical_context": "HIV services delivered in the community will, however, are based on specific target populations as such data can be collected from UNHCR ( for component 1a ) and the NGOs and other appropriate agencies ( for component 1b ) that implement such projects. All routine data collected, will be based on the country \u2019 s data collection protocols, and will be sent to both the PFO and the NAC M & E unit to ensure that the NACs have ownership and are informed of developments. 80.", + "type": "data", + "explanation": "'Routine data' is mentioned as a type of information collected but not as a specific dataset or data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'routine data' refers to a structured collection of data used in the context of HIV services.", + "contextual_reason_agent": "'Routine data' is mentioned as a type of information collected but not as a specific dataset or data source.", + "contextual_signal": "mentioned only as a type of information collected, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "154_431120PAD0P1021e0only10R20081014311", + "page": 13, + "text": "Despite continued efforts, prevention gaps related to HIV / AIDS, sexually transmitted infections ( STIs ), and reproductive health remain an obstacle to reducing epidemic growth. As noted earlier, although there has been a reduction in prevalence among specific population sub-groups since 2003, overall prevalence rates still remain unacceptably high. With respect to knowledge levels, a 2004 national survey found that although 93 percent o f the respondents had heard o f HIV / AIDS, the proportion o f respondents 15-24 years who both correctly identify ways o f preventing the sexual transmission of HIV and who reject major misconceptions about HIV transmission or prevention increased merely from 36 percent in 2001 to 38 percent in 2004. The Government \u2019 s target for this critical knowledge indicator was 90 percent by 2005. It i s unfortunately clear that this, and several other key prevention targets outlined in the National HIV / AIDS Strategic Framework ( 2003-2009 ), will not be met. 13. With respect to behavioral risk, the BAIS I1 Survey ( 2004 ) indicated that 76 percent o f young people ( 15-24 years ) have had sex with a non-marital, non-cohabiting sexual partner in the last 12 months. Additionally, this assessment indicated an increase in the proportion o f people aged 15-24 years reporting unprotected sex in the past month ( after consuming alcohol ) - from 5 4", + "ner_text": [ + [ + 1043, + 1057, + "named" + ], + [ + 390, + 394, + "BAIS I1 Survey <> publication year" + ], + [ + 507, + 530, + "BAIS I1 Survey <> reference population" + ], + [ + 741, + 745, + "BAIS I1 Survey <> reference year" + ], + [ + 1060, + 1064, + "BAIS I1 Survey <> publication year" + ], + [ + 1097, + 1109, + "BAIS I1 Survey <> reference population" + ], + [ + 1286, + 1309, + "BAIS I1 Survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "13. With respect to behavioral risk, the BAIS I1 Survey ( 2004 ) indicated that 76 percent o f young people ( 15-24 years ) have had sex with a non-marital, non-cohabiting sexual partner in the last 12 months. Additionally, this assessment indicated an increase in the proportion o f people aged 15-24 years reporting unprotected sex in the past month ( after consuming alcohol ) - from 5 4", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey that collects and reports data on sexual behavior among young people.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides statistical data on behavioral risks.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey that collects and reports data on sexual behavior among young people.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "036_Tajikistan-Resilient-Landscape-Restoration-Project", + "page": 56, + "text": "The World Bank RESILAND CA + Program: Tajikistan Resilient Landscape Restoration Project ( P171524 ) Page 52 of 104 IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Land area under sustainable landscape management practices The indicator measures, in hectares, the land area for which new and / or improved sustainable landscape management practices have been introduced. Land is the terrestrial biologically productive system comprising soil, vegetation, and the associated ecological and hydrological processes; Adoption refers to change of practice or change in the use of a technology promoted or introduced by the project; Sustainable landscape management ( SLM ) practices refers to a combination of at least two technologies and approaches to increase land quality and restore degraded lands for example, Annual Project reports, Project management information systems A combination of methods \u2013 field based and digital mapping, and remote sensing data, project reports and MIS.", + "ner_text": [ + [ + 990, + 1028, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank RESILAND CA + Program: Tajikistan Resilient Landscape Restoration Project ( P171524 ) Page 52 of 104 IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Land area under sustainable landscape management practices The indicator measures, in hectares, the land area for which new and / or improved sustainable landscape management practices have been introduced. Land is the terrestrial biologically productive system comprising soil, vegetation, and the associated ecological and hydrological processes; Adoption refers to change of practice or change in the use of a technology promoted or introduced by the project; Sustainable landscape management ( SLM ) practices refers to a combination of at least two technologies and approaches to increase land quality and restore degraded lands for example, Annual Project reports, Project management information systems A combination of methods \u2013 field based and digital mapping, and remote sensing data, project reports and MIS.", + "type": "system", + "explanation": "However, it is not a dataset as it is described as a system for managing information rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'information systems' which often relates to data management.", + "contextual_reason_agent": "However, it is not a dataset as it is described as a system for managing information rather than a structured collection of data.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 102, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 90 publish their procurement plans. These limitations have a negative impact on procurement outcomes such as: ( a ) difficulty in following up and monitoring procurement performance with a lack of milestones for the procurement activities, ( b ) not providing sufficient advance notice to the private sector on upcoming procurement opportunities and thus lowering the level of competition, ( c ) using less competitive procurement methods, and ( d ) creating problems for following up and preparing audit plans for the regulatory bodies. 12. Procurement profile of the program. Based on IBEX data for EFY 2011 ( 2019 ), on average 29 percent of the total regional budget for all sectors was spent through procurement which is a reduction from EFY2007 ( 2015 ) data when the share of procurement from the total budget was 50 percent. Yet, the total amount allocated to be spent through procurement for eight regions is ETB 57. 5 billion, which is a significant amount. 13. When the data are seen specifically for the four human capital sectors, the allocation for these sectors out of the total for all sectors is at 54 percent, with ETB 104 billion allocated for these sectors out of the total ETB 192 billion.", + "ner_text": [ + [ + 647, + 656, + "named" + ], + [ + 661, + 669, + "IBEX data <> reference year" + ], + [ + 672, + 676, + "IBEX data <> publication year" + ] + ], + "validated": true, + "empirical_context": "Procurement profile of the program. Based on IBEX data for EFY 2011 ( 2019 ), on average 29 percent of the total regional budget for all sectors was spent through procurement which is a reduction from EFY2007 ( 2015 ) data when the share of procurement from the total budget was 50 percent. Yet, the total amount allocated to be spent through procurement for eight regions is ETB 57.", + "type": "data", + "explanation": "In the context, 'IBEX data' is used to provide empirical figures regarding procurement spending, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'IBEX data' is a dataset because it is referenced in relation to budget procurement statistics.", + "contextual_reason_agent": "In the context, 'IBEX data' is used to provide empirical figures regarding procurement spending, indicating it functions as a data source.", + "contextual_signal": "follows 'based on' indicating it is used as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 93, + "text": "The release of IDA funds under this result area will be linked to the following DLI: Establishment of a standardized student learning assessment system for primary and secondary education ( DLI 5 ). 45. The national team prepared a detailed action plan for this activity. Table 2. 6 provides a simplified overview of the action plan. Table 2. 6. Activity Simplified Action Plan ( during Project life ) Year 1 Year 2 Year 3 Year 4 Year 5 Establishment of the learning assessment unit Report on pilot testing ( primary class 4 and 6 ) Assessment report including dissemination plan and recommendations for ( primary class 4 and 6 ) Report on pilot testing ( secondary form 2 and 4 ) Assessment report including dissemination plan and recommendations for ( secondary form 2 and 4 ) Source: MINEPAT. 2017. \u201c Programme d \u2019 appui \u00e0 la r\u00e9forme de l \u2019 \u00e9ducation au Cameroun. Document du Projet \u201d 46. Results Area 6: An integrated EMIS functional and operational. The objective of this result area is to support the Government \u2019 s ongoing efforts to establish a functional and operational integrated EMIS. The integrated EMIS will build on the efforts on the CEQUIL Project to address challenges related to the limited availability of reliable data on the performance of the education system, namely an assessment of the EMIS within MINEDUB and MINESEC.", + "ner_text": [ + [ + 1312, + 1316, + "named" + ] + ], + "validated": false, + "empirical_context": "The objective of this result area is to support the Government \u2019 s ongoing efforts to establish a functional and operational integrated EMIS. The integrated EMIS will build on the efforts on the CEQUIL Project to address challenges related to the limited availability of reliable data on the performance of the education system, namely an assessment of the EMIS within MINEDUB and MINESEC.", + "type": "system", + "explanation": "However, EMIS is described as a system rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it relates to data management in education.", + "contextual_reason_agent": "However, EMIS is described as a system rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "088_UGANDA-PAD-04272018", + "page": 53, + "text": "The largest part of the funding goes to the LG level \u2013 the municipal development grant ( now named DDEG under the IGFTR ) US $ 136 million, and the CB grants ( CBG ) US $ 15 million with the balance going to support results at the MLHUD level to support CB activities as well as Program implementation. The last grant cycle has just been released, based on the results from the annual performance assessments ( APA ). 3. USMID AF will provide support to part of the overall GoU Intergovernmental Fiscal Transfer Reform Program, which is aiming at improving the overall grant system, including size, allocation, modalities and efficiency in the use of transfers. Under this program, the discretionary development equalisation grant ( DDEG ) is supporting multi-sectoral investments at the LG level, and under this is the \u201c urban window \u201d with the targeting of the USMID municipalities being a critical element. The sub-window \u201c USMID municipalities \u201d as mentioned in the MTEF and DDEG guidelines from Office of the Prime Minister, 2017 will be the target for financial support, and the transitional grant window for development grants when it comes to the sub-window for refugees and host communities providing additional funding the DDEG allocations for these host areas. 68 4. Program funds will be provided through disbursement-linked indicators ( DLI ).", + "ner_text": [ + [ + 378, + 408, + "named" + ] + ], + "validated": false, + "empirical_context": "The largest part of the funding goes to the LG level \u2013 the municipal development grant ( now named DDEG under the IGFTR ) US $ 136 million, and the CB grants ( CBG ) US $ 15 million with the balance going to support results at the MLHUD level to support CB activities as well as Program implementation. The last grant cycle has just been released, based on the results from the annual performance assessments ( APA ). 3.", + "type": "assessment", + "explanation": "However, it is not a dataset as it refers to an assessment process rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'annual performance assessments' suggests a systematic evaluation process that could involve data collection.", + "contextual_reason_agent": "However, it is not a dataset as it refers to an assessment process rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + } + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 93, + "text": "An appropriate policy action is needed to improve the quality of education, which will increase the potential of the youth and equip them with the skills that will enable them to join the productive labor force. Returns to Education 5. In terms of the benefits of education, Niger \u2019 s labor market provides a strong signal that investment in education yields higher returns and better employment opportunities to both individuals and households and contributes to reducing inequality in access to education as well as post-education labor market outcomes. In Niger, evidence from the 2014 household survey, labelled ECVMA, reveals that higher levels of education are associated with better earnings, an increased probability of wage employment, and increased likelihood of finding employment in more productive sectors; and the 48 % 52 % 44 % 87 % 42 % 58 % 63 % 44 % 19 % 47 % 62 % 22 % 28 % 74 % 118 % National Male Female Urban Rural Q1 Q5 Agadez Diffa Dosso Maradi Tahoua Tillaberi Zinder Niamey Gender Area Wealth Quintile Region", + "ner_text": [ + [ + 584, + 605, + "named" + ] + ], + "validated": true, + "empirical_context": "In terms of the benefits of education, Niger \u2019 s labor market provides a strong signal that investment in education yields higher returns and better employment opportunities to both individuals and households and contributes to reducing inequality in access to education as well as post-education labor market outcomes. In Niger, evidence from the 2014 household survey, labelled ECVMA, reveals that higher levels of education are associated with better earnings, an increased probability of wage employment, and increased likelihood of finding employment in more productive sectors; and the 48 % 52 % 44 % 87 % 42 % 58 % 63 % 44 % 19 % 47 % 62 % 22 % 28 % 74 % 118 % National Male Female Urban Rural Q1 Q5 Agadez Diffa Dosso Maradi Tahoua Tillaberi Zinder Niamey Gender Area Wealth Quintile Region", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a household survey that provides data used for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey conducted in 2014 that provides empirical evidence.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a household survey that provides data used for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "003_BOSIB-850e0c11-07c1-4c9c-8d44-4286704221bd", + "page": 11, + "text": "Poverty headcount ratio at US $ 2. 15 a day ( 2017 PPP ) ( % of population ) - Niger. https: / / data. worldbank. org / indicator / SI. POV. DDAY? locations = NE 5 In the transport sector women hold less than one percent of jobs. Although data on women in technical roles is unavailable, their share is likely lower due to inadequate skills and strong gender norms. 6 When referring to host communities in this document, internally displaced persons are considered part of the host population unless noted otherwise. 7 P. Thenkabail et al. 2016. Global Food Security Support Analysis Data ( GFSAD ) Crop Dominance 2010 Global 1 km V001 [ Ddata set ]. NASA Making Earth System Data Records for Use in Research Environments ( MEaSUREs ) NASA EOSDIS Land Processes DAAC. https: / / lpdaac. usgs. gov / products / gfsad1kcdv001 /. 8 The TSR corridor is one of the oldest transnational road corridors in Africa. It is 4, 500 km long, crossing the Sahara Desert and linking Algeria, Chad, Mali, Niger, Nigeria, and Tunisia. 9 The TSH, or Trans-African Highway 5, connects Dakar, Senegal to N ' Djamena, Chad, passing through Mali, Burkina Faso, Niger, and Nigeria. It links Niamey and Maradi in Niger with Burkina Faso and Nigeria, respectively. 10 These figures correspond to populations located within a 150-km radius from the RN1 Maradi \u2013 Zinder section ( WorldPop, 2020 ). 11 UNHCR. 2025. UNHCR Niger - Map Population of Concern - Mars 2025. https: / / data. unhcr. org / en / documents / details / 115551.", + "ner_text": [ + [ + 546, + 588, + "named" + ], + [ + 79, + 84, + "Global Food Security Support Analysis Data <> data geography" + ], + [ + 519, + 539, + "Global Food Security Support Analysis Data <> author" + ], + [ + 540, + 544, + "Global Food Security Support Analysis Data <> publication year" + ], + [ + 651, + 655, + "Global Food Security Support Analysis Data <> publisher" + ], + [ + 735, + 739, + "Global Food Security Support Analysis Data <> publisher" + ], + [ + 989, + 994, + "Global Food Security Support Analysis Data <> data geography" + ], + [ + 1139, + 1144, + "Global Food Security Support Analysis Data <> data geography" + ] + ], + "validated": true, + "empirical_context": "Thenkabail et al. 2016. Global Food Security Support Analysis Data ( GFSAD ) Crop Dominance 2010 Global 1 km V001 [ Ddata set ]. NASA Making Earth System Data Records for Use in Research Environments ( MEaSUREs ) NASA EOSDIS Land Processes DAAC.", + "type": "dataset", + "explanation": "This is indeed a dataset as it is explicitly labeled as such and is part of a structured collection of data used for research.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Data' in the title and is referenced in a context related to data records.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly labeled as such and is part of a structured collection of data used for research.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "073_Ethiopia-Consolidated-Water-Supply-Sanitation-and-Hygiene-Account-Project", + "page": 83, + "text": "The focus on this Project is to establish a solid M & E foundation, which will enable the sector to move toward results-based implementation approaches. 99. The program will support the NWCO, RWCOs, and WASH Sector PMUs for regular organizing of the WASH Annual Review Meetings to track implementation progress and provide feedback for improvement. This platform will be used to enhance learning and experience sharing among program woredas and towns. Under the platform, the best-performing woredas and towns will be selected based on a clear evaluation guideline using predefined service delivery result indicators and will be recognized / awarded. This is expected to create positive competition toward meeting intended results among program woredas and towns. Once this initiative is tested and piloted under the CWA, the platform will be scaled up to the wider OWNP. Information from the MIS will be the basis for selecting best-performing woredas and will be used as motivation to encourage regular updating of information into the MIS. Implementation Support Plan and Resource Requirements 100. Many of the Project team members are based out of the World Bank \u2019 s country office in Addis Ababa, which helps provide timely and effective implementation support to the Client. Semiannual supervision missions and targeted follow-up technical missions will focus on the areas described in the following paragraphs. 101. Strategic support: The World Bank implementation support missions will meet with national and local authorities to: ( i ) review progress on the Project \u2019 s activities; ( ii ) discuss strategic alignment of the Project \u2019 s different activities and the activities of relevant stakeholders; and ( iii ) evaluate progress on cross-cutting issues, such as M & E, gender, training, communication, dissemination of Project results and experiences, and coordination between relevant stakeholders. Table A1. 8: Thematic Support Time Focus Skills Needed Resource Partner Role First 12 months Preparation of tendering contracts; feasibility studies and detailed engineering designs; safeguards screening and mitigation plans Procurement, FM, safeguards and infrastructure specialists; hydrology and water resources specialists Supervision budget Provide support, national expertise, and technical advice", + "ner_text": [ + [ + 893, + 896, + "named" + ] + ], + "validated": false, + "empirical_context": "Once this initiative is tested and piloted under the CWA, the platform will be scaled up to the wider OWNP. Information from the MIS will be the basis for selecting best-performing woredas and will be used as motivation to encourage regular updating of information into the MIS. Implementation Support Plan and Resource Requirements 100.", + "type": "system", + "explanation": "'MIS' is not a dataset as it is described as a management information system, which does not function as a structured collection of data in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is mentioned in the context of information and data collection.", + "contextual_reason_agent": "'MIS' is not a dataset as it is described as a management information system, which does not function as a structured collection of data in this context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "124_PAD8220PAD0P12010Box385319B00OUO090", + "page": 19, + "text": "The project will deliver benefits to 1. 6 million residents across the GBML, located within the BMLWE areas of service provision that are geographically divided into four zones and 21 municipalities. Beneficiaries will benefit from increased volume and quality of public water provided to the household and a subsequent decrease in the cost of alternative sources of water. 13 39. The decrease in total cost of water will directly and positively impact the poor. Of the 506, 000 people across the GBML that live below US $ 4 per day, 460, 000 are located in the project area, as determined by a project specific survey of 1, 200 project households, Lebanon \u2019 s 2005 Poverty Assessment, and available census data. 40. A household survey of 1, 200 beneficiary households across the GBML was conducted as part of project preparation. Half the project survey respondents reported per capita incomes of less than LBP 600, 000 LBP ( US $ 400 ) per month, equivalent to less than the US $ 4 per day national poverty line. A map of the percent of surveyed households within each municipality whose monthly income is within the bottom third relative to the sample is presented in Figure 1: 13 Households currently buy tanker water, bottled water and / or construct private wells to supplement the low volumes of public water. Water supplied by the Bisri dam will substitute these alternative sources of water supply. 8", + "ner_text": [ + [ + 595, + 647, + "named" + ], + [ + 71, + 75, + "project specific survey of 1, 200 project households <> data geography" + ], + [ + 497, + 501, + "project specific survey of 1, 200 project households <> data geography" + ], + [ + 719, + 735, + "project specific survey of 1, 200 project households <> data type" + ], + [ + 780, + 784, + "project specific survey of 1, 200 project households <> data geography" + ] + ], + "validated": true, + "empirical_context": "The decrease in total cost of water will directly and positively impact the poor. Of the 506, 000 people across the GBML that live below US $ 4 per day, 460, 000 are located in the project area, as determined by a project specific survey of 1, 200 project households, Lebanon \u2019 s 2005 Poverty Assessment, and available census data. 40.", + "type": "survey", + "explanation": "This is indeed a dataset as it is a structured collection of data from a survey used to inform the analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey conducted on a defined group of households.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data from a survey used to inform the analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 90, + "text": "85 7. Nevertheless, analysis of recent PISA results ( World Bank, 2008 ) 6 indicate that given its endowments ( such as school, student, and household characteristics ), Jordan should likely be performing even better in terms of cognitive skill development. By Jordan \u2019 s own internal assessment systems, there is significant need for quality enhancing reforms. Pass rates on the general secondary certificate GSC ( essentially the secondary school exit exam also known as the Tawjihi ) are quite low ( about 55 percent of regular track students and under 35 percent of students from \u201c irregular \u201d tracks ), as are results on Jordan \u2019 s own national achievement tests which indicate the majority of 8th and 10th grade students are not proficient in any of the subjects ( Science, Math, English, and Arabic. ) In addition, the Tawjihi, like many exams of its kind, does not measure the same kinds of cognitive skills as does PISA, and therefore it creates a strong incentive against education quality. ERfKE II directly addresses this need to transform the exams, and it is not insignificant that increasing scores on school, national, and international assessments is the first ERfKE II project development objective and key indicator, before completion rates and enrolment rates.", + "ner_text": [ + [ + 39, + 43, + "named" + ], + [ + 54, + 64, + "PISA <> publisher" + ], + [ + 66, + 70, + "PISA <> publication year" + ], + [ + 170, + 176, + "PISA <> data geography" + ], + [ + 261, + 267, + "PISA <> data geography" + ], + [ + 626, + 632, + "PISA <> data geography" + ], + [ + 699, + 726, + "PISA <> reference population" + ] + ], + "validated": true, + "empirical_context": "85 7. Nevertheless, analysis of recent PISA results ( World Bank, 2008 ) 6 indicate that given its endowments ( such as school, student, and household characteristics ), Jordan should likely be performing even better in terms of cognitive skill development. By Jordan \u2019 s own internal assessment systems, there is significant need for quality enhancing reforms.", + "type": "dataset", + "explanation": "PISA is indeed a dataset as it provides structured data used for empirical analysis of educational outcomes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because PISA is known for providing international assessment data on student performance.", + "contextual_reason_agent": "PISA is indeed a dataset as it provides structured data used for empirical analysis of educational outcomes.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "068_Burundi-Integrated-Community-Development-Project", + "page": 12, + "text": "The three poorest provinces \u2013 Ruyigi, Muyinga and Cankuzo \u2013 are located in the north-east and have poverty rates of 85. 4, 83. 5 and 79. 5 percent respectively measured against the national poverty line, well above the national average of 64. 9 percent. Suffering from degraded and scarce land resources, high population density and isolation from centers of economic activity, the north - eastern provinces face acute food security and nutrition challenges. Access to basic infrastructure and services is also limited in the country, but particularly in the north-east. 7 1 ECVMB, Enqu\u00eate sur les Conditions de Vie des M\u00e9nages Burundais ( 2017 ). 2 Demographic and Health Survey ( DHS ) 2016-2017. 3 About 10 percent of Burundian children under 2 years consume a minimum acceptable diet: Demographic and Health Survey 2016-17. 4 After the 2000 Arusha Peace Accords and democratic elections in 2005, Burundi \u2019 s growth accelerated to an average of 4. 3 percent annually from 2007 to 2014. Moreover, its Human Capital Index ( HCI ) value went up from 0. 35 to 0. 38 between 2012 and 2017. 5 The World Bank Systematic Country Diagnostic identifies \u2018 ensuring basic needs are met \u2019 as a priority for the coming years. See World Bank ( 2018 ) Republic of Burundi: Systematic Country Diagnostic. Report No. 122549-BI. 6 See World Bank ( 2011 ) World Development Report 2011: Conflict, Security and Development and World Bank ( 2017 ) World Development Report 2017: Governance and the Law. 7 Data from the United Nations INFORM vulnerability index shows: ( a ) on food security, Ruyigi ranks as the most vulnerable province, with Ngozi second and Cankuzo fifth; ( b ) on infrastructure, Ruyigi ranks number two and Cankuzo third; ( c ) on education, Muyinga ranks second most vulnerable, Ngozi forth, Ruyigi fifth and Cankuzo seventh; and ( d ) on access to health, Ruyigi ranks third and Cankuzo forth.", + "ner_text": [ + [ + 650, + 679, + "named" + ], + [ + 30, + 36, + "Demographic and Health Survey <> data geography" + ], + [ + 640, + 644, + "Demographic and Health Survey <> publication year" + ], + [ + 682, + 685, + "Demographic and Health Survey <> acronym" + ], + [ + 688, + 697, + "Demographic and Health Survey <> publication year" + ], + [ + 1422, + 1426, + "Demographic and Health Survey <> publication year" + ], + [ + 1454, + 1458, + "Demographic and Health Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "7 1 ECVMB, Enqu\u00eate sur les Conditions de Vie des M\u00e9nages Burundais ( 2017 ). 2 Demographic and Health Survey ( DHS ) 2016-2017. 3 About 10 percent of Burundian children under 2 years consume a minimum acceptable diet: Demographic and Health Survey 2016-17.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned in the context as a source of data for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a survey providing empirical data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned in the context as a source of data for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "059_Uganda-Investing-in-Forests-and-Protected-Areas-for-Climate-Smart-Development-Project", + "page": 72, + "text": "STEP. The project will use STEP, a planning and tracking system, which will provide data on procurement activities, establish benchmarks, monitor delays, and measure procurement performance. The MWE is familiar with STEP, which is also used under the IWMDP. The knowledge built, lessons learned, and corrective mechanisms put in place will provide a foundation for the project. 52. Use of national procurement procedures. Due to recent changes in Uganda \u2019 s procurement legal framework, Uganda National Procurement Procedures are not in line with the World Bank Procurement Regulations Section V - Para 5. 4 National Procurement Procedures. In particular, the following provisions are not consistent with Procurement Regulations: ( a ) Use of domestic preference for contracts obtained through open national competitive procedures ( b ) Fees for handling bidder complaints at procuring entity level", + "ner_text": [ + [ + 27, + 31, + "named" + ] + ], + "validated": false, + "empirical_context": "STEP. The project will use STEP, a planning and tracking system, which will provide data on procurement activities, establish benchmarks, monitor delays, and measure procurement performance. The MWE is familiar with STEP, which is also used under the IWMDP.", + "type": "system", + "explanation": "However, STEP is described as a planning and tracking system, not a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed STEP is a dataset because it is mentioned in the context of providing data on procurement activities.", + "contextual_reason_agent": "However, STEP is described as a planning and tracking system, not a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 15, + "validated": 1, + "not_validated": 14 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 23, + "text": "To facilitate data sharing and use, the subcomponent will develop a data visualization and use platform ( software ) focusing on visual representations of TPM and routine data, inclusive of BHI data. Linking of platforms, including DHIS2 and the HSF platform will be integral to the work. The data visualization platform will include visualization of Results Framework data and other core indicators from the HSSP, linking TPM and DHIS2 data using maps, charts, and graphs and will incorporate HSF data along with the overlay of health and meteorologic data to better understand the impact of climatic patterns on health. The platform will include analysis of health service delivery in refugee and host community areas to facilitate improved health service delivery among the critical underserved populations. The development of an integrated, institutionalized, and sustainable platform which will strengthen MoH systems will be emphasized. Annex 2 provides further details on the platform. 43. Subcomponent 3. 3: Contract and Program Management Capacity Development ( PMU; US $ 3. 44 million: US $ 1. 54 million equivalent IDA [ WHR ] and US $ 1. 9 million Trust Funds [ US $ 0. 20 million SDTF and US $ 1. 7 million MDTF ] ).", + "ner_text": [ + [ + 246, + 258, + "named" + ] + ], + "validated": false, + "empirical_context": "To facilitate data sharing and use, the subcomponent will develop a data visualization and use platform ( software ) focusing on visual representations of TPM and routine data, inclusive of BHI data. Linking of platforms, including DHIS2 and the HSF platform will be integral to the work. The data visualization platform will include visualization of Results Framework data and other core indicators from the HSSP, linking TPM and DHIS2 data using maps, charts, and graphs and will incorporate HSF data along with the overlay of health and meteorologic data to better understand the impact of climatic patterns on health.", + "type": "platform", + "explanation": "However, the context indicates that the HSF platform is described as a software tool rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'platform' which could imply a data source.", + "contextual_reason_agent": "However, the context indicates that the HSF platform is described as a software tool rather than a structured collection of data.", + "contextual_signal": "mentioned only as a platform, not as a data source", + "tags": [] + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 68, + "text": "The MWE is currently developing an SSIP for 2018 \u2013 2023. Recent developments and trends and the foreseeable donor commitments indicate that it is unlikely that Uganda will have adequate WSS funding to achieve the national sector targets and SDGs. The new SSIP will make this funding gap visible, but more support is needed to address this challenge. Therefore, this component will support a sector financing study to support the MWE leadership make strategic decisions given limited resources and explore new financing opportunities from internal and external sources ( including public - private partnership [ PPP ], commercial financing, and green infrastructure investments ). 38. Strengthening WSS regulatory functions. This component will finance TA and capacity building to strengthen the regulatory functions of the MWE. A recent comprehensive assessment and ongoing projects financed by other DPs have been considered in the design of this component to increase synergies in the consolidation of the water sector. The Project will finance training for national and regional regulatory unit staff and service providers on the sector performance monitoring framework, the O & M institutional framework, and other relevant regulatory areas. The planned improvements to UPMIS system ( under Component 1 ) will facilitate the monitoring of performance and compliance of umbrellas with improved information / data quality and reporting by the umbrellas. Umbrella Water Authority Model Background 39. In 2002, the GoU launched the UO to provide O & M support to water authorities in small towns and rural areas. To date, the UWAs have played a key role in keeping the systems functional by providing high-level technical and managerial support. In addition, the UWAs have provided financial support for major repairs, training, and water quality monitoring. The GoU and donors subsidize the UWAs \u2019 services. 40. Despite the effectiveness of the Umbrella support model, the small towns and RGCs have struggled to provide sustainable day-to-day O & M, employ preventative maintenance, and properly", + "ner_text": [ + [ + 1274, + 1279, + "named" + ] + ], + "validated": false, + "empirical_context": "The Project will finance training for national and regional regulatory unit staff and service providers on the sector performance monitoring framework, the O & M institutional framework, and other relevant regulatory areas. The planned improvements to UPMIS system ( under Component 1 ) will facilitate the monitoring of performance and compliance of umbrellas with improved information / data quality and reporting by the umbrellas. Umbrella Water Authority Model Background 39.", + "type": "system", + "explanation": "UPMIS is described as a system, not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed UPMIS is a dataset because it is mentioned in the context of monitoring performance and compliance.", + "contextual_reason_agent": "UPMIS is described as a system, not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "010_BOSIB1554c314c0a2187c019d7e85bc2a91", + "page": 23, + "text": "The M & E approach for the project is aligned with the Government \u2019 s procedures and data sources and will contribute to improved data quality. All project indicators ( a ) are a subset of the health sector \u2019 s performance indicators available in various data sources including the Kenya Health Information System ( KHIS ); and ( b ) will be collected routinely through project reports. The project will support county health sector annual performance data review meetings as well as availability of key surveys under Component 1. Where relevant, at project closure, data from household and facility surveys will be used to complement routine data to measure project achievement of the PDO. C. Sustainability 33. The project will support priority interventions outlined in the national health strategies to ensure sustainability. The project will build on existing national systems and structures for implementation and fiduciary arrangements. The Government remains committed to improving delivery of primary healthcare services to advance progress towards UHC, and key project activities are aligned with these objectives. The project implementation entities will be drawn from existing Government structures which will ensure continuity of the expected results beyond the project period. In addition,", + "ner_text": [ + [ + 577, + 607, + "named" + ], + [ + 148, + 166, + "household and facility surveys <> data description" + ] + ], + "validated": true, + "empirical_context": "The project will support county health sector annual performance data review meetings as well as availability of key surveys under Component 1. Where relevant, at project closure, data from household and facility surveys will be used to complement routine data to measure project achievement of the PDO. C.", + "type": "survey", + "explanation": "In this context, it is confirmed as a dataset since it is explicitly mentioned that data from these surveys will be used to measure project achievement.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'household and facility surveys' are typically structured collections of data used for analysis.", + "contextual_reason_agent": "In this context, it is confirmed as a dataset since it is explicitly mentioned that data from these surveys will be used to measure project achievement.", + "contextual_signal": "follows 'data from household and facility surveys will be used to complement routine data'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 107, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 95 32. In addition to the IBEX data, the assessment looked at KPI data that were submitted annually as part of DLI deliverables under ESPES. The main data include the total procurement expenditure, list of procurement activities, share of the open bidding procedure, and the bid process time. The data received indicates no large value contract exists at regional level that reaches the Output and Performance-based Road Contracts ( OPRC ) threshold for exclusion of contracts under PforR operations. The maximum contract amount identified is ETB 250 million ( around US $ 7 million ) which is much below the threshold for goods at US $ 30 million. However, it is noted that the total amount of contracts reported under the KPI does not match the data received from IBEX. This indicates that regions are not registering all the contracts for each sector. To address this quality issue, the HCO includes a DLI that requires alignment between the KPI procurement report and budget allocation and expenditure data. 33. While the KPI data have quality issues particularly related to the comprehensiveness of the data captured, the practice is encouraging. It is understood that building systems is a process that passes through many obstacles and the result cannot be achieved in one go. The effort requires continuous engagement and resources.", + "ner_text": [ + [ + 122, + 130, + "named" + ], + [ + 4, + 14, + "KPI data <> publisher" + ], + [ + 15, + 23, + "KPI data <> data geography" + ], + [ + 227, + 256, + "KPI data <> data description" + ], + [ + 258, + 288, + "KPI data <> data description" + ], + [ + 290, + 325, + "KPI data <> data description" + ] + ], + "validated": true, + "empirical_context": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 95 32. In addition to the IBEX data, the assessment looked at KPI data that were submitted annually as part of DLI deliverables under ESPES. The main data include the total procurement expenditure, list of procurement activities, share of the open bidding procedure, and the bid process time.", + "type": "data", + "explanation": "In the context, 'KPI data' is explicitly mentioned as part of the assessment, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'KPI data' is a dataset because it refers to specific metrics collected for assessment purposes.", + "contextual_reason_agent": "In the context, 'KPI data' is explicitly mentioned as part of the assessment, indicating it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "002_BOSIB-ca473522-8ad0-4c80-9f0d-88bf887f2a2f", + "page": 45, + "text": "The data is then disaggregated by gender, by youth ( < 30 years ) and refugee / host community status Frequency Quarterly Data source Project MIS. Methodology for Data Collection Monitoring project implementation. MIS database. Crosstabulation of popualtion statistics with hecterage benefiting from enhanced environmental management. Geospatial mapping and disaggregation of land / aquatic zone type and activity taken in each area. Data collected by the implementing agency. Responsibility for Data Collection IA Monitoring and Evaluation Plan: Intermediate Results Indicators by Components Social and Economic Services & Infrastructure Climate-resilient infrastructure subprojects completed in target area ( Number ) Description Quantitative indicator counting number of infrastructure subprojects completed in target districts through component 1 ( 1. 1 & 1. 3 ). The data is then disaggregated by type of subproject. Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation. Responsibility for Data Collection IA People benefiting from Displacement Crisis Response Mechanism investments ( Number )", + "ner_text": [ + [ + 214, + 226, + "named" + ], + [ + 134, + 145, + "MIS database <> publisher" + ], + [ + 954, + 965, + "MIS database <> publisher" + ] + ], + "validated": true, + "empirical_context": "Methodology for Data Collection Monitoring project implementation. MIS database. Crosstabulation of popualtion statistics with hecterage benefiting from enhanced environmental management.", + "type": "database", + "explanation": "This is a dataset as it is explicitly referred to as a database that supports data collection monitoring.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MIS database' suggests a structured collection of data used for monitoring.", + "contextual_reason_agent": "This is a dataset as it is explicitly referred to as a database that supports data collection monitoring.", + "contextual_signal": "mentioned as a data source", + "tags": [] + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 58, + "text": "The population census is the most important data collection activity in developing countries for at least two reasons. First, it provides spatial distribution of the population that cannot be obtained with demographic projections. Second, the cartography of the census provides the sampling framework of all other statistical operations ( household surveys, agricultural censuses, enterprise censuses, and so on ). In addition, population census data are used to construct poverty maps, a powerful tool for targeting social programs. Since the census is planned for 2017, the project will contribute to the analysis phase of the population census. 42. The subcomponent will support poverty analysis. As stated, INS conducted its fourth living conditions survey in 2014 ( ECAM 4 ) and a complementary survey financed by the Government early 2017; the first results are expected by end of 2017. The objective of the complementary survey is to update the Cameroon poverty map, probably at the commune level. The survey collects some data at the household level as well as at the community level. Information is also collected on school and health facilities. 43. Finally, the LFS methodology needs to be upgraded and updated, and data collection frequency improved to be relevant for policy making. 44.", + "ner_text": [ + [ + 4, + 21, + "named" + ], + [ + 138, + 176, + "population census <> data description" + ], + [ + 473, + 485, + "population census <> data description" + ], + [ + 566, + 570, + "population census <> publication year" + ], + [ + 840, + 844, + "population census <> publication year" + ], + [ + 887, + 891, + "population census <> publication year" + ], + [ + 952, + 960, + "population census <> data geography" + ] + ], + "validated": true, + "empirical_context": "The population census is the most important data collection activity in developing countries for at least two reasons. First, it provides spatial distribution of the population that cannot be obtained with demographic projections.", + "type": "census", + "explanation": "In the context, it is described as a data collection activity that provides essential information about population distribution.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because a population census is a structured collection of demographic data.", + "contextual_reason_agent": "In the context, it is described as a data collection activity that provides essential information about population distribution.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + }, + "term_stats": { + "total": 6, + "validated": 5, + "not_validated": 1 + } + }, + { + "filename": "108_Jordan-PforR-PAD-P159522-FINAL-DISCLOSURE-10052016", + "page": 9, + "text": "1 I. STRATEGIC CONTEXT A. Country Context 1. Jordan is a small middle-income country facing severe challenges. These challenges are brought by insecurity in neighboring Syria and Iraq. The total closure of land trade routes with Syria and Iraq and other security-related challenges within and around Jordan adversely affected trade, tourism, investment, and construction. 1 According to a census conducted in 2015, Jordan has a population of 9. 5 million ( of which about a third are non-Jordanian ) and suffers from a high unemployment rate of 13 percent for Jordanians ( about 200, 000 individuals ). Real gross domestic product ( GDP ) growth is estimated to have contracted to 2. 4 percent in 2015 from 3. 1 percent in 2014. 2 GDP growth is forecasted to rebound slightly over 3. 0 percent on average from 2016 to 2018. This low growth rate is insufficient to provide enough jobs to the growing population in Jordan. 2. The crisis in Syria has led to a massive influx of Syrian refugees into Jordan over the past five years. As of June 2016, Jordan hosts 655, 217 Syrian refugees registered with United Nations High Commissioner for Refugees ( UNHCR ), 3 80 percent of whom live in host communities. About 75 percent of Syrian refugees live in the governorates of Mafraq, Irbid, and Amman.", + "ner_text": [ + [ + 389, + 395, + "named" + ], + [ + 45, + 51, + "census <> data geography" + ], + [ + 409, + 413, + "census <> publication year" + ], + [ + 415, + 421, + "census <> data geography" + ], + [ + 519, + 555, + "census <> data description" + ], + [ + 560, + 570, + "census <> reference population" + ], + [ + 996, + 1002, + "census <> data geography" + ], + [ + 1046, + 1052, + "census <> data geography" + ], + [ + 1276, + 1281, + "census <> data geography" + ], + [ + 1287, + 1292, + "census <> data geography" + ] + ], + "validated": true, + "empirical_context": "The total closure of land trade routes with Syria and Iraq and other security-related challenges within and around Jordan adversely affected trade, tourism, investment, and construction. 1 According to a census conducted in 2015, Jordan has a population of 9. 5 million ( of which about a third are non-Jordanian ) and suffers from a high unemployment rate of 13 percent for Jordanians ( about 200, 000 individuals ).", + "type": "census", + "explanation": "In this context, the census is explicitly mentioned as a source of population data for Jordan, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'census' is a dataset because it typically refers to a structured collection of demographic data.", + "contextual_reason_agent": "In this context, the census is explicitly mentioned as a source of population data for Jordan, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 28, + "text": "The World Bank SPF: Improved Livelihoods for Internally Displaced Persons in Azerbaijan ( P178125 ) Page 26 of 34 Indicator Name Corporate Unit of Measure Baseline End Target Frequency Data Source / Methodology Responsibility for Data Collection Description: Name: Percentage of beneficiaries taking a more active role in their communities Percentag e 0. 00 50. 00 Twice, once before civic engagement training and again at least three months after civic engagement training completion. For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection. For individuals trained only as part of Component 3, a separate pre-training survey will be conducted as well as a follow-up survey conducted at least three months after civic engagement training. M & E Specialist Percentage of beneficiaries taking a more active role in their communities - female Percentag e 0. 00 50. 00 Twice, once before civic engagement training and again at least three months after civic engagement training completion. For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection.", + "ner_text": [ + [ + 745, + 761, + "named" + ] + ], + "validated": false, + "empirical_context": "For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection. For individuals trained only as part of Component 3, a separate pre-training survey will be conducted as well as a follow-up survey conducted at least three months after civic engagement training. M & E Specialist Percentage of beneficiaries taking a more active role in their communities - female Percentag e 0.", + "type": "survey", + "explanation": "However, it is not a dataset itself but rather a method of data collection mentioned in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'follow-up survey' implies a structured method of data collection.", + "contextual_reason_agent": "However, it is not a dataset itself but rather a method of data collection mentioned in the context.", + "contextual_signal": "mentioned only as a survey, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 3, + "validated": 1, + "not_validated": 2 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 58, + "text": "48 implementation, and the monitoring and evaluation of the cash transfer program, as a template for other social safety net interventions. 34. Building on the database of eligible households, these modules will include: ( i ) program beneficiary lists with an eventual registration of complementary activities, ( ii ) payment modules ( payroll and the reconciliation from the payment provider ( s ) ), ( iii ) operational tracking of program, and ( iv ) basic monitoring and evaluation, including beneficiary feedback and grievance redress mechanisms when operational. The program beneficiary lists will start with the cash transfer beneficiary list and track beneficiaries \u2019 participation in the complementary activities set-up by the program. While initially, participation will be required but payments will not be conditional on participation, the system will provide the functionality to set up conditionalities in the future. The payment system will include the quarterly / monthly payroll based on beneficiary lists, the amounts transferred to the payment agency ( ies ), the beneficiary receipts and the reconciliation of accounts. The operational tracking module would provide an operational dashboard to enable program managers to plan and track activities, human and material resources and other inputs at the central, provincial and communal levels.", + "ner_text": [ + [ + 227, + 252, + "named" + ], + [ + 661, + 674, + "program beneficiary lists <> reference population" + ] + ], + "validated": true, + "empirical_context": "34. Building on the database of eligible households, these modules will include: ( i ) program beneficiary lists with an eventual registration of complementary activities, ( ii ) payment modules ( payroll and the reconciliation from the payment provider ( s ) ), ( iii ) operational tracking of program, and ( iv ) basic monitoring and evaluation, including beneficiary feedback and grievance redress mechanisms when operational. The program beneficiary lists will start with the cash transfer beneficiary list and track beneficiaries \u2019 participation in the complementary activities set-up by the program.", + "type": "list", + "explanation": "This is indeed a dataset as it refers to a structured collection of information about program beneficiaries used for tracking and operational purposes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'program beneficiary lists' suggests a structured collection of data regarding beneficiaries.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a structured collection of information about program beneficiaries used for tracking and operational purposes.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "186_multi-page", + "page": 54, + "text": "In addition, a Social Assessment is currently underway with Bank assistance to identify issues related to disadvantaged groups, especially orphans and child-headed households. The information from the household survey and social assessment will be used to develop intervention strategies to widen access to education and training, especially for targeted groups, as well as identify other areas of analysis which need further investigation. Development of the MIS, already begun during project preparation ( with PPF financing ), will be an ongoing and important feature of the project. Relevant education and financial indicators, including school-level information indicators, will be agreed for inclusion in RHRDP efforts to develop the MIS. Finally, the AIDS epidemic and its impact on the education system will be further analyzed. Changes in demand for education and in the supply of labor need to be factored into the planning for human resource capacity of the sector. Projections that take these changes into account are needed to provide the basis for this planning process.", + "ner_text": [ + [ + 201, + 217, + "named" + ], + [ + 60, + 64, + "household survey <> publisher" + ], + [ + 642, + 677, + "household survey <> data description" + ] + ], + "validated": true, + "empirical_context": "In addition, a Social Assessment is currently underway with Bank assistance to identify issues related to disadvantaged groups, especially orphans and child-headed households. The information from the household survey and social assessment will be used to develop intervention strategies to widen access to education and training, especially for targeted groups, as well as identify other areas of analysis which need further investigation. Development of the MIS, already begun during project preparation ( with PPF financing ), will be an ongoing and important feature of the project.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it is explicitly mentioned that the information from the household survey will be used for analysis and intervention strategies.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'household survey' implies a structured collection of data gathered from households.", + "contextual_reason_agent": "The context confirms it is a dataset as it is explicitly mentioned that the information from the household survey will be used for analysis and intervention strategies.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "107_PAD-Citizens-Charter-Afghanistan-P160567-Oct-7-Board-version-10072016", + "page": 19, + "text": "The World Bank Citizens ' Charter Afghanistan Project ( P160567 ) Page 15 of 139 ( d ) Minimum of 10, 500 CDCs / clusters / Gozars6 able to plan, implement, monitor and coordinate development activities as measured by an institutional maturity index. 7 III. PROJECT DESCRIPTION A. Project Components 17. CCAP will seek to address key limitations of line agency efforts and NSP to date, and respond to financial constraints to national development investments. First, it will bring together under one umbrella program the rural and urban community level work. Afghan cities are growing at a rapid rate, caused partially by an influx of returning refugees and internally displaced. The population of Afghan cities is expected to double within the next 15 years and by 2060, one in every two Afghans will be living in cities. 8 The 2013-2014 Afghanistan Living Conditions Survey estimates that 74 percent of the urban population lives in slums. Informal settlements in major cities are growing while the number of poor \u2013 an estimated 29 percent of the urban population \u2013 do not have access to basic services. As urban areas have spread over the years, NSP and other projects have formed some 1, 800 peri-urban and urban CDCs to increase community participation, give voice to urban residents, and provide some vital services.", + "ner_text": [ + [ + 839, + 875, + "named" + ], + [ + 34, + 45, + "Afghanistan Living Conditions Survey <> data geography" + ], + [ + 559, + 572, + "Afghanistan Living Conditions Survey <> data geography" + ], + [ + 829, + 838, + "Afghanistan Living Conditions Survey <> publication year" + ], + [ + 839, + 850, + "Afghanistan Living Conditions Survey <> data geography" + ], + [ + 909, + 925, + "Afghanistan Living Conditions Survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "The population of Afghan cities is expected to double within the next 15 years and by 2060, one in every two Afghans will be living in cities. 8 The 2013-2014 Afghanistan Living Conditions Survey estimates that 74 percent of the urban population lives in slums. Informal settlements in major cities are growing while the number of poor \u2013 an estimated 29 percent of the urban population \u2013 do not have access to basic services.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it provides empirical estimates about the urban population and living conditions in Afghanistan.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "The context confirms it is a dataset as it provides empirical estimates about the urban population and living conditions in Afghanistan.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 12, + "text": "Despite relatively high per capita income of US $ 1, 862 in 2015, 30 percent of the population lives in poverty and 21 percent lives in extreme poverty according to the fourth ( 2018 ) round of household survey ( EDAM4 ). The same survey also found that: ( a ) 36 percent of Djibouti \u2019 s population is under 14 years old and 51 percent under 24; ( b ) its human development indicators indicate a life expectancy of 62 years and an infant mortality at birth rate to be 54 / 1, 000; and ( c ) the adult literacy rate is only 53 percent. In terms of employment opportunities, Djibouti \u2019 s national unemployment rate in 2017 stood at 47 percent for people aged 15 and older, and 22 percent for those aged 15-24, with significant variations in unemployment rates across gender, region and age2. And while there exists a slight positive correlation between employment rates and education levels, attaining higher levels of education does not guarantee more opportunities in the labor market. 3.", + "ner_text": [ + [ + 213, + 218, + "named" + ], + [ + 178, + 182, + "EDAM4 <> publication year" + ], + [ + 194, + 210, + "EDAM4 <> data type" + ], + [ + 275, + 283, + "EDAM4 <> data geography" + ], + [ + 396, + 423, + "EDAM4 <> data description" + ], + [ + 431, + 461, + "EDAM4 <> data description" + ], + [ + 495, + 533, + "EDAM4 <> data description" + ], + [ + 573, + 581, + "EDAM4 <> data geography" + ] + ], + "validated": true, + "empirical_context": "Despite relatively high per capita income of US $ 1, 862 in 2015, 30 percent of the population lives in poverty and 21 percent lives in extreme poverty according to the fourth ( 2018 ) round of household survey ( EDAM4 ). The same survey also found that: ( a ) 36 percent of Djibouti \u2019 s population is under 14 years old and 51 percent under 24; ( b ) its human development indicators indicate a life expectancy of 62 years and an infant mortality at birth rate to be 54 / 1, 000; and ( c ) the adult literacy rate is only 53 percent.", + "type": "survey", + "explanation": "In the context, 'EDAM4' is explicitly mentioned as a household survey that provides empirical data on poverty and demographics.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'EDAM4' is a dataset because it is referenced in the context of survey results and statistics.", + "contextual_reason_agent": "In the context, 'EDAM4' is explicitly mentioned as a household survey that provides empirical data on poverty and demographics.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 21, + "text": "Available patient-centric digital services include booking medical appointments and viewing select medical information, including active and pending medications, recent immunization records, pending and completed laboratory test results, allergies, vital signs, medicine refills, and delivery of health services for non-communicable diseases. Result Area 2 on enhanced government effectiveness through digitalization: the PDO-level indicators are: 3 ) Increased student trust in the fairness of the general secondary education examination ( Tawjihi ) as an outcome of the digitalization of student assessment. A recent survey11 reflects a widespread lack of trust in the fairness of the exam by most students ( 47 percent ). It is expected that the digitalization of the exam will enhance the perception by students of its objectivity and fairness. 4 ) Increased representation of women in leadership positions in the civil service as a result of performance - based promotions and competitive recruitment. In 2020, women were officially appointed to only 16 percent of leadership positions in the civil service. The Program aims to improve the gender balance in leadership positions with more performance-based promotions and competitive recruitments as a result of digital skills enhancement and by leveraging the use of the Human Resource Management Information System ( HRMIS ) and other HRM digital platforms.", + "ner_text": [ + [ + 1327, + 1371, + "named" + ] + ], + "validated": false, + "empirical_context": "In 2020, women were officially appointed to only 16 percent of leadership positions in the civil service. The Program aims to improve the gender balance in leadership positions with more performance-based promotions and competitive recruitments as a result of digital skills enhancement and by leveraging the use of the Human Resource Management Information System ( HRMIS ) and other HRM digital platforms.", + "type": "system", + "explanation": "However, it is mentioned as a system that supports HR management rather than as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting it handles data.", + "contextual_reason_agent": "However, it is mentioned as a system that supports HR management rather than as a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "187_multi-page", + "page": 52, + "text": "2. 3 Assist in Establishment of Institute for Training in Public Administration ( ITPA ) ( US $ 0. 24 million ) 1. Periodic long term advisor ( international ) provides advice on establishment of Public Administration Training Institute 2. Design, build and assist in implementation of sirnple system for tracking training received, by recipient, and monitoring data required to assess training targeting and results ( e. g., instructors ' evaluations of how well each trainee mastered the material in any given training event, tracking career development histories of training recipients, etc. ). 3. Provide assistance to ITPA and DoPA staff in undertaking pilot evaluations of ITPA training targeting and impacts. TA to be delivered as a learning-by-doing exercise. Project Component 3 - US $ 0. 40 million 3. Policy Formulation and Coordination Strengthening of policy formulation and coordination will be supported with technical assistance, training and computer hardware and software complementary to the TA and training. The TA will support the Government ' s efforts to ( 1 ) build stable professional capacity within the CoM to provide systematic analyses of policy issues and tradeoffs raised by legislative and policy proposals, and ( 2 ) strengthen procurement policies and practices.", + "ner_text": [ + [ + 286, + 300, + "named" + ] + ], + "validated": false, + "empirical_context": "Periodic long term advisor ( international ) provides advice on establishment of Public Administration Training Institute 2. Design, build and assist in implementation of sirnple system for tracking training received, by recipient, and monitoring data required to assess training targeting and results ( e. g.", + "type": "system", + "explanation": "However, the term 'sirnple system' refers to a system rather than a structured collection of data or a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it mentions a system for tracking training data.", + "contextual_reason_agent": "However, the term 'sirnple system' refers to a system rather than a structured collection of data or a dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "158_40156", + "page": 22, + "text": "The existing computerized accounting system currently being used to report on IGAD and IDA - IDF and PPA finances will be used to report on IRAPP finances. This software will be customized to adapt to IDA project needs, more specifically to produce quarterly IFRs. A Project Operational Manual will include a section on financial management. This section will outline the detailed project financial management arrangements, including accounting, audits, chart of accounts, fund flow, audits, and quarterly financial reporting formats ( IFRs and other financial statements ), and job responsibilities for staff handling project financial affairs. 73. A project dollar designated account will be maintained to channel project funds from IDA. Disbursement would be made initially on the basis of incurred eligible expenditures ( transaction based disbursements ). The Project may later become eligible to use the report - based disbursement, i. e., one based on quarterly IFR reports, upon fulfillment of the conditions listed in the Disbursement Arrangement section. The external audit will be carried out annually as part of the IGAD audit by the same private auditor that is auditing IGAD financial statements. The auditor will express an opinion on the annual project financial statements based on International Standards on Auditing and submit the audit report within six months of the end of the financial year. 3.", + "ner_text": [ + [ + 13, + 43, + "named" + ] + ], + "validated": false, + "empirical_context": "The existing computerized accounting system currently being used to report on IGAD and IDA - IDF and PPA finances will be used to report on IRAPP finances. This software will be customized to adapt to IDA project needs, more specifically to produce quarterly IFRs.", + "type": "system", + "explanation": "However, it is a software system used for accounting, not a structured collection of data or a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves data reporting and financial information.", + "contextual_reason_agent": "However, it is a software system used for accounting, not a structured collection of data or a dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 22, + "text": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 18 of 47 Figure 1. Theory of Change Output Inadequate infrastructure, equipment, and medicine \uf0b7 Renovation / upgradation of selected HFs to 24 / 7 \uf0b7 Purchasing of equipment and medicines \uf0b7 Increased number of HFs renovated / upgraded including female staff hostels * \uf0b7 Increased number of HFs with essential inputs Improved utilization of quality health services in selected refugee hosting districts in Balochistan Improved child health outcomes ( reduced under \u2010 five mortality, reduced stunting ) among refugees and host communities in Balochistan Improved human capital among refugees and host communities in Balochistan Shortage, absence, low productivity, and competency of providers \uf0b7 Contracting of especially female providers * \uf0b7 Creation of HRH database \uf0b7 Competency \u2010 based training \uf0b7 Increased availability of ( especially female ) providers * at the HFs \uf0b7 Functional HRH database \uf0b7 Increased number of providers with minimum clinical competency Low quality and suboptimal use of health data \uf0b7 Digitization and integration of the HMIS into the DHIS with dashboards \uf0b7 Data review meetings \uf0b7 HMIS ( DHIS, vertical programs ) digitized and integrated into DHIS / DHIS2 \uf0b7 Improved quality of DHIS reports Weak institutional capacity \uf0b7 Capacity building ( HMIS, monitoring, supervision, and management ) \uf0b7 Increased number of targeted staff with basic / advanced competencies Low demand of health and education services \uf0b7 Advocacy and awareness \u2010 raising", + "ner_text": [ + [ + 1249, + 1254, + "named" + ], + [ + 4, + 14, + "DHIS2 <> publisher" + ], + [ + 15, + 26, + "DHIS2 <> data geography" + ], + [ + 481, + 492, + "DHIS2 <> data geography" + ], + [ + 583, + 591, + "DHIS2 <> reference population" + ], + [ + 616, + 627, + "DHIS2 <> data geography" + ], + [ + 690, + 701, + "DHIS2 <> data geography" + ] + ], + "validated": true, + "empirical_context": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 18 of 47 Figure 1. Theory of Change Output Inadequate infrastructure, equipment, and medicine \uf0b7 Renovation / upgradation of selected HFs to 24 / 7 \uf0b7 Purchasing of equipment and medicines \uf0b7 Increased number of HFs renovated / upgraded including female staff hostels * \uf0b7 Increased number of HFs with essential inputs Improved utilization of quality health services in selected refugee hosting districts in Balochistan Improved child health outcomes ( reduced under \u2010 five mortality, reduced stunting ) among refugees and host communities in Balochistan Improved human capital among refugees and host communities in Balochistan Shortage, absence, low productivity, and competency of providers \uf0b7 Contracting of especially female providers * \uf0b7 Creation of HRH database \uf0b7 Competency \u2010 based training \uf0b7 Increased availability of ( especially female ) providers * at the HFs \uf0b7 Functional HRH database \uf0b7 Increased number of providers with minimum clinical competency Low quality and suboptimal use of health data \uf0b7 Digitization and integration of the HMIS into the DHIS with dashboards \uf0b7 Data review meetings \uf0b7 HMIS ( DHIS, vertical programs ) digitized and integrated into DHIS / DHIS2 \uf0b7 Improved quality of DHIS reports Weak institutional capacity \uf0b7 Capacity building ( HMIS, monitoring, supervision, and management ) \uf0b7 Increased number of targeted staff with basic / advanced competencies Low demand of health and education services \uf0b7 Advocacy and awareness \u2010 raising", + "type": "system", + "explanation": "In this context, 'DHIS2' is used as a health management information system that serves as a data source for health data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'DHIS2' is mentioned in the context of digitization and integration of health data systems.", + "contextual_reason_agent": "In this context, 'DHIS2' is used as a health management information system that serves as a data source for health data.", + "contextual_signal": "system but mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "050_Cameroon-Secondary-Education-and-Skills-Development-Project", + "page": 103, + "text": "The tracer study will cover a representative sample of graduates from training institutions of MINEFOP, MINESEC ( lyc\u00e9e technique ) and TVET institutions from other ministries in selected economic sectors, public and private. The tracer survey is a questionnaire which includes basic demographic information and information on labor market outcomes of graduates including employment status, industry, etc. The results of the tracer survey, conducted by the firm to be contracted by PCU, are analyzed by the ONEFOP. ONEFOP will prepare a report based on the analysis of findings, including policy recommendations, and publish this report on the MINEFOP website. Tracer studies will be carried out 3 times during project implementation period. The allocated amounts for sub-DLRs ( a ) and ( b ) will be disbursed independently. Data source / Agency ONEFOP / MINEFOP Verification Entity Independent Verification Agency ( IVA ) Procedure PCU compiles the necessary evidence as described, the IVA verifies the evidence, PCU sends the evidence and results of the verification to the Bank, the Bank reviews, requests clarification if needed and approves.", + "ner_text": [ + [ + 4, + 16, + "named" + ] + ], + "validated": false, + "empirical_context": "The tracer study will cover a representative sample of graduates from training institutions of MINEFOP, MINESEC ( lyc\u00e9e technique ) and TVET institutions from other ministries in selected economic sectors, public and private. The tracer survey is a questionnaire which includes basic demographic information and information on labor market outcomes of graduates including employment status, industry, etc. The results of the tracer survey, conducted by the firm to be contracted by PCU, are analyzed by the ONEFOP.", + "type": "study", + "explanation": "However, 'tracer study' refers to the overall study process rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'tracer study' is a dataset because it involves collecting data through a questionnaire.", + "contextual_reason_agent": "However, 'tracer study' refers to the overall study process rather than a structured collection of data itself.", + "contextual_signal": "mentioned only as a study, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 21, + "validated": 8, + "not_validated": 13 + } + }, + { + "filename": "101_Project-Appraisal-Document-PAD-P163830-2017-05-22-CLEAN-05232017", + "page": 83, + "text": "The World Bank Somalia Emergency Drought Response and Recovery Project ( P163830 ) Page 79 of 111 groups will be led by women. Fodder production will target areas within acceptable distance from the homesteads, which ensures that women can actively participate. Financial Management 51. The ICRC headquarters is in Geneva with the ICRC Somalia operating directly under the ICRC Geneva - Financial Management Regulations June 2016 ( DIR2295REV \u2013 Appendix 1 ). The Somalia Delegation Financial management function is headed by Head of Finance and Administration supported by a team of five ( 5 ) Accountants based in in Nairobi with Finance and ten ( 10 ) Administration Assistants located in different field offices in Somalia. Additional Rules on Financial Management Delegation of Somalia \u2013 2016 are in place and they provide specific delegation of authority on financial commitments. Financial Management reporting responsibilities, staffing, functional responsibilities are documented in the various HR / FM policies and procedures. The Finance Department works in close consultation with the Logistics Department particularly in contracts management, commitment control, reconciliation of commitments balances in SUN Accounting System and the Purchasing and Stores Management System. External Finance is usually coordinated and channelled through the External Department in Geneva.", + "ner_text": [ + [ + 1247, + 1286, + "named" + ] + ], + "validated": false, + "empirical_context": "Financial Management reporting responsibilities, staffing, functional responsibilities are documented in the various HR / FM policies and procedures. The Finance Department works in close consultation with the Logistics Department particularly in contracts management, commitment control, reconciliation of commitments balances in SUN Accounting System and the Purchasing and Stores Management System. External Finance is usually coordinated and channelled through the External Department in Geneva.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source or collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Management System' in its name, suggesting a structured approach to data handling.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source or collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "021_BOSIB05e6fc47e0770aeec00ad5e11774f2", + "page": 13, + "text": "Agriculture accounts for approximately 22 percent of Uganda \u2019 s GDP, 46 percent of its export earnings, and 60 percent of the labor force. Despite its important role, the agriculture sector performs far below its potential, exacerbated by increasing climate variability as well as extreme weather events. Uganda \u2019 s national agricultural output has grown at only 2 percent per year over a five-year period compared to about 3 \u2013 5 percent output growth in other East African Community members over the same period. 5 Low rates of commercialization and inadequate capacity to invest in adapting to climate related 3 OPM ( Office of the Prime Minister ) and UNHCR ( United Nations High Commissioner for Refugees ). 2022. Uganda Comprehensive Refugee Response Portal. https: / / data2. unhcr. org / en / country / uga. 4 World Bank and FAO ( Food and Agriculture Organization of the United Nations ). 2019. Rapid Assessment of Natural Resources Degradation in Areas Impacted by the South Sudan Refugee Influx in Northern Uganda. Washington, DC: World Bank. 5 Uganda National Household Survey ( 2016 / 17 )", + "ner_text": [ + [ + 1055, + 1087, + "named" + ], + [ + 53, + 59, + "Uganda National Household Survey <> data geography" + ], + [ + 305, + 311, + "Uganda National Household Survey <> data geography" + ], + [ + 718, + 724, + "Uganda National Household Survey <> data geography" + ], + [ + 1017, + 1023, + "Uganda National Household Survey <> data geography" + ], + [ + 1055, + 1061, + "Uganda National Household Survey <> data geography" + ], + [ + 1090, + 1099, + "Uganda National Household Survey <> reference year" + ] + ], + "validated": true, + "empirical_context": "Washington, DC: World Bank. 5 Uganda National Household Survey ( 2016 / 17 )", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly named as a survey, which is a recognized form of data collection.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'National Household Survey', which typically indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly named as a survey, which is a recognized form of data collection.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "182_multi0page", + "page": 8, + "text": "Poverty and Social Impact of Transition: The recent repeated shocks - - a consequence of political strife and social unrest - - have created major economic and social problems in the country. Because of these shocks, many Albanians have realized a loss of income and earning opportunities and have incurred risks to their personal well-being and safety. Poverty, which is concentrated in the rural areas and north of the country, is a result of the inability of individuals, households or communities to protect themselves, formally or informally, against the outcome of these shocks, or even against the more mundane income risks faced over their lifetime. A poverty profile based on 1996 data showed that some 30 percent of the rural and some 15 percent of the urban population are poor, with many others vulnerable to poverty due to their incomes being close to the poverty threshold ( this information will be updated through an analysis of a Living Conditions Survey, a Multiple Indicators Cluster Survey and ( likely ) a Household Budget Survey to be completed by May 2001, and through an FY2002 LSMS ). Young people, less educated people, and families with many children, are particularly at risk of poverty.", + "ner_text": [ + [ + 947, + 971, + "named" + ], + [ + 222, + 231, + "Living Conditions Survey <> reference population" + ], + [ + 392, + 403, + "Living Conditions Survey <> data geography" + ], + [ + 660, + 675, + "Living Conditions Survey <> data type" + ], + [ + 685, + 689, + "Living Conditions Survey <> reference year" + ], + [ + 1070, + 1078, + "Living Conditions Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "Poverty, which is concentrated in the rural areas and north of the country, is a result of the inability of individuals, households or communities to protect themselves, formally or informally, against the outcome of these shocks, or even against the more mundane income risks faced over their lifetime. A poverty profile based on 1996 data showed that some 30 percent of the rural and some 15 percent of the urban population are poor, with many others vulnerable to poverty due to their incomes being close to the poverty threshold ( this information will be updated through an analysis of a Living Conditions Survey, a Multiple Indicators Cluster Survey and ( likely ) a Household Budget Survey to be completed by May 2001, and through an FY2002 LSMS ). Young people, less educated people, and families with many children, are particularly at risk of poverty.", + "type": "survey", + "explanation": "This is indeed a dataset as it is mentioned as a source of information that will be used for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that collects data on living conditions.", + "contextual_reason_agent": "This is indeed a dataset as it is mentioned as a source of information that will be used for analysis.", + "contextual_signal": "mentioned as a source of information for analysis", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "042_Burundi-Skills-for-Jobs-Women-and-Youth-Project", + "page": 67, + "text": "Annual starting year 2 Attendance lists Administrative data ( project data ) M & E Specialist within the PIU Number of start-ups created through business plan competitions Number of startups registered by youths that benefitted from the subsidies of the Business Plan Competition Annual starting F2 Procurement report Administrative data M & E specialist within the PIU", + "ner_text": [ + [ + 40, + 59, + "named" + ], + [ + 62, + 74, + "Administrative data <> data type" + ], + [ + 109, + 171, + "Administrative data <> data description" + ] + ], + "validated": true, + "empirical_context": "Annual starting year 2 Attendance lists Administrative data ( project data ) M & E Specialist within the PIU Number of start-ups created through business plan competitions Number of startups registered by youths that benefitted from the subsidies of the Business Plan Competition Annual starting F2 Procurement report Administrative data M & E specialist within the PIU", + "type": "administrative data", + "explanation": "In this context, 'administrative data' is explicitly mentioned alongside project data and is used by an M & E Specialist, indicating it serves as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'administrative data' often refers to structured collections of data used for analysis.", + "contextual_reason_agent": "In this context, 'administrative data' is explicitly mentioned alongside project data and is used by an M & E Specialist, indicating it serves as a data source.", + "contextual_signal": "mentioned as a data source for project data", + "tags": [] + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 10, + "text": "As of August 2017, Jordan hosts 660, 5822 registered Syrian refugees, of which 232, 8683 are school \u2010 aged children requiring the provision of education services. Eighty percent of refugees live in host communities, representing 10 percent of Jordan \u2019 s population, while the rest live in dedicated refugee camps. Per the recent 2016 government census, the total number of Syrians in Jordan reached an estimated 1. 265 million, representing 13. 2 percent of population. 4 Jordan has been committed to integrating Syrian refugee children in the public formal sector, and as of June 2017, approximately 10 percent of children in public schools were Syrian refugees. Therefore, it is important that education services to refugee children in Jordan respond to the nature of the challenges they face in the education system. 3. Jordan \u2019 s economic development hinges on the existence of an education system that provides students with the cognitive and socioemotional skills needed to succeed in the labor market. Realizing the full potential of educational investments for economic prosperity requires improving access and quality of education for both girls and boys. 5 Additionally, the cost of not educating refugee children is high in terms of loss of human capital for regional economic development, as well as for the long \u2010 term processes of peace, stability, and reconstruction. It is thus necessary for the education system to be agile and address the existing education quality challenges by strengthening the system ' s ability to manage a growing number of students, including refugee and vulnerable children. 1 Human Development Data ( 1990 \u2013 2015 ); Human Development Report. 2016. 2 United Nations High Commissioner for Refugees ( UNHCR ). August 6, 2017. 3 Brussels Conference Paper. 2017. 4 Department of Statistics ( DOS ); National census. November 2016. 5 OECD. 2016. PISA. 2015 Results ( Volume I ): Excellence and Equity in Education, OECD Publishing, Paris. http: / / dx. doi. org / 10. 1787 / 9789264266490 \u2010 en.", + "ner_text": [ + [ + 1884, + 1888, + "named" + ], + [ + 13, + 17, + "PISA <> publication year" + ], + [ + 19, + 25, + "PISA <> data geography" + ], + [ + 384, + 390, + "PISA <> data geography" + ], + [ + 823, + 829, + "PISA <> data geography" + ], + [ + 1872, + 1876, + "PISA <> publisher" + ], + [ + 1890, + 1894, + "PISA <> publication year" + ], + [ + 1953, + 1957, + "PISA <> publisher" + ] + ], + "validated": true, + "empirical_context": "2016. PISA. 2015 Results ( Volume I ): Excellence and Equity in Education, OECD Publishing, Paris.", + "type": "dataset", + "explanation": "PISA is explicitly referenced as a source of results, indicating it functions as a dataset in this context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because PISA is known for providing educational assessment data.", + "contextual_reason_agent": "PISA is explicitly referenced as a source of results, indicating it functions as a dataset in this context.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "044_Turkey-Social-Entrepreneurship-Empowerment-and-Cohesion-in-Refugee-and-Host-Communities-in-Turkey-Project", + "page": 47, + "text": "The World Bank Social Entrepreneurship, Empowerment and Cohesion in Refugee and Host Communities in Turkey Project ( P171456 ) 38 on the Results Framework indicators. Second, the MoIT, with the TDAs, will facilitate community monitoring processes to ensure feedback from beneficiaries. This will include a regular readout on refugee and host community engagement indicators measured through the participatory processes in Subcomponents 1A and 2A, with a view to incorporating a process for dialogue and action. During preparation, in cooperation with the MoIT and TDAs, a World Bank team started a series of beneficiary dialogues in a number of project areas to identify the diverse refugee and host community challenges and opportunities for social enterprise and enhanced livelihoods, as well as social cohesion issues. This ongoing dialogue process will help inform the detailed design of project activities in the POM and support project measurement. All information will be collated at the national level and made available for annual discussions. Third, the project will include baseline, midline, and endline evaluations, which will be conducted by an independent firm to be hired by the MoIT. These evaluations will include perception surveys with social cohesion measures ( drawing on international survey tools, instruments utilized in Turkey, and indicators selected by the communities themselves ).", + "ner_text": [ + [ + 1232, + 1250, + "named" + ], + [ + 4, + 14, + "perception surveys <> publisher" + ], + [ + 100, + 106, + "perception surveys <> data geography" + ], + [ + 572, + 582, + "perception surveys <> publisher" + ], + [ + 1294, + 1320, + "perception surveys <> data type" + ], + [ + 1346, + 1352, + "perception surveys <> data geography" + ] + ], + "validated": true, + "empirical_context": "Third, the project will include baseline, midline, and endline evaluations, which will be conducted by an independent firm to be hired by the MoIT. These evaluations will include perception surveys with social cohesion measures ( drawing on international survey tools, instruments utilized in Turkey, and indicators selected by the communities themselves ).", + "type": "survey", + "explanation": "In this context, 'perception surveys' are explicitly mentioned as part of the evaluations, indicating they are used to collect data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'perception surveys' imply a structured collection of data gathered from respondents.", + "contextual_reason_agent": "In this context, 'perception surveys' are explicitly mentioned as part of the evaluations, indicating they are used to collect data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "059_Uganda-Investing-in-Forests-and-Protected-Areas-for-Climate-Smart-Development-Project", + "page": 57, + "text": "The World Bank Uganda Investing in Forests and Protected Areas for Climate-Smart Development Project ( P170466 ) Page 54 of 83 resilience. Proportion of UWA field staff housed on - site This indicator measures the outcomes of infrastructure investments of the project in construction of staff accommodation for UWA staff in target protected areas. Annual UWA field reports / HR records UWA will compile information UWA CFM and CRM groups established with project support that have at least one woman in the executive committee This indicator measures the outcomes of gender - inclusive processes of formation of collaborative forest management groups. This is a gender indicator. Annual CFM agreements / CRM MOUs and information on the membership of their executive committees UWA and NFA focal points responsible for community engagement will collate this information based on the documentation. UWA and NFA Tourism roads constructed This indicator measures the establishment of tourism roads within wildlife protected areas managed by UWA. This indicator measures the adaptive capacity of resilience. Annual.", + "ner_text": [ + [ + 348, + 385, + "named" + ], + [ + 15, + 21, + "Annual UWA field reports / HR records <> data geography" + ], + [ + 386, + 389, + "Annual UWA field reports / HR records <> author" + ] + ], + "validated": true, + "empirical_context": "Proportion of UWA field staff housed on - site This indicator measures the outcomes of infrastructure investments of the project in construction of staff accommodation for UWA staff in target protected areas. Annual UWA field reports / HR records UWA will compile information UWA CFM and CRM groups established with project support that have at least one woman in the executive committee This indicator measures the outcomes of gender - inclusive processes of formation of collaborative forest management groups. This is a gender indicator.", + "type": "reports, records", + "explanation": "These reports and records are explicitly mentioned as sources of information used to measure project outcomes, confirming their role as datasets.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to compiled information from reports and records.", + "contextual_reason_agent": "These reports and records are explicitly mentioned as sources of information used to measure project outcomes, confirming their role as datasets.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 28, + "text": "The World Bank Building Skills for Human Capital Development in South Sudan ( P178654 ) Page 24 of 66 65. Given its focus on building local capacity in the medium and long term, this Subcomponent is aligned with GCRF Pillar 4: Strengthening Policies, Institutions and Investments for Rebuilding Better. 66. Subcomponent 4. 2: Strengthening data for education system management ( US $ 2 million ). Strengthening the education system \u2019 s management also entails supporting the education governing ministries in establishing a data system to deliver reliable data in line with international standards. This activity will build on the existing EMIS. TA will be provided to diagnose the existing system regarding data collection processes, standards and templates, personnel, and IT infrastructure. Potential areas to be addressed may include ( i ) improved templates and standards; ( ii ) improved data collection processes, such as phone-based data collection to enable the various education stakeholders, including the community and parents, to collect and submit real-time data into the EMIS; and ( iii ) an improved web-based EMIS system that has the capacity to integrate real-time data. Real-time data would include schools \u2019 condition, teacher registration, teacher and student attendance, and the availability of teaching and learning materials. This could be effectively utilized once synced across the web-based platform.", + "ner_text": [ + [ + 1126, + 1130, + "named" + ] + ], + "validated": false, + "empirical_context": "TA will be provided to diagnose the existing system regarding data collection processes, standards and templates, personnel, and IT infrastructure. Potential areas to be addressed may include ( i ) improved templates and standards; ( ii ) improved data collection processes, such as phone-based data collection to enable the various education stakeholders, including the community and parents, to collect and submit real-time data into the EMIS; and ( iii ) an improved web-based EMIS system that has the capacity to integrate real-time data. Real-time data would include schools \u2019 condition, teacher registration, teacher and student attendance, and the availability of teaching and learning materials.", + "type": "system", + "explanation": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it relates to data collection and management.", + "contextual_reason_agent": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "186_multi-page", + "page": 47, + "text": "the sector. It should be pointed out that many of the cited indicators are estimates based on projected figures from before the war since MINEDUC has not collected school-based data since 1992. The RHRDP will, as a priority, support development of an education management information system. In 1998, primary enrollment was estimated to be about 1, 273, 000 students, with a gross enrollment rate ( GER ) of approximately 89 percent. Large numbers of underage / overage students explain the significantly lower primary net enrollment rate of about 65 percent. Efficiency gains, resulting from lower repetition and dropout rates, could significantly reduce the number of over-age students and raise the net enrollment rate. The existence of vast numbers of untrained primary teachers is also evident in the very high ratio of pupils to trained teachers ( 125: 1 ). Access to secondary education drops precipitously. Only about 90, 000 secondary students ( both cycles ) were enrolled in government, church and private secondary schools. Producing-and retaining adequate numbers of trained secondary school teachers is a challenge to the secondary system as it is in the primary.", + "ner_text": [ + [ + 164, + 181, + "named" + ], + [ + 188, + 192, + "school-based data <> publication year" + ], + [ + 816, + 851, + "school-based data <> data description" + ] + ], + "validated": true, + "empirical_context": "the sector. It should be pointed out that many of the cited indicators are estimates based on projected figures from before the war since MINEDUC has not collected school-based data since 1992. The RHRDP will, as a priority, support development of an education management information system.", + "type": "data", + "explanation": "However, it is not a dataset in this context as it refers to data that has not been collected since 1992 and is not currently functioning as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'school-based data' implies a collection of data related to schools.", + "contextual_reason_agent": "However, it is not a dataset in this context as it refers to data that has not been collected since 1992 and is not currently functioning as a data source.", + "contextual_signal": "mentioned only as a type of data, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 18, + "text": "According to the SDI survey, during unannounced inspection visits, 16. 6 percent of teachers were absent from school, and 27 percent were absent from the classroom. On average, the actual teaching time in public schools is just over 4 hours per day, or nearly 1. 5 hours below the policy. Furthermore, school directors are not equipped with the necessary skills to effectively manage schools, as they receive little training. In addition, the existing 470 school inspectorates in the eight regions are currently unable to play a critical role in improving teaching given the inadequate pedagogical support staff to teacher ratio and limited resources. 16. Weak educational inputs in the classroom and weak management of resources also hamper the learning process. While the student-teacher ratio in primary schools at the national level has improved with a ratio of 37 to 1, there are large disparities across regions. For example, the differences range from 26 to 1 in Diffa to 45 to 1 in Maradi. The same holds true for the student-teacher ratio at the secondary level. In 29 percent of cases, the distribution of teachers is based on criteria other than the number of pupils, which reflects the lack of coherence in the management of teachers.", + "ner_text": [ + [ + 17, + 27, + "named" + ], + [ + 84, + 92, + "SDI survey <> reference population" + ], + [ + 774, + 795, + "SDI survey <> data description" + ], + [ + 970, + 975, + "SDI survey <> data geography" + ], + [ + 990, + 996, + "SDI survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "According to the SDI survey, during unannounced inspection visits, 16. 6 percent of teachers were absent from school, and 27 percent were absent from the classroom.", + "type": "survey", + "explanation": "The SDI survey is indeed a dataset as it collects and presents structured data on teacher attendance during inspections.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides statistical data on teacher absences.", + "contextual_reason_agent": "The SDI survey is indeed a dataset as it collects and presents structured data on teacher attendance during inspections.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "163_30267", + "page": 23, + "text": "misconceptions about HIV transmission or prevention ( adapted UNGASS indicator ) Improved social conditions within refugee and surrounding populations: There i s an increase in the social and gender conditions within the target populations \u2019 in refugee sites and surrounding populations - - ~ NETWORKS m Better networking among selected GLIA network organizations: Networks have been strengthened and there i s improved management and AIDS learning capacities of networks and member organizations Improved health services to mobile populations: There i s increased knowledge transfer between GLIA countries and uptake o f health services by mobile populations Results Indicators for Each Component HEALTH SERVICES Component One: Prevention: @ 100 % of sites have uninterrupted and sufficient supplies o f male condoms over the previous 6 month period 100 % o f sites have HIV posters and billboards in appropriate language > 75 % have functioning peer educator programs 100 % sites has access to functioning VCT services 100 % of refugee, surrounding and retumee sites have access to functioning PMTCT programs m 0 Use of Outcome Information Baseline information will be collected at the start of the project, and outcome indicator information at the end of the project. For this reason, outcome indicator data will be utilized to determine the overall impact of the GLIA on the achievement of the GLIA \u2019 S mission statement. Use of Results Monitoring Component One: The indicator scores that are generated will be used to inform annual planning and refinement o f the annual work plans for this component. The results will also be used to initiate discussions with the host countries and to ensure better linkages between activities for refugee site residents ( managed by UNHCR, implemented by UNHCR implementation partners ) and residents within surrounding communities ( managed and implemented by host Please note that the Social and Gender assessment that is currently being carried out will provide detailed information for the correct formulation of this indicator 20", + "ner_text": [ + [ + 1288, + 1310, + "named" + ], + [ + 1214, + 1243, + "outcome indicator data <> data type" + ], + [ + 1471, + 1487, + "outcome indicator data <> data description" + ], + [ + 1738, + 1760, + "outcome indicator data <> reference population" + ] + ], + "validated": true, + "empirical_context": "misconceptions about HIV transmission or prevention ( adapted UNGASS indicator ) Improved social conditions within refugee and surrounding populations: There i s an increase in the social and gender conditions within the target populations \u2019 in refugee sites and surrounding populations - - ~ NETWORKS m Better networking among selected GLIA network organizations: Networks have been strengthened and there i s improved management and AIDS learning capacities of networks and member organizations Improved health services to mobile populations: There i s increased knowledge transfer between GLIA countries and uptake o f health services by mobile populations Results Indicators for Each Component HEALTH SERVICES Component One: Prevention: @ 100 % of sites have uninterrupted and sufficient supplies o f male condoms over the previous 6 month period 100 % o f sites have HIV posters and billboards in appropriate language > 75 % have functioning peer educator programs 100 % sites has access to functioning VCT services 100 % of refugee, surrounding and retumee sites have access to functioning PMTCT programs m 0 Use of Outcome Information Baseline information will be collected at the start of the project, and outcome indicator information at the end of the project. For this reason, outcome indicator data will be utilized to determine the overall impact of the GLIA on the achievement of the GLIA \u2019 S mission statement. Use of Results Monitoring Component One: The indicator scores that are generated will be used to inform annual planning and refinement o f the annual work plans for this component.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to collected data that will be used to assess the impact of the GLIA project.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'outcome indicator data' suggests a collection of measurable information used for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it refers to collected data that will be used to assess the impact of the GLIA project.", + "contextual_signal": "mentioned as data to determine overall impact", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "026_Djibouti-Health-System-Strengthening-Project", + "page": 33, + "text": "( including regulating and engaging private sector in health ), standards, protocols, guidelines, and plans related to RMNCAH-N, PHC, quality of care, pandemic preparedness and response, digitalization and e-health strategy, integration of refugees into the national health system, human resources for health, review of supply chain efficiency and controls to improve efficiency, prevention of the medicalization of FGM, health care waste management, \u201c green \u201d facilities and climate change mitigation and adaptation measures, emergency preparedness and response, including climate-change induced or exacerbated natural disasters and pandemics / epidemics. TA will also address high-leverage initiatives including the rollout of DHIS2 ( which will disaggregate services by patients \u2019 refugee status ), development of and rollout of the national Logistics Management Information System ( eLMIS ), and activation and operationalization of a national nutrition surveillance system; TA will also resolve bottlenecks preventing refugees from receiving free services to which they are entitled 30 Topics on climate resilience will include preventive behavior and recognition of warning signs around water - and vector-borne diseases exacerbated by climate change, multi-risk contingency planning and disease surveillance and response.", + "ner_text": [ + [ + 887, + 892, + "named" + ] + ], + "validated": false, + "empirical_context": "( including regulating and engaging private sector in health ), standards, protocols, guidelines, and plans related to RMNCAH-N, PHC, quality of care, pandemic preparedness and response, digitalization and e-health strategy, integration of refugees into the national health system, human resources for health, review of supply chain efficiency and controls to improve efficiency, prevention of the medicalization of FGM, health care waste management, \u201c green \u201d facilities and climate change mitigation and adaptation measures, emergency preparedness and response, including climate-change induced or exacerbated natural disasters and pandemics / epidemics. TA will also address high-leverage initiatives including the rollout of DHIS2 ( which will disaggregate services by patients \u2019 refugee status ), development of and rollout of the national Logistics Management Information System ( eLMIS ), and activation and operationalization of a national nutrition surveillance system; TA will also resolve bottlenecks preventing refugees from receiving free services to which they are entitled 30 Topics on climate resilience will include preventive behavior and recognition of warning signs around water - and vector-borne diseases exacerbated by climate change, multi-risk contingency planning and disease surveillance and response.", + "type": "system", + "explanation": "'eLMIS' is mentioned as a system but not explicitly as a data source, indicating it does not function as a dataset in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'eLMIS' is a dataset because it includes 'Information System' in its name, suggesting it could contain data.", + "contextual_reason_agent": "'eLMIS' is mentioned as a system but not explicitly as a data source, indicating it does not function as a dataset in this context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "166_304360LK", + "page": 38, + "text": "Each beneficiary family will be allowed a period o f approximately four months for the completion o f the physical works. T h i s will mitigate against funds being recycled for other purposes. To facilitate compliance, each eligible beneficiary family will sign a memorandum o f understanding with the DPU. This will outline the target dates and construction progress required for each tranche payment to be triggered and any other obligations to be made by the beneficiary family. Apportioning of Funds across Beneficiaries, Districts and Divisions IDPs from the Jaffna HSZ \u201d and Mannar IDPs in PuttalamZ8 are currently unable to return to their original villages. Talung this into account, NEHRP will attempt to address allocations for IDPs from 25 A house which is more than 60 % damaged will be entitled to a fully damaged housing grant. A house that is between 30-60 % damaged will be entitled to a partly damaged housing grant. The assessment will occur through a Housing Damage Assessment and Social Verification Survey, using specified criteria ( based on cost o f reconstruction ) o f what constitutes 60 % damaged as opposed to 30 % damaged. 2G i. e. in the name o f both husband and wife, except for a single headed households. \u201d The release o f land to enable the return o f Jaffna HSZ displacees i s a political decision. 28 A PHRD-financed study in 2004 found that 72 % o f Puttalam families have originated from Mannar, 13 % are from Jaffna, 11 % are fiom Mullaitivu and more than 98 % o f the IDPs are Muslim. Overall, 59 % claimed that 33", + "ner_text": [ + [ + 970, + 1026, + "named" + ], + [ + 550, + 554, + "Housing Damage Assessment and Social Verification Survey <> reference population" + ], + [ + 564, + 570, + "Housing Damage Assessment and Social Verification Survey <> data geography" + ], + [ + 581, + 587, + "Housing Damage Assessment and Social Verification Survey <> data geography" + ], + [ + 692, + 697, + "Housing Damage Assessment and Social Verification Survey <> publisher" + ], + [ + 1287, + 1293, + "Housing Damage Assessment and Social Verification Survey <> data geography" + ], + [ + 1363, + 1367, + "Housing Damage Assessment and Social Verification Survey <> publication year" + ], + [ + 1427, + 1433, + "Housing Damage Assessment and Social Verification Survey <> data geography" + ], + [ + 1449, + 1455, + "Housing Damage Assessment and Social Verification Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "A house that is between 30-60 % damaged will be entitled to a partly damaged housing grant. The assessment will occur through a Housing Damage Assessment and Social Verification Survey, using specified criteria ( based on cost o f reconstruction ) o f what constitutes 60 % damaged as opposed to 30 % damaged. 2G i.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly used to assess housing damage and determine eligibility for grants based on collected data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is a dataset as it is explicitly used to assess housing damage and determine eligibility for grants based on collected data.", + "contextual_signal": "described as a survey that collects data for assessment", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 144, + "text": "According to preliminary results of the Central African Republic refugee poverty analysis ( American University of Beirut, forthcoming ), using expenditure data from a Household Economy Analysis ( UNHCR / IFORD 2017 ), Central African Republic refugees in the East, Adamawa, and North regions show alarming levels of poverty. Data suggest that 96 percent of refugees fall below the extreme poverty line ( less than CFAF 17, 962 per person per month ). If the Minimum Food Basket ( MFB ) calculated by the WFP is used as a reference, 71 percent of the Central African Republic refugee population fall below this line, meaning that their expenditure is less than CFAF 8, 800 per person per month \u2014 the amount necessary to purchase minimum food energy requirements ( emergency standards: 2, 100 kcal per person per day ).", + "ner_text": [ + [ + 168, + 194, + "named" + ], + [ + 40, + 64, + "Household Economy Analysis <> data geography" + ], + [ + 92, + 121, + "Household Economy Analysis <> author" + ], + [ + 144, + 160, + "Household Economy Analysis <> data type" + ], + [ + 197, + 202, + "Household Economy Analysis <> publisher" + ], + [ + 211, + 215, + "Household Economy Analysis <> publication year" + ], + [ + 219, + 252, + "Household Economy Analysis <> reference population" + ], + [ + 834, + 852, + "Household Economy Analysis <> usage context" + ] + ], + "validated": true, + "empirical_context": "According to preliminary results of the Central African Republic refugee poverty analysis ( American University of Beirut, forthcoming ), using expenditure data from a Household Economy Analysis ( UNHCR / IFORD 2017 ), Central African Republic refugees in the East, Adamawa, and North regions show alarming levels of poverty. Data suggest that 96 percent of refugees fall below the extreme poverty line ( less than CFAF 17, 962 per person per month ).", + "type": "analysis", + "explanation": "This is indeed a dataset as it provides structured data used for empirical analysis regarding refugee poverty.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to an analysis that uses expenditure data.", + "contextual_reason_agent": "This is indeed a dataset as it provides structured data used for empirical analysis regarding refugee poverty.", + "contextual_signal": "follows 'using expenditure data from'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "037_Uganda-Investment-for-Industrial-Transformation-and-Employment-Project", + "page": 59, + "text": "The World Bank Uganda: Investment for Industrial Transformation and Employment ( P171607 ) Page 54 of 92 supported by the Bank, and the number of businesses that benefited from financial services. Number of SMEs with a loan or line of credit Annual collection Web based platform report Data down loaded annual from web based platform Bank of Uganda ( BoU ) Number of formally employed in the manufacturing sector according to PAYE data collected by URA TIN numbers ( # of jobs ) Number of formally employed in the manufacturing sector according to PAYE data collected by URA TIN Annual Uganda Revenue Authority, Commissione r ' s General Office, Research Dept Based on annual tax PAYE reports Project Implementation Teams ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Number of SME loans at Tier I institutions amortization extension Existing Loans of Tier 1 clients, receiving a grace period of up to 12 months from their FIs 6 months Reporting to the web based platform by PFIs Reporting by PFIs BoU Number of Micro and HH enterprises loans receiving grace period Number of Loans with a Grant Element ( Reduced interest Rate or Cash Quarterly Participating Micro Finance Reporting by PFIs BOU", + "ner_text": [ + [ + 426, + 435, + "named" + ], + [ + 15, + 21, + "PAYE data <> data geography" + ], + [ + 357, + 412, + "PAYE data <> data description" + ], + [ + 449, + 452, + "PAYE data <> publisher" + ], + [ + 571, + 574, + "PAYE data <> publisher" + ] + ], + "validated": true, + "empirical_context": "The World Bank Uganda: Investment for Industrial Transformation and Employment ( P171607 ) Page 54 of 92 supported by the Bank, and the number of businesses that benefited from financial services. Number of SMEs with a loan or line of credit Annual collection Web based platform report Data down loaded annual from web based platform Bank of Uganda ( BoU ) Number of formally employed in the manufacturing sector according to PAYE data collected by URA TIN numbers ( # of jobs ) Number of formally employed in the manufacturing sector according to PAYE data collected by URA TIN Annual Uganda Revenue Authority, Commissione r ' s General Office, Research Dept Based on annual tax PAYE reports Project Implementation Teams ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Number of SME loans at Tier I institutions amortization extension Existing Loans of Tier 1 clients, receiving a grace period of up to 12 months from their FIs 6 months Reporting to the web based platform by PFIs Reporting by PFIs BoU Number of Micro and HH enterprises loans receiving grace period Number of Loans with a Grant Element ( Reduced interest Rate or Cash Quarterly Participating Micro Finance Reporting by PFIs BOU", + "type": "data", + "explanation": "In this context, 'PAYE data' is indeed used as a data source for measuring employment in the manufacturing sector.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'PAYE data' is a dataset because it refers to collected employment data used for analysis.", + "contextual_reason_agent": "In this context, 'PAYE data' is indeed used as a data source for measuring employment in the manufacturing sector.", + "contextual_signal": "described as data collected by URA TIN numbers", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "014_BOSIB1dda1d49e0221807413cf06ea9ae3f", + "page": 42, + "text": "Frequency Annually Data Source Firm-level reporting by beneficiary MSMEs, verified by implementation partners Methodology for Data Collection Periodic survey Responsibility for Data Collection Implementing partners, PIU Strengthen and expand the financial sector to enhance access to finance for MSMEs Number of MSMEs registered in the movables collateral registry ( guaranteed with movables assets ) ( Number ) Description Number of unique MSMEs that are registered in registeries developed and implemented with the project support. Frequency Quarterly Data Source Project records Methodology for Data Collection Information from the registry is tabulated Responsibility for Data Collection PIU Number of loans by women or women-owned MSMEs registered in the movables collateral registry ( guaranteed with movables assets ) ( Number ) Description Number of unique women-owned MSMEs that are registered in registeries developed and implemented with the project support. Frequency Quarterly Data Source Project records Methodology for Data Collection Information from the registry is tabulated Responsibility for Data PIU", + "ner_text": [ + [ + 336, + 364, + "named" + ], + [ + 67, + 72, + "movables collateral registry <> reference population" + ], + [ + 715, + 741, + "movables collateral registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "Frequency Annually Data Source Firm-level reporting by beneficiary MSMEs, verified by implementation partners Methodology for Data Collection Periodic survey Responsibility for Data Collection Implementing partners, PIU Strengthen and expand the financial sector to enhance access to finance for MSMEs Number of MSMEs registered in the movables collateral registry ( guaranteed with movables assets ) ( Number ) Description Number of unique MSMEs that are registered in registeries developed and implemented with the project support. Frequency Quarterly Data Source Project records Methodology for Data Collection Information from the registry is tabulated Responsibility for Data Collection PIU Number of loans by women or women-owned MSMEs registered in the movables collateral registry ( guaranteed with movables assets ) ( Number ) Description Number of unique women-owned MSMEs that are registered in registeries developed and implemented with the project support.", + "type": "registry", + "explanation": "This is a dataset as it is described as a registry that holds information on MSMEs and is used for data collection.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a registry that collects data on MSMEs.", + "contextual_reason_agent": "This is a dataset as it is described as a registry that holds information on MSMEs and is used for data collection.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 13, + "text": "Cameroon has ranked consistently in the lower quintile of world governance indicators for the last two decades ( Figures 5-1 and 5-2, Annex 5 ). Governance, Government effectiveness, and corruption are consistently acknowledged as key issues by Cameroonians, 3 the private sector, 4 and the GoC. 5 3 Transparency International ( 2013 ), Global Corruption Barometer, Afrobarometer ( 2011 \u2013 2013 and 2014 \u2013 2015 ). 4 World Bank ( 2009 ) Cameroon Enterprise Surveys, INS ( 2009 ) firm census, GoC ( 2011 ) Business Climate Survey; WEF ( World Economic Forum ). 2015. Global Competitiveness Report 2014 \u2013 2015. 5 DSCE 2010 \u2013 2020; President Paul Biya \u2019 s New Year Speeches in December 2003, 2005, 2006, 2007, 2014, 2015 or official communication to Cabinet in December 2004, September 2006, September 2007, March 2008, July 2009, and October 2015.", + "ner_text": [ + [ + 366, + 379, + "named" + ], + [ + 0, + 8, + "Afrobarometer <> data geography" + ], + [ + 245, + 257, + "Afrobarometer <> reference population" + ], + [ + 329, + 333, + "Afrobarometer <> publication year" + ], + [ + 382, + 393, + "Afrobarometer <> reference year" + ], + [ + 398, + 409, + "Afrobarometer <> publication year" + ], + [ + 428, + 432, + "Afrobarometer <> publication year" + ], + [ + 435, + 443, + "Afrobarometer <> data geography" + ] + ], + "validated": true, + "empirical_context": "Governance, Government effectiveness, and corruption are consistently acknowledged as key issues by Cameroonians, 3 the private sector, 4 and the GoC. 5 3 Transparency International ( 2013 ), Global Corruption Barometer, Afrobarometer ( 2011 \u2013 2013 and 2014 \u2013 2015 ). 4 World Bank ( 2009 ) Cameroon Enterprise Surveys, INS ( 2009 ) firm census, GoC ( 2011 ) Business Climate Survey; WEF ( World Economic Forum ).", + "type": "survey", + "explanation": "Afrobarometer is explicitly mentioned in the context as a source of data related to governance and corruption issues.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because Afrobarometer is known for collecting and providing data on public opinion across Africa.", + "contextual_reason_agent": "Afrobarometer is explicitly mentioned in the context as a source of data related to governance and corruption issues.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "050_Cameroon-Secondary-Education-and-Skills-Development-Project", + "page": 144, + "text": "The World Bank Secondary Education and Skills Development Project ( P170561 ) Page 140 of 159 68. Improved availability of information on training programs through development of integrated digital platform on training opportunities. The project will support the development of an integrated digital information system on training opportunities and the introduction and operationalization of post-training tracking mechanisms of graduates. A digital system will provide updated information on training opportunities to all stakeholders, including students and employers. The system will capitalize on existing but limited information, for example, in databases of ONEFOP, the NEF, COSUP, and the NIS, as it will also strengthen / upgrade some of the existing tools, expand data collection, and increase transparency. 69. Design and implementation of tracer studies. Currently, neither MINEFOP nor MINESEC track their graduates in a systematic way and do not collect information about their labor market outcomes. Under the project, three tracer surveys ( baseline, mid-term, and final ) are planned to monitor labor market outcomes of TVET graduates from different streams of both public and private institutions. These surveys will be supervised by ONEFOP and conducted by a survey company. The surveys will provide baseline data and yearly information on the external effectiveness of the training system.", + "ner_text": [ + [ + 1038, + 1052, + "named" + ], + [ + 4, + 14, + "tracer surveys <> publisher" + ], + [ + 664, + 670, + "tracer surveys <> publisher" + ], + [ + 1135, + 1149, + "tracer surveys <> reference population" + ], + [ + 1250, + 1256, + "tracer surveys <> publisher" + ], + [ + 1317, + 1330, + "tracer surveys <> data description" + ] + ], + "validated": true, + "empirical_context": "Currently, neither MINEFOP nor MINESEC track their graduates in a systematic way and do not collect information about their labor market outcomes. Under the project, three tracer surveys ( baseline, mid-term, and final ) are planned to monitor labor market outcomes of TVET graduates from different streams of both public and private institutions. These surveys will be supervised by ONEFOP and conducted by a survey company.", + "type": "survey", + "explanation": "These surveys are explicitly mentioned as planned to monitor and collect data on labor market outcomes, confirming their role as datasets.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'tracer surveys' are datasets because they involve systematic data collection about labor market outcomes.", + "contextual_reason_agent": "These surveys are explicitly mentioned as planned to monitor and collect data on labor market outcomes, confirming their role as datasets.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "037_Uganda-Investment-for-Industrial-Transformation-and-Employment-Project", + "page": 15, + "text": "For example, the average value of assets among all households ( both refugee and host ) in the District of Arua is UGX 560, 000 ( US $ 144 ), which is only 10 percent of comparable asset values in the Kampala region. 9. COVID-19 is already disrupting incomes and livelihoods, with the poorest wealth quintiles most adversely affected. Since the COVID-19 outbreak, 91 percent of households have reported reduced income ( or losses ) from at least one of their sources of livelihood. Services, such as trade, transport and accommodation and food services have been the sectors most affected by the COVID-19 restrictions and have also lost the highest share of workers. Although employment levels have recovered partially, income levels for many households have not returned to pre-COVID-19 levels. By April 2021, income levels were still below pre-COVID-19 levels for at least one third of households. The second lockdown in mid-2021 is likely to have stalled and even possibly reversed progress in income recovery. In fact, 49 percent of MSMEs interviewed on the impact of the second lockdown 10 Uganda Comprehensive Refugee Response Portal ( https: / / data2. unhcr. org / en / country / uga ), September 20, 2020. 11 Calculation based on district-level firm data from Census of Business Establishments, and refugee and host community household data from the Refugee and Host Community Household Survey.", + "ner_text": [ + [ + 1308, + 1349, + "named" + ], + [ + 51, + 61, + "refugee and host community household data <> reference population" + ], + [ + 95, + 111, + "refugee and host community household data <> data geography" + ], + [ + 201, + 215, + "refugee and host community household data <> data geography" + ], + [ + 805, + 809, + "refugee and host community household data <> publication year" + ] + ], + "validated": true, + "empirical_context": "org / en / country / uga ), September 20, 2020. 11 Calculation based on district-level firm data from Census of Business Establishments, and refugee and host community household data from the Refugee and Host Community Household Survey.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to structured data collected from the Refugee and Host Community Household Survey.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes specific data from a survey related to households.", + "contextual_reason_agent": "This is indeed a dataset as it refers to structured data collected from the Refugee and Host Community Household Survey.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "138_781290PAD0JO0R0t0Box377365B00OUO090", + "page": 34, + "text": "35 health services and the GOJ \u2019 s policy to provide refugees with access to MOH health care services, facilities especially in the north, are witnessing a dramatic increase in the use of services that is exceeding their capacity. 8. It is evident that this dramatic change in the demographic and epidemiologic conditions in Jordan as a result of the Syrian conflict is taking its toll on the health sector. Demand for services by refugees at MOH facilities increased significantly. MOH data show that the number of outpatient visits to MOH PHCCs by Syrian refugees increased from 68 in January 2012 to 15, 975 in March 2013 ( Figure 1 ). Similarly and during the same period, Syrian refugees admitted to MOH hospital increased from 300 admissions to 10, 330 associated with a sharp increase in the number of surgeries performed at these hospitals going from 105 to 622 surgeries. Figure 1: Use of MOH services by: January 2012-March 2013 9. MOH facilities in the northern part of the country are shouldering the greatest burden from the refugee influx. For example, Figure 2 shows utilization rates in the Women and Children Hospital in the northern city of Ramtha during the first three months of 2013, where children and women outpatient visits increased by more than 300 percent and 400 percent, respectively.", + "ner_text": [ + [ + 483, + 491, + "named" + ], + [ + 77, + 80, + "MOH data <> publisher" + ], + [ + 325, + 331, + "MOH data <> data geography" + ], + [ + 443, + 446, + "MOH data <> publisher" + ], + [ + 550, + 565, + "MOH data <> reference population" + ], + [ + 587, + 599, + "MOH data <> reference year" + ], + [ + 620, + 624, + "MOH data <> publication year" + ], + [ + 677, + 692, + "MOH data <> reference population" + ], + [ + 898, + 901, + "MOH data <> publisher" + ], + [ + 934, + 938, + "MOH data <> publication year" + ], + [ + 942, + 945, + "MOH data <> publisher" + ], + [ + 1159, + 1165, + "MOH data <> data geography" + ] + ], + "validated": true, + "empirical_context": "Demand for services by refugees at MOH facilities increased significantly. MOH data show that the number of outpatient visits to MOH PHCCs by Syrian refugees increased from 68 in January 2012 to 15, 975 in March 2013 ( Figure 1 ). Similarly and during the same period, Syrian refugees admitted to MOH hospital increased from 300 admissions to 10, 330 associated with a sharp increase in the number of surgeries performed at these hospitals going from 105 to 622 surgeries.", + "type": "data", + "explanation": "In this context, 'MOH data' is indeed used as a data source to illustrate trends in service demand by refugees.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'MOH data' is a dataset because it refers to specific numerical information regarding outpatient visits and hospital admissions.", + "contextual_reason_agent": "In this context, 'MOH data' is indeed used as a data source to illustrate trends in service demand by refugees.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "031_IBArchive-bd2c789e-ee04-4df7-a219-9409a5f705d3", + "page": 46, + "text": "The assessment concluded that the SEA / SH risks are Substantial. Drivers of risk in the context include high rates of child marriage and female circumcision, general social acceptability of GBV, conflict, high risks of human trafficking, and lack of legislation on domestic violence and sexual harassment. GBV is highly prevalent, and it is estimated that 28. 6 percent of women nationwide have experienced physical or sexual violence by an intimate partner at some point in their lives. 38 SEA / SH requirements have been reflected in the ESCP, in contracts, and in the contractor \u2019 s ESCP. The recipient will develop a budgeted SEA / SH Action Plan annexed to the ESMF that will outline the project \u2019 s mitigation strategies, response protocols, and accountability mechanisms. The Recipient will map GBV services in areas of implementation and will develop a referral protocol for the timely, safe, and ethical referral of all survivors who may disclose GBV / SEA / SH incidents to the project. In addition, the recipient will design and implement an SEA / SH-sensitive grievance mechanism for the safe and confidential documentation, response, and management of SEA / SH complaints and will include targeted, enabling, and regular involvement of women and other groups at risk in stakeholder engagement. The Recipient \u2019 s supervision 37 https: / / documents1. worldbank. org / curated / en / 099115102012230317 / pdf / P17449507045c20b70a0b20cbd9ac3ae22d. pdf 38 Chad, Demographic Health Survey ( DHS ), 2014 \u2013 15 ( in French ).", + "ner_text": [ + [ + 1473, + 1498, + "named" + ], + [ + 374, + 379, + "Demographic Health Survey <> reference population" + ], + [ + 1501, + 1504, + "Demographic Health Survey <> publisher" + ], + [ + 1508, + 1517, + "Demographic Health Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "org / curated / en / 099115102012230317 / pdf / P17449507045c20b70a0b20cbd9ac3ae22d. pdf 38 Chad, Demographic Health Survey ( DHS ), 2014 \u2013 15 ( in French ).", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to a specific demographic health survey that collects structured data for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'survey', which often refers to structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a specific demographic health survey that collects structured data for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "144_84657-PAD-P132268-Project-Commitment-Paper", + "page": 63, + "text": "landfill gate computer system and individual municipalities \u2019 records for waste managed 20 % 30 % MSW depositions gradually increasing in Al Minya 77 % of total waste managed 79 % of total waste managed 81 % of total waste managed 84 % of total waste managed 87 % of total waste managed 91 % of total waste managed 95 % of total waste managed Total 20 % 30 % \u2013", + "ner_text": [ + [ + 0, + 29, + "named" + ] + ], + "validated": false, + "empirical_context": "landfill gate computer system and individual municipalities \u2019 records for waste managed 20 % 30 % MSW depositions gradually increasing in Al Minya 77 % of total waste managed 79 % of total waste managed 81 % of total waste managed 84 % of total waste managed 87 % of total waste managed 91 % of total waste managed 95 % of total waste managed Total 20 % 30 % \u2013", + "type": "system", + "explanation": "However, the context indicates it is a computer system rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'system' which can imply data handling.", + "contextual_reason_agent": "However, the context indicates it is a computer system rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 23, + "text": "The World Bank South Sudan Health Sector Transformation Project ( HSTP ) ( P181385 ) Page 20 of 68 will ensure that independent and credible data on health service delivery and coverage and commodities are generated and that the data are usable and used to enable the Government, the World Bank, and development partners to verify that resources are reaching the intended beneficiaries and minimize potential harm. The monitoring entities \u2019 roles will include working with the PMU, UNICEF, the World Bank, and IPs to explain results, providing guidance on improved methods, proposing context-appropriate solutions, and conducting ex-post fact verification of results provided by project reporting mechanisms. 41. Subcomponent 3. 1: Third Party Monitoring ( Competitively selected TPM agencies; US $ 13. 75 million: US $ 6. 15 million equivalent IDA [ WHR ] and US $ 7. 6 million Trust Funds [ US $ 0. 8 million SDTF and US $ 6. 8 million MDTF ] ). The project will finance TPM of delivery of basic health services under Subcomponent 1. 1 and will build on arrangements through the COVID-19 Emergency Response and Health System Preparedness Project ( CERHSPP - P176480 ), incorporating lessons learned from the project. TPM will provide critical assessment and survey data, in complement to routine data through DHIS2, in support of the country \u2019 s overall HMIS.", + "ner_text": [ + [ + 1311, + 1316, + "named" + ] + ], + "validated": true, + "empirical_context": "1 and will build on arrangements through the COVID-19 Emergency Response and Health System Preparedness Project ( CERHSPP - P176480 ), incorporating lessons learned from the project. TPM will provide critical assessment and survey data, in complement to routine data through DHIS2, in support of the country \u2019 s overall HMIS.", + "type": "system", + "explanation": "DHIS2 is indeed a data source as it is referenced in relation to supporting the country's health management information system.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned in the context of providing data for health management information systems.", + "contextual_reason_agent": "DHIS2 is indeed a data source as it is referenced in relation to supporting the country's health management information system.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 25, + "text": "In addition to software and hardware management, QRC is also responsible for data verification and will coordinate with the DCU and individual units to provide all necessary disaggregated data needed to monitor Program indicators and DLIs. 53. While training on the use of the OpenEMIS was conducted during the 2016 \u2013 2017 academic year, there is a critical need to build MOE staff capacity in this regard. In order to exploit the full potential of the OpenEMIS for policy development and planning, staff capacity in the fundamentals of data analytics is necessary and will be supported by this Program. C. Disbursement Arrangements 54. For the PforR part, the DLIs will be used for disbursement. There are eight DLIs spread across five years. 55. The disbursement will be contingent upon the Government furnishing evidence satisfactory to the World Bank that it has achieved the respective disbursement \u2010 linked results ( DLRs ) and these are verified by the Independent Verification Agency ( IVA ) as specified in the verification protocol. Application for withdrawal from the World Bank \u2019 s financing account of amounts allocated to individual DLRs and calculated in accordance with the relevant formula will be sent to the World Bank any time after the World Bank has notified the GOJ in writing that it has accepted evidence of achievement of the DLRs and the amount eligible for payment.", + "ner_text": [ + [ + 277, + 285, + "named" + ] + ], + "validated": false, + "empirical_context": "53. While training on the use of the OpenEMIS was conducted during the 2016 \u2013 2017 academic year, there is a critical need to build MOE staff capacity in this regard. In order to exploit the full potential of the OpenEMIS for policy development and planning, staff capacity in the fundamentals of data analytics is necessary and will be supported by this Program.", + "type": "program", + "explanation": "However, OpenEMIS is mentioned as a program aimed at building capacity, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed OpenEMIS is a dataset because it is associated with data analytics and policy development.", + "contextual_reason_agent": "However, OpenEMIS is mentioned as a program aimed at building capacity, not as a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 9, + "validated": 2, + "not_validated": 7 + } + }, + { + "filename": "151_533550PAD0P117101Official0Use0Only1", + "page": 43, + "text": "Annex 3: Results Framework and Monitoring for Overall Program REPUBLIC OF YEMEN SOCIAL WELFARE FUND INSTITUTIONAL SUPPORT PROJECT ( i ) Increase the share of pool households among the beneficiaries ol the cash transfer program; and ( ii: contribute in building the humar capital of SWF beneficiaries ir selected districts. Intermediate / Immediate Results Component 1: Improve the Cash Transfer Program Increased application of PMT method to existing and future SWF applicant survey data Adoption of poverty-based targeting policy Development and implementation of a: ommunications strategy [ mproved case management [ mproved monitoring & evaluation; apacity [ mproved administrative cost MIS in place and used by n anagem en t team Project Outcome Indicators 1. 2. 3. 4. Percentage of cash transfer beneficiaries who are in groups A and B, ( according to PMT scores ). ( Target: Percentage of cash transfer applicants who are in groups A and B ( Target: Increase in the fraction of total cash transfer amount that is awarded to the poorest 20 % of the population ( lowest 2 deciles according to HBS ) ( Target 20 % increase ) Percentage increase in human capital outcomes of households receiving BDP services to comparable households that do not receive BDP services ( Target 20 % ) 60-70 % ) 60-70 % ) Results Indicators for Each Component 1.", + "ner_text": [ + [ + 462, + 487, + "named" + ], + [ + 62, + 79, + "SWF applicant survey data <> data geography" + ] + ], + "validated": true, + "empirical_context": "Annex 3: Results Framework and Monitoring for Overall Program REPUBLIC OF YEMEN SOCIAL WELFARE FUND INSTITUTIONAL SUPPORT PROJECT ( i ) Increase the share of pool households among the beneficiaries ol the cash transfer program; and ( ii: contribute in building the humar capital of SWF beneficiaries ir selected districts. Intermediate / Immediate Results Component 1: Improve the Cash Transfer Program Increased application of PMT method to existing and future SWF applicant survey data Adoption of poverty-based targeting policy Development and implementation of a: ommunications strategy [ mproved case management [ mproved monitoring & evaluation; apacity [ mproved administrative cost MIS in place and used by n anagem en t team Project Outcome Indicators 1. 2.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned in the context as part of the cash transfer program's monitoring and evaluation efforts.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'survey data' which typically indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned in the context as part of the cash transfer program's monitoring and evaluation efforts.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 50, + "text": "least 80 % of public primary schools and at least 50 % of private primary schools in districts 1-2 ( Djibouti Ville ) transmit their data via the application on time At least 80 % of public primary schools and at least 50 % of private primary schools in districts 3-4 ( Balbala ) transmit their data via the application on time 60 % of primary schools in the regions and in the refugee villages transmit their data via the application on time 70 % of public and 50 % of private primary schools transmit their data via the application on time Modernization of the EMIS ( Text ) No Evaluation report of existing data systems and recommendations for strengthening EMIS performance Student information system is developed Student information system is developed Student information system is developed Student information system is developed", + "ner_text": [ + [ + 563, + 567, + "named" + ] + ], + "validated": false, + "empirical_context": "least 80 % of public primary schools and at least 50 % of private primary schools in districts 1-2 ( Djibouti Ville ) transmit their data via the application on time At least 80 % of public primary schools and at least 50 % of private primary schools in districts 3-4 ( Balbala ) transmit their data via the application on time 60 % of primary schools in the regions and in the refugee villages transmit their data via the application on time 70 % of public and 50 % of private primary schools transmit their data via the application on time Modernization of the EMIS ( Text ) No Evaluation report of existing data systems and recommendations for strengthening EMIS performance Student information system is developed Student information system is developed Student information system is developed Student information system is developed", + "type": "system", + "explanation": "However, EMIS is mentioned as a modernization effort and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with data transmission from schools.", + "contextual_reason_agent": "However, EMIS is mentioned as a modernization effort and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 33, + "text": "WFP works to complement MENFOP \u2019 s school canteen program. UNHCR coordinates on the educational needs of refugees. The Japanese International Cooperation Agency currently sends experts in mathematics instruction and school management committees. The French Development Agency, also active in the sector, works in TVET and Higher Education. Finally, the EAA Foundation is collaborating with the World Bank on their Educate A Child program for out of school children. They are contributing US $ 3. 8 million with results-based funding aimed at helping the GoD enroll OOSC in the project. G. Lessons Learned and Reflected in the Project Design 72. The proposed project will benefit from the technical and policy dialogue in education conducted with the GoD on an ongoing basis since 2000. It will also benefit from the World Bank \u2019 s strong analytical understanding of contemporary education issues in Djibouti that is based on: ( a ) the Systematic Country Diagnostic ( 2018 ), ( b ) the EDAM4 Household Survey ( 2018 ), and ( c ) the support to MENFOP in their revision of their Education Action Plan for 2017-2020. The World Bank has also recently completed a SABER-ECD study ( 2019 ) and is currently studying the educational needs of refugees ( expected 2019 ). The project will also draw on lessons from the UNICEF-supported revision to the Out of School Children Survey ( 2019 ), which focuses on refugees, girls, and school leavers. 73. The following table highlights lessons from the two previous projects, PAEQ ( P145323 ) and ACIGEF ( P123315 ), that will be incorporated into the new Project. Table 3: Lessons learned from previous IDA-financed projects in education Lessons Mitigation Measures in this Project When developing a sub-sector in which there has been limited activity, like preschool, it is important to consider whether the legislative and regulatory framework is in place and is needed as a first step In the pre-primary sub-sector, the policy and quality framework will be developed first by incentivizing MENFOP to invest in the creation of an enabling environment through a DLI.", + "ner_text": [ + [ + 1160, + 1175, + "named" + ] + ], + "validated": false, + "empirical_context": "It will also benefit from the World Bank \u2019 s strong analytical understanding of contemporary education issues in Djibouti that is based on: ( a ) the Systematic Country Diagnostic ( 2018 ), ( b ) the EDAM4 Household Survey ( 2018 ), and ( c ) the support to MENFOP in their revision of their Education Action Plan for 2017-2020. The World Bank has also recently completed a SABER-ECD study ( 2019 ) and is currently studying the educational needs of refugees ( expected 2019 ). The project will also draw on lessons from the UNICEF-supported revision to the Out of School Children Survey ( 2019 ), which focuses on refugees, girls, and school leavers.", + "type": "study", + "explanation": "However, the context indicates that it is a study and not explicitly described as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'study' in its name, which can imply data collection.", + "contextual_reason_agent": "However, the context indicates that it is a study and not explicitly described as a data source or structured collection of data.", + "contextual_signal": "mentioned only as a study, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 21, + "validated": 8, + "not_validated": 13 + } + }, + { + "filename": "037_Uganda-Investment-for-Industrial-Transformation-and-Employment-Project", + "page": 42, + "text": "The World Bank Uganda: Investment for Industrial Transformation and Employment ( P171607 ) Page 37 of 92 loan repayments made and status of loans. BoU will use the same system for the INVITE Trust. The functionality / features with relevance to the project include the following: ( a ) Financial Accounting and General Ledger that is integrated ( b ) Security: access levels and audit trail ( c ) Microsoft Integrated Reports with ability to export reports into MS Excel. ( d ) Ability to produce reports by set criteria 99. PSFU uses the SUN accounting system, which is sufficiently developed and documented to meet the needs of a public interest membership and donor-funded body. 100. Staffing. The finance operations function of BoU is headed by the Executive Director Finance ( EDF ) who reports to the Deputy Governor. The EDF function supervises the Chief Accountant and Director National Payment Systems. The EDF will liaise with the ACF Head of Division to process project transactions and the proposed staffing appears reasonable to cover the project financial management needs once mainstreamed into BoU operations. 101. Banking and funds flow. PSFU and BoU will have designated and operational project accounts in US dollars and Ugandan shillings to be opened in the BoU per existing arrangements with the borrower. IDA disbursements will flow through these accounts for project implementation.", + "ner_text": [ + [ + 539, + 560, + "named" + ] + ], + "validated": false, + "empirical_context": "( d ) Ability to produce reports by set criteria 99. PSFU uses the SUN accounting system, which is sufficiently developed and documented to meet the needs of a public interest membership and donor-funded body. 100.", + "type": "system", + "explanation": "However, it is described as an accounting system, not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a system that handles data.", + "contextual_reason_agent": "However, it is described as an accounting system, not as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "040_Iraq-COVID-19-Vaccination-Project", + "page": 39, + "text": "The World Bank Iraq COVID-19 Vaccination Project ( P177038 ) Page 34 of 54 88. COVID-19 vaccine uptake is lower among women in Iraq. According to the findings of the Facebook survey conducted under I3RF, only 25 percent of female respondents indicated they would get vaccinated when the COVID - 19 vaccine is made available compared to 40 percent of male respondents. Actual vaccination coverage shows more stark gender differences in uptake, with men receiving approximately 65 percent of vaccines delivered to date. 13 Until recently, nursing mothers and pregnant women were not eligible to receive COVID-19 vaccines. This can also partly explain the lower vaccination rates among women. 89. Lack of understanding of the benefits and importance of the vaccine could have serious repercussions in the uptake among priority population groups, especially women who have more limited options to access information than men. For example, 67 percent of women in Iraq use the Internet compared to 84 percent of men. These gender dimensions intersect with other inequities, particularly for populations that are poor, with limited access to formal education, living in hard-to-reach areas, temporary or informal settlements, or living with disabilities. 90. Specific considerations in terms of media tools and messaging will be made when targeting women, men, and vulnerable populations in rural areas who are much more likely to have limited access to information.", + "ner_text": [ + [ + 166, + 181, + "named" + ], + [ + 15, + 19, + "Facebook survey <> data geography" + ], + [ + 127, + 131, + "Facebook survey <> data geography" + ], + [ + 223, + 241, + "Facebook survey <> reference population" + ], + [ + 350, + 366, + "Facebook survey <> reference population" + ], + [ + 958, + 962, + "Facebook survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "COVID-19 vaccine uptake is lower among women in Iraq. According to the findings of the Facebook survey conducted under I3RF, only 25 percent of female respondents indicated they would get vaccinated when the COVID - 19 vaccine is made available compared to 40 percent of male respondents. Actual vaccination coverage shows more stark gender differences in uptake, with men receiving approximately 65 percent of vaccines delivered to date.", + "type": "survey", + "explanation": "This is indeed a dataset as it provides empirical data on vaccination intentions among different genders.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data on vaccine uptake.", + "contextual_reason_agent": "This is indeed a dataset as it provides empirical data on vaccination intentions among different genders.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 96, + "text": "For instance, in estimates of the determinants of the most recent PISA scores in Jordan World Bank ( HDNED, 2008 ) finds a significant and positive association with the number of hours of instruction in a subject. In fact, the effect appears to be relatively large ( e. g., regression coefficients on the order of 8. 5 to 10 points per hour ). These results are robust with respect to different model estimations, comparisons of high and low achieving students, and probability of students achieving at a higher proficiency level \u2014 in most cases for Math, Reading, and Science. 24. One of the stated goals of the ERfKE II reforms is to eliminate double shifting. Eliminating double-shifting is popular with policy makers and some education analysts, but given that it involves lowering the overall use of infrastructure investments rather than raising them, it is often difficult analytically to make an economic case since sufficient outcome data are not usually available. However, we can deduce from Parolin \u2019 s ( 2008 ) school planning study that eliminating double shifting can add about 4 hours per week of instruction. 13 In addition, we can isolate the costs of eliminating double shifting.", + "ner_text": [ + [ + 66, + 70, + "named" + ] + ], + "validated": false, + "empirical_context": "For instance, in estimates of the determinants of the most recent PISA scores in Jordan World Bank ( HDNED, 2008 ) finds a significant and positive association with the number of hours of instruction in a subject. In fact, the effect appears to be relatively large ( e.", + "type": "program", + "explanation": "'PISA' is mentioned in the context of scores and associations but is not described as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'PISA' is a dataset because it is often associated with educational assessments and scores.", + "contextual_reason_agent": "'PISA' is mentioned in the context of scores and associations but is not described as a structured collection of data or a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "073_Ethiopia-Consolidated-Water-Supply-Sanitation-and-Hygiene-Account-Project", + "page": 39, + "text": "The World Bank One WASH \u2014 Consolidated Water Supply, Sanitation, and Hygiene Account Project ( One WASH \u2014 CWA ) ( P167794 ) Page 35 of 103 RESULT_FRAME_TBL_IO Indicator Name DLI Baseline Intermediate Targets End Target 1 2 3 4 Project Management and Institutional Strengthening Sector Management Information System ( MIS ) operationalized ( Yes / No ) No No Yes Yes Yes Yes Percentage of grievances registered related to delivery of sub-project benefits that are timely and satisfactorily addressed ( Percentage ) 0. 00 80. 00 80. 00 80. 00 80. 00 80. 00 Percentage of schemes with completed environmental and social screening processes and against which required mitigation measures including compensations are settled ( Percentage ) 0. 00 10. 00 35. 00 60. 00 90. 00 100. 00 IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Number of people provided with access to safe drinking water services under the Project The indicator measures the cumulative number of people accessing water services that meet the following conditions: \u2022 Quality: water supplied Annual Regular WASH MIS reports, HH surveys, and Project progress Regular WASH MIS reports, HH surveys, and Project progress reports.", + "ner_text": [ + [ + 285, + 314, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank One WASH \u2014 Consolidated Water Supply, Sanitation, and Hygiene Account Project ( One WASH \u2014 CWA ) ( P167794 ) Page 35 of 103 RESULT_FRAME_TBL_IO Indicator Name DLI Baseline Intermediate Targets End Target 1 2 3 4 Project Management and Institutional Strengthening Sector Management Information System ( MIS ) operationalized ( Yes / No ) No No Yes Yes Yes Yes Percentage of grievances registered related to delivery of sub-project benefits that are timely and satisfactorily addressed ( Percentage ) 0. 00 80.", + "type": "system", + "explanation": "However, it is mentioned as a system operationalized for project management, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Management Information System' suggests a structured collection of data.", + "contextual_reason_agent": "However, it is mentioned as a system operationalized for project management, not as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "054_Sudan-Basic-Education-Emergency-Support-Project", + "page": 11, + "text": "The World Bank Sudan Basic Education Emergency Support Project ( P172812 ) Page 7 of 40 2020. The IMF has already forecasted an overall economic stagnation in 2020 in Sudan. GDP is expected to decrease between 4-10 percent in 2020 due to the combined impact of the economic crisis exacerbated by the social distancing measures to curb the spread of COVID-19. Slowing growth and COVID-19 policy responses will have a significant negative impact on government revenue. Slowing activity will automatically translate into lower levels of tax and other government revenue collection. The combined effect on government revenues is projected to be significant. 6. Poverty reduction stagnated in 2018 mainly due to weak economic growth, political and macroeconomic instability and the shortage of essential food items such as bread. According to the most recent official estimates of poverty based on the 2014 / 15 National Household Budget and Poverty Survey ( NHBPS ), 36. 1 percent of Sudanese population ( or 13. 4 million people ) are poor. However, the overall / national poverty rate masks wide disparities across Sudan \u2019 s 18 states. For example, Central Darfur State in western Sudan recorded the highest rate of poverty ( 67. 2 percent ).", + "ner_text": [ + [ + 907, + 951, + "named" + ], + [ + 4, + 14, + "National Household Budget and Poverty Survey <> publisher" + ], + [ + 167, + 172, + "National Household Budget and Poverty Survey <> data geography" + ], + [ + 897, + 906, + "National Household Budget and Poverty Survey <> reference year" + ], + [ + 954, + 959, + "National Household Budget and Poverty Survey <> acronym" + ], + [ + 980, + 999, + "National Household Budget and Poverty Survey <> reference population" + ], + [ + 1147, + 1167, + "National Household Budget and Poverty Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "Poverty reduction stagnated in 2018 mainly due to weak economic growth, political and macroeconomic instability and the shortage of essential food items such as bread. According to the most recent official estimates of poverty based on the 2014 / 15 National Household Budget and Poverty Survey ( NHBPS ), 36. 1 percent of Sudanese population ( or 13.", + "type": "survey", + "explanation": "This is indeed a dataset as it is referenced as the source of official estimates of poverty.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is referenced as the source of official estimates of poverty.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "186_multi-page", + "page": 54, + "text": "within MINEDUC, are undertaken on an ongoing basis. It is expected that the reviews will produce information that the Ministry will utilize in prioritizing and re-allocating annual expenditures. In collaboration with the central ministries, the expenditure and budget management reviews will provide the foundation for a Medium Term Expenditure Framework ( MTEF ) that matches expected resources with required expenditures. The establishment of a functioning MTEF will be an important and essential foundation for moving to a SWAP that donors hope to support as a basis for future lending by 2003. Future Economic Analysis: In addition to the ongoing revision of the education simulation model and of the public expenditure analysis, several other types of economic analysis will be initiated ( as early as the PPF ) and undertaken during the project. The results of the proposed analysis will be used to better inform education policy makers and determine if Rwanda is prepared to utilize a SWAP approach to donor financing. While insufficient data currently exist to undertake traditional cost-benefit analysis and benefit incidence analysis, Rwanda ' s household survey will be strengthened to permit such analysis in future. In particular, information on households ' access to, and expenditure on, education will be developed to determine the effect of income, location, gender, etc., on access to and progression in education.", + "ner_text": [ + [ + 1156, + 1172, + "named" + ], + [ + 592, + 596, + "household survey <> publication year" + ], + [ + 960, + 966, + "household survey <> data geography" + ], + [ + 1145, + 1151, + "household survey <> data geography" + ], + [ + 1259, + 1269, + "household survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "The results of the proposed analysis will be used to better inform education policy makers and determine if Rwanda is prepared to utilize a SWAP approach to donor financing. While insufficient data currently exist to undertake traditional cost-benefit analysis and benefit incidence analysis, Rwanda ' s household survey will be strengthened to permit such analysis in future. In particular, information on households ' access to, and expenditure on, education will be developed to determine the effect of income, location, gender, etc., on access to and progression in education.", + "type": "survey", + "explanation": "In the context, it is explicitly mentioned that the household survey will be strengthened to permit analysis, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'household survey' implies a structured collection of data related to households.", + "contextual_reason_agent": "In the context, it is explicitly mentioned that the household survey will be strengthened to permit analysis, indicating it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "158_40156", + "page": 52, + "text": "UNHCR already has BSS information for some of the camps to be supported by the project, and if resources are made available in future, it is envisioned that the other camps would be covered by BSS as well. b ) Sentinel surveillance: Sentinel surveillance will be undertaken by IGAD member states, who will add additional sentinel sites to their national sentinel surveillance system \u2013 one in each refugee site, IDP site and surrounding population. Initially, only those sites supported by this project will be added. Later, when the regional HIV M & E strategy has been developed, all the sites and camps will be added to national sentinel surveillance. c ) Health facility surveys: At all health facilities in hot spots, a health facility survey will be undertaken to assess the availability of HIV related services in the health facilities that are servicing populations in the 14 hot spots. d ) Rapid assessment in hot spot areas: All 14 hot spot areas \u2013 the seven targeted for year 1 of implementation and the seven targeted for year 2 of implementation - will be assessed at the start of year 1.", + "ner_text": [ + [ + 724, + 746, + "named" + ], + [ + 780, + 816, + "health facility survey <> data description" + ] + ], + "validated": true, + "empirical_context": "Later, when the regional HIV M & E strategy has been developed, all the sites and camps will be added to national sentinel surveillance. c ) Health facility surveys: At all health facilities in hot spots, a health facility survey will be undertaken to assess the availability of HIV related services in the health facilities that are servicing populations in the 14 hot spots. d ) Rapid assessment in hot spot areas: All 14 hot spot areas \u2013 the seven targeted for year 1 of implementation and the seven targeted for year 2 of implementation - will be assessed at the start of year 1.", + "type": "survey", + "explanation": "This is a dataset as it involves a systematic survey to collect data on HIV-related services at health facilities.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured assessment of health facilities.", + "contextual_reason_agent": "This is a dataset as it involves a systematic survey to collect data on HIV-related services at health facilities.", + "contextual_signal": "described as a survey to assess availability of services", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "037_Uganda-Investment-for-Industrial-Transformation-and-Employment-Project", + "page": 89, + "text": "The monitoring component of the M & E approach will require data collection across different dimensions of the Project: ( 1 ) Performance Tracking data ( e. g. sales, employment, wages, transactions, etc ); ( 2 ) Activity Tracking data reflecting the Theory of Change ( e. g. as reflected by the number of loans serviced on the project \u2019 s web platform, the number of receivables purchased on the factoring platform, the number of refugees receiving business training, etc. ); ( 3 ) Key Results data ( e. g. value of private investment in manufacturing firms, formal employment in manufacturing firms, etc ); and ( 4 ) Key Risks tracking ( e. g. project implementation performance, NPL ratio of banks and PAR of MFIs, etc ). The evaluation component will build on the data collected under the monitoring component, but additionally focus on implementing a structured impact evaluation to measure the impact and attribution of the different policies under the project i. e. incubators, industrial parks, etc.,", + "ner_text": [ + [ + 213, + 235, + "named" + ], + [ + 358, + 415, + "Activity Tracking data <> data description" + ], + [ + 431, + 439, + "Activity Tracking data <> reference population" + ] + ], + "validated": true, + "empirical_context": "g. sales, employment, wages, transactions, etc ); ( 2 ) Activity Tracking data reflecting the Theory of Change ( e. g.", + "type": "data", + "explanation": "In this context, 'Activity Tracking data' is used to reflect specific metrics related to the Theory of Change, indicating it serves as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'data' which often implies a structured collection.", + "contextual_reason_agent": "In this context, 'Activity Tracking data' is used to reflect specific metrics related to the Theory of Change, indicating it serves as a data source.", + "contextual_signal": "follows 'reflecting the Theory of Change'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "161_28046", + "page": 14, + "text": "The project \u2019 s Task Force, also under the supervision o f the Secretary General, would follow i t s technical implementation and would ensure objectives are reached. The project would finance the recurrent costs of this Task Force, its logistical support ( transport and computer system ), technical assistance, and study tours in other African countries. Institutional responsibilities for the Medical Waste Management Plan would rest ( a ) for the overall responsibility, with the MOH Directorate o f Equipment and Infrastructure ( DIEM ), and ( b ) for the decentralized levels, with the General Director o f Hospitals, the Head o f the Health Centers and o f the Health Posts. 3. Monitoring and evaluation of outcomes / results A yearly health expenditures tracking survey would be carried out, as well as a yearly client satisfaction survey focusing on quality o f care. Those two tools along with data from the health information system, would provide the data necessary to assess progress and identify bottlenecks. Guinea has a management information system quite sophisticated where a monitoring o f health centers activities and finances i s undertaken every six months, thus providing facility-based information. For evaluation purposes, a DHS was be carried out in 2004 which will serve as base-line.", + "ner_text": [ + [ + 813, + 846, + "named" + ], + [ + 1023, + 1029, + "yearly client satisfaction survey <> data geography" + ], + [ + 1277, + 1281, + "yearly client satisfaction survey <> publication year" + ], + [ + 1328, + 1346, + "yearly client satisfaction survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "3. Monitoring and evaluation of outcomes / results A yearly health expenditures tracking survey would be carried out, as well as a yearly client satisfaction survey focusing on quality o f care. Those two tools along with data from the health information system, would provide the data necessary to assess progress and identify bottlenecks.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey that will provide data for assessing progress.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured survey designed to collect data on client satisfaction.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey that will provide data for assessing progress.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "151_533550PAD0P117101Official0Use0Only1", + "page": 85, + "text": "Only 8 % of households are officially female-headed, and a further 10 % can be considered de-facto female-headed as the male head is away for 6 months a year or more. Poverty incidence is not significantly different between these and other households; however the Household Budget Survey ( HBS ) established that resources are better allocated in female-headed households, which spend more on education and food than on tobacco and qat. 240. Yemen \u2019 s DPPR recognizes the challenges faced by Yemeni women and discusses the need to tackle the complex web of economic, social, cultural, and legal obstacles to women \u2019 s empowerment. In particular, one of the aims of the DPPR is to increase women \u2019 s employment in both private and public sectors and their participation in local and parliamentary elections, both as candidates and voters. Yet, the DPPR does not provide a clear action plan to move from what can be achieved through legal means to the necessary social transformations that can underpin sustainable progress. 74", + "ner_text": [ + [ + 264, + 287, + "named" + ], + [ + 290, + 293, + "Household Budget Survey <> publisher" + ], + [ + 347, + 371, + "Household Budget Survey <> reference population" + ], + [ + 442, + 447, + "Household Budget Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "Only 8 % of households are officially female-headed, and a further 10 % can be considered de-facto female-headed as the male head is away for 6 months a year or more. Poverty incidence is not significantly different between these and other households; however the Household Budget Survey ( HBS ) established that resources are better allocated in female-headed households, which spend more on education and food than on tobacco and qat. 240.", + "type": "survey", + "explanation": "The Household Budget Survey is explicitly mentioned as a source that established findings about resource allocation in households, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides empirical data on household spending.", + "contextual_reason_agent": "The Household Budget Survey is explicitly mentioned as a source that established findings about resource allocation in households, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 28, + "text": "Currently, data from the lesson observations is underutilized. The Kenya National Examinations Council ( KNEC ) will conduct quarterly analysis of this data and disseminate it to the clusters. \u2022 Student assessment: target schools will conduct regular student assessments based on identified benchmarks of proficiency in literacy and numeracy in the early grades, and also English, science, and mathematics in grade 7, and use the findings to facilitate remedial learning for lagging learners. The State of Ceara in Brazil is a role model for reducing learning poverty. Each target school will set a target to improve literacy and numeracy scores in grade 3 as well as targets to improve scores in English, math, and sciences in grade 7. The school grants / SIP operational manual will include templates for tools / instruments that can help the target schools to set learning targets and monitor progress.", + "ner_text": [ + [ + 25, + 44, + "named" + ] + ], + "validated": false, + "empirical_context": "Currently, data from the lesson observations is underutilized. The Kenya National Examinations Council ( KNEC ) will conduct quarterly analysis of this data and disseminate it to the clusters.", + "type": "non-dataset", + "explanation": "'Lesson observations' is mentioned as data that is underutilized but is not described as a structured collection or dataset itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'lesson observations' refers to a structured collection of data related to educational assessments.", + "contextual_reason_agent": "'Lesson observations' is mentioned as data that is underutilized but is not described as a structured collection or dataset itself.", + "contextual_signal": "mentioned only as data, not as a data source", + "tags": [] + }, + { + "filename": "077_BANGLADESH-PAD-02212019-636878521930630901", + "page": 55, + "text": "The World Bank Emergency Multi-Sector Rohingya Crisis Response Project ( P167762 ) Page 54 multipurpose disaster shelters / community service centers, as measured by the design capacity. Households participating in community workfare and services Number of eligible households participating in workfare and services activities Biannual WFP SCOPE Platform Minimum participation of 30 days of work per household will be counted MoDMR Households supported through the participation of DRP women in community workfare and services Number of households supported through the participation of DRP women in community workfare and services Biannual WFP Scope Platform Minimum participation of 30 days of work per household will be counted MoDMR Development of the DRP Camp Management System Documentation and presentation of the camp governance improvement strategy which entails the strengthening of the camp level capacity and tools, as well as enhancing the DRP volunteer network as the main service interface ( for GRM, behavior changing communication and outreach ) with the Rohingya population.", + "ner_text": [ + [ + 756, + 782, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Emergency Multi-Sector Rohingya Crisis Response Project ( P167762 ) Page 54 multipurpose disaster shelters / community service centers, as measured by the design capacity. Households participating in community workfare and services Number of eligible households participating in workfare and services activities Biannual WFP SCOPE Platform Minimum participation of 30 days of work per household will be counted MoDMR Households supported through the participation of DRP women in community workfare and services Number of households supported through the participation of DRP women in community workfare and services Biannual WFP Scope Platform Minimum participation of 30 days of work per household will be counted MoDMR Development of the DRP Camp Management System Documentation and presentation of the camp governance improvement strategy which entails the strengthening of the camp level capacity and tools, as well as enhancing the DRP volunteer network as the main service interface ( for GRM, behavior changing communication and outreach ) with the Rohingya population.", + "type": "system", + "explanation": "However, it is described as a management system focused on governance improvement rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'system' which can imply data management.", + "contextual_reason_agent": "However, it is described as a management system focused on governance improvement rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 69, + "text": "These tools may advance the use of Kobo-toolbox platform for self - reporting purposes. The M & E specialists will be responsible for verification of the accuracy and timeliness of data entry and training of new staff, WASH-committed contractors, and other implementation partners on the use of the M & E system. The project M & E system will be, to the extent possible, integrated with the WSW MIS and allow gender disaggregation. 24. A key aspect of the results monitoring will be the consumer satisfaction surveys for water services and engagement processes. Consumer satisfaction surveys will be conducted every year for selected subprojects after the start of project implementation to assess satisfaction levels and measure attributable outcomes of the project. Surveys for the engagement processes will be conducted every year using various means of communication. Baseline surveys will be conducted in the first year of project implementation after effectiveness. At the same time, the project will continue relying on WASH - committee models adopted under the RWSSP to enable frequent community roundtables or forums with water users to inform them of the status of investments, seek their feedback regarding project implementation progress, and discuss any corrective action which was taken to address issues raised through the feedback process. Results of such meetings will be documented and reported through the regular M & E process.", + "ner_text": [ + [ + 487, + 516, + "named" + ], + [ + 1131, + 1142, + "consumer satisfaction surveys <> reference population" + ] + ], + "validated": true, + "empirical_context": "24. A key aspect of the results monitoring will be the consumer satisfaction surveys for water services and engagement processes. Consumer satisfaction surveys will be conducted every year for selected subprojects after the start of project implementation to assess satisfaction levels and measure attributable outcomes of the project.", + "type": "survey", + "explanation": "The context confirms that these surveys are conducted to assess satisfaction levels and measure outcomes, indicating they are used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to structured surveys designed to collect data on consumer satisfaction.", + "contextual_reason_agent": "The context confirms that these surveys are conducted to assess satisfaction levels and measure outcomes, indicating they are used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "060_Yemen-Emergency-COVID-19-Project", + "page": 18, + "text": "This improves essential healthcare service delivery and enables people to access the appropriate care, which builds resilience that is especially key for the poor who are the most vulnerable and least equipped to handle the impacts of climate change. C. Project Beneficiaries 29. The expected project beneficiaries will be the entire population in Yemen including nationals and non-nationals, medical and emergency personnel, laboratory and testing facilities, and health agencies across the country. In 2018, the total estimated population size was 28. 9 million, 13 including about 24 million needing humanitarian assistance. 14 For immediate response to stop the transmission and allocate necessary resources for treatment of cases, the project will specifically target communities that have high risks of local transmission, such as highly populated cities across the whole country. 9 https: / / www. who. int / countries / yem / en /. Accessed on March 23, 2020. 10 https: / / www. who. int / bulletin / volumes / 93 / 10 / 15-021015 / en /. Accessed on March 23, 2020. 11 https: / / apps. who. int / gho / data / node. country. country-YEM. Accessed on March 23, 2020. 12 https: / / data. worldbank. org / indicator / SH. MED. BEDS. ZS? view = chart. Accessed on March 23, 2020. 13 The World Bank. World Development Indicators Data Bank. https: / / databank. worldbank. org / source / world-development-indicators. Accessed on March 21, 2020. 14 United Nations Office for the Coordination of Humanitarian Affairs. Relief Web data on Yemen. https: / / m. reliefweb. int / report / 3422113. Accessed on March 21, 2020.", + "ner_text": [ + [ + 1304, + 1342, + "named" + ], + [ + 348, + 353, + "World Development Indicators Data Bank <> data geography" + ], + [ + 504, + 508, + "World Development Indicators Data Bank <> publication year" + ], + [ + 1292, + 1302, + "World Development Indicators Data Bank <> publisher" + ], + [ + 1539, + 1544, + "World Development Indicators Data Bank <> data geography" + ] + ], + "validated": true, + "empirical_context": "13 The World Bank. World Development Indicators Data Bank. https: / / databank.", + "type": "data bank", + "explanation": "This is indeed a dataset as it is explicitly referred to as a 'Data Bank' which implies a structured collection of data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Data Bank' in its name, suggesting a collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referred to as a 'Data Bank' which implies a structured collection of data used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 36, + "text": "Cycle 61. 5 % 97. 8 % 61. 6 % 98. 0 % 61 8 % 98. 2 % 62. 0 % 98. 4 % 62. 3 % 98. 6 % 62. 6 % 98. 8 % 63. 0 % % Annual EMIS Directorate of Educational Planning ( DEP ) 2 The baseline for the enrollment rates based on the EMIS data for the year 2007 / 2008", + "ner_text": [ + [ + 220, + 224, + "named" + ] + ], + "validated": false, + "empirical_context": "8 % 63. 0 % % Annual EMIS Directorate of Educational Planning ( DEP ) 2 The baseline for the enrollment rates based on the EMIS data for the year 2007 / 2008", + "type": "program", + "explanation": "However, EMIS is mentioned as a program rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with educational data collection.", + "contextual_reason_agent": "However, EMIS is mentioned as a program rather than a structured collection of data.", + "contextual_signal": "mentioned only as a program, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "101_Project-Appraisal-Document-PAD-P163830-2017-05-22-CLEAN-05232017", + "page": 18, + "text": "With over 1. 1 million people ( 10 percent of the population ) already internally displaced, new displacement, particularly to urban centres, is compounding existing development deficits and strain the capacity of most communities. 13. Female-headed households and displaced women and girls are among the populations most vulnerable to the adverse impacts of the current drought. In addition, the limited security in IDP settlements, poor living conditions and lack of protection pose added risks of gender-based violence ( GBV ) and sexual exploitation to female IDPs. The separation of many women and children from community and familial support structures, as well as from traditional livelihoods activities, contributes to an increased reliance on marginal, inconsistent and hazardous livelihood strategies, which heighten poverty, food 7 OCHA: Humanitarian Bulletin Somalia, May 4 2017 8 The RDNA relies on the following sources of information: Drought Severity data was derived from ENDVI 10-year historical anomaly data based on LANDSAT 8 imagery. Displacement figures were derived from compiled IDP data from IOM, UN Habitat, UNHCR and local Ministries of Interior. Agriculture and livestock impact data heavily relied on expert judgment and data from FAO FSNAU database ( 2011-2017 ), UNFPA 2014 population data and LANDSCAN 2015 population density. Cholera / AWD forecasts and data was derived from World Health Organization ( WHO ) data, reporting and forecasting analysis. Water resources data was derived from UN SWALIM and ICRC databases 9 When these assets are priced at the prevailing depressed prices in the markets. The scenarios assume drought effect on grazing areas, disparities in household resilience to the effects of drought based on livelihood zone, wealth and coping mechanisms, as well as differences in adaptation of livestock to drought 10 http: / / reliefweb. int / report / somalia / unhcr-somalia-drought-displacement-period-1-nov-2016-31-march-2017", + "ner_text": [ + [ + 1103, + 1111, + "named" + ], + [ + 236, + 260, + "IDP data <> reference population" + ], + [ + 871, + 878, + "IDP data <> data geography" + ], + [ + 950, + 971, + "IDP data <> data description" + ], + [ + 1117, + 1120, + "IDP data <> author" + ], + [ + 1122, + 1132, + "IDP data <> publisher" + ], + [ + 1134, + 1139, + "IDP data <> publisher" + ], + [ + 1281, + 1290, + "IDP data <> reference year" + ] + ], + "validated": true, + "empirical_context": "The separation of many women and children from community and familial support structures, as well as from traditional livelihoods activities, contributes to an increased reliance on marginal, inconsistent and hazardous livelihood strategies, which heighten poverty, food 7 OCHA: Humanitarian Bulletin Somalia, May 4 2017 8 The RDNA relies on the following sources of information: Drought Severity data was derived from ENDVI 10-year historical anomaly data based on LANDSAT 8 imagery. Displacement figures were derived from compiled IDP data from IOM, UN Habitat, UNHCR and local Ministries of Interior. Agriculture and livestock impact data heavily relied on expert judgment and data from FAO FSNAU database ( 2011-2017 ), UNFPA 2014 population data and LANDSCAN 2015 population density.", + "type": "data", + "explanation": "'IDP data' is indeed a dataset as it is compiled data used for empirical analysis regarding displacement figures.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'IDP data' is a dataset because it is mentioned as a source of information for displacement figures.", + "contextual_reason_agent": "'IDP data' is indeed a dataset as it is compiled data used for empirical analysis regarding displacement figures.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "112_IDAR2016-0221-PAD-09012016", + "page": 21, + "text": "The MoLG will be responsible for providing technical input to MDLF, namely all of the technical aspects of procurement, monitoring of progress towards the PDO, and details of project activities. MDLF has been performing at a satisfactory level to collect, aggregate, and analyze results achieved by municipalities under the MDP-II and the MoLG under LGSIP as both programs \u2019 implementing agency. The MDLF has a robust monitoring and evaluation system that uses a web-based Program Management Information System ( PGMIS ) as well as the Financial Management Information System ( FMIS ) to automate data aggregation, storage, and presentation as part of a results - based Monitoring and Evaluation ( M & E ) system. The MDLF \u2019 s Results-Based Monitoring Manual ( RBMM ) has aligned the individual objectives of the various MDLF-implemented projects and programs with the PA \u2019 s NDP strategic objectives. As such, no additional arrangements or staff are required for the M & E of this project. C. Sustainability 31. At present, there is a gap in metropolitan area urban planning policy, practice, and methodology in Palestine, and MoLG has indicated interest in developing capacity to administer, advise on, and implement such practice in Palestine. The Ministry conducted a study on metropolitan planning in 2007, which led to the establishment of the existing joint coordination unit for the RAB area. However, MoLG was unable to go beyond this initial study to mainstream metropolitan planning into government practice or policies. The proposed ICUD therefore would provide the initial metropolitan / urban area growth planning framework and local government driven implementation experience for MoLG to guide metropolitan planning policy development. The MoLG planning unit will technically oversee the proposed project activities and receive support to improve bottom-up metropolitan planning. The urban growth planning exercise will also inform necessary reforms in the PA \u2019 s own planning system, in addition to the inputs to developing LGUs urban area plans.", + "ner_text": [ + [ + 473, + 510, + "named" + ] + ], + "validated": false, + "empirical_context": "MDLF has been performing at a satisfactory level to collect, aggregate, and analyze results achieved by municipalities under the MDP-II and the MoLG under LGSIP as both programs \u2019 implementing agency. The MDLF has a robust monitoring and evaluation system that uses a web-based Program Management Information System ( PGMIS ) as well as the Financial Management Information System ( FMIS ) to automate data aggregation, storage, and presentation as part of a results - based Monitoring and Evaluation ( M & E ) system. The MDLF \u2019 s Results-Based Monitoring Manual ( RBMM ) has aligned the individual objectives of the various MDLF-implemented projects and programs with the PA \u2019 s NDP strategic objectives.", + "type": "system", + "explanation": "However, it is described as a management information system that automates data aggregation rather than being a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Information System' which often relates to data management.", + "contextual_reason_agent": "However, it is described as a management information system that automates data aggregation rather than being a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "037_Uganda-Investment-for-Industrial-Transformation-and-Employment-Project", + "page": 76, + "text": "Table 1: Refugee and Host Population in Uganda62 Population Refugee % of total Number of firms63 Refu gee Ugandan hosts Refuge e Host North West Refugee-Hosting Districts Yumbe, Adjumani, Madi Okollo, Terego Lamwo, Koboko, Obongi 873, 844 2, 169, 200 29 % 1, 987 13, 505 South West Refugee-Hosting Districts Isingiro, Kyegegwa, Kamwenge, Kiryandongo, Kikuube 576, 922 2, 266, 800 20 % 2, 526 15, 095 Total non-Kampala RHDs 1, 450, 766 4, 436, 000 25 % 4, 513 28, 601 Total Kampala 98, 415 1, 709, 000 5 % 5, 028 104, 972 2. The economic activity slow down caused by COVID-19 has affected Uganda \u2019 s ability to generate jobs for those living in vulnerable situations, including refugees and host communities. Despite the concerted efforts to integrate refugees within the ecosystems of their host communities, refugee - hosting districts ( RHDs ) remain less developed areas. Low levels of disposable incomes have resulted in low demand and limited access to labor markets, leaving those residents with some access to land with no alternative but to live off subsistence agriculture and humanitarian aid. These areas were less developed even before the inflow of refugees and remain decoupled from resilient and viable supply chains in the economy. For example, the average value of assets among all households ( both refugee and host ) in the district of Arua64 is 560, 000 Ugandan shillings ( US $ 144 ), which is only 10 percent of comparable asset values in the Kampala region. 62 Uganda Comprehensive Refugee Response Portal ( https: / / data2. unhcr. org / en / country / uga ) 31 October 2021 63 Calculation based on district-level firm data from Census of Business Establishments ( COBE ), and refugee and host community household data from the Refugee and Host Community Household Survey 64 Arua was until recent sub-divisions of the district considered a refugee hosting district.", + "ner_text": [ + [ + 1701, + 1742, + "named" + ], + [ + 473, + 480, + "refugee and host community household data <> data geography" + ], + [ + 677, + 685, + "refugee and host community household data <> reference population" + ], + [ + 1594, + 1598, + "refugee and host community household data <> publication year" + ] + ], + "validated": true, + "empirical_context": "unhcr. org / en / country / uga ) 31 October 2021 63 Calculation based on district-level firm data from Census of Business Establishments ( COBE ), and refugee and host community household data from the Refugee and Host Community Household Survey 64 Arua was until recent sub-divisions of the district considered a refugee hosting district.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as data collected from the Refugee and Host Community Household Survey.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific household data collected from a survey.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as data collected from the Refugee and Host Community Household Survey.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 46, + "text": "In addition, a survey will be conducted in sampled SPG woredas as part of the impact evaluation to measure composite indicators not collected through routine information systems. 71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2. The MOH will share HCO-related data with the regions and woredas to provide any disaggregated data needed to monitor HCO results indicators and DLIs. Likewise, the central MOE is responsible for the management of EMIS and NLA which collects varying aspects of data at the school level. MOE will validate and share disaggregated data with regions and woredas to monitor HCO results indicators and DLIs. Other M & E data sources will be coordinated with various implementation units and service providers. The SPG impact evaluation survey will be 71 Such as Minimal Acceptable Diet for children 6 \u2013 23 months. Minimal Acceptable Diet is calculated according to breasfeeding status, number of food groups consumed, and number of times fed. https: / / inddex. nutrition. tufts. edu / data4diets / indicator / minimum-acceptable-diet-mad.", + "ner_text": [ + [ + 306, + 355, + "named" + ], + [ + 347, + 355, + "Unified Nutrition Information System for Ethiopia <> data geography" + ], + [ + 358, + 363, + "Unified Nutrition Information System for Ethiopia <> acronym" + ], + [ + 764, + 782, + "Unified Nutrition Information System for Ethiopia <> data type" + ], + [ + 1240, + 1263, + "Unified Nutrition Information System for Ethiopia <> data description" + ], + [ + 1268, + 1290, + "Unified Nutrition Information System for Ethiopia <> reference population" + ] + ], + "validated": true, + "empirical_context": "In addition, a survey will be conducted in sampled SPG woredas as part of the impact evaluation to measure composite indicators not collected through routine information systems. 71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2.", + "type": "system", + "explanation": "It IS a dataset as it is described as a data collection system linked to other data management systems.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting it collects and manages data.", + "contextual_reason_agent": "It IS a dataset as it is described as a data collection system linked to other data management systems.", + "contextual_signal": "mentioned as a data collection system linked to DHIS2", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "185_multi-page", + "page": 201, + "text": "Impact monitoring: The impact of the national response to the epidemic will be reflected in a broad range of indicators, including: 1. prevalence rates of HIV infection, by age group and gender; 2. median age at first sex; 3. reported condom use at last sex with non-regular partner; 4. STI incidence / prevalence; 5. primary school enrollment and completion rates among orphans; 6. the number of non-regular sexual partners during a defined period, by marital status, age group, and gender. The collection of these indicators will be the responsibility of the various implementing agencies and will be measured through a variety of instruments, including the Demographic and Health Survey of 2003. The Ministry of Health collects monthly HIV / AIDS prevalence data from 22 sentinel surveillance sites ( 13 urban and nine rural ). This existing surveillance system of HIV prevalence will be improved to include behavioral indicators ( \" second-generation surveillance \" ) with the assistance of cooperating partners. Trends in the above-mentioned indicators will be attributed to the collective efforts of the partnership against HIV / AIDS in Kenya, and to the project as part thereof.", + "ner_text": [ + [ + 731, + 765, + "named" + ], + [ + 135, + 193, + "monthly HIV / AIDS prevalence data <> data description" + ], + [ + 693, + 697, + "monthly HIV / AIDS prevalence data <> publication year" + ], + [ + 703, + 721, + "monthly HIV / AIDS prevalence data <> publisher" + ], + [ + 1144, + 1149, + "monthly HIV / AIDS prevalence data <> data geography" + ] + ], + "validated": true, + "empirical_context": "The collection of these indicators will be the responsibility of the various implementing agencies and will be measured through a variety of instruments, including the Demographic and Health Survey of 2003. The Ministry of Health collects monthly HIV / AIDS prevalence data from 22 sentinel surveillance sites ( 13 urban and nine rural ). This existing surveillance system of HIV prevalence will be improved to include behavioral indicators ( \" second-generation surveillance \" ) with the assistance of cooperating partners.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to collected data on HIV/AIDS prevalence from multiple sites.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific data collected on HIV/AIDS prevalence.", + "contextual_reason_agent": "This is indeed a dataset as it refers to collected data on HIV/AIDS prevalence from multiple sites.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "145_689710PAD0P1300Official0Use0Only090", + "page": 136, + "text": "123 Figure 14: Left panel graphs: Precipitation and temperature projections for the 21st century, for driest, average and wettest GCMs. Right panel: Quartiles ( 0, 25, 50, 75 and 100 % ) of rainfall and temperature projections ( source: NRB-CRA study ) Climate wizard 15. The climate wizard website48 displays the potential future changes in temperature and precipitation ( not shown ) as predicted for each country in the world by an ensemble of 16 GCMs for the medium A1B and other SRES49 climate change scenarios. The changes in precipitation projected by the ensemble-20 % ( 20 % of the climate models projected a greater decrease in precipitation ) and ensemble-80 % ( 20 % of the climate models projected a greater increase in precipitation ) are also shown. Results are summarized in Table 24 and results from the Climate Portal ( see below ) and the NRB-CRA are also included in Table 24. 48 University of Washington and the Nature Conservancy ( 2009 ); Data source: Global Climate Model ( GCM ) output, from the World Climate Research Program ' s ( WCRP ) Coupled Model Inter-comparison Project phase 3 ( CMIP3 ) multi-model dataset ( Meehl et al., 2007 ), were downscaled ( as per Maurer et al., 2009 ), using the bias - correction / spatial downscaling method of Wood et al. (", + "ner_text": [ + [ + 975, + 995, + "named" + ] + ], + "validated": false, + "empirical_context": "Results are summarized in Table 24 and results from the Climate Portal ( see below ) and the NRB-CRA are also included in Table 24. 48 University of Washington and the Nature Conservancy ( 2009 ); Data source: Global Climate Model ( GCM ) output, from the World Climate Research Program ' s ( WCRP ) Coupled Model Inter-comparison Project phase 3 ( CMIP3 ) multi-model dataset ( Meehl et al., 2007 ), were downscaled ( as per Maurer et al., 2009 ), using the bias - correction / spatial downscaling method of Wood et al. (", + "type": "model", + "explanation": "However, it is not a dataset but rather a type of model used to generate climate data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'model' in its name, which can imply data generation.", + "contextual_reason_agent": "However, it is not a dataset but rather a type of model used to generate climate data.", + "contextual_signal": "mentioned only as a model, not as a data source", + "tags": [] + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 39, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 35 of 74 Figure 3: Implementation Arrangements B. Results Monitoring and Evaluation Arrangements 73. The proposed SNSOP will develop a comprehensive M & E framework and plan, building on the existing ones under the SSSNP. The SNSOP will employ an innovative M & E system that relies primarily on electronic data collection to be stored and managed in the MIS, building on the M & E system using the Geo-Enabling Initiative for Monitoring and Supervision ( GEMS ) developed under SSSNP to allow for real time data collection and analysis, thus improving the efficiency and reducing cost of M & E. M & E activities will also be embedded in project activities where possible to minimize the burden on field-based staff. These flexible, remote arrangements allow the M & E system to adapt to various circumstances in South Sudan \u2019 s FCV context. Key M & E activities will include Registration Lessons Learned surveys that will assess the effectiveness of targeting and registration and identify areas for improvement. These surveys will provide baseline information on key demographics and socioeconomic indicators that will be tracked over the course of the project. There will also be Post Distribution Monitoring to monitor project implementation, mainly on payments under components 1 and 2.", + "ner_text": [ + [ + 449, + 452, + "named" + ] + ], + "validated": false, + "empirical_context": "The proposed SNSOP will develop a comprehensive M & E framework and plan, building on the existing ones under the SSSNP. The SNSOP will employ an innovative M & E system that relies primarily on electronic data collection to be stored and managed in the MIS, building on the M & E system using the Geo-Enabling Initiative for Monitoring and Supervision ( GEMS ) developed under SSSNP to allow for real time data collection and analysis, thus improving the efficiency and reducing cost of M & E. M & E activities will also be embedded in project activities where possible to minimize the burden on field-based staff.", + "type": "system", + "explanation": "However, 'MIS' is mentioned as a management information system, not as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is associated with data collection and management.", + "contextual_reason_agent": "However, 'MIS' is mentioned as a management information system, not as a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "029_IBArchive-ea5ff2ad-6f1f-45bf-a2ad-036440562c18", + "page": 107, + "text": "Clean cooking market assessment. Limited customer exposure and inadequate buy-in for new stove technologies reduce demand and are a constraint to the adoption of clean cooking solutions. To increase uptake, an understanding of that market is required. This will in-turn inform the products that are sold and allow manufacturers and distributors to adopt marketing strategies that boost local demand. 62 A series of consultations between suppliers of clean cooking products and end users; focus groups with end users, a majority of whom are women; and analysis of industry material will be undertaken under this activity. This will provide manufacturers and distributors of clean cooking solutions insights on product preferences, willingness to pay, and constraints to clean cooking products purchase that are faced by the end users. The 60 UBOS and ICF. 2018. Uganda Demographic and Health Survey 2016. Kampala, Uganda and Rockville, Maryland, USA: UBOS and ICF. 61 Market Assessment Study: Stand-alone Solar for Productive Uses in Uganda, The World Bank. Study conducted by Economic Consulting Associates Ltd, and African Solar Designs. 62 Uganda Clean Cooking Behavioral Diagnostic, ESMAP ( Energy Sector Management Assistance Program ). 2019. Washington, D. C.: World Bank", + "ner_text": [ + [ + 861, + 897, + "named" + ], + [ + 540, + 545, + "Uganda Demographic and Health Survey <> reference population" + ], + [ + 850, + 853, + "Uganda Demographic and Health Survey <> publisher" + ], + [ + 855, + 859, + "Uganda Demographic and Health Survey <> publication year" + ], + [ + 861, + 867, + "Uganda Demographic and Health Survey <> data geography" + ], + [ + 898, + 902, + "Uganda Demographic and Health Survey <> reference year" + ], + [ + 904, + 911, + "Uganda Demographic and Health Survey <> data geography" + ], + [ + 913, + 919, + "Uganda Demographic and Health Survey <> data geography" + ], + [ + 959, + 962, + "Uganda Demographic and Health Survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "2018. Uganda Demographic and Health Survey 2016. Kampala, Uganda and Rockville, Maryland, USA: UBOS and ICF.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly identified as a demographic and health survey, which is a recognized source of empirical data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly identified as a demographic and health survey, which is a recognized source of empirical data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 33, + "text": "28 a school-based approach to improving the delivery of education services. Organizational effectiveness at all levels of the system has been improved by the adoption of a results-based approach to policy, planning, accountability, incentives, and M & E. in ongoing professional development programs to support the delivery of the school improvement program. ( Target: 1, 100 ) Review and development of mechanisms for professional and public accountability. ( Target: 80 % of schools ) Stakeholder perceptions of relevance of M & E reports for informing policy and planning. Stakeholder views on extent to which SIS / EMIS is producing data for continuous monitoring of the 32 key performance indicators. and refinement specifically in relation to this component. The indicators will be reviewed through supervision missions and also used as focal points for discussion with GOJ and other stakeholders. Teaching & Learning Resource Development: Student learning outcomes relevant to the acquisition of knowledge economy skills have been improved through realigned curriculum, authentic assessment, appropriate teaching and learning resources, and interactive classroom practices. Teacher policies revised to support application of national teacher standards.", + "ner_text": [ + [ + 613, + 623, + "named" + ] + ], + "validated": false, + "empirical_context": "( Target: 80 % of schools ) Stakeholder perceptions of relevance of M & E reports for informing policy and planning. Stakeholder views on extent to which SIS / EMIS is producing data for continuous monitoring of the 32 key performance indicators. and refinement specifically in relation to this component.", + "type": "system", + "explanation": "However, SIS / EMIS is described as a system and not explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed SIS / EMIS is a dataset because it includes the term 'data' in the context.", + "contextual_reason_agent": "However, SIS / EMIS is described as a system and not explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "033_IBArchive-38b35d89-2c5c-4636-ab7e-0ba662b95a5f", + "page": 61, + "text": "The World Bank Djibouti Skills Development for Employment Project ( P175483 ) Page 57 of 68 PDO # 2 - Share of girls / women, refugees, and persons with disabilities completing TVET programs increased Students will be tracked upon admissions to TVET programs and their dropout or completion dates will be recorded. This will be the basis for reporting this information. This information will be reported on a quarterly basis. Every participating institution will be required to maintain student records from the time of admission to the completion or termination of their programs. A standardized data capture instrument will be used by each participating institution to gather and maintain student information. MENFOP PDO # 3 - Share of trainees employed 6 and 9 months after completion of their programs in similar sectors - disaggregated by gender, disability and refugee status ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection IRI # 1: Number and share of students engaged in Apprenticeships, Internships, Sub-component 1. 1", + "ner_text": [ + [ + 584, + 620, + "named" + ] + ], + "validated": false, + "empirical_context": "Every participating institution will be required to maintain student records from the time of admission to the completion or termination of their programs. A standardized data capture instrument will be used by each participating institution to gather and maintain student information. MENFOP PDO # 3 - Share of trainees employed 6 and 9 months after completion of their programs in similar sectors - disaggregated by gender, disability and refugee status ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection IRI # 1: Number and share of students engaged in Apprenticeships, Internships, Sub-component 1.", + "type": "instrument", + "explanation": "However, it is not a dataset but rather a tool or method for collecting data, not a structured collection itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'data' and relates to data collection.", + "contextual_reason_agent": "However, it is not a dataset but rather a tool or method for collecting data, not a structured collection itself.", + "contextual_signal": "mentioned only as a tool for data collection, not as a data source", + "tags": [] + }, + { + "filename": "031_IBArchive-bd2c789e-ee04-4df7-a219-9409a5f705d3", + "page": 79, + "text": "The World Bank Chad Energy Access Scale Up Project ( P174495 ) Page 74 of 87 ANNEX 4: Climate and Hazard Considerations Climate Change and Natural Hazard Risks and Adaptation Opportunities 1. Resilient infrastructure development in the Republic of Chad includes consideration of existing natural hazards51 and ongoing climate change. Three key risks in this project include wildfire, flooding, and extreme heat, which are expected to increase due to climate change. Figure 4. 1. Key Hazard Risks in Chad and Their Geographic Distribution Source: ( A ) Author calculations using data from [ 46 ]; ( B ) FATHOM fluvial flooding data ( 2021 ) [ 48 ]; ( C ) Internal calculations using data from NASA NEX-GDDP CMIP5 data [ 50 ]. 2. Wildfire is recognized as a \u2018 high \u2019 risk in Chad under current climate conditions, and climate change is expected to exacerbate this risk. 52 However, this risk is concentrated in the southern part of the country, particularly along the southeastern corner where the average annual area of land that is burned is 20 \u2013 50 percent or more ( Figure 4. 1 ).", + "ner_text": [ + [ + 602, + 608, + "named" + ], + [ + 236, + 252, + "FATHOM <> data geography" + ], + [ + 633, + 637, + "FATHOM <> publication year" + ] + ], + "validated": true, + "empirical_context": "1. Key Hazard Risks in Chad and Their Geographic Distribution Source: ( A ) Author calculations using data from [ 46 ]; ( B ) FATHOM fluvial flooding data ( 2021 ) [ 48 ]; ( C ) Internal calculations using data from NASA NEX-GDDP CMIP5 data [ 50 ]. 2.", + "type": "dataset", + "explanation": "In the context, 'FATHOM' is explicitly mentioned as 'fluvial flooding data', indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'FATHOM' is a dataset because it is referenced alongside other data sources in the context.", + "contextual_reason_agent": "In the context, 'FATHOM' is explicitly mentioned as 'fluvial flooding data', indicating it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 41, + "text": "36 Target Values Data Collection and Reporting Project Outcome Indicators Baseline YR1 YR2 YR3 YR4 YR5 YR6 Frequency and Reports Data Collection Instruments Responsibility for Data Collection policy and planning. 2. 6 Extent to which SIS / EMIS is producing data for continuous monitoring of the 32 key performance indicators. 10 indicators 12 indicators 20 Indicators 32 Indicators Yrs 1, 3 and 6 External assessment of EMIS Directorate of Educational Planning ( DEP ) 3. Teaching and Learning Resource Development 3. 1 Teacher policies revised to support application of national teacher standards. Nil Policies identified 50 % target policies revised 100 % of target policies revised Yrs 1, 3 and 6 MoE Report DTQS / PSPS 3. 2 Number / percentage of newly appointed teachers completing post - recruitment initial training in ETC. Nil 6 % 30 % 50 % 60 % 70 % 80 % Annual MoE Reports DTQS / ETC 3. 3 Number / percentage of new teachers appointed using a competency - based model.", + "ner_text": [ + [ + 421, + 425, + "named" + ] + ], + "validated": false, + "empirical_context": "6 Extent to which SIS / EMIS is producing data for continuous monitoring of the 32 key performance indicators. 10 indicators 12 indicators 20 Indicators 32 Indicators Yrs 1, 3 and 6 External assessment of EMIS Directorate of Educational Planning ( DEP ) 3. Teaching and Learning Resource Development 3.", + "type": "system", + "explanation": "However, EMIS is described as a system and not explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with producing data for monitoring indicators.", + "contextual_reason_agent": "However, EMIS is described as a system and not explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "073_Ethiopia-Consolidated-Water-Supply-Sanitation-and-Hygiene-Account-Project", + "page": 99, + "text": "As shown in Table A3. 2, the per capita cost increases with the complexity of the technology type and the return of investment decreases. 49 For rural households, traveling times to the nearest water source are substantial \u2014 about 56 percent of rural households had to travel less than an hour, but a large share ( 37 percent ) had to travel between 1. 0 and 2. 5 hours, and about 15 percent had to travel even longer to fetch water ( Ethiopia Time Use Survey, CSA, December 2014 ). 50 Reduction in incidence of diarrheal disease compared with unimproved coverage is estimated to be 34 percent for an improved community water source, 28 percent for improved on-site sanitation, 40 percent for handwashing with soap, 60 percent for combined basic WASH, and 80 percent for safe WASH. Data on water and sanitation are from J. Wolf et al., \u201c Assessing the Impact of Drinking Water and Sanitation on Diarrhoeal Disease in Low - and Middle-Income Settings: Systematic Review and Meta-Regression, \u201d Tropical Medicine & International Health 19, no. 8 ( 2014 ): 928 \u2013 42, https: / / www. ncbi. nlm. nih. gov / pubmed / 24811732; on handwashing with soap from M. C. Freeman et al., \u201c Hygiene and Health: Systematic Review of Handwashing Practices Worldwide and Update of Health Effects, \u201d Tropical Medicine & International Health 19, no. 8 ( 2014 ): 906 \u2013 16, https: / / www. ncbi. nlm. nih. gov / pubmed / 24889816. Estimates for basic WASH and safe WASH are based on combining individual services. 51 According to Guy Hutton, a dollar spent globally on basic WASH will provide in return US $ 3. 50, US $ 2. 60, and US $ 4. 20 for WASH, respectively.", + "ner_text": [ + [ + 435, + 459, + "named" + ], + [ + 145, + 161, + "Ethiopia Time Use Survey <> reference population" + ], + [ + 435, + 443, + "Ethiopia Time Use Survey <> data geography" + ], + [ + 461, + 464, + "Ethiopia Time Use Survey <> publisher" + ], + [ + 475, + 479, + "Ethiopia Time Use Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "0 and 2. 5 hours, and about 15 percent had to travel even longer to fetch water ( Ethiopia Time Use Survey, CSA, December 2014 ). 50 Reduction in incidence of diarrheal disease compared with unimproved coverage is estimated to be 34 percent for an improved community water source, 28 percent for improved on-site sanitation, 40 percent for handwashing with soap, 60 percent for combined basic WASH, and 80 percent for safe WASH.", + "type": "survey", + "explanation": "This is indeed a dataset as it is referenced directly in the context as a source of empirical data regarding time use in Ethiopia.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is referenced directly in the context as a source of empirical data regarding time use in Ethiopia.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 21, + "text": "11 extreme poor for the purpose of the cash transfer program and its complementary activities. However, its ultimate function is to serve as the basis for a registry of extreme poor households for targeted key programs of the NSPS, and for the targeted complementary interventions of CNPS member ministries. 37. The development of the database will take place in phases, according to the availability of poverty information, and the implementation of the cash transfer program. Initially, for the purpose of identifying potential beneficiaries of the cash transfer program, four provinces were selected on the basis of their monetary poverty rate ( ECBM 2013 / 14 ) and chronic malnutrition rates ( DHS 2010 ) and included Gitega, Karuzi, Kirundo and Ruyigi. The selection at communes, collines, and household-level is described in Box 3. 38. As part of the registration process, the project will assess whether potential transfer recipients in beneficiary households have national ID cards12. ID cards are required for mobile phone registration so if transfer recipients do not have an ID, the project will provide them with support to obtain one. The project may issue them temporary program cards for the first payment and the initial complementary activities \u2019 sessions. If necessary, the Project will coordinate with the Ministry of Interior for the provision of national identity cards. 39.", + "ner_text": [ + [ + 699, + 702, + "named" + ], + [ + 625, + 646, + "DHS <> data description" + ], + [ + 649, + 663, + "DHS <> publication year" + ], + [ + 670, + 696, + "DHS <> data description" + ], + [ + 699, + 707, + "DHS <> publication year" + ], + [ + 723, + 729, + "DHS <> data geography" + ], + [ + 731, + 737, + "DHS <> data geography" + ], + [ + 739, + 746, + "DHS <> data geography" + ], + [ + 751, + 757, + "DHS <> data geography" + ] + ], + "validated": true, + "empirical_context": "The development of the database will take place in phases, according to the availability of poverty information, and the implementation of the cash transfer program. Initially, for the purpose of identifying potential beneficiaries of the cash transfer program, four provinces were selected on the basis of their monetary poverty rate ( ECBM 2013 / 14 ) and chronic malnutrition rates ( DHS 2010 ) and included Gitega, Karuzi, Kirundo and Ruyigi. The selection at communes, collines, and household-level is described in Box 3.", + "type": "survey", + "explanation": "In the context, 'DHS' is used to refer to a specific survey that provides data on chronic malnutrition rates, confirming it as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'DHS' is a dataset because it is referenced in relation to chronic malnutrition rates, suggesting it contains relevant data.", + "contextual_reason_agent": "In the context, 'DHS' is used to refer to a specific survey that provides data on chronic malnutrition rates, confirming it as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 7, + "validated": 6, + "not_validated": 1 + } + }, + { + "filename": "078_Afghanistan-Eshteghal-Zaiee-Karmondena-EZ-Kar-Project", + "page": 31, + "text": "The World Bank Afghanistan: Eshteghal Zaiee - Karmondena ( EZ-Kar ) ( P166127 ) Page 24 of 85 management reporting system to monitor physical and financial progress; ( iv ) low budget credibility due to weak planning process; ( v ) limited scope and coverage of internal audit as well as poor quality of audit reports; and ( vi ) non \u2010 compliance with external audit recommendations. To mitigate the risk and strengthen FM capacity, time bound mitigation measures have been agreed with the implementing agencies. The financial management risk will be reassessed once the mitigation measures are in place. 57. The project FM arrangements rely on the country systems. GoIRA budgeting processes will apply, and the project \u2019 s budget will be a part of GoIRA \u2019 s annual budget. The accounting records will be maintained at the central level by MOF in Afghanistan Financial Management Information System ( AFMIS ) based on M16s and the FM department in MoFA, KMDP, IDLG \u2010 Deputy Ministry for Municipalities ( DMM ) and MoFA will maintain detailed subsidiary records. The FM Manual ( FMM ) for ARTF and IDA projects has been developed that will be adopted for EZ \u2010 Kar. The FMM provides an elaborate FM and internal control framework that is acceptable to the World Bank.", + "ner_text": [ + [ + 847, + 898, + "named" + ] + ], + "validated": false, + "empirical_context": "GoIRA budgeting processes will apply, and the project \u2019 s budget will be a part of GoIRA \u2019 s annual budget. The accounting records will be maintained at the central level by MOF in Afghanistan Financial Management Information System ( AFMIS ) based on M16s and the FM department in MoFA, KMDP, IDLG \u2010 Deputy Ministry for Municipalities ( DMM ) and MoFA will maintain detailed subsidiary records. The FM Manual ( FMM ) for ARTF and IDA projects has been developed that will be adopted for EZ \u2010 Kar.", + "type": "system", + "explanation": "However, it is described as a system for maintaining accounting records, not as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting it handles data.", + "contextual_reason_agent": "However, it is described as a system for maintaining accounting records, not as a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "021_BOSIB05e6fc47e0770aeec00ad5e11774f2", + "page": 67, + "text": "MAAIF currently uses the Integrated Financial Management Information System ( IFMIS ) in management of the accounting and reporting functions for the Government. The project will be expected to be managed through the upgraded IFMIS with the project module for new projects as agreed and directed by MoFPED. This has however presented the risk of slow implementation of the new project module under IFMIS due to some technical challenges that are being addressed by MoFPED. Funds Flow and Disbursements Arrangements 8. Bank accounts. The following bank accounts authorized by MoFPED will be maintained by MAAIF in the Bank of Uganda ( BoU ) for implementing the project: ( a ) Designated Account ( DA ) denominated in US dollars where disbursements from IDA will be deposited and ( b ) project account, denominated in the local currency. Transfers from the DA ( for payment of transactions in local currency ) will be deposited into this account in accordance with the project objectives, work plans, and budgets. Transfers to other IAs and lower-level implementation points such as district local governments will follow the established Government systems as provided for in the Government \u2019 s treasury accounting instructions, 2003 ( currently under revision in line with the new PFM Act, 2015 and PFM Regulations 2016 ). Figure 1. 1. Funds Flow Chart IDA DA ( US $ ) in BoU MAAIF ) MAAIF Project Account ( UGX ) in BoU", + "ner_text": [ + [ + 25, + 75, + "named" + ] + ], + "validated": false, + "empirical_context": "MAAIF currently uses the Integrated Financial Management Information System ( IFMIS ) in management of the accounting and reporting functions for the Government. The project will be expected to be managed through the upgraded IFMIS with the project module for new projects as agreed and directed by MoFPED.", + "type": "system", + "explanation": "However, the context indicates that it is a system used for management and reporting functions, not a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Information System', which often relates to data management.", + "contextual_reason_agent": "However, the context indicates that it is a system used for management and reporting functions, not a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 43, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 32 Indicator Name People provided with access to improved water sources Definition / Description Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC Area performance reports, Refugee Database, Refugees GIS Mapping for WASH and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE and NWSC Indicator Name People provided with access to improved water sources - Female ( RMS requirement ) Definition / Description Frequency Bi-annually. Data Source MWE M & E system, NWSC Area performance reports, Refugee Database, Refugees GIS Mapping for WASH and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report.", + "ner_text": [ + [ + 235, + 251, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Integrated Water Management and Development Project ( P163782 ) 32 Indicator Name People provided with access to improved water sources Definition / Description Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC Area performance reports, Refugee Database, Refugees GIS Mapping for WASH and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'system' in its name, which can imply data collection.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "118_PAD1199-PAD-P144637-IDA-R2015-0247-1-Box393201B-OUO-9", + "page": 94, + "text": "83 5. According to an independent survey carried out on December 2008 and sectoral data collected from recipients and consolidated by the provincial units of the project, the overall impact of the project on beneficiaries is considered satisfactory. For instance: \uf0a7 In the education sector, nearly 9, 900 students have improved access to education facilities through the construction of 116 classrooms and provision of 6, 415 textbooks to 13 primary and two secondary schools. These facilities have substantially reduced the average number of students per classroom to an estimated 45 ( compared to 70-80 students outside of the project area ). However, this average hides the disparities from one province to another, which can be as large as 70 students in the Western Province and only 30 students per classroom in the South; \uf0a7 In the health sector, about 28, 000 people now have access to better health care through 16 integrated health centers, including maternity hospitals built or rehabilitated.", + "ner_text": [ + [ + 74, + 87, + "named" + ], + [ + 56, + 69, + "sectoral data <> reference year" + ], + [ + 525, + 565, + "sectoral data <> data description" + ], + [ + 763, + 779, + "sectoral data <> data geography" + ], + [ + 1019, + 1037, + "sectoral data <> usage context" + ] + ], + "validated": true, + "empirical_context": "83 5. According to an independent survey carried out on December 2008 and sectoral data collected from recipients and consolidated by the provincial units of the project, the overall impact of the project on beneficiaries is considered satisfactory. For instance: \uf0a7 In the education sector, nearly 9, 900 students have improved access to education facilities through the construction of 116 classrooms and provision of 6, 415 textbooks to 13 primary and two secondary schools.", + "type": "data", + "explanation": "In this context, 'sectoral data' is explicitly mentioned as collected from recipients and consolidated, indicating it serves as a data source for the evaluation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'sectoral data' refers to a structured collection of data used in the analysis of the project's impact.", + "contextual_reason_agent": "In this context, 'sectoral data' is explicitly mentioned as collected from recipients and consolidated, indicating it serves as a data source for the evaluation.", + "contextual_signal": "collected from recipients and consolidated by the provincial units", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "068_Burundi-Integrated-Community-Development-Project", + "page": 87, + "text": "According to the Good Practice Note for Addressing GBV in Investment Projects Involving Major Civil Works, 66 all projects regardless of risk level should ensure the following minimum recommended actions to address GBV risks: GBV risk assessment included in project \u2019 s social assessments ( including Stakeholder Engagement Plan and Safeguard Documents ); community engagement / consultations with women and girls; conduct PIU capacity assessment; conduct GBV service provider mapping in project area; include GBV-sensitive approaches in GRM; define GBV requirements bid documents ( including the requirement for a Code of Conduct 63 2016-2017 Demographic and Health Survey in Burundi 64 Victoria Rames, Cl\u00e9mence Bununagi, and Caritas Niyonzima ( 2017 ) USAID / Burundi Gender Analysis Report. Prepared by Banyan Global. 65 IASC ( 2015 ). Guidelines for integrating gender-based violence interventions in humanitarian action: reducing risk, promoting resilience and aiding recovery. 66 Good Practice Note: Recommendations for Addressing Gender Based Violence in Investment Project Financing involving Major Civil Works. World Bank. 2018", + "ner_text": [ + [ + 644, + 673, + "named" + ], + [ + 398, + 413, + "Demographic and Health Survey <> reference population" + ], + [ + 634, + 643, + "Demographic and Health Survey <> reference year" + ], + [ + 677, + 684, + "Demographic and Health Survey <> data geography" + ], + [ + 688, + 702, + "Demographic and Health Survey <> author" + ], + [ + 704, + 721, + "Demographic and Health Survey <> author" + ], + [ + 727, + 744, + "Demographic and Health Survey <> author" + ], + [ + 747, + 751, + "Demographic and Health Survey <> publication year" + ], + [ + 806, + 819, + "Demographic and Health Survey <> author" + ] + ], + "validated": true, + "empirical_context": "According to the Good Practice Note for Addressing GBV in Investment Projects Involving Major Civil Works, 66 all projects regardless of risk level should ensure the following minimum recommended actions to address GBV risks: GBV risk assessment included in project \u2019 s social assessments ( including Stakeholder Engagement Plan and Safeguard Documents ); community engagement / consultations with women and girls; conduct PIU capacity assessment; conduct GBV service provider mapping in project area; include GBV-sensitive approaches in GRM; define GBV requirements bid documents ( including the requirement for a Code of Conduct 63 2016-2017 Demographic and Health Survey in Burundi 64 Victoria Rames, Cl\u00e9mence Bununagi, and Caritas Niyonzima ( 2017 ) USAID / Burundi Gender Analysis Report. Prepared by Banyan Global.", + "type": "survey", + "explanation": "This is indeed a dataset as it is a recognized survey that provides empirical data used for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically involves structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it is a recognized survey that provides empirical data used for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 18, + "text": "The updated methodology will reflect the main improvements achieved during the first phase ( locality based, participative process, grievance mechanism, and so on ). 25. Households from Hodh Chargui, including those in the Mbera Refugee Camp that are considered poor, will be included in the Social Registry. This inclusion will be a positive step towards better coordination between humanitarian actors and the Government and between emergency and development support. The methodology to identify poor refugee households for inclusion in the Social Registry will be based on the profiling work carried out in 2018 by the UNHCR and WFP. It identified six different household categories: ( a ) deprived; ( b ) precarious; ( c ) unstable; ( d ) fragile ( e ) emerging; and ( f ) catalyst ( see figure 3 ). Each category gradually receives ( or will receive ) differentiated assistance based on its needs. This profiling work was carried out with the participation of camp and Government authorities. Based on preliminary data analysis, categories 1, 2 and 3 ( \u201c deprived \u201d, \u201c precarious \u201d, and \u201c unstable \u201d ) which include approximately 7, 500 households will enter the Social Registry. 26. This sub-component will finance: ( a ) the costs related to the community targeting ( Social Registry enrolment officers and related logistics and material ); ( b ) the socio-economic data collection costs for up to 350, 000 households ( 70, 000 households per year ); and ( c ) the costs related to quality insurance ( grievance mechanism, toll-free number, spot-check surveys, process evaluation, and so on ). Figure 3: Refugee categorization in Mauritania Source: UNHCR", + "ner_text": [ + [ + 543, + 558, + "named" + ], + [ + 223, + 241, + "Social Registry <> data geography" + ], + [ + 498, + 521, + "Social Registry <> reference population" + ], + [ + 610, + 614, + "Social Registry <> publication year" + ], + [ + 622, + 627, + "Social Registry <> publisher" + ], + [ + 1637, + 1647, + "Social Registry <> data geography" + ], + [ + 1656, + 1661, + "Social Registry <> publisher" + ] + ], + "validated": true, + "empirical_context": "This inclusion will be a positive step towards better coordination between humanitarian actors and the Government and between emergency and development support. The methodology to identify poor refugee households for inclusion in the Social Registry will be based on the profiling work carried out in 2018 by the UNHCR and WFP. It identified six different household categories: ( a ) deprived; ( b ) precarious; ( c ) unstable; ( d ) fragile ( e ) emerging; and ( f ) catalyst ( see figure 3 ).", + "type": "registry", + "explanation": "The context indicates that the Social Registry is used for identifying poor refugee households, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Social Registry' suggests a structured collection of data related to households.", + "contextual_reason_agent": "The context indicates that the Social Registry is used for identifying poor refugee households, confirming its role as a data source.", + "contextual_signal": "mentioned as a data source for identifying households", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 12, + "text": "The World Bank SPF: Improved Livelihoods for Internally Displaced Persons in Azerbaijan ( P178125 ) Page 10 of 34 11. Findings from the IDP survey reveal limitations in IDP civic engagement and social cohesion. There is a very low level of IDP participation in social activities in their communities such as youth and women \u2019 s groups, cultural activities, agricultural or entrepreneurship activities. Only eight percent of respondents participate in such activities. Forty two percent of respondents said that they either do not know where to, or would not want to, lodge a complaint or make a request regarding the delivery of services. In terms of social cohesion and community integration, 61 percent of respondents said that they felt well integrated into their village / city and only 40 percent felt that if someone in their family was in an emergency, they could count on support of their community. While the survey did not collect comparative data for non-IDPs, these findings indicate that the unique living conditions of IDPs may limit their participation in community-based activities and lead them to feel less well-supported by their communities. This may be a significant challenge for people who are returning to their places of origin as these places are less likely to have established local governance arrangements.", + "ner_text": [ + [ + 136, + 146, + "named" + ], + [ + 4, + 14, + "IDP survey <> publisher" + ], + [ + 45, + 73, + "IDP survey <> reference population" + ], + [ + 77, + 87, + "IDP survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "The World Bank SPF: Improved Livelihoods for Internally Displaced Persons in Azerbaijan ( P178125 ) Page 10 of 34 11. Findings from the IDP survey reveal limitations in IDP civic engagement and social cohesion. There is a very low level of IDP participation in social activities in their communities such as youth and women \u2019 s groups, cultural activities, agricultural or entrepreneurship activities.", + "type": "survey", + "explanation": "This is indeed a dataset as it provides empirical findings based on the collected data from the IDP survey.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data on IDP civic engagement.", + "contextual_reason_agent": "This is indeed a dataset as it provides empirical findings based on the collected data from the IDP survey.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 85, + "text": "The biggest gap in the Cameroon statistical system is on the production of micro-data ( household surveys and censuses ). The funding requirements for the population census is estimated at US $ 55 million ( CFAF 30 billion ) according to BUCREP and the requirements for the agriculture census is estimated at US $ 51 million ( CFAF 28 billion ). In addition, there is no commitment for the next ECAM. The proposed project intends to increase the frequency of the production of micro-data for a close poverty monitoring ( by complementing resources for the population census and securing resources for the next living conditions survey ), strengthen the national accounts, and enhance access to statistics to strengthen both policy making and monitoring.", + "ner_text": [ + [ + 556, + 573, + "named" + ], + [ + 23, + 31, + "population census <> data geography" + ], + [ + 75, + 85, + "population census <> data type" + ], + [ + 238, + 244, + "population census <> publisher" + ] + ], + "validated": true, + "empirical_context": "In addition, there is no commitment for the next ECAM. The proposed project intends to increase the frequency of the production of micro-data for a close poverty monitoring ( by complementing resources for the population census and securing resources for the next living conditions survey ), strengthen the national accounts, and enhance access to statistics to strengthen both policy making and monitoring.", + "type": "census", + "explanation": "In this context, it is used as a source of data for monitoring poverty and enhancing statistics, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'population census' typically refers to a structured collection of demographic data.", + "contextual_reason_agent": "In this context, it is used as a source of data for monitoring poverty and enhancing statistics, confirming its role as a dataset.", + "contextual_signal": "mentioned as a data source for close poverty monitoring", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + }, + "term_stats": { + "total": 6, + "validated": 5, + "not_validated": 1 + } + }, + { + "filename": "172_multi0page", + "page": 42, + "text": "Data collection activities are carried out M A properly staffed and equipped EMIS is in a timely manner and are utilized for established in the MEST. planning and management purposes. The sector performance indicators are defined and included in annual data collection activities on a gradual basis over time. The planning department of the MEST is restructured on the basis of key planning operations and staff qualifications. Overall Risk Rating 5 Risk Rating - H ( High Risk ), S ( Substantial Risk ), M ( Modest Risk ), N ( Negligible or Low Risk ) 3. Possible Controversial Aspects: 3. 1 The Project supports the provision of services by private SPs such as, Catholic, Protestant and Islamic religious groups, NGOs and CBOs. Even though there is a long tradition in Sierra Leone by these SPs, and currently 85 % of government-assisted schools are administered by the missions, it can be argued that the provision of education services be made solely by the State. It is clear that the proposed approach has an advantage for the MEST so that it serves as a regulatory and policy making body, which is more consistent with modern trends, and more efficient in meeting the education needs of the population. - 37 -", + "ner_text": [ + [ + 188, + 217, + "named" + ] + ], + "validated": false, + "empirical_context": "planning and management purposes. The sector performance indicators are defined and included in annual data collection activities on a gradual basis over time. The planning department of the MEST is restructured on the basis of key planning operations and staff qualifications.", + "type": "concept", + "explanation": "'Sector performance indicators' are mentioned in the context of planning and management but are not described as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'sector performance indicators' refers to a dataset because it sounds like a structured collection of data.", + "contextual_reason_agent": "'Sector performance indicators' are mentioned in the context of planning and management but are not described as a data source or structured collection of data.", + "contextual_signal": "mentioned only as a concept, not as a data source", + "tags": [] + }, + { + "filename": "143_749060PAD0P124010Box374388B00OUO090", + "page": 65, + "text": "The results monitoring framework assesses progress towards the PDO through key indicators, focusing on achieving improvement in quantity and quality of social services provided by MOSA, expanded coverage of SSNs ( the NPTP ), and viable income generating sub - projects ( see Annex 1 ). In addition, intermediate indicators will monitor the progress of each component over the life of the project. Monitoring and evaluation ( M & E ) arrangements will be centralized at the level of the PM, which will have a dedicated M & E Specialist, and rely on an M & E system adapted to the needs of each component. A computerized, modular MIS will be the central piece of the M & E system and will include: ( a ) a database on capacity building activities carried out under Component 1 and their beneficiaries ( e. g., date, place, topic, and the names of beneficiaries of a specific training ); ( b ) a module to track the processing of applications under Component 2, input baseline data for approved sub-projects, as well as monitor the physical progress in sub - project implementation and related tranche disbursements; ( c ) a module to register households in the NPTP, record the results of their eligibility assessment ( including their NPTP score ), and follow their utilization of benefits; ( d ) a financial management module for the whole project. 52. The SDCs will be the primary providers of information to be entered into the MIS and will be trained accordingly, as well as equipped with computers and connected to the internet. The MIS will be designed to allow data entry directly from the field, while the PM will perform data checks and produce summary reports. For transparency purposes, part of the data contained in the MIS will be accessible to the public ( on a viewing-only basis ) through the MOSA website. The M & E specialist will randomly visit SDCs, sub-project sites, and beneficiary households to verify the accuracy of the data entered, accompanied by the Internal Auditor as needed. 66", + "ner_text": [ + [ + 1431, + 1434, + "named" + ] + ], + "validated": false, + "empirical_context": "52. The SDCs will be the primary providers of information to be entered into the MIS and will be trained accordingly, as well as equipped with computers and connected to the internet. The MIS will be designed to allow data entry directly from the field, while the PM will perform data checks and produce summary reports.", + "type": "system", + "explanation": "However, 'MIS' is described as a management information system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MIS' suggests a system for managing information.", + "contextual_reason_agent": "However, 'MIS' is described as a management information system, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 119, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 114 of 117. ANNEX 6. PROGRAM ACTION PLAN Action Description Source DLI # Responsibility Timing Completion Measurement Undertake data analysis in the NEMIS platform and publish annual educational statistical booklet. Technical MoE Recurrent Yearly Annual educational statistical booklet published for core education data, including refugee children and learner with special needs. Conduct policy dialogue for inclusion / mainstrea ming of refugees in the NEMIS. Technical MoE Other Continuous Data for refugee children included in the NEMIS Training: implementation of the consolidated three year capacity building plan for staff development at MoE and the Program \u2019 s implementing entities ( IEs ). Technical MoE Recurrent Yearly Targets in the plan implemented Technical expertise to be engaged in the areas of CBC and formative assessments Technical MoE Recurrent Yearly two international Partner institutions procured to provide technical support to KICD and KNEC in the implementation and monitoring of the roll out of CBC and assessments. Utilization of the online based item portal for competency based assessment Technical KNEC Recurrent Continuous Teachers post assessment items in the portal and also use the assessment items in the portal. Timely release of funds Exchequer to the Implementing Entities Fiduciary Systems NT, MoE and TSC Recurrent Yearly Timely release of funds to the Implementing Entities ( IEs ) PPRA to undertake compliance assessment.", + "ner_text": [ + [ + 611, + 616, + "named" + ], + [ + 253, + 291, + "NEMIS <> data description" + ], + [ + 408, + 424, + "NEMIS <> reference population" + ], + [ + 429, + 455, + "NEMIS <> reference population" + ] + ], + "validated": true, + "empirical_context": "Conduct policy dialogue for inclusion / mainstrea ming of refugees in the NEMIS. Technical MoE Other Continuous Data for refugee children included in the NEMIS Training: implementation of the consolidated three year capacity building plan for staff development at MoE and the Program \u2019 s implementing entities ( IEs ). Technical MoE Recurrent Yearly Targets in the plan implemented Technical expertise to be engaged in the areas of CBC and formative assessments Technical MoE Recurrent Yearly two international Partner institutions procured to provide technical support to KICD and KNEC in the implementation and monitoring of the roll out of CBC and assessments.", + "type": "system", + "explanation": "NEMIS is indeed a dataset as it is referenced in relation to continuous data for refugee children, indicating its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is mentioned in the context of data inclusion and technical support for refugee children.", + "contextual_reason_agent": "NEMIS is indeed a dataset as it is referenced in relation to continuous data for refugee children, indicating its role as a data source.", + "contextual_signal": "mentioned as a data source for refugee children", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "003_BOSIB-850e0c11-07c1-4c9c-8d44-4286704221bd", + "page": 11, + "text": "Poverty headcount ratio at US $ 2. 15 a day ( 2017 PPP ) ( % of population ) - Niger. https: / / data. worldbank. org / indicator / SI. POV. DDAY? locations = NE 5 In the transport sector women hold less than one percent of jobs. Although data on women in technical roles is unavailable, their share is likely lower due to inadequate skills and strong gender norms. 6 When referring to host communities in this document, internally displaced persons are considered part of the host population unless noted otherwise. 7 P. Thenkabail et al. 2016. Global Food Security Support Analysis Data ( GFSAD ) Crop Dominance 2010 Global 1 km V001 [ Ddata set ]. NASA Making Earth System Data Records for Use in Research Environments ( MEaSUREs ) NASA EOSDIS Land Processes DAAC. https: / / lpdaac. usgs. gov / products / gfsad1kcdv001 /. 8 The TSR corridor is one of the oldest transnational road corridors in Africa. It is 4, 500 km long, crossing the Sahara Desert and linking Algeria, Chad, Mali, Niger, Nigeria, and Tunisia. 9 The TSH, or Trans-African Highway 5, connects Dakar, Senegal to N ' Djamena, Chad, passing through Mali, Burkina Faso, Niger, and Nigeria. It links Niamey and Maradi in Niger with Burkina Faso and Nigeria, respectively. 10 These figures correspond to populations located within a 150-km radius from the RN1 Maradi \u2013 Zinder section ( WorldPop, 2020 ). 11 UNHCR. 2025. UNHCR Niger - Map Population of Concern - Mars 2025. https: / / data. unhcr. org / en / documents / details / 115551.", + "ner_text": [ + [ + 740, + 746, + "named" + ] + ], + "validated": false, + "empirical_context": "Global Food Security Support Analysis Data ( GFSAD ) Crop Dominance 2010 Global 1 km V001 [ Ddata set ]. NASA Making Earth System Data Records for Use in Research Environments ( MEaSUREs ) NASA EOSDIS Land Processes DAAC. https: / / lpdaac.", + "type": "system", + "explanation": "However, EOSDIS refers to a system rather than a structured collection of data or a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is mentioned in the context of data records and research environments.", + "contextual_reason_agent": "However, EOSDIS refers to a system rather than a structured collection of data or a dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 21, + "text": "11 extreme poor for the purpose of the cash transfer program and its complementary activities. However, its ultimate function is to serve as the basis for a registry of extreme poor households for targeted key programs of the NSPS, and for the targeted complementary interventions of CNPS member ministries. 37. The development of the database will take place in phases, according to the availability of poverty information, and the implementation of the cash transfer program. Initially, for the purpose of identifying potential beneficiaries of the cash transfer program, four provinces were selected on the basis of their monetary poverty rate ( ECBM 2013 / 14 ) and chronic malnutrition rates ( DHS 2010 ) and included Gitega, Karuzi, Kirundo and Ruyigi. The selection at communes, collines, and household-level is described in Box 3. 38. As part of the registration process, the project will assess whether potential transfer recipients in beneficiary households have national ID cards12. ID cards are required for mobile phone registration so if transfer recipients do not have an ID, the project will provide them with support to obtain one. The project may issue them temporary program cards for the first payment and the initial complementary activities \u2019 sessions. If necessary, the Project will coordinate with the Ministry of Interior for the provision of national identity cards. 39.", + "ner_text": [ + [ + 157, + 192, + "named" + ], + [ + 625, + 646, + "registry of extreme poor households <> data description" + ], + [ + 649, + 663, + "registry of extreme poor households <> publication year" + ], + [ + 670, + 696, + "registry of extreme poor households <> data description" + ], + [ + 723, + 729, + "registry of extreme poor households <> data geography" + ], + [ + 731, + 737, + "registry of extreme poor households <> data geography" + ], + [ + 739, + 746, + "registry of extreme poor households <> data geography" + ], + [ + 751, + 757, + "registry of extreme poor households <> data geography" + ], + [ + 912, + 967, + "registry of extreme poor households <> reference population" + ] + ], + "validated": true, + "empirical_context": "11 extreme poor for the purpose of the cash transfer program and its complementary activities. However, its ultimate function is to serve as the basis for a registry of extreme poor households for targeted key programs of the NSPS, and for the targeted complementary interventions of CNPS member ministries. 37.", + "type": "registry", + "explanation": "This is indeed a dataset as it serves as the basis for a registry used in targeted programs.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured collection of households categorized as extreme poor.", + "contextual_reason_agent": "This is indeed a dataset as it serves as the basis for a registry used in targeted programs.", + "contextual_signal": "described as a registry that serves as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "012_BOSIB-9a6accb6-73d1-4bd1-8307-d41a339a51ab", + "page": 55, + "text": "The World Bank Kenya Water, Sanitation, and Hygiene Program ( P179012 ) Page 51 of 58 Description The OCCR is selected as a representative KPI out of the WSPs PIAPs as a good proxy to measure degree of implementation of the PIAP. The total amount allocated to this DLI will be equally divided into the 33 WSPs. Then the annual disbursement each year will be made against the degree of achievement for each WSP as per their annual OCCR target defined in their PIAP. To be eligible for disbursement under DLI 5, the WSPs must show sustained full compliance with the legal and regulatory requirements for good governance defined in DLI 6. Data source / Agency County Government, MoWSI ( SDWS ), WASREB Verification Entity IVA Procedure The IVA shall carry out a review of WSP data related to OCCR. The IVA shall make physical visits to the WSPs to vefify completed investments defined in the PIAPs. 8: Amount of financing leveraged by WSPs from private / commercial financing Formula 100 % of the amount leveraged by the WSP from private / commercial financing Description Disbursement will be triggered by the WSP securing a commercial loan deal to finance bankable projects included in the PIAP, contributing to improvements in the OCCR. This DLI will provide 50 percent of total bankable project cost or 100 percent of the commercial loan secured.", + "ner_text": [ + [ + 769, + 777, + "named" + ], + [ + 4, + 14, + "WSP data <> publisher" + ], + [ + 514, + 518, + "WSP data <> reference population" + ], + [ + 837, + 841, + "WSP data <> reference population" + ] + ], + "validated": true, + "empirical_context": "To be eligible for disbursement under DLI 5, the WSPs must show sustained full compliance with the legal and regulatory requirements for good governance defined in DLI 6. Data source / Agency County Government, MoWSI ( SDWS ), WASREB Verification Entity IVA Procedure The IVA shall carry out a review of WSP data related to OCCR. The IVA shall make physical visits to the WSPs to vefify completed investments defined in the PIAPs.", + "type": "data", + "explanation": "In the context, 'WSP data' is explicitly mentioned as being reviewed and verified, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'WSP data' is a dataset because it refers to specific data related to Water Service Providers (WSPs).", + "contextual_reason_agent": "In the context, 'WSP data' is explicitly mentioned as being reviewed and verified, indicating it is used as a data source.", + "contextual_signal": "mentioned as data related to WSPs that is verified", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 41, + "text": "The Operation will build on the existing M & E structures at MoE and strengthen these under the IPF component. MoE \u2019 s Central Planning and Project Management Unit ( CPPMU ), headed by the Chief Economists ( from Basic Education, TVET, University and Post Training and Skills Development ), are responsible for overall coordination and monitoring of NESSP implementation. The CPPMU works closely with the Kenya Bureau of Statistics ( KNBS ). The CPPMU, in collaboration with the KNBS team, develops and publishes educational statistical booklets. The MoE \u2019 s DPCAD oversees day-to day implementation of key donor funded projects, including the ongoing education projects. 65. The Operation results monitoring will build on the existing databases used by the CPPMU that comprise the online - based NEMIS platform. The NEMIS, hosted by MoE, is being revamped to ensure data credibility and data security. The NEMIS contains up-to-date key education data, including for primary education, and has been used to generate the annual educational statistical booklets. KNEC hosts the platform for data on learning assessments, including the school specific learning assessments reports. The Directorate of Quality Assurance and Standards, and the Directorate of School Audit at MoE conduct regular visits to schools and submit their data and reports to MoE management and relevant IEs for action.", + "ner_text": [ + [ + 1133, + 1177, + "named" + ] + ], + "validated": false, + "empirical_context": "The NEMIS contains up-to-date key education data, including for primary education, and has been used to generate the annual educational statistical booklets. KNEC hosts the platform for data on learning assessments, including the school specific learning assessments reports. The Directorate of Quality Assurance and Standards, and the Directorate of School Audit at MoE conduct regular visits to schools and submit their data and reports to MoE management and relevant IEs for action.", + "type": "report", + "explanation": "However, it is referred to as 'reports' and not as a structured collection of data, indicating it is not a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'assessments' which is often associated with data collection.", + "contextual_reason_agent": "However, it is referred to as 'reports' and not as a structured collection of data, indicating it is not a dataset.", + "contextual_signal": "mentioned only as a report, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 20, + "validated": 4, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 72, + "text": "The overall fiduciary risk of the Program is rated as \u201c Moderate. \u201d The overall Program \u2019 s fiduciary framework is assessed as adequate to provide reasonable assurance that the Program \u2019 s financing proceeds will be used for intended purposes, with due attention to the principles of economy, efficiency, effectiveness, transparency, and accountability. Key risks have been identified and those that could potentially impact the Program are detailed in the risks section. 3. The following risk mitigation measures are proposed: ( 1 ) The Bank team will review the annual budgetary allocations of implementing agencies; ( 2 ) Special annual reports on arrears will be submitted to the World Bank for monitoring purposes; ( 3 ) The implementing agencies will use an electronic system to manage their contracts, and it will be interconnected to the Government Financial Management Information System ( GFMIS ); ( 4 ) Procurement plans will be based on annual work plans of different agencies; ( 5 ) MOPIC will coordinate and monitor the overall procurement plans; ( 6 ) Timely recording of procurement transactions will be ensured; ( 7 ) Capacity building of procurement departments will be done through training on Procurement Bylaw No. 8 / 2022 and e-procurement ( JONEPS ); ( 8 ) Complaint records will be uploaded and published on JONEPS; ( 9 ) Implementing agencies will appoint qualified technical specialized staff", + "ner_text": [ + [ + 846, + 896, + "named" + ] + ], + "validated": false, + "empirical_context": "3. The following risk mitigation measures are proposed: ( 1 ) The Bank team will review the annual budgetary allocations of implementing agencies; ( 2 ) Special annual reports on arrears will be submitted to the World Bank for monitoring purposes; ( 3 ) The implementing agencies will use an electronic system to manage their contracts, and it will be interconnected to the Government Financial Management Information System ( GFMIS ); ( 4 ) Procurement plans will be based on annual work plans of different agencies; ( 5 ) MOPIC will coordinate and monitor the overall procurement plans; ( 6 ) Timely recording of procurement transactions will be ensured; ( 7 ) Capacity building of procurement departments will be done through training on Procurement Bylaw No. 8 / 2022 and e-procurement ( JONEPS ); ( 8 ) Complaint records will be uploaded and published on JONEPS; ( 9 ) Implementing agencies will appoint qualified technical specialized staff", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Information System' which often relates to data management.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "054_Sudan-Basic-Education-Emergency-Support-Project", + "page": 39, + "text": "Table A3-1: Access, enrollment, and completion rates in basic education in Sudan Sudan Urban Rural Net entry rate ( 6-year-olds ) * 82. 8 % 90. 4 % 79. 9 % NER ( 6-13-year-olds ) * 69. 1 % 85. 8 % 62. 6 % GER ( 6-13-year-olds ) * 73. 3 % 88. 0 % 67. 4 % Grade 4 survival rate * * 84. 7 % 97. 7 % 76. 8 % Grade 6 survival rate * * 66. 6 % 83. 3 % 56. 5 % Grade 8 survival rate * * 49. 3 % 68. 2 % 37. 8 % Completion rate ( 13-year-olds ) * 55. 0 % 58. 2 % 52. 6 % Source: authors \u2019 estimates based on the data from Sudan MICS 2014 * and School Census 2018 * * 3. About seven percent of people never attended school in 2014. Real access increased by eight percentage points from 85 percent in 2009 to 93 percent in 2014. Access at age six increased by 30 percentage points from 40 percent in 2009 to 70 percent in 2014. The increase is driven by an increase in access among the bottom-40 percent of the population. System demonstrated growth between 2009 and 2014 with late entry remaining constant at 11 years. Those who are not ever attended school at age 11 will never attend. 4. The poor retention rate in basic education comes from a lack of school demand, in particular, among the poorest. Economic difficulties and behavior such as early marriage, pregnancy, and economic hardships explain the fragility of school demand. The lack of supply ( overcrowded classrooms, \u2018 open-air \u2019 or temporary classrooms, and incomplete schools ) also negatively effects retention rates. According to the School Census data, 16 percent of students are enrolled in a school that does not provide full course of basic education cycle ( 8 grades ). In addition, these students are likely to drop out before completion.", + "ner_text": [ + [ + 536, + 554, + "named" + ] + ], + "validated": true, + "empirical_context": "2 % 52. 6 % Source: authors \u2019 estimates based on the data from Sudan MICS 2014 * and School Census 2018 * * 3. About seven percent of people never attended school in 2014.", + "type": "census", + "explanation": "In the context, it is explicitly mentioned as a source of information used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a census, which typically involves structured data collection.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as a source of information used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 53, + "text": "Name: Integrated Statistical Yearbook available at national level ( DLI6 ) Yes / No N Y Annual ( except Years 1 and 2 ) Project Secretariat MINEDUB / MINESEC Learning Assessment Technical Unit Description: Yearbook includes information on: enrollment, infrastructures, results, scorecards and comprised disaggregated data ( by region and gender ) The yearbook covers pre-primary through higher education, including technical and vocational education and training ( TVET ) Name: Number of schools included in the Performance - based Financing ( PBF ) school Number 20. 00 3000. 00 Annual Project Secretariat Report FBP Unit / MINEDUB", + "ner_text": [ + [ + 6, + 37, + "named" + ], + [ + 51, + 65, + "Integrated Statistical Yearbook <> data geography" + ], + [ + 303, + 321, + "Integrated Statistical Yearbook <> data type" + ], + [ + 367, + 403, + "Integrated Statistical Yearbook <> reference population" + ], + [ + 415, + 462, + "Integrated Statistical Yearbook <> reference population" + ] + ], + "validated": true, + "empirical_context": "Name: Integrated Statistical Yearbook available at national level ( DLI6 ) Yes / No N Y Annual ( except Years 1 and 2 ) Project Secretariat MINEDUB / MINESEC Learning Assessment Technical Unit Description: Yearbook includes information on: enrollment, infrastructures, results, scorecards and comprised disaggregated data ( by region and gender ) The yearbook covers pre-primary through higher education, including technical and vocational education and training ( TVET ) Name: Number of schools included in the Performance - based Financing ( PBF ) school Number 20. 00 3000.", + "type": "yearbook", + "explanation": "The yearbook is explicitly described as containing disaggregated data and information relevant for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes structured information on various educational metrics.", + "contextual_reason_agent": "The yearbook is explicitly described as containing disaggregated data and information relevant for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "157_417980PAD0P1041ly10IDAR200081006011", + "page": 46, + "text": "( c ) the end o f the project life, the PMT will submit an ICR. 5. A simplified electronic database will also be created at the level o f M O L G District Offices using Microsoft Office software to aggregate data from ADPs, CDD subproject proposals, JSCPDs Quarterly Progress Reports with OC assistance, and final evaluations o f implemented CDD subprojects. Designated M O L G District Office coordinators will be responsible for maintaining the electronic forms and coordinating with the PMT Community Development / Reporting Specialist. This information will feed into the MIS. Participatory Monitoring & Evaluation 6. In order to ensure the VNDP \u2019 s accountability to the communities, the project will introduce participatory M & E tools for citizens to be able to monitor progress in project implementation and evaluate results, as well as monitor the transparency and accountability o f their LGUs and JSCPDs. Participatory M & E will also allow the communities to express opinions, share information and reflect on findings in an open, transparent and inclusive manner.", + "ner_text": [ + [ + 576, + 579, + "named" + ] + ], + "validated": false, + "empirical_context": "Designated M O L G District Office coordinators will be responsible for maintaining the electronic forms and coordinating with the PMT Community Development / Reporting Specialist. This information will feed into the MIS. Participatory Monitoring & Evaluation 6.", + "type": "system", + "explanation": "'MIS' is mentioned as a system but not as a data source, indicating it does not function as a dataset in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is an acronym that could imply a structured collection of data.", + "contextual_reason_agent": "'MIS' is mentioned as a system but not as a data source, indicating it does not function as a dataset in this context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "036_Tajikistan-Resilient-Landscape-Restoration-Project", + "page": 43, + "text": "The methodology monitors three aspects of degradation: ( a ) net primary productivity using normalized difference vegetation index ( NDVI ) with information from satellite imagery ( for example, Moderate Resolution Imaging Spectroradiometer ); ( b ) land cover using either a representative area approach with high-resolution imagery, Landsat and Sentinel imagery, or globally produced datasets; and ( c ) soil organic carbon using field sampling. The UNCCD Secretariat will support the CEP to establish baselines for the above three aspects and repeat the measurements at project midterm and completion. 90. The M & E system of the project will comprise both performance and impact monitoring. The project M & E system will include both annual outcome and result targets as well as periodic evaluations of impact on land degradation, using the three LDN indicators, and households \u2019 socioeconomic factors that will be compared with baseline evaluations to be carried out by the CEP IT in Years 1 and 2. The project will make particular efforts to integrate participatory monitoring methods, using ongoing advances in digital tools and data collection, thus giving communities the potential for timely decision - making, wider sharing of results, and greater ownership of investments. The project will design and maintain a website for wider dissemination of the results and progress.", + "ner_text": [ + [ + 195, + 240, + "named" + ] + ], + "validated": false, + "empirical_context": "The methodology monitors three aspects of degradation: ( a ) net primary productivity using normalized difference vegetation index ( NDVI ) with information from satellite imagery ( for example, Moderate Resolution Imaging Spectroradiometer ); ( b ) land cover using either a representative area approach with high-resolution imagery, Landsat and Sentinel imagery, or globally produced datasets; and ( c ) soil organic carbon using field sampling. The UNCCD Secretariat will support the CEP to establish baselines for the above three aspects and repeat the measurements at project midterm and completion.", + "type": "system", + "explanation": "However, it is actually a system or instrument used for capturing data rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is associated with satellite imagery data collection.", + "contextual_reason_agent": "However, it is actually a system or instrument used for capturing data rather than a structured collection of data itself.", + "contextual_signal": "mentioned as a system for capturing imagery, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "023_Ethiopia-Second-Phase-Development-Response-to-Displacement-Impacts-Project-in-the-Horn-of-Africa-Project", + "page": 17, + "text": "The World Bank Development Response to Displacement Impacts Project in the Horn of Africa Phase II ( P178047 ) Page 12 of 80 18. While refugees enjoy equal or even superior access to basic services such as health, education and water compared to hosts, they lack livelihood opportunities and are highly dependent on dwindling humanitarian aid. A survey of refugees and host community members conducted in 2017 showed that refugees in Ethiopia are poorer on average than hosts, live in inferior housing and have less access to electricity and job opportunities. 26 The low level of refugee self-reliance is more acute for female-headed households with 13 percent of female-headed households having access to income sources other than aid, compared to 26 percent of households headed by men. 27 For urban refugees, livelihood opportunities are limited by regulatory barriers, levies imposed on informal businesses and the absence of start-up capital, especially with the negative impact of COVID-19 on the inflow of remittances. 19. Refugee-hosting regions in Ethiopia vary widely in terms of socio-economic characteristics. Most refugees in Ethiopia reside in camps in areas that border their country of origin. Nearly all the South Sudanese refugees have settled in Gambella and the Somali refugees in Somali region. Around half of the Eritrean refugees have settled in Tigray or Afar, though many have moved since the conflict erupted in the north.", + "ner_text": [ + [ + 346, + 391, + "named" + ], + [ + 405, + 409, + "survey of refugees and host community members <> publication year" + ], + [ + 434, + 442, + "survey of refugees and host community members <> data geography" + ], + [ + 621, + 645, + "survey of refugees and host community members <> reference population" + ], + [ + 1058, + 1066, + "survey of refugees and host community members <> data geography" + ], + [ + 1140, + 1148, + "survey of refugees and host community members <> data geography" + ], + [ + 1266, + 1274, + "survey of refugees and host community members <> data geography" + ], + [ + 1302, + 1315, + "survey of refugees and host community members <> data geography" + ], + [ + 1370, + 1376, + "survey of refugees and host community members <> data geography" + ], + [ + 1380, + 1384, + "survey of refugees and host community members <> data geography" + ] + ], + "validated": true, + "empirical_context": "While refugees enjoy equal or even superior access to basic services such as health, education and water compared to hosts, they lack livelihood opportunities and are highly dependent on dwindling humanitarian aid. A survey of refugees and host community members conducted in 2017 showed that refugees in Ethiopia are poorer on average than hosts, live in inferior housing and have less access to electricity and job opportunities. 26 The low level of refugee self-reliance is more acute for female-headed households with 13 percent of female-headed households having access to income sources other than aid, compared to 26 percent of households headed by men.", + "type": "survey", + "explanation": "This is indeed a dataset as it is a survey that provides empirical data on the conditions of refugees and host communities.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is a survey that provides empirical data on the conditions of refugees and host communities.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 19, + "text": "The existing frameworks mainly operate at the national level and usually are not disseminated at the sub-national levels nor enforced at the national and sub - national levels. Improving and enforcing the policy framework to address the existing education challenges and guiding the sector activities is fundamental in enhancing individual and organizational performance, as well as maintaining it over time to achieve the required results. Strengthening the physical capacity, such as suitable physical infrastructure and equipment, is equally important to implement the introduced change and encourage individuals. Currently, the existing physical and Information Technology ( IT ) infrastructure and office equipment in the education ministries, mainly at the sub-national levels, are insufficient. The technology of some systems, such as the EMIS, requires updating to facilitate dissemination to states. Improving the physical infrastructure and upgrading equipment are necessary to address the capacity gap fully. C. Relevance to Higher Level Objectives 25. The World Bank is re-engaging with South Sudan in the education sector after a gap of several years. As South Sudan rebuilds its education sector to offer quality education to children across the country, the proposed Project will support its efforts by helping put in place the essential building blocks for a strong system.", + "ner_text": [ + [ + 846, + 850, + "named" + ] + ], + "validated": false, + "empirical_context": "Currently, the existing physical and Information Technology ( IT ) infrastructure and office equipment in the education ministries, mainly at the sub-national levels, are insufficient. The technology of some systems, such as the EMIS, requires updating to facilitate dissemination to states. Improving the physical infrastructure and upgrading equipment are necessary to address the capacity gap fully.", + "type": "system", + "explanation": "However, EMIS is mentioned as a system that requires updating, not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is related to information technology in education.", + "contextual_reason_agent": "However, EMIS is mentioned as a system that requires updating, not as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 30, + "text": "At the district level, GROW will provide technical assistance on specific areas ( Stakeholder engagement, grievance management, management of workers ) to government officers based in the districts and relevant government entities ( MGLSD, NEMA - Social unit, CDOs, Labour, Gender, Occupational Health and Safety ( OHS ), and all other government departments that manage social risk in the country ), and sub-county staff. Key focus will also be given to building capacity of MGLSD, PSFU and other relevant government institutions, including academia on social risk mitigation. 62. Subcomponent 4B: Policy innovation and evidence generation. This subcomponent will finance data collection efforts beyond the information gathered through the MIS and digital delivery platforms ), analysis and publication of data from project and non-project datasets on female entrepreneurship, climate resilience and WEE, establishment of a data portal, and research workshops and policy forums on female entrepreneurship and WEE. In addition, the learning agenda could also include the design and implementation of innovative pilot activities within the project to test what works to address the key constraints faced by beneficiaries, including refugees and women living in RHDs. This work will be designed and conducted in collaboration with the World Bank \u2019 s Africa Gender Innovation Lab as well as other local research institutes and development partners.", + "ner_text": [ + [ + 925, + 936, + "named" + ] + ], + "validated": false, + "empirical_context": "Subcomponent 4B: Policy innovation and evidence generation. This subcomponent will finance data collection efforts beyond the information gathered through the MIS and digital delivery platforms ), analysis and publication of data from project and non-project datasets on female entrepreneurship, climate resilience and WEE, establishment of a data portal, and research workshops and policy forums on female entrepreneurship and WEE. In addition, the learning agenda could also include the design and implementation of innovative pilot activities within the project to test what works to address the key constraints faced by beneficiaries, including refugees and women living in RHDs.", + "type": "portal", + "explanation": "However, the term 'data portal' refers to a platform for accessing data rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'data portal' suggests a collection of data.", + "contextual_reason_agent": "However, the term 'data portal' refers to a platform for accessing data rather than a structured collection of data itself.", + "contextual_signal": "mentioned only as a platform, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 9, + "validated": 1, + "not_validated": 8 + } + }, + { + "filename": "026_Djibouti-Health-System-Strengthening-Project", + "page": 17, + "text": ". 6 3. 1 9. 2 Excluding HICsc 58 13 22 17 6 2. 9 5. 9 2. 9 9. 2 Horn of Africad 452 28 58 36 8 4. 3 3. 2 0. 9 3. 8 Source: Merchandise export diversification is the latest available year from UNCTAD Stat, United Nations Conference on Trade and Development. All other indicators are the latest available year from World Development Indicators, World Bank 17. Communicable diseases and childhood health conditions ( including malnutrition ) still dominate Djibouti \u2019 s burden of disease. They account for seven out of the top ten causes of women \u2019 s and girls \u2019 deaths ( Figure 3 ) ( six out of ten for both sexes ). Water-borne diarrheal diseases exacerbated by climate change, including unpredictable rainfall and rising temperatures account for 8 percent of under-5 deaths. Similarly, the incidence of 10 Notes: a ) Based on a \u201c dissimilarity index, \u201d constructed as follows: Total population, GNI per capita, official development assistance as a share of GNI, and merchandise export diversification where each is standardized by taking the z - score across countries. Each country of the 138 with sufficient data was compared to Djibouti, and the resulting sum of squared differences across the four standardized variables is the index. b ) Middle East and North Africa, World Bank classification c ) High-income countries ( HICs ), according to the World Bank FY22 classification d ) Djibouti, Eritrea, Ethiopia, and Somalia e ) For consistency across countries, this table employs nutrition estimates from the UNICEF-WHO-WB Joint Child Malnutrition Estimates, which indicate a substantially higher rate of stunting than found by the Djibouti 2019 SMART survey.", + "ner_text": [ + [ + 313, + 341, + "named" + ], + [ + 343, + 353, + "World Development Indicators <> publisher" + ], + [ + 454, + 462, + "World Development Indicators <> data geography" + ], + [ + 830, + 849, + "World Development Indicators <> data type" + ], + [ + 1131, + 1139, + "World Development Indicators <> data geography" + ], + [ + 1273, + 1283, + "World Development Indicators <> publisher" + ], + [ + 1352, + 1362, + "World Development Indicators <> publisher" + ], + [ + 1387, + 1395, + "World Development Indicators <> data geography" + ], + [ + 1637, + 1645, + "World Development Indicators <> data geography" + ] + ], + "validated": true, + "empirical_context": "8 Source: Merchandise export diversification is the latest available year from UNCTAD Stat, United Nations Conference on Trade and Development. All other indicators are the latest available year from World Development Indicators, World Bank 17. Communicable diseases and childhood health conditions ( including malnutrition ) still dominate Djibouti \u2019 s burden of disease.", + "type": "dataset", + "explanation": "In the context, it is explicitly mentioned as a source for the latest available year indicators, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a source of indicators.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as a source for the latest available year indicators, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "100_Lebanon-Health-PAD-PAD2358-06152017", + "page": 28, + "text": "The World Bank Lebanon Health Resilience Project ( P163476 ) Page 26 of 54 48. The MoPH, through the PMU \u2019 s two coordinators ( PHCC and hospital ), will be responsible for monitoring the daily progress of the project, focusing on improved accessibility of beneficiaries to the package of services, proper procurement, and capacity building of hospitals. The PMU will be responsible for preparing and submitting semiannual progress reports that, among other things, provide detailed reporting on services, procurement, and expenditures. It will also conduct mid-term and post - completion evaluations to gauge progress toward the PDO and assess the impact of the project on targeted beneficiaries. 49. The HIS system developed by the MoPH will be further refined and expanded under the project to all newly enrolled PHCCs to support the implementation and monitoring of the program. Data will be collected and used to: ( i ) supervise the performance of PHCCs; ( ii ) monitor the progress of beneficiary accessibility; ( iii ) monitor hospital improvements; and ( iv ) improve the provision of services on the basis of intermediate output and outcome data. The data will be verified directly by MoPH supervisory systems and external evaluation, and indirectly through triangulation with other data sources such as hospital claims. 50.", + "ner_text": [ + [ + 1314, + 1329, + "named" + ] + ], + "validated": false, + "empirical_context": "Data will be collected and used to: ( i ) supervise the performance of PHCCs; ( ii ) monitor the progress of beneficiary accessibility; ( iii ) monitor hospital improvements; and ( iv ) improve the provision of services on the basis of intermediate output and outcome data. The data will be verified directly by MoPH supervisory systems and external evaluation, and indirectly through triangulation with other data sources such as hospital claims. 50.", + "type": "data", + "explanation": "'Hospital claims' is mentioned as a source of information rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'hospital claims' refers to a dataset because it involves data related to hospital services.", + "contextual_reason_agent": "'Hospital claims' is mentioned as a source of information rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "026_Djibouti-Health-System-Strengthening-Project", + "page": 59, + "text": "The World Bank Djibouti Health System Strengthening ( P178033 ) Page 55 of 64 resident of the commune of Balbala. Public health facilities with safe disposal of sharps waste Quarterly IVA Quarterly IVA and PBF report MOH Facilities receiving PBF payment within 30 days of submission of verification of results Semi - annual Accounting system Semi-annual internal project review by DGP MOH Average user experience score among PBF / DFF participating facilities User experience score is a zero if not collected. Quarterly IVA Quarterly IVA and PBF report MOH Primary health facilities with functional Facility Community Committee assessed by the PBF quality checklist Semi - annual Human resources MIS Semi-annual internal project review by DGP MOH Communities with functioning community health workers per the norms set by the National Community Health Strategy Semi - annual Human resources MIS Semi-annual internal project review by DGP MOH Communities that have formally declared the abandonment of the practice of FGM Annual Official act by community leadership Annual internal project review by DGP ( combined with semi - annual review ) MOH Facilities with DHIS-2 implementation ( DHIS-2 data entry at the facility level ) Semi - annual DHIS2 Semi-annual internal project review by DGP MOH Facilities that are connected to the National Logistics Management Information System ( eLMIS ) Semi - annual eLMIS Semi-annual internal project review by DGP MOH Health surveys conducted and results made public Survey results will lead to action plans after every Semi - annual DGP Semi-annual", + "ner_text": [ + [ + 1162, + 1168, + "named" + ] + ], + "validated": false, + "empirical_context": "Public health facilities with safe disposal of sharps waste Quarterly IVA Quarterly IVA and PBF report MOH Facilities receiving PBF payment within 30 days of submission of verification of results Semi - annual Accounting system Semi-annual internal project review by DGP MOH Average user experience score among PBF / DFF participating facilities User experience score is a zero if not collected. Quarterly IVA Quarterly IVA and PBF report MOH Primary health facilities with functional Facility Community Committee assessed by the PBF quality checklist Semi - annual Human resources MIS Semi-annual internal project review by DGP MOH Communities with functioning community health workers per the norms set by the National Community Health Strategy Semi - annual Human resources MIS Semi-annual internal project review by DGP MOH Communities that have formally declared the abandonment of the practice of FGM Annual Official act by community leadership Annual internal project review by DGP ( combined with semi - annual review ) MOH Facilities with DHIS-2 implementation ( DHIS-2 data entry at the facility level ) Semi - annual DHIS2 Semi-annual internal project review by DGP MOH Facilities that are connected to the National Logistics Management Information System ( eLMIS ) Semi - annual eLMIS Semi-annual internal project review by DGP MOH Health surveys conducted and results made public Survey results will lead to action plans after every Semi - annual DGP Semi-annual", + "type": "system", + "explanation": "However, DHIS-2 is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed DHIS-2 is a dataset because it is mentioned in the context of data entry and health facilities.", + "contextual_reason_agent": "However, DHIS-2 is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 94, + "text": "While not specifically benefitting from financing from the RSW, activities linked to learning assessment and EMIS will include features that will enable the Government to more effectively monitor and evaluate data on refugee-related education challenges. Learning assessments will include modules to analyze refugee learning outcomes, and efforts will be made to include disaggregated data on refugees in statistical yearbooks. 49. Priority Area 4: Supporting the operational needs of public schools in host community areas affected by refugees. Refugee-related project activities, estimated at US $ 36 million, fall under Results Areas 1, 3, and 7. In line with the Government \u2019 s vision, it has been agreed that the project will target host community schools. As such, benefits will accrue to both refugee children and host communities simultaneously, as most refugee children are enrolled in regular schools ( see annex 6 for additional details ). Expected direct beneficiaries include about 300 schools, enrolling about 150, 000 pupils, of which 20, 000 are refugees. Education inputs ( for example, classrooms, teachers, teaching and learning materials, furniture ) in the four affected regions, Far North, North, East, and Adamawa, supported under the project are very limited.", + "ner_text": [ + [ + 109, + 113, + "named" + ] + ], + "validated": false, + "empirical_context": "While not specifically benefitting from financing from the RSW, activities linked to learning assessment and EMIS will include features that will enable the Government to more effectively monitor and evaluate data on refugee-related education challenges. Learning assessments will include modules to analyze refugee learning outcomes, and efforts will be made to include disaggregated data on refugees in statistical yearbooks.", + "type": "system", + "explanation": "However, EMIS is described as a system, not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it relates to monitoring and evaluating data.", + "contextual_reason_agent": "However, EMIS is described as a system, not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 19, + "text": "The World Bank Building Skills for Human Capital Development in South Sudan ( P178654 ) Page 15 of 66 23. Functional mechanisms to support evidence-based decision-making and management of the sector are needed. MoGEI has an Education Management Information System ( EMIS ), but its implementation has not been tackled comprehensively to date and the existing data essential for sector evaluation and development has significant gaps. The EMIS requires dissemination to all states and harmonization with other databases in the MoGEI, such as the examination results, South Sudan School Attendance and Monitoring System, Human Resources Management Information System, and Financial Management Information System. The existing data collection instruments also need revision to incorporate the required information for monitoring the implementation of key indicators. Quality, reliable data is fundamental to inform education decisions and policies and enhance transparency. Accordingly, strengthening the EMIS and the annual school survey process to provide quality education information, in a timely manner, at every administrative level is essential to support the functions of the education ministries. 24. The education governing ministries lack the policy tools and implementation capacity needed for education sector management. The General Education system is characterized by inequitable allocation of teachers across the states, unclear deployment drivers, and outdated salary structure.", + "ner_text": [ + [ + 619, + 664, + "named" + ] + ], + "validated": false, + "empirical_context": "MoGEI has an Education Management Information System ( EMIS ), but its implementation has not been tackled comprehensively to date and the existing data essential for sector evaluation and development has significant gaps. The EMIS requires dissemination to all states and harmonization with other databases in the MoGEI, such as the examination results, South Sudan School Attendance and Monitoring System, Human Resources Management Information System, and Financial Management Information System. The existing data collection instruments also need revision to incorporate the required information for monitoring the implementation of key indicators.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting it handles data.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "084_Lebanon-PAD-final-06082018", + "page": 96, + "text": "Figure 3 illustrates the relationship between the Results Areas, and DLIs across the result chain \u2013 it shows that DLRs consist of both outputs ( particularly those that come in the early stages of the Program ) and intermediate outcomes, which come later in the program and include higher-level DLIs. 32. An impact evaluation program is embedded into monitoring and evaluation of the Program. Impact evaluation of the NJP \u2019 s actions can ( i ) assess the effectiveness of jobs programs in a labor market under considerable stress, and ( ii ) compare the effectiveness and cost-efficiency of different support modalities in a single consistent framework. Approximately US $ 600, 000 in trust fund resources have been secured to fund robust impact evaluation from the Jobs Multi-Donor Trust Fund ( parent Trust Fund number TF072322 ). Randomized controlled trials are envisaged to generate convincing evidence on the performance of the entrepreneurship grant and ALMP components. High quality administrative data available from the MOF will allow for a high-quality non-experimental evaluation of activities including support to the TSEZ ( synthetic control group ), and the value chains and broadband access components ( difference in difference approaches with pre-trends ).", + "ner_text": [ + [ + 978, + 1010, + "named" + ], + [ + 1290, + 1308, + "High quality administrative data <> usage context" + ] + ], + "validated": true, + "empirical_context": "Randomized controlled trials are envisaged to generate convincing evidence on the performance of the entrepreneurship grant and ALMP components. High quality administrative data available from the MOF will allow for a high-quality non-experimental evaluation of activities including support to the TSEZ ( synthetic control group ), and the value chains and broadband access components ( difference in difference approaches with pre-trends ).", + "type": "administrative data", + "explanation": "This is indeed a dataset as it is described as high-quality administrative data used for empirical evaluation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured collection of data used for evaluation.", + "contextual_reason_agent": "This is indeed a dataset as it is described as high-quality administrative data used for empirical evaluation.", + "contextual_signal": "mentioned as a source of data for evaluation", + "tags": [] + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 2, + "text": "( 2018 ) round of the Household Survey ( Enquete djiboutienne aupr\u00e8s des m\u00e9nages 4 ) EEP Eligible Expenditures of the Program EFA Education for All EGMA Early Grade Mathematics Assessment EMIS Education Management Information System EMP Environmental Management Plan ESCP Environmental and Social Commitment Plan ESMF Environment and Social Management System ESMP Environmental and Social Management Plans FM Financial Management FMR Financial Management Report", + "ner_text": [ + [ + 22, + 38, + "named" + ], + [ + 2, + 6, + "Household Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "( 2018 ) round of the Household Survey ( Enquete djiboutienne aupr\u00e8s des m\u00e9nages 4 ) EEP Eligible Expenditures of the Program EFA Education for All EGMA Early Grade Mathematics Assessment EMIS Education Management Information System EMP Environmental Management Plan ESCP Environmental and Social Commitment Plan ESMF Environment and Social Management System ESMP Environmental and Social Management Plans FM Financial Management FMR Financial Management Report", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referred to as a survey, which typically collects and organizes data for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Household Survey' suggests a structured collection of data from households.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referred to as a survey, which typically collects and organizes data for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "036_Tajikistan-Resilient-Landscape-Restoration-Project", + "page": 90, + "text": "These investments could be spent on, but not limited to, ( i ) infrastructure to access and use remote pastures, such as spot road improvements, stock watering points, shelters and stock-pens, and milk cooling equipment; ( ii ) small machinery to produce and harvest fodder; ( iii ) rehabilitation measures for degraded areas 84 As of September 2021. National Bank of Tajikistan, https: / / www. nbt. tj / en /. 85 Expert estimations based on historical data from the National Bank of Tajikistan and forecasts done by Economist Intelligence Unit Country report on Tajikistan ( 3rd and 4th quarters ). 86 Re-financing rate in Tajikistan from April 28, 2021. National Bank of Tajikistan, https: / / www. nbt. tj / en /. 87 The social discount rate used for the economic analysis is based on World Bank \u2019 s estimations, proposed by a standardized methodology. See Discounting Costs and Benefits in Economic Analysis of World Bank Projects, OPSPQ. May 9, 2016. \u201c Where no country-specific growth projections are available, we suggest using 3 percent as a rough estimate for expected long-term growth rate in developing countries. Given reasonable parameters for the other parameters for the other variables in the standard Ramsey formula linking discount rates to growth rates, this yields a discount rate of 6 percent. \u201d The discount rate is also in line with the discount rate in recently endorsed Strengthening Resilience of the Agriculture Sector Project In Tajikistan ( P175952 ), ANNEX 4: Economic and Financial Analysis and Greenhouse Gas Accounting. The joint World Bank / IMF Debt Sustainability Analysis ( DSA, May 2020 ) projects an average growth rate of 3. 8 percent in the coming decade.", + "ner_text": [ + [ + 443, + 458, + "named" + ] + ], + "validated": false, + "empirical_context": "tj / en /. 85 Expert estimations based on historical data from the National Bank of Tajikistan and forecasts done by Economist Intelligence Unit Country report on Tajikistan ( 3rd and 4th quarters ). 86 Re-financing rate in Tajikistan from April 28, 2021.", + "type": "data", + "explanation": "'Historical data' is not a structured collection of data but rather a general term for past information.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'historical data' refers to a dataset due to its mention alongside data sources.", + "contextual_reason_agent": "'Historical data' is not a structured collection of data but rather a general term for past information.", + "contextual_signal": "mentioned only as a term, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "158_40156", + "page": 23, + "text": "Included in the M & E framework for the project is a set of indicators \u2013 as defined in the Results Framework and Monitoring ( Annex 3 ). As part of the M & E strategy development, IGAD will also work with Member States to identify indicators for CBMP that will be added in National M & E Frameworks. The regional HIV M & E strategy will constitute the 3rd of the Three Ones for IGAD. 77. The M & E system cannot be operational without funding. Therefore, a costed M & E work plan will be developed and updated on an annual basis as part of Component 3 of this project. One the regional M & E strategy has been developed this strategy will also be operationalized on an annual basis through the development of one regional and costed M & E work plan. IGAD will dedicate, for future HIV funding, a percentage of funding to the functioning of the M & E system \u2013 a minimum of 10 % of all HIV project funding will be dedicated by IGAD to sustain the functioning of the regional HIV M & E system. 78. Surveys and surveillance data will be used to track PDO-level project results.", + "ner_text": [ + [ + 1007, + 1024, + "named" + ], + [ + 180, + 184, + "surveillance data <> author" + ], + [ + 995, + 1002, + "surveillance data <> data type" + ], + [ + 1047, + 1072, + "surveillance data <> data description" + ] + ], + "validated": true, + "empirical_context": "78. Surveys and surveillance data will be used to track PDO-level project results.", + "type": "data", + "explanation": "In this context, 'surveillance data' is explicitly mentioned as being used to track project results, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'surveillance data' is a dataset because it refers to a collection of data used for tracking results.", + "contextual_reason_agent": "In this context, 'surveillance data' is explicitly mentioned as being used to track project results, confirming its role as a data source.", + "contextual_signal": "follows 'will be used to track'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 60, + "text": "This indicator will be measured, at a minimum, on a quarterly basis SNSOP Management Information System Attendance data collected during each training session Implementing Partner Number of beneficiary households receiving Direct Income Support The number of total beneficiary HHs that are selected to participate in DIS under sub-component 1. 2, in accordance with the Project Operations Manual, This indicator will be measured, at a minimum, Registration and payment data from the SNSOP MIS Beneficiary data will be collected during registration and updated over the course of project implementation. Selected Implementing Partner", + "ner_text": [ + [ + 483, + 492, + "named" + ] + ], + "validated": false, + "empirical_context": "This indicator will be measured, at a minimum, on a quarterly basis SNSOP Management Information System Attendance data collected during each training session Implementing Partner Number of beneficiary households receiving Direct Income Support The number of total beneficiary HHs that are selected to participate in DIS under sub-component 1. 2, in accordance with the Project Operations Manual, This indicator will be measured, at a minimum, Registration and payment data from the SNSOP MIS Beneficiary data will be collected during registration and updated over the course of project implementation. Selected Implementing Partner", + "type": "system", + "explanation": "However, the context indicates that 'SNSOP MIS' is described as a management information system but not explicitly as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'MIS' which often stands for Management Information System, suggesting data handling.", + "contextual_reason_agent": "However, the context indicates that 'SNSOP MIS' is described as a management information system but not explicitly as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 18, + "validated": 10, + "not_validated": 8 + } + }, + { + "filename": "158_40156", + "page": 24, + "text": "\u0083 Health facility surveys: At all health facilities in hot spots, a health facility survey will be undertaken to assess the availability of HIV related services in the health facilities that are servicing populations in the 14 hot spots. \u0083 Rapid assessment in hot spot areas: All 14 hot spot areas \u2013 the seven targeted for year 1 of implementation and the seven targeted for year 2 of implementation - will be assessed prior to the initiation of implementation. The purpose of these rapid assessments will be to collect the baseline data on services supporting CBMPs, identify vulnerable populations ( CBMPs ), estimate the sizes of the vulnerable populations, the demographics of the local populations ( including nomads ), vulnerability factors, and interaction between the CBMPs and local populations. \u0083 IGAD Regional Mapping assessment: In the last couple of years IGAD implemented a World Bank Institutional Development Fund ( IDF ) grant on Strengthening the HIV / AIDS M & E Capacity in the Horn of Africa Cross-Border Regions. The main output of this grant was a regional mapping assessment of HIV / AIDS interventions in the cross-border areas, and the establishment of a regional M & E working group. The data generated from in-country reports will be updated regularly through this project and posted on the IGAD web site.", + "ner_text": [ + [ + 2, + 25, + "named" + ], + [ + 68, + 90, + "Health facility surveys <> data type" + ], + [ + 524, + 537, + "Health facility surveys <> data type" + ], + [ + 577, + 599, + "Health facility surveys <> reference population" + ], + [ + 685, + 702, + "Health facility surveys <> reference population" + ], + [ + 715, + 721, + "Health facility surveys <> reference population" + ], + [ + 807, + 811, + "Health facility surveys <> author" + ], + [ + 998, + 1033, + "Health facility surveys <> data geography" + ] + ], + "validated": true, + "empirical_context": "\u0083 Health facility surveys: At all health facilities in hot spots, a health facility survey will be undertaken to assess the availability of HIV related services in the health facilities that are servicing populations in the 14 hot spots. \u0083 Rapid assessment in hot spot areas: All 14 hot spot areas \u2013 the seven targeted for year 1 of implementation and the seven targeted for year 2 of implementation - will be assessed prior to the initiation of implementation.", + "type": "survey", + "explanation": "This is indeed a dataset as it involves a systematic survey to collect data on HIV related services.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured assessment of health facilities.", + "contextual_reason_agent": "This is indeed a dataset as it involves a systematic survey to collect data on HIV related services.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "035_Senegal-Casamance-Economic-Development-Project", + "page": 41, + "text": "To allow for informed beneficiary feedback, the citizen engagement interventions will be supported by sequenced communication actions through local radios and traditional community communication channels to disclose information and keep communities informed on the project progress and key information. The communication messaging will also inform the GRM procedures as well as the uptake locations and timeframe of resolutions. The project will support the development and strengthening of the CDD app to provide access to information on all interventions to a broader audience and support the project \u2019 s community-centered approach. The strengthening of social cohesion and community resilience in Casamance are the main objectives of the project through a multidimensional and inclusive approach. Citizen engagement will be captured in the result framework through monitoring Grievances received and addressed in the determined timeframe - % \u201c. In order to count for a CE mechanism the satisfaction survey has to be done at the latest by project midterm so that the feedback can be taken into account in the remaining project interventions. 100. Gender and youth sensitive interventions. To address stark gender and youth inequalities, the project has integrated gender - and youth-sensitive interventions into the project components through actively boosting their roles in public life and community decision making by supporting their participation in local development processes, empowering them to identify and prioritize investments, engaging them actively in local dialogues and social cohesion events under Component 1. In response to gender and youth-based disparities in economic opportunities and climate vulnerability, CEDP will target women and youth through a multi-faceted approach of livelihood, agribusiness, and labor-intensive activities ( through for example rural road maintenance ). Increasing women and youth \u2019 s roles in public life and decision making will be supported by women and youth \u2019 s participation in community", + "ner_text": [ + [ + 990, + 1009, + "named" + ], + [ + 701, + 710, + "satisfaction survey <> data geography" + ], + [ + 1751, + 1766, + "satisfaction survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "Citizen engagement will be captured in the result framework through monitoring Grievances received and addressed in the determined timeframe - % \u201c. In order to count for a CE mechanism the satisfaction survey has to be done at the latest by project midterm so that the feedback can be taken into account in the remaining project interventions. 100.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned that the survey will be conducted to gather feedback, indicating its use as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'satisfaction survey' implies a structured collection of responses.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned that the survey will be conducted to gather feedback, indicating its use as a data source.", + "contextual_signal": "mentioned as a survey to gather feedback", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "031_IBArchive-bd2c789e-ee04-4df7-a219-9409a5f705d3", + "page": 78, + "text": "Borrowed to start, operate, or expand a farm or business 10. 1 13. 7 3. 6 Saved at a financial institution 1. 6 3. 5 1. 9 Saved using a savings club or a person outside the family 17. 6 12. 3 - 5. 3 Saved to start, operate, or expand a farm or business 9. 7 15. 5 5. 8 Saved for education or school fees * 8. 8 19. 8 11. 1 Source: World Bank Findex Survey ( 2017 ). Note: * Findex 2014. 8. Entrepreneurship. The World Bank Enterprise Survey collects data from firms in the manufacturing and service industries around the world. In Chad, only 13. 1 percent of all firms have female participation in ownership and 9. 0 percent have a majority of female participation in ownership. As a benchmark, 29. 6 percent have female participation in ownership and 12. 3 percent have a majority of female participation in ownership in Sub-Saharan Africa. Moreover, only 1. 9 percent of permanent full - time production workers are female in Chad, compared to 19. 0 percent in the region. Table 3. 5.", + "ner_text": [ + [ + 412, + 440, + "named" + ], + [ + 331, + 341, + "World Bank Enterprise Survey <> publisher" + ], + [ + 358, + 362, + "World Bank Enterprise Survey <> publication year" + ], + [ + 531, + 535, + "World Bank Enterprise Survey <> data geography" + ], + [ + 822, + 840, + "World Bank Enterprise Survey <> data geography" + ], + [ + 928, + 932, + "World Bank Enterprise Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "Entrepreneurship. The World Bank Enterprise Survey collects data from firms in the manufacturing and service industries around the world. In Chad, only 13.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly described as collecting data from firms for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a survey that collects data from firms.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly described as collecting data from firms for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 63, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | 54 6. The Program aims to address challenges that prevents Jordan \u2019 s health system to be more efficient and effective to provide people-centric quality services. According to the Global Digital Health Index ( 2018 ), Jordan has developed a more mature digital health landscape ( with an overall score of 4 out of 5 ), compared to other neighboring countries, such as Iraq and Kuwait. However, due to multiple electronic information systems designed and operated in silos, data systems have been fragmented with inconsistent data quality. Other assessments also highlight areas for improvement, such as governance ( for example, a lack of standards across different systems and enforcement ), ICT infrastructure ( for example, limited coverage of ICT equipment and broadband coverage at health facilities ), and institutional and workforce capacity. Accordingly, the Program aims to address critical gaps by supporting the expansion, interoperability, and effective use of digital health information systems in Jordan. With Jordan \u2019 s mature digital health landscape, expected results from the Program will enable Jordan to establish a conducive governance environment for more transparent and effective integration of innovative technologies in the health sector. In addition, the Program builds on the legacy that Jordan had invested in during the last 10 years.", + "ner_text": [ + [ + 275, + 302, + "named" + ], + [ + 15, + 21, + "Global Digital Health Index <> data geography" + ], + [ + 154, + 160, + "Global Digital Health Index <> data geography" + ], + [ + 305, + 309, + "Global Digital Health Index <> publication year" + ], + [ + 313, + 319, + "Global Digital Health Index <> data geography" + ], + [ + 1106, + 1112, + "Global Digital Health Index <> data geography" + ], + [ + 1119, + 1125, + "Global Digital Health Index <> data geography" + ], + [ + 1209, + 1215, + "Global Digital Health Index <> data geography" + ], + [ + 1411, + 1417, + "Global Digital Health Index <> data geography" + ] + ], + "validated": true, + "empirical_context": "The Program aims to address challenges that prevents Jordan \u2019 s health system to be more efficient and effective to provide people-centric quality services. According to the Global Digital Health Index ( 2018 ), Jordan has developed a more mature digital health landscape ( with an overall score of 4 out of 5 ), compared to other neighboring countries, such as Iraq and Kuwait. However, due to multiple electronic information systems designed and operated in silos, data systems have been fragmented with inconsistent data quality.", + "type": "index", + "explanation": "It is indeed a dataset as it provides a structured collection of scores used for empirical analysis of digital health maturity.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as an index that scores the digital health landscape.", + "contextual_reason_agent": "It is indeed a dataset as it provides a structured collection of scores used for empirical analysis of digital health maturity.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 7, + "validated": 3, + "not_validated": 4 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 27, + "text": "It was agreed that the second phase of this reform should be shaped around a principle of school and district focus, so that reforms at the central level will be premised on the changing role that the central ministry will play in a system that is focused on school and district level delivery of learning programs to build the kind of participatory, outcome-based and student-centered learning that is required. 11. Quality: Jordan has now participated in three rounds of the Trends in International Mathematics and Science Study ( TIMSS ) and one round of the Programme for International Student Assessment ( PISA ). In both cases, Jordan performed well in comparison with other countries in the Region, but well below many countries at comparable income and education expenditure levels, and at or below the international averages. As part of ERfKE I, MoE introduced a national assessment program using a PISA - like approach but focused specifically on the knowledge economy aspects of the national curriculum. It also conducted a series of more \u201c traditional \u201d national assessments of learning achievement in key subjects. Establishing and implementing these systems is a major stride forward in its own right and a significant achievement of the ERfKE I reform. However, these assessments have revealed that the majority of students still perform below desired achievement levels and that a significant number of students drop out of the system. The overall completion rate of secondary education is about 70 percent, with almost 30 percent of students dropping out after the 10th grade. The monitoring and evaluation processes established under ERfKE I are now beginning to relate this performance to a range of different education and non education variables to determine what mix of inputs and processes has yielded the greatest improvements in terms of quality learning outcomes. Quality remains a significant challenge for Jordan, but some mechanisms have been put in place to permit more systematic and rigorous evaluation of which investments are most likely to yield impact on learning outcomes.", + "ner_text": [ + [ + 477, + 530, + "named" + ], + [ + 426, + 432, + "Trends in International Mathematics and Science Study <> data geography" + ], + [ + 533, + 538, + "Trends in International Mathematics and Science Study <> acronym" + ], + [ + 634, + 640, + "Trends in International Mathematics and Science Study <> data geography" + ], + [ + 1934, + 1940, + "Trends in International Mathematics and Science Study <> data geography" + ] + ], + "validated": true, + "empirical_context": "11. Quality: Jordan has now participated in three rounds of the Trends in International Mathematics and Science Study ( TIMSS ) and one round of the Programme for International Student Assessment ( PISA ). In both cases, Jordan performed well in comparison with other countries in the Region, but well below many countries at comparable income and education expenditure levels, and at or below the international averages.", + "type": "study", + "explanation": "This is a dataset as it is a structured collection of data used for empirical analysis in education.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a study that collects and analyzes educational data.", + "contextual_reason_agent": "This is a dataset as it is a structured collection of data used for empirical analysis in education.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 21, + "validated": 8, + "not_validated": 13 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 56, + "text": "Annual Monitoring reports MoHEST PIU will collect data from Universities / polytechni cs; validated by TPM annually MoHEST PIU Beneficiary Feedback: Percentage of students satisfied with the digital skills for the agriculture program This indicator will track the percentage of students satisfied with the quality of Annual Feedback survey results MoHEST PIU will conduct feedback survey results annually MoHEST PIU", + "ner_text": [ + [ + 26, + 36, + "named" + ] + ], + "validated": false, + "empirical_context": "Annual Monitoring reports MoHEST PIU will collect data from Universities / polytechni cs; validated by TPM annually MoHEST PIU Beneficiary Feedback: Percentage of students satisfied with the digital skills for the agriculture program This indicator will track the percentage of students satisfied with the quality of Annual Feedback survey results MoHEST PIU will conduct feedback survey results annually MoHEST PIU", + "type": "organization", + "explanation": "However, it is identified as an organization (MoHEST PIU) and not a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes an acronym that resembles a data source.", + "contextual_reason_agent": "However, it is identified as an organization (MoHEST PIU) and not a structured collection of data.", + "contextual_signal": "mentioned only as an organization, not as a data source", + "tags": [] + }, + { + "filename": "101_Project-Appraisal-Document-PAD-P163830-2017-05-22-CLEAN-05232017", + "page": 83, + "text": "The shared services centre located in Manila Philippines performs monthly financial management quality assurance reviews, subsequently the cleared data is uploaded to ICRC Corporate Global system - JD Edwards Financial Management System / Data Warehouse. 52. The FM function in FAO Somalia is headed by Finance and Administration Officer ( Corporate Support Services Department - CSSD ). The Department is made up of Finance ( 4 staff ), HR ( 4 staff ), Logistics ( 4 ) IT ( 3 staff ) and Operations & Budget Control. The CSSD work consultatively with technical teams, External Audit and Compliance team as well as Operations Department \u2013 which coordinates corporate planning and programs implementation including liaison with donors. The Financial Management function is managed through Financial Programs Management Information Systems ( FPMIS ) seamlessly integrated to provide financial data to FAO Oracle based Global Resource Management System. The organization has field offices in Mogadishu ( South Central Somalia ), Hargeisa ( Somaliland ) and Garowe ( Puntland ). FAO has over 1, 200 framework agreements with different organizations across Somalia ( NGOs, Governments and Community Based Organizations, Professional Associations ). 53. FAO and ICRC Finance & Administration Departments in close consultation with the project technical teams in the respective organizations will take leadership in the overall responsibility in the management of the project financial management function. The two organizations will take steps to ensure the project financial management activities are integrated into the existing financial management systems. The Financial Management organizational structures and the related control environment were reviewed and found to be adequate to support the implementation of the project activities. Budgeting 54. ICRC planning and budgeting process is bottom up and culminates in a resource mobilization annual budget \u2013 \u201c Planning for Results ( PfR ), \u201d which outlines the costing and prioritized programmatic areas of interventions. The project budget shall be prepared as stipulated in ICRC Financial Regulations, in particular Section ( 6 ) of the Regulations. The budgeting process is fully integrated into the SUN accounting system budget module with well-defined budget formulation, execution and amendment controls. All budget", + "ner_text": [ + [ + 788, + 837, + "named" + ] + ], + "validated": false, + "empirical_context": "The CSSD work consultatively with technical teams, External Audit and Compliance team as well as Operations Department \u2013 which coordinates corporate planning and programs implementation including liaison with donors. The Financial Management function is managed through Financial Programs Management Information Systems ( FPMIS ) seamlessly integrated to provide financial data to FAO Oracle based Global Resource Management System. The organization has field offices in Mogadishu ( South Central Somalia ), Hargeisa ( Somaliland ) and Garowe ( Puntland ).", + "type": "system", + "explanation": "However, it is described as a management information system, which indicates it is a system for managing information rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information Systems' in its name, suggesting a data-related function.", + "contextual_reason_agent": "However, it is described as a management information system, which indicates it is a system for managing information rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 25, + "text": "The World Bank Strengthening Public Sector Effectiveness and Statiscal Capacity Project ( P151155 ) Page 22 of 93 publication of public finance data and the setup of the BOOST25 database on public budget and spending recently developed with World Bank support to ensure it is updated and published. \uf0b7 Subcomponent 1. 3: Deploying new payroll management. This subcomponent will support the deployment of the recently developed integrated HR / payroll management system ( SIGIPES II ) in MINEDUB and MINSANTE. This will require the completion of the ongoing update of the HR database for the selected ministries, acquisition of additional IT equipment, and training of the respective units in charge of HR / payroll in these ministries including change management support ( possibly using the RRA ). Finally, the project will support the use of SIGIPES II for HR analysis, the update of the Organizational Framework ( cadres organiques ), and strategic staffing planning for MINEDUB and MINSANTE. \uf0b7 Subcomponent 1. 4: Strengthening SOE portfolio management. This subcomponent will support the Government \u2019 s efforts to strengthen the corporate governance framework and Government oversight of the public enterprise sector.", + "ner_text": [ + [ + 470, + 480, + "named" + ] + ], + "validated": false, + "empirical_context": "3: Deploying new payroll management. This subcomponent will support the deployment of the recently developed integrated HR / payroll management system ( SIGIPES II ) in MINEDUB and MINSANTE. This will require the completion of the ongoing update of the HR database for the selected ministries, acquisition of additional IT equipment, and training of the respective units in charge of HR / payroll in these ministries including change management support ( possibly using the RRA ).", + "type": "system", + "explanation": "However, it is described as a system and not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as an integrated HR/payroll management system.", + "contextual_reason_agent": "However, it is described as a system and not as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 47, + "text": "For Results area 2, TA will support capacity building activities, such as the creation of a multisectoral team at the woreda level, staffing of key specialists to support SPG implementation; provision of training for relevant staff; M & E including data collection and entry for DHIS2, UNISE, and EMIS; incentive mechanisms for the service delivery system; development of innovations for effective service delivery at federal and regional levels by focusing in selected woredas for demonstrations; and impact evaluation to assess the effectiveness of SPG interventions. 87. The capacity building interventions supported under the IPF component are described in more detail in Annex 8. The HCO includes a PAP to drive intermediate outputs linked to the achievement of the results outlined under each results area, and the IPF component ( subcomponent 2. 3 ) provides TA for the completion of activities in the PAP ( Annex 6 ). Key capacity building activities are summarized in section II. D above, and a more detailed description is provided in Annex 8. D. Capacity Building", + "ner_text": [ + [ + 297, + 301, + "named" + ], + [ + 470, + 477, + "EMIS <> data geography" + ] + ], + "validated": true, + "empirical_context": "For Results area 2, TA will support capacity building activities, such as the creation of a multisectoral team at the woreda level, staffing of key specialists to support SPG implementation; provision of training for relevant staff; M & E including data collection and entry for DHIS2, UNISE, and EMIS; incentive mechanisms for the service delivery system; development of innovations for effective service delivery at federal and regional levels by focusing in selected woredas for demonstrations; and impact evaluation to assess the effectiveness of SPG interventions. 87.", + "type": "system", + "explanation": "EMIS is indeed a data source as it is involved in data collection and entry processes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is mentioned in the context of data collection and entry.", + "contextual_reason_agent": "EMIS is indeed a data source as it is involved in data collection and entry processes.", + "contextual_signal": "mentioned as a data source in the context of data collection and entry", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 57, + "text": "47 32. The sub-component will support the background analysis for the implementation of the database and its implementation. These include: ( a ) Design and implementation of key tools: updated poverty and malnutrition maps, development of community-based targeting criteria and processes, development of registration questionnaire, construction of proxy-means test score, the organization of the different committees involved in the registration, the implementation of the PMT survey, storing and analysis of the data as well as the preparation of beneficiary lists for the cash transfer program and its complementary activities. It will also support the acquisition of key equipment ( hardware, software, back-up equipment ). ( b ) Implementation of the targeting and registration in selected areas including the organization and support of the targeting committees, the implementation and processing of questionnaires and the preparation of the list of registered households as well as eligible households for the cash transfer program. ( c ) Information campaigns to explain the purposes and processes of the targeting and registration to commune and colline administration and local government staff and to households in the selected collines, potential additional activities to ensure that beneficiaries are aware of the requirements and supporting documents for national ID cards, ( d ) Quality controls: Spot checks, process evaluations, analysis of targeting efficiency to identify necessary adjustments. Sub-component 2. 2.", + "ner_text": [ + [ + 305, + 331, + "named" + ] + ], + "validated": false, + "empirical_context": "The sub-component will support the background analysis for the implementation of the database and its implementation. These include: ( a ) Design and implementation of key tools: updated poverty and malnutrition maps, development of community-based targeting criteria and processes, development of registration questionnaire, construction of proxy-means test score, the organization of the different committees involved in the registration, the implementation of the PMT survey, storing and analysis of the data as well as the preparation of beneficiary lists for the cash transfer program and its complementary activities. It will also support the acquisition of key equipment ( hardware, software, back-up equipment ).", + "type": "questionnaire", + "explanation": "However, the registration questionnaire itself is a tool for data collection, not a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves data collection through a questionnaire.", + "contextual_reason_agent": "However, the registration questionnaire itself is a tool for data collection, not a structured collection of data.", + "contextual_signal": "mentioned only as a tool for data collection, not as a data source", + "tags": [] + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 64, + "text": "The World Bank Building Skills for Human Capital Development in South Sudan ( P178654 ) Page 60 of 66 16. Under the proposed grant, Cash reconciliations shall be done on a daily basis and report signed by the preparer, checked, and approved by authorized persons. Both ministries will also be required to prepare periodic monthly bank reconciliations with the above controls as cash reconciliation; any discrepancies identified during reconciliation will be communicated to the management and resolved immediately. The POM shall be established to spell out procedures and policies for the Project. In addition, MoGEI and MoHEST shall maintain a separate Fixed Assets register for bank-financed assets, properly tagged and assigned a unique assets identification number. Project Fixed Assets such as vehicles ( if any ) shall be insured by a reputable insurance company after proper due diligence is done by the PIU and approved by the World Bank where it \u2019 s deemed necessary. MoGEI and MoHEST shall ensure that World Bank resources are utilized for the intended purposes by maintaining adequate internal control arrangements that are acceptable to the World Bank throughout project implementation. Funds Flow and Disbursement arrangements 17. Disbursement of the Grant will use advances, reimbursement, direct payments and payments under Special Commitments including full documentation or against statements of expenditure, as appropriate.", + "ner_text": [ + [ + 654, + 675, + "named" + ] + ], + "validated": false, + "empirical_context": "The POM shall be established to spell out procedures and policies for the Project. In addition, MoGEI and MoHEST shall maintain a separate Fixed Assets register for bank-financed assets, properly tagged and assigned a unique assets identification number. Project Fixed Assets such as vehicles ( if any ) shall be insured by a reputable insurance company after proper due diligence is done by the PIU and approved by the World Bank where it \u2019 s deemed necessary.", + "type": "register", + "explanation": "However, it is mentioned as a register for assets rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a register that could contain data about fixed assets.", + "contextual_reason_agent": "However, it is mentioned as a register for assets rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 60, + "text": "The World Bank Tajikistan Water Supply and Sanitation Investment Project ( P177325 ) Page 57 of 89 metered and entered in the utility database. project M & E reports. and self-reporting mechanisms described in the PAD. Number of social institutions with basic sanitation and hygiene facilities installed Upgraded WASH facilities in schools and HCFs will need to meet criteria for basic access to sanitation under SDG 6. 2 for safety, privacy and safe sewage disposal. Sanitation facilities also need to be accessible to people with disabilities, and have allocated facilities for menstrual hygiene management for female students and teachers. Annual M & E reports. Data reported by construction companies and schools / HCF management upon acceptance of the upgraded facilities will be further verified through the random site visits of the PMU specialists to monitor functionality of WASH facilities and their ) & M arrangements.", + "ner_text": [ + [ + 126, + 142, + "named" + ], + [ + 4, + 14, + "utility database <> publisher" + ], + [ + 15, + 25, + "utility database <> data geography" + ] + ], + "validated": true, + "empirical_context": "The World Bank Tajikistan Water Supply and Sanitation Investment Project ( P177325 ) Page 57 of 89 metered and entered in the utility database. project M & E reports.", + "type": "database", + "explanation": "This is a dataset as it is mentioned in the context of being metered and entered, indicating it serves as a data source for monitoring and evaluation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'utility database' suggests a structured collection of data related to water supply and sanitation.", + "contextual_reason_agent": "This is a dataset as it is mentioned in the context of being metered and entered, indicating it serves as a data source for monitoring and evaluation.", + "contextual_signal": "mentioned as a data source", + "tags": [] + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 20, + "text": "The fourth Result Area is strengthened education system management by focusing on supporting MOE and strengthening its capacity to manage an increasing number of schools and students, notably due to the expansion of early childhood education and to the enrollment of a large number of refugee children in Jordanian schools. The focus of this Result Area is to provide and enhance the tools and resources available to the MOE for decision making and implementation. These tools include information systems such as the operationalization of the GIS, which will allow the MOE to map school construction, expansion, and rehabilitation needs, and the strengthening of the existing OpenEMIS to allow MOE to analyze and make use of disaggregated and gender \u2010 sensitive data for decision making. This Result Area will also support the MOE in securing budget additionality to the sector in an efficient and effective manner to ensure that resources are available for undertaking the necessary reforms. 19 In an effort to shed light on gender dynamics in the education sector in Jordan, the impact evaluation will assess heterogeneous effects by student gender. 20 By \u201c private KG \u201d is meant: all non \u2010 public provision including for profit private KGs, community \u2010 based KGs, and NGO KGs.", + "ner_text": [ + [ + 543, + 546, + "named" + ] + ], + "validated": false, + "empirical_context": "The focus of this Result Area is to provide and enhance the tools and resources available to the MOE for decision making and implementation. These tools include information systems such as the operationalization of the GIS, which will allow the MOE to map school construction, expansion, and rehabilitation needs, and the strengthening of the existing OpenEMIS to allow MOE to analyze and make use of disaggregated and gender \u2010 sensitive data for decision making. This Result Area will also support the MOE in securing budget additionality to the sector in an efficient and effective manner to ensure that resources are available for undertaking the necessary reforms.", + "type": "system", + "explanation": "However, GIS is described as an information system rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed GIS is a dataset because it is associated with mapping and data analysis.", + "contextual_reason_agent": "However, GIS is described as an information system rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 84, + "text": "The World Bank Cameroon Education Reform Support Project ( P160926 ) Page 81 of 148 14. Figure 2. 1 includes the results chain of the project. Figure 2. 1. Results Chain RESULTS AREAS AND OTHER KEY INITIATIVES INTERMEDIATE RESULTS INTERMEDIATE OUTCOMES OUTCOME IMPROVE EQUITABLE ACCESS TO QUALITY BASIC EDUCATION, WITH A FOCUS ON SELECTED DISADVANTAGED AREAS Improved Access, Quality, and Education System Management Percentage of primary schools ( with more than 100 pupils ) with at least 3 state paid teachers Improved distribution of teachers recruited by the state in primary public schools ( including focus on refugee - areas ) ( DLI 1 ) Share of primary-level teachers trained on new curricula training Increased capacities of teachers on the effective and efficient use of the new curriculum in pre-primary and primary schools ( DLI 2 ) Availability of essential textbooks ( based on the new curriculum ) student ratio at primary level Increased availability of essential textbooks in public primary schools ( DLI 3 ) Level of Pre - primary enrollment in rural areas Increased access to pre - school in rural areas through community preschool. according to standards ( DLI 4 ) Improved Education System Management Learning assessments regularly available, disclosed and used for system piloting Standardized student learning assessment for primary and secondary education in place ( DLI 5 ) Data regularly available, disclosed and used for system piloting Integrated EMIS functional and operational ( DLI", + "ner_text": [ + [ + 1476, + 1480, + "named" + ], + [ + 4, + 14, + "EMIS <> publisher" + ], + [ + 15, + 23, + "EMIS <> data geography" + ] + ], + "validated": true, + "empirical_context": "Results Chain RESULTS AREAS AND OTHER KEY INITIATIVES INTERMEDIATE RESULTS INTERMEDIATE OUTCOMES OUTCOME IMPROVE EQUITABLE ACCESS TO QUALITY BASIC EDUCATION, WITH A FOCUS ON SELECTED DISADVANTAGED AREAS Improved Access, Quality, and Education System Management Percentage of primary schools ( with more than 100 pupils ) with at least 3 state paid teachers Improved distribution of teachers recruited by the state in primary public schools ( including focus on refugee - areas ) ( DLI 1 ) Share of primary-level teachers trained on new curricula training Increased capacities of teachers on the effective and efficient use of the new curriculum in pre-primary and primary schools ( DLI 2 ) Availability of essential textbooks ( based on the new curriculum ) student ratio at primary level Increased availability of essential textbooks in public primary schools ( DLI 3 ) Level of Pre - primary enrollment in rural areas Increased access to pre - school in rural areas through community preschool. according to standards ( DLI 4 ) Improved Education System Management Learning assessments regularly available, disclosed and used for system piloting Standardized student learning assessment for primary and secondary education in place ( DLI 5 ) Data regularly available, disclosed and used for system piloting Integrated EMIS functional and operational ( DLI", + "type": "system", + "explanation": "In the context, 'EMIS' is described as an integrated system that is functional and operational, indicating it serves as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'EMIS' is a dataset because it is mentioned in the context of data availability and system management.", + "contextual_reason_agent": "In the context, 'EMIS' is described as an integrated system that is functional and operational, indicating it serves as a data source.", + "contextual_signal": "system but mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "110_Lebaon-RACE2-PforR-Board-Package-PAD-WB-9-5-16-09072016", + "page": 22, + "text": "The achievement of the DLIs will be financed through a mix of funding modalities including an IDA credit with co-financing from grant sources. The co-financing arrangements amongst the different DLIs are detailed under table A3. 3 of this document. IV. ASSESSMENT SUMMARY21 A. Technical Strategic rationale 52. Helping host countries share the burden of receiving Syrian refugees is a global strategic priority. The international community, together with MEHE, have been addressing this need through the RACE program which started in 2013. This next phase proposes to pivot from a strict focus on supply - side access to focus on demand-side issues as well as on quality and on systems strengthening. These last two are national, regional, and global priorities, since it is learning rather than simply \u2018 schooling \u2019 that will equip future generations with the knowledge and skills to join a productive workforce and drive economic growth and human development. It is equally critical to help Lebanon \u2019 s education system sustain the delivery of quality services going forward and ultimately enable it to emerge stronger from this crisis. Technical soundness 53. The technical assessment concurred with the prioritization of access-linked activities to increase supply. Investment in infrastructure is a MEHE priority and responds to an urgent need. Statistics provided by the Developing Rehabilitation Assistance to Schools and Teachers Improvement ( D-RASATI survey, 2011 ) and recently updated under the Second Education Development Project ( EDP II, 2014 ) indicate that among the 1, 262 existing public schools, approximately 75 percent need repairs. Of these, approximately 25 percent need major repairs and among these, 40 specific schools are considered structurally unsafe. Additionally, only 42 percent of schools are owned by MEHE, 21 Studies and reports cited in the World Bank technical assessment are not necessarily endorsed by MEHE.", + "ner_text": [ + [ + 1452, + 1467, + "named" + ], + [ + 993, + 1000, + "D-RASATI survey <> data geography" + ], + [ + 1469, + 1473, + "D-RASATI survey <> publication year" + ], + [ + 1554, + 1558, + "D-RASATI survey <> publication year" + ], + [ + 1727, + 1781, + "D-RASATI survey <> data description" + ] + ], + "validated": true, + "empirical_context": "Investment in infrastructure is a MEHE priority and responds to an urgent need. Statistics provided by the Developing Rehabilitation Assistance to Schools and Teachers Improvement ( D-RASATI survey, 2011 ) and recently updated under the Second Education Development Project ( EDP II, 2014 ) indicate that among the 1, 262 existing public schools, approximately 75 percent need repairs. Of these, approximately 25 percent need major repairs and among these, 40 specific schools are considered structurally unsafe.", + "type": "survey", + "explanation": "The D-RASATI survey is explicitly mentioned as providing statistics used in the context, confirming it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides statistics.", + "contextual_reason_agent": "The D-RASATI survey is explicitly mentioned as providing statistics used in the context, confirming it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "110_Lebaon-RACE2-PforR-Board-Package-PAD-WB-9-5-16-09072016", + "page": 48, + "text": "41 Total Financing As % of Total Indicative timeline for DLI achievement to Grade 11 DLI # 3 Teacher performance measured and evaluated 6 2. 7 % No evidence of impact First Impact Evaluation Study ( IES ) conducted Second IES conducted DLI # 4 Number of participating schools that implement formative and summative assessments for students in Grade 3 in reading and math 18. 4 8. 2 % Not currently monitored 50 180 DLI # 5 Proportion of participating schools with active community partnerships 15. 6 7. 0 % 0 % for second - shift schools 50 % 80 % DLI # 6 Timely and robust data available for evidence informed policymaking and planning. DLR # 6. 1 A data and information management framework developed and adopted by MEHE and CERD 3 1. 3 % Misalignment in data collection and data management functions across different MEHE and CERD units Expected to be completed by Y1 DLR # 6. 2 Annual data available from participating schools on disaggregated data on student 12 5. 4 % Data on student enrollment not available until end of school year Student enrollment data by March 1st Student enrollment data by March 1st Student enrollment data by March 1st Student enrollment data by March 1st", + "ner_text": [ + [ + 1040, + 1063, + "named" + ], + [ + 934, + 952, + "Student enrollment data <> data type" + ] + ], + "validated": true, + "empirical_context": "2 Annual data available from participating schools on disaggregated data on student 12 5. 4 % Data on student enrollment not available until end of school year Student enrollment data by March 1st Student enrollment data by March 1st Student enrollment data by March 1st Student enrollment data by March 1st", + "type": "data", + "explanation": "This is indeed a dataset as it refers to structured data collected on student enrollment.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific data on student enrollment.", + "contextual_reason_agent": "This is indeed a dataset as it refers to structured data collected on student enrollment.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 14, + "text": "Similarly, the private sector ( formal and informal ) and nongovernmental HFs operate in a largely unregulated manner despite the significant role they play. 21 In addition, the GoB lacks health information critical for planning, budgeting, and management purposes, such as data on the availability of essential inputs for service delivery. Data on service utilization at HFs are mostly collected manually using paper forms and data controls and quality assurance mechanisms are largely nonexistent due to budget constraints. The GoB also does not have a digital registry of health care providers with basic data to manage human resources, such as job titles or professional profiles including education, work experience, and in \u2010 service trainings. Similarly, the supply chain management process is based on paper systems, except for two vertical programs ( immunization and family planning ), often leading to delays in the delivery of drugs, extended periods of medicine stockouts at public HFs, and high incidence of expired drugs. 17. In Balochistan, children suffer from suboptimal learning outcomes and large disparities by gender. Despite significant efforts by the Secondary Education Department ( SED ), Balochistan performs poorly compared to the national average across all education outcomes.", + "ner_text": [ + [ + 555, + 596, + "named" + ] + ], + "validated": false, + "empirical_context": "Data on service utilization at HFs are mostly collected manually using paper forms and data controls and quality assurance mechanisms are largely nonexistent due to budget constraints. The GoB also does not have a digital registry of health care providers with basic data to manage human resources, such as job titles or professional profiles including education, work experience, and in \u2010 service trainings. Similarly, the supply chain management process is based on paper systems, except for two vertical programs ( immunization and family planning ), often leading to delays in the delivery of drugs, extended periods of medicine stockouts at public HFs, and high incidence of expired drugs.", + "type": "registry", + "explanation": "However, the context indicates it is not functioning as a data source since it is described as nonexistent.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a registry that could contain structured data about health care providers.", + "contextual_reason_agent": "However, the context indicates it is not functioning as a data source since it is described as nonexistent.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 50, + "text": "The World Bank Generating Growth Opportunities and Productivity for Women Enterprises Uganda ( P176747 ) Page 46 of 77 including childcare and those community and household members. the infrastructure facilities. Women beneficiaries ( percentage ) Women in RHD Refugee women Value of credit provided to women enterprises ( Amount ) This indicator measures the value of credit provided by the PFIs under the project, disaggregated by refugee status, district, age, and disability status. Continuous. PFI data. The PFIs will maintain databases of the value of the credit disbursed, disaggregated by refugee status, district, age, and disability status. The MGLSD to collect the data from the PFIs each month, and compile and report it. Women enterprises in RHDs Refugee-owned enterprises Beneficiaries of job-focused interventions Enterprise baseline survey, annual surveys from year 2. Surveys of enterprises. This indicator measures the total number of jobs of enterprises benefiting from the project. MGLSD to administer the surveys, and compile and report the data.", + "ner_text": [ + [ + 499, + 507, + "named" + ], + [ + 68, + 85, + "PFI data <> reference population" + ], + [ + 86, + 92, + "PFI data <> data geography" + ], + [ + 213, + 232, + "PFI data <> reference population" + ] + ], + "validated": true, + "empirical_context": "Continuous. PFI data. The PFIs will maintain databases of the value of the credit disbursed, disaggregated by refugee status, district, age, and disability status.", + "type": "database", + "explanation": "In this context, 'PFI data' is indeed a dataset as it refers to structured information maintained in databases.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'PFI data' refers to a dataset because it mentions maintaining databases of disbursed credit values.", + "contextual_reason_agent": "In this context, 'PFI data' is indeed a dataset as it refers to structured information maintained in databases.", + "contextual_signal": "maintain databases of the value of the credit disbursed", + "tags": [] + }, + { + "filename": "029_IBArchive-ea5ff2ad-6f1f-45bf-a2ad-036440562c18", + "page": 64, + "text": "The World Bank Electricity Access Scale-up Project ( EASP ) ( P166685 ) Page 59 of 130 electricity access under the project, disaggregated by gender. Average household size of 4. 7 people, UBOS National Household Survey 2016-2017. reports. Average household size based on UBOS National Household Survey. People provided with access to electricity under the project with off - grid of which female The indicator will track the number of females benefiting from off-grid electricity access under the project. Share of female is 51 %, UBOS Women and Men in Uganda, Facts and Figures 2016. Quarterly Project implementati on progress reports. UBOS Women and Men in Uganda, Facts and Figures, 2016 Approved loan and grants applications UECCC, MEMD People provided with access to electricity with grid, mini-grid, off-grid in refugee - hosting districts The indicator will track the number of host communities and refugees beneficiaries of access to grid, mini-grid, and off-grid technologies under the project in District hosting refugees. Average household size of 4. 7 people, UBOS National Household Survey 2016 - Quarterly Project implementati on progress reports. Average household size based on UBOS National Household Survey. Service Providers customer database, MEMD database, IVA reports, approved loan applications and installation reports.", + "ner_text": [ + [ + 1073, + 1103, + "named" + ], + [ + 150, + 187, + "UBOS National Household Survey <> data description" + ], + [ + 220, + 229, + "UBOS National Household Survey <> publication year" + ], + [ + 554, + 560, + "UBOS National Household Survey <> data geography" + ], + [ + 580, + 584, + "UBOS National Household Survey <> publication year" + ], + [ + 660, + 666, + "UBOS National Household Survey <> data geography" + ], + [ + 1360, + 1378, + "UBOS National Household Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "Average household size of 4. 7 people, UBOS National Household Survey 2016 - Quarterly Project implementati on progress reports. Average household size based on UBOS National Household Survey.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey that provides empirical data used for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a survey providing data on household size.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey that provides empirical data used for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "171_multi0page", + "page": 69, + "text": "would also provide the basis for the preparation of the project audit reports. Reports to assist the monitoring of physical progress of the project would include project performance indicators that would link the financial information and the progress in project implementation. Reports for monitoring progress in the acquisition of project facilities would include reports on the procurement of goods, works, and services. The procurement progress reports would address issues relating to the compliance with Bank procurement guidelines, highlight key procurement issues and provide information on complaints, inadequate performance, if any, and major disputes. Risk analysis The specific financial management risk to be considered during the implementation of this project is the provision of adequate amount of counterpart resources in a timely manner. The extent of this risk would appear manageable since the shortage of counterpart funding was not an implementation problem under the BogotA Urban Transport Project ( Loan 4021-CO ), also implemented by the SHD through the PCU. A second financial management risk relates to the fact that the proposed project will be the first pilot project in Colombia utilizing Report Based Disbursement using FMRs. Nevertheless the institutional strengthening that the SHD would undergo prior to loan effectiveness and the significant amount of technical assistance included in the project would substantially mitigate this risk.", + "ner_text": [ + [ + 162, + 192, + "named" + ] + ], + "validated": false, + "empirical_context": "would also provide the basis for the preparation of the project audit reports. Reports to assist the monitoring of physical progress of the project would include project performance indicators that would link the financial information and the progress in project implementation. Reports for monitoring progress in the acquisition of project facilities would include reports on the procurement of goods, works, and services.", + "type": "concept", + "explanation": "'Project performance indicators' are mentioned as part of reports and monitoring, but they do not function as a data source themselves.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'project performance indicators' is a dataset because it sounds like a structured collection of metrics.", + "contextual_reason_agent": "'Project performance indicators' are mentioned as part of reports and monitoring, but they do not function as a data source themselves.", + "contextual_signal": "mentioned only as a concept, not as a data source", + "tags": [] + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 43, + "text": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 39 of 47 feeding practices that decrease the prevalence of child diarrhea ), this CBA identifies only a lower bound of project benefits. Table 1. 2. Discounted Project Costs of Health Component ( current US $ ) Years Present Value 2 % Discount Rate 4 % Discount Rate 2020 2, 478, 360 2, 430, 581 2021 3, 996, 990 3, 844, 363 2022 4, 445, 516 4, 193, 332 2023 3, 622, 024 3, 350, 689 2024 3, 326, 210 3, 017, 714 Total 17, 869, 100 16, 836, 680 9. Mortality avoided is estimated based on the number of additional health services provided under the project. As a first step, utilization rates of reproductive, maternal, and child health services over the project cycle \u2014 both with and without the project \u2014 are projected. Three types of services are considered: skilled birth attendance, measles vaccination, and family planning visits. Baseline utilization data for 2017 and 2018, in the target HFs came from the Balochistan DHIS.", + "ner_text": [ + [ + 912, + 937, + "named" + ], + [ + 837, + 861, + "Baseline utilization data <> data description" + ], + [ + 888, + 910, + "Baseline utilization data <> data description" + ], + [ + 942, + 946, + "Baseline utilization data <> reference year" + ], + [ + 951, + 955, + "Baseline utilization data <> reference year" + ], + [ + 1022, + 1040, + "Baseline utilization data <> usage context" + ] + ], + "validated": true, + "empirical_context": "Three types of services are considered: skilled birth attendance, measles vaccination, and family planning visits. Baseline utilization data for 2017 and 2018, in the target HFs came from the Balochistan DHIS.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to collected data on service utilization for specific years.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific data collected for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it refers to collected data on service utilization for specific years.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "073_Ethiopia-Consolidated-Water-Supply-Sanitation-and-Hygiene-Account-Project", + "page": 60, + "text": "The MoF and BoF are responsible for the FM aspect at the federal and regional levels, respectively. At the woreda and zone levels a pool system is in place, where the WOFs and ZOFs, respectively, are responsible for all FM aspects of WASH sector offices. 25. Financial management manual: The Project will follow the FM manual developed under the WASHP, which largely follows the government \u2019 s accounting manual, depicting all accounting policies, procedures, internal control issues, financial reporting, fund flow arrangements, budgeting, and external audits. Hence, in view of the new developments and activities of the new phase and the lessons learnt under the WASHP, the FM manual will be revised within two months after the Project \u2019 s effectiveness. The Borrower must obtain a \u201c no objection \u201d to the revised FM manual from the World Bank. Training on the FM manual will be carried out within two months of its approval by the World Bank. 26. Accounting system: For normal treasury funds, the GoE uses the Integrated Budget and Expenditure ( IBEX ) or Integrated Financial Management Information System ( IFMIS ). The IBEX accounting system is now operational in almost all public bodies while the IFMIS is under testing at federal ministries.", + "ner_text": [ + [ + 1060, + 1110, + "named" + ] + ], + "validated": false, + "empirical_context": "26. Accounting system: For normal treasury funds, the GoE uses the Integrated Budget and Expenditure ( IBEX ) or Integrated Financial Management Information System ( IFMIS ). The IBEX accounting system is now operational in almost all public bodies while the IFMIS is under testing at federal ministries.", + "type": "system", + "explanation": "However, it is described as a system under testing and not explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting it handles data.", + "contextual_reason_agent": "However, it is described as a system under testing and not explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "007_BOSIB-e8e37b29-1d61-491d-8aad-23a07cf57740", + "page": 30, + "text": "Page 26 of 88 The World Bank Chad Agribusiness and Rural Transformation Project ( P179238 ) activities and will be assisted by a team of M & E specialists in the four regional coordination units. The project will use the Geo-Enabling Monitoring System ( GEMS ) developed by the World Bank using geo-enabled methods to undertake M & E, particularly data collection in areas difficult to reach due to insecurity or conflict. It will also use third party monitoring where needed, through UN agencies, national NGOs, or firms hired by the PCU, to collect just-in-time information via mobile apps / tablets, building on geo-tagging of activities. As part of the legacy from ProPAD, provision has also been made for the project to use the toll-free number which permits collecting feedback directly from beneficiaries. 49. A baseline survey will be conducted during the first year of the project to establish the RF reference data and verify targets. Beneficiaries will be surveyed subsequently in year 3 ( mid \u2010 term ) and year 6 ( project end ) as part of surveys covering both reference and treatment samples, to track changes in their livelihood conditions attributable to project performance. M & E reports will be issued every six months on physical implementation and results monitoring. C. Sustainability 50. Sustainability considerations have been integrated into all project components.", + "ner_text": [ + [ + 819, + 834, + "named" + ], + [ + 278, + 288, + "baseline survey <> publisher" + ], + [ + 798, + 811, + "baseline survey <> reference population" + ], + [ + 907, + 924, + "baseline survey <> data type" + ], + [ + 945, + 958, + "baseline survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "49. A baseline survey will be conducted during the first year of the project to establish the RF reference data and verify targets. Beneficiaries will be surveyed subsequently in year 3 ( mid \u2010 term ) and year 6 ( project end ) as part of surveys covering both reference and treatment samples, to track changes in their livelihood conditions attributable to project performance.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey conducted to collect reference data for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'baseline survey' implies a structured collection of data collected from beneficiaries.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey conducted to collect reference data for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "151_533550PAD0P117101Official0Use0Only1", + "page": 80, + "text": "Evidence from the Zambia scheme shows that between the baseline data and program evaluation, school enrollment for children between the ages of 5-1 8 increased significantly with the largest increase between 14-15 years. In terms of gender disparities, girls were disadvantaged compared to the boys, indicating that with a low transfer level, parents often send only one child and it is usually a boy. The Malawi program provided a protective function with respect to schooling: after one year, children in beneficiary households had doubled their enrollment rates compared to comparison households, with drop rates half as large as children in comparison households. The beneficiary group also had fewer school absences per month. 222. Expected benefits from the Beneficiary Development Program. Component 2 of the ISP aims to pilot the Beneficiary Development Program in 25 districts. By linking health education services as well as skills training to the cash transfer program, the BDP will help ensure that the SWF has the maximum possible impact that a well-functioning cash transfer program would typically have. In this sense, the BDP ensures that results from the well-known cash transfer programs in Latin America are replicated in Yemen as well. This is likely to be due to the following key aspects of the BDP: Focus on health and education for the poorest groups ( groups A and B under the PMT ).", + "ner_text": [ + [ + 55, + 68, + "named" + ] + ], + "validated": true, + "empirical_context": "Evidence from the Zambia scheme shows that between the baseline data and program evaluation, school enrollment for children between the ages of 5-1 8 increased significantly with the largest increase between 14-15 years. In terms of gender disparities, girls were disadvantaged compared to the boys, indicating that with a low transfer level, parents often send only one child and it is usually a boy.", + "type": "data", + "explanation": "In this context, 'baseline data' is indeed used as a data source for evaluating changes in school enrollment.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'baseline data' is a dataset because it refers to a specific collection of data used for comparison in the evaluation.", + "contextual_reason_agent": "In this context, 'baseline data' is indeed used as a data source for evaluating changes in school enrollment.", + "contextual_signal": "mentioned as a reference point for program evaluation", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "172_multi0page", + "page": 111, + "text": "Additional Annex II: Education Sector Analysis SIERRA LEONE: REHABILITATION OF BASIC EDUCATION Education Sector Analysis Based on the MICS ( 2000 ) survey, a preliminary analysis of the sector has been conducted. The data collection was done in 2000, and approximately 24, 000 individuals belonging to about 3, 900 households were interviewed The survey had national coverage ( 1, 198 in the North Region, 972 in the East Region, 815 in the South Region and 919 in the West Region ). Of the total number of households included in the sample, 2, 720 households were located in rural area, while 1, 184 were located in urban settings. In order to maximize the accuracy of the data given the large size of the sample, the raw data collected are not to be representative of the entire country, but the use of weights was used to obtain representative national estimates.", + "ner_text": [ + [ + 134, + 138, + "named" + ], + [ + 47, + 59, + "MICS <> data geography" + ], + [ + 141, + 145, + "MICS <> publication year" + ], + [ + 392, + 404, + "MICS <> data geography" + ], + [ + 417, + 428, + "MICS <> data geography" + ], + [ + 441, + 453, + "MICS <> data geography" + ], + [ + 469, + 480, + "MICS <> data geography" + ] + ], + "validated": true, + "empirical_context": "Additional Annex II: Education Sector Analysis SIERRA LEONE: REHABILITATION OF BASIC EDUCATION Education Sector Analysis Based on the MICS ( 2000 ) survey, a preliminary analysis of the sector has been conducted. The data collection was done in 2000, and approximately 24, 000 individuals belonging to about 3, 900 households were interviewed The survey had national coverage ( 1, 198 in the North Region, 972 in the East Region, 815 in the South Region and 919 in the West Region ).", + "type": "survey", + "explanation": "In the context, 'MICS' is explicitly mentioned as the survey that provided data for the education sector analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'MICS' is a dataset because it is referenced in the context of a survey used for analysis.", + "contextual_reason_agent": "In the context, 'MICS' is explicitly mentioned as the survey that provided data for the education sector analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "124_PAD8220PAD0P12010Box385319B00OUO090", + "page": 53, + "text": "Dam inflows. Because long precipitation and temperature records are scarcely available in and around the catchment, the used precipitation and temperature records for the period 1979-2009 were extracted from the Climate Forecast System Reanalysis ( CFSR ) dataset, a reanalysis product developed at the National Centre for Environmental Prediction ( NCEP ). CFSR data have an hourly temporal resolution and a spatial resolution of approximately 38 km. 31. The detailed hydrological catchment model was adapted to the upstream catchment on the basis of the CFSR data and cartographic maps information on topography, soil, land use, catchment boundaries and river network which were made available by the Lebanese CNRS. Following calibration, the full CFSR record was used to simulate 30 years of monthly flows in a Control Run, to be used as the reference for comparing the scenarios accounting for climatic changes. Two modified precipitation and temperature series, which represent the agreed climate change scenario, were then prepared by gradually reducing the annual amount of precipitation and increasing the temperature over a 30 year period. 32. The results of the simulations show that the combined effect of reduced precipitation and increased temperature leads to a reduction of 4 percent in Bisri Dam average annual inflow volume over its economic life time compared to the historical inflows of the last three decades. 33.", + "ner_text": [ + [ + 556, + 565, + "named" + ], + [ + 178, + 187, + "CFSR data <> reference year" + ], + [ + 303, + 356, + "CFSR data <> author" + ], + [ + 376, + 402, + "CFSR data <> data description" + ] + ], + "validated": true, + "empirical_context": "31. The detailed hydrological catchment model was adapted to the upstream catchment on the basis of the CFSR data and cartographic maps information on topography, soil, land use, catchment boundaries and river network which were made available by the Lebanese CNRS. Following calibration, the full CFSR record was used to simulate 30 years of monthly flows in a Control Run, to be used as the reference for comparing the scenarios accounting for climatic changes.", + "type": "data", + "explanation": "In the context, 'CFSR data' is explicitly used to simulate flows, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'CFSR data' is referenced as a source for hydrological modeling.", + "contextual_reason_agent": "In the context, 'CFSR data' is explicitly used to simulate flows, indicating it functions as a data source.", + "contextual_signal": "mentioned as a data source for simulation", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 27, + "text": "The World Bank Strengthening Public Sector Effectiveness and Statiscal Capacity Project ( P151155 ) Page 24 of 93 development of competency requirements for officials involved in procurement transactions. A strategy will be produced for eventual professionalization of the procurement function. The project will support the implementation of a national capacity - building strategy for public procurement upon its adoption. Also, the institutionalization of a training capacity will be explored including the massive open online courses ( MOOC ) option. \uf0b7 Subcomponent 3. 2: Enhancing the capacity of organizations in the procurement system to carry out their functions. This subcomponent will support enhanced clarity in the rules regarding procurement processes used in MINMAP and other executing agencies through a business process review and the creation of materials to guide staff in following the required procedures. The extent of compliance with rules will be monitored through the creation of processes and platforms, including electronic tracking systems as part of the e - procurement system under development, and the analysis of the collected data. This will include acquisition of IT equipment and some logistics goods. \uf0b7 Subcomponent 3. 3: Streamlining the procurement regulatory framework. This subcomponent will support the review and the implementation of a new procurement code and / or Central Purchasing Unit upon adoption by the Government. \uf0b7 Subcomponent 3. 4: Improving the management and monitoring of procurement performance.", + "ner_text": [ + [ + 1038, + 1065, + "named" + ] + ], + "validated": false, + "empirical_context": "This subcomponent will support enhanced clarity in the rules regarding procurement processes used in MINMAP and other executing agencies through a business process review and the creation of materials to guide staff in following the required procedures. The extent of compliance with rules will be monitored through the creation of processes and platforms, including electronic tracking systems as part of the e - procurement system under development, and the analysis of the collected data. This will include acquisition of IT equipment and some logistics goods.", + "type": "system", + "explanation": "However, it is described as a system rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'electronic tracking systems' implies a collection of data related to procurement processes.", + "contextual_reason_agent": "However, it is described as a system rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 69, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 57 refugee host areas percentage of refugee children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in high refugee host areas Share of new entrants in the first grade of primary school attended pre-primary education in 29 selected SPG woredas The indicator measures children enrolled in the first year of primary school with some exposure to preschool education and calculated by dividing the gross enrollment rate for pre-primary level by the gross intake rate of primary level. Annual EMIS Annual data collection through school census MOE and WOFED Pregnant women & caregivers of children 0-23 months participating in Community Conversations ( CC ) in 29 selected SPG woredas These are currently pregnant women and caregivers of children 0-23 months who attended at least 1 CC session per month Annual UNISE / DHIS2 UNISE / DHIS2 - - Data collected by woreda health offices MOH and MOF Quarterly reports of minutes of multisectoral committee meetings in 29 selected SPG woredas Quarterly minutes report submitted by WOFED to MOF and verified by ESS Annual Admin WOFED report quarterly meeting minutes with all committee members signed to MOF WOFED Students benefiting from direct interventions to enhance learning Annual Project M & E Questionnaires will be developed to keep track on number of beneficiaries MOE and WOEFD", + "ner_text": [ + [ + 1247, + 1252, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 57 refugee host areas percentage of refugee children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in high refugee host areas Share of new entrants in the first grade of primary school attended pre-primary education in 29 selected SPG woredas The indicator measures children enrolled in the first year of primary school with some exposure to preschool education and calculated by dividing the gross enrollment rate for pre-primary level by the gross intake rate of primary level. Annual EMIS Annual data collection through school census MOE and WOFED Pregnant women & caregivers of children 0-23 months participating in Community Conversations ( CC ) in 29 selected SPG woredas These are currently pregnant women and caregivers of children 0-23 months who attended at least 1 CC session per month Annual UNISE / DHIS2 UNISE / DHIS2 - - Data collected by woreda health offices MOH and MOF Quarterly reports of minutes of multisectoral committee meetings in 29 selected SPG woredas Quarterly minutes report submitted by WOFED to MOF and verified by ESS Annual Admin WOFED report quarterly meeting minutes with all committee members signed to MOF WOFED Students benefiting from direct interventions to enhance learning Annual Project M & E Questionnaires will be developed to keep track on number of beneficiaries MOE and WOEFD", + "type": "organization", + "explanation": "However, WOFED is referenced as an organization involved in the data collection process, not as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is mentioned alongside data collection efforts.", + "contextual_reason_agent": "However, WOFED is referenced as an organization involved in the data collection process, not as a data source itself.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 60, + "text": "This indicator will be measured, at a minimum, on a quarterly basis SNSOP Management Information System Attendance data collected during each training session Implementing Partner Number of beneficiary households receiving Direct Income Support The number of total beneficiary HHs that are selected to participate in DIS under sub-component 1. 2, in accordance with the Project Operations Manual, This indicator will be measured, at a minimum, Registration and payment data from the SNSOP MIS Beneficiary data will be collected during registration and updated over the course of project implementation. Selected Implementing Partner", + "ner_text": [ + [ + 68, + 103, + "named" + ] + ], + "validated": false, + "empirical_context": "This indicator will be measured, at a minimum, on a quarterly basis SNSOP Management Information System Attendance data collected during each training session Implementing Partner Number of beneficiary households receiving Direct Income Support The number of total beneficiary HHs that are selected to participate in DIS under sub-component 1. 2, in accordance with the Project Operations Manual, This indicator will be measured, at a minimum, Registration and payment data from the SNSOP MIS Beneficiary data will be collected during registration and updated over the course of project implementation.", + "type": "system", + "explanation": "However, it is described as a management information system, not as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'data' in its name.", + "contextual_reason_agent": "However, it is described as a management information system, not as a data source itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 13, + "text": "In 2012, only 22 percent of rural health centers ( RHCs ) had most ( more than 75 percent ) of the key RMNCHN drugs, 5 percent had most of the family planning commodities, none had sufficient basic laboratory tests, and 39 percent had a functional ambulance. 18 Funding gaps for essential medicines and maintenance and repairs, inadequate capacity to quantify needs, and suboptimal supply chains are key factors leading to high stockouts of medicines and lack of functional equipment at public HFs. The limited open hours of PHC HFs represent 11 NIPS and ICF International. 2013. PDHS 2012 \u2010 13. UNHCR. 2011. Population Profiling, Verification and Response Survey of Afghans in Pakistan. 12 Wilhelm J. A. and S. Helleringer. 2019. \" Utilization of Non \u2010 Ebola Health Care Services during Ebola Outbreaks: a Systematic Review and Meta \u2010 Analysis. \" Journal of Global Health. 9 ( 1 ). https: / / www. ncbi. nlm. nih. gov / pmc / articles / PMC6344071 /; Chang H. J., N. Huang, C. H.", + "ner_text": [ + [ + 609, + 663, + "named" + ], + [ + 3, + 7, + "Population Profiling, Verification and Response Survey <> reference year" + ], + [ + 585, + 594, + "Population Profiling, Verification and Response Survey <> reference year" + ], + [ + 596, + 601, + "Population Profiling, Verification and Response Survey <> publisher" + ], + [ + 603, + 607, + "Population Profiling, Verification and Response Survey <> publication year" + ], + [ + 667, + 674, + "Population Profiling, Verification and Response Survey <> reference population" + ], + [ + 678, + 686, + "Population Profiling, Verification and Response Survey <> data geography" + ], + [ + 691, + 704, + "Population Profiling, Verification and Response Survey <> author" + ], + [ + 709, + 723, + "Population Profiling, Verification and Response Survey <> author" + ] + ], + "validated": true, + "empirical_context": "2011. Population Profiling, Verification and Response Survey of Afghans in Pakistan. 12 Wilhelm J.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to a specific survey that collects data on the population of Afghans in Pakistan.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a specific survey that collects data on the population of Afghans in Pakistan.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 95, + "text": "The World Bank Niger Accelerating Electricity Access Project ( Hask\u00e9 ) ( P174034 ) Page 88 of 174 Generation capacity of renewable energy constructed or rehabilitated The indicator captures the total solar PV capacity that has been installed via mini grids or rehabilitated through solar PV hybridization. Semi - annually. NIGELEC and ANPER project database. Determined based on the reporting of the contractors responsible installing / hybridizing mini grids. NIGELEC / ANPER. Net greenhouse gas ( GHG ) emissions Project net greenhouse gas ( GHG ) emissions are calculated as an annual average of the difference between project gross ( absolute ) emissions aggregated over the economic lifetime of the project and the emissions of a baseline ( counterfactual ) scenario aggregated over the same time horizon. They are reported in metric tons of carbon dioxide equivalent per year. Annually NIGELEC ' s commercial department / ANPER and ANERSOL project databases. Based on data provided in the Owner ' s engineer report. 5. 73 million tCO2e are expected to be reduced during economic life of the project for an approximate annual reduction of 779, 215 tCO2e. MTR targets are the same as the end targeted because the indicator measures the annual average reductions over the entire economic analysis period.", + "ner_text": [ + [ + 928, + 933, + "named" + ] + ], + "validated": false, + "empirical_context": "They are reported in metric tons of carbon dioxide equivalent per year. Annually NIGELEC ' s commercial department / ANPER and ANERSOL project databases. Based on data provided in the Owner ' s engineer report.", + "type": "project", + "explanation": "'ANPER' is mentioned as a project, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'ANPER' is a dataset because it is mentioned alongside 'project databases'.", + "contextual_reason_agent": "'ANPER' is mentioned as a project, not as a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 4, + "validated": 1, + "not_validated": 3 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 32, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 27 of 117 dialogue on refugee education issues will also be supported under this component, to support implementation of Kenya \u2019 s existing refugee education commitments. Table 5: A Summary of Key Activities for the IPF Component Description of Area Activity # Key Activity description RA / PAP PEELP budget ( US $ 12 million ) Comments 1. Program management, policy dialogue, communication, monitoring and evaluation, safeguards and fiduciary, and verification. 1. 1 Program operational costs, including whole-of - Government refugee policy coordination PAP US $ 5 million39 Annual work plans are required of all implementing agencies and will be consolidated by PCU and approved by the NSC and the World Bank. Procurement: workshops and consultants. 1. 2 SIP / school grant SIP manual40; development of a disaster mitigation plan for schools affected by floods and drought; and additional capacity building measures on the SIPs, including incorporation actions from the disaster mitigation plan in the SIPs-including for camp - based refugee schools. RA 1 1. 3 Data management, including categorization of special needs learners by nature of disability and gender, and registration of refugee learners, including learners in host communities, in NEMIS. MoE develops guidelines for inclusion of refugee children in NEMIS and other education services.", + "ner_text": [ + [ + 1393, + 1398, + "named" + ], + [ + 198, + 203, + "NEMIS <> data geography" + ], + [ + 1100, + 1128, + "NEMIS <> reference population" + ], + [ + 1167, + 1242, + "NEMIS <> data description" + ], + [ + 1264, + 1280, + "NEMIS <> reference population" + ], + [ + 1332, + 1335, + "NEMIS <> author" + ], + [ + 1373, + 1389, + "NEMIS <> reference population" + ] + ], + "validated": true, + "empirical_context": "3 Data management, including categorization of special needs learners by nature of disability and gender, and registration of refugee learners, including learners in host communities, in NEMIS. MoE develops guidelines for inclusion of refugee children in NEMIS and other education services.", + "type": "system", + "explanation": "NEMIS is indeed a data source as it is used for managing data related to learners, including registration and categorization.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is involved in the registration and categorization of learners.", + "contextual_reason_agent": "NEMIS is indeed a data source as it is used for managing data related to learners, including registration and categorization.", + "contextual_signal": "mentioned as a data source for registration and categorization of learners", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "082_PAD2809-PAD-PUBLIC-disclosed-9-12-2018-IDA-R2018-0286-1", + "page": 20, + "text": "The two refugee projects share similar objectives and the same rationale for integrated and long-term support to refugees and host communities. The two operations are the World Bank \u2019 s first contributions to operationalizing the Sahel Alliance and to engaging in the Lake Chad region, which will include an FY2019 Lake Chad Regional Recovery Project. II. PROJECT DESCRIPTION A. Project Development Objective PDO Statement 22. The Project Development Objectives are to improve access of refugees and host communities to basic services, livelihoods, and safety nets, and strengthen country systems to manage refugees. PDO Level Indicators \u2022 Beneficiaries with improved access to community infrastructure ( health and education ) ( total, females, and refugees ) \u2022 Beneficiaries of social safety net programs ( total, females, and refugees ) \u2022 Beneficiaries in targeted areas included in the Unified Social Registry ( total, females, and refugees ) \u2022 Eligible refugees with identity documents issued by CNARR ( total and females ). 20 These projects include: Additional Financing Education Sector Reform Project Phase II ( P163740 ); Emergency Food and Livestock Crisis Response Project ( P151215 ); Mother and Child Health Services Strengthening Project ( P148052 ); and Climate Resilient Agriculture and Productivity Enhancement Project ( P162956 ).", + "ner_text": [ + [ + 890, + 913, + "named" + ], + [ + 171, + 181, + "Unified Social Registry <> publisher" + ], + [ + 640, + 653, + "Unified Social Registry <> reference population" + ], + [ + 763, + 776, + "Unified Social Registry <> reference population" + ], + [ + 842, + 855, + "Unified Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "The Project Development Objectives are to improve access of refugees and host communities to basic services, livelihoods, and safety nets, and strengthen country systems to manage refugees. PDO Level Indicators \u2022 Beneficiaries with improved access to community infrastructure ( health and education ) ( total, females, and refugees ) \u2022 Beneficiaries of social safety net programs ( total, females, and refugees ) \u2022 Beneficiaries in targeted areas included in the Unified Social Registry ( total, females, and refugees ) \u2022 Eligible refugees with identity documents issued by CNARR ( total and females ). 20 These projects include: Additional Financing Education Sector Reform Project Phase II ( P163740 ); Emergency Food and Livestock Crisis Response Project ( P151215 ); Mother and Child Health Services Strengthening Project ( P148052 ); and Climate Resilient Agriculture and Productivity Enhancement Project ( P162956 ).", + "type": "registry", + "explanation": "The Unified Social Registry is explicitly mentioned as a source of data for beneficiaries in targeted areas.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a registry that includes beneficiaries.", + "contextual_reason_agent": "The Unified Social Registry is explicitly mentioned as a source of data for beneficiaries in targeted areas.", + "contextual_signal": "mentioned as a data source for beneficiaries", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 41, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XXXII DLI allocation 17, 197, 500. 00 As a % of Total Financing Amount 5. 0 % \u27a2 5. 3: Enhanced digital literacy / skills ( Number ) 0 0 Prime Ministry endorses the curricular for digital training adopted by IPA, 1, 000 civil servants with certified digital literacy / skills 2, 000 civil servants with certified digital literacy / skills 3, 000 civil servants with certified digital literacy / skills 4, 000 civil servants with certified digital literacy / skills 0. 00 0. 00 3, 000, 000. 00 2, 000, 000. 00 2, 000, 000. 00 2, 000, 000. 00 DLI allocation 9, 000, 000. 00 As a % of Total Financing Amount 2. 57 % \u27a2 5. 4: Increase in the utilization of HRMIS and other digital platforms by public entities for competitive recruitment, performance-based promotion and digital upskilling. ( Percentage ) 0 0 18 % of recruitments, promotions, and digital skills trainings recorded using HRMIS and other digital platforms 36 % of recruitments, promotions, and digital skills trainings recorded using HRMIS and other digital platforms 54 % of recruitments, promotions, and digital skills trainings recorded using HRMIS and other digital platforms 72 % of recruitments", + "ner_text": [ + [ + 977, + 982, + "named" + ] + ], + "validated": false, + "empirical_context": "4: Increase in the utilization of HRMIS and other digital platforms by public entities for competitive recruitment, performance-based promotion and digital upskilling. ( Percentage ) 0 0 18 % of recruitments, promotions, and digital skills trainings recorded using HRMIS and other digital platforms 36 % of recruitments, promotions, and digital skills trainings recorded using HRMIS and other digital platforms 54 % of recruitments, promotions, and digital skills trainings recorded using HRMIS and other digital platforms 72 % of recruitments", + "type": "system", + "explanation": "However, HRMIS is described as a system and not explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HRMIS is a dataset because it is associated with recorded data on recruitments and promotions.", + "contextual_reason_agent": "However, HRMIS is described as a system and not explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "168_252640updated0version", + "page": 88, + "text": "For this application, the plan i s to differentiate between transactions related to the projects and their funding source; this should make it possible to translate the objectives and activity of a project into financial and accounting terms. The concept that has been developed should permit monitoring of transactions for the duration o f the grant and, in particular: ( i ) by budget classification; ( ii ) by component and subcomponent; ( iii ) by use ( type o f establishment, site and district ); ( iv ) by category o f expenditure; and ( v ) in local and foreign currencies. The extraction o f multicriteria data i s planned; this facilitates a dynamic approach as required for the management o f project monitoring. It should also permit the analysis o f the results o f project monitoring and their interpretation, as well as retrieval o f information on actual transactions of the project stakeholders ( State, b A and Auditor ). All the procedures are programmed in the transactional mode. All system users have access to certain authorized transactions that allow them to enhance the information in the system.", + "ner_text": [ + [ + 601, + 619, + "named" + ] + ], + "validated": false, + "empirical_context": "The concept that has been developed should permit monitoring of transactions for the duration o f the grant and, in particular: ( i ) by budget classification; ( ii ) by component and subcomponent; ( iii ) by use ( type o f establishment, site and district ); ( iv ) by category o f expenditure; and ( v ) in local and foreign currencies. The extraction o f multicriteria data i s planned; this facilitates a dynamic approach as required for the management o f project monitoring. It should also permit the analysis o f the results o f project monitoring and their interpretation, as well as retrieval o f information on actual transactions of the project stakeholders ( State, b A and Auditor ).", + "type": "data", + "explanation": "'Multicriteria data' is described as a type of data extraction rather than a structured dataset or data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'multicriteria data' refers to a structured collection of data due to its mention in the context of project monitoring.", + "contextual_reason_agent": "'Multicriteria data' is described as a type of data extraction rather than a structured dataset or data source.", + "contextual_signal": "mentioned only as a type of data, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 139, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 127 and complexity of activity which will be indicated in the Procurement Plan in STEP. These prior-review contracts will be updated in the Procurement Plan annually or as necessary during implementation, based on the procurement capacity assessment during implementation support missions. 64. The World Bank will carry out regular procurement supervision missions on an annual basis and carry out procurement post-review on an annual basis. Contracts not subject to prior review will be subject to post - review by the World Bank as per procedures set forth in Annex II122 \u2013 \u201c Procurement Oversight \u201d of the Procurement Regulations. The sample contracts for the procurement post-reviews will be risk-based. The World Bank will undertake the post-reviews online accessing procurement documents and data from STEP. The Recipient will upload all procurement process documentation and information in STEP at each roadmap stage. At the completion of the contracts, the Recipient will upload completion documents including inspection, acceptance, delivery, and final payment evidenced in STEP. 65.", + "ner_text": [ + [ + 142, + 146, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 127 and complexity of activity which will be indicated in the Procurement Plan in STEP. These prior-review contracts will be updated in the Procurement Plan annually or as necessary during implementation, based on the procurement capacity assessment during implementation support missions.", + "type": "program", + "explanation": "'STEP' is not a dataset as it is referred to as a program related to procurement processes, not a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'STEP' is a dataset because it is mentioned in the context of procurement planning.", + "contextual_reason_agent": "'STEP' is not a dataset as it is referred to as a program related to procurement processes, not a structured collection of data.", + "contextual_signal": "mentioned only as a program, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 15, + "validated": 1, + "not_validated": 14 + } + }, + { + "filename": "143_749060PAD0P124010Box374388B00OUO090", + "page": 15, + "text": "( b ) Beneficiaries of safety net programs ( Core Indicator ), tracking in particular the number of female beneficiaries and the total number of NPTP beneficiaries ( programs such as NPTP, which offer education credits, fees waivers and health subsidies, are defined in the core indicators as \" other social assistance programs \" ). ( c ) NPTP beneficiaries from extremely poor households as a share of total NPTP beneficiaries, as a measure of the targeting accuracy of the program. ( d ) Percentage of SDC beneficiaries reporting improved quality of services provided by SDCs, measuring the centers ' improved capacity. Information for this indicator will come from opinion polls, beneficiary surveys and social audits. ( e ) Percentage of citizens reporting MOSA ' s improved responsiveness in delivering social assistance and development grants, measuring MOSA ' s improved capacity. This indicator will be measured through opinion polls and / or beneficiary surveys. III. PROJECT DESCRIPTION A. Project Components 20. The SPPP will have the following four components: ( 1 ) Social Development Centers ( SDCs ); ( 2 ) Community Social Development Program ( CSD ); ( 3 ) National Poverty Targeting Program ( NPTP ); and ( 4 ) Project Management ( PM ).", + "ner_text": [ + [ + 683, + 702, + "named" + ], + [ + 6, + 42, + "beneficiary surveys <> reference population" + ] + ], + "validated": true, + "empirical_context": "( d ) Percentage of SDC beneficiaries reporting improved quality of services provided by SDCs, measuring the centers ' improved capacity. Information for this indicator will come from opinion polls, beneficiary surveys and social audits. ( e ) Percentage of citizens reporting MOSA ' s improved responsiveness in delivering social assistance and development grants, measuring MOSA ' s improved capacity.", + "type": "survey", + "explanation": "In the context, 'beneficiary surveys' is explicitly mentioned as a source of information for measuring improved quality of services, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'beneficiary surveys' implies a structured collection of data from respondents.", + "contextual_reason_agent": "In the context, 'beneficiary surveys' is explicitly mentioned as a source of information for measuring improved quality of services, confirming its role as a dataset.", + "contextual_signal": "follows 'information for this indicator will come from'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "044_Turkey-Social-Entrepreneurship-Empowerment-and-Cohesion-in-Refugee-and-Host-Communities-in-Turkey-Project", + "page": 73, + "text": "The World Bank Social Entrepreneurship, Empowerment and Cohesion in Refugee and Host Communities in Turkey Project ( P171456 ) 64 contracts determined after the needs analysis, and procurement plan will be updated accordingly. 36. Procurement Tracking. The proposed project will use Systematic Tracking of Exchanges in Procurement ( STEP ), a planning and tracking system that provides data on procurement activities, establishes benchmarks, monitors delays, and measures procurement performance. Procurement planning will be done and updated through STEP whenever the contract packages are identified as a result of the needs analysis as mentioned above. Only the MoIT will be given STEP access in the project portal to safeguard the confidentiality of the contract information recorded by different contracting entities. Hence, the local authority beneficiaries who are implementing subprojects under the \u201c guided project support \u201d ( Subcomponent 2B ) will share the procurement information with the relevant TDA. The TDAs will ensure and maintain that this information is forwarded to the MoIT and processed in STEP by the PIU. 37. A list of the procurements performed by the beneficiaries under Subcomponent 1B will be recorded in a format agreed by the Bank and specified in the POM, and these records will be uploaded into STEP by the MoIT at least annually but not later than the closing date of the project. 38. Advance Procurement.", + "ner_text": [ + [ + 283, + 330, + "named" + ] + ], + "validated": false, + "empirical_context": "Procurement Tracking. The proposed project will use Systematic Tracking of Exchanges in Procurement ( STEP ), a planning and tracking system that provides data on procurement activities, establishes benchmarks, monitors delays, and measures procurement performance. Procurement planning will be done and updated through STEP whenever the contract packages are identified as a result of the needs analysis as mentioned above.", + "type": "system", + "explanation": "However, it is described as a planning and tracking system, not a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'tracking' which suggests data collection.", + "contextual_reason_agent": "However, it is described as a planning and tracking system, not a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 27, + "text": "The MOE has developed an integrated, comprehensive, flexible, and trusted educational management information system ( EMIS ) using OpenEMIS, which provides a system that is accessible countrywide, covering over 2 million students, 7, 300 schools, and 170, 000 educational staff. \u2022 World Bank diagnostics and international benchmarks also warrant the Program \u2019 s focus on transparency and accountability. Under international governance indicators, Jordan is on par with other upper middle-income countries, but it lags on voice and accountability. Regarding transparency, although Jordan was the first country in the region to legislate access to information, it is poorly rated under the Right to Information Index and ranks among the 15 percent least well-performing countries. According to an assessment of the whole of government citizen feedback platform, only 33 percent of citizens filing a grievance using the At Your Service online government grievance redress platform surveyed in 2022 had received any response. Opinion surveys reflect a pervasive perception of corruption in government and the private sector. According to a 2022 opinion survey by the Centre of Strategic Studies of the University of Jordan, 14 94 percent of Jordanians believe that the economy and administrative bodies are \u201c plagued by corruption, \u201d and 82 percent believe that the government is not serious or willing to fight corruption. \u201d 44.", + "ner_text": [ + [ + 131, + 139, + "named" + ] + ], + "validated": false, + "empirical_context": "The MOE has developed an integrated, comprehensive, flexible, and trusted educational management information system ( EMIS ) using OpenEMIS, which provides a system that is accessible countrywide, covering over 2 million students, 7, 300 schools, and 170, 000 educational staff. \u2022 World Bank diagnostics and international benchmarks also warrant the Program \u2019 s focus on transparency and accountability.", + "type": "system", + "explanation": "OpenEMIS is described as an educational management information system, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed OpenEMIS is a dataset because it is associated with educational data management.", + "contextual_reason_agent": "OpenEMIS is described as an educational management information system, not as a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 9, + "validated": 2, + "not_validated": 7 + } + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 23, + "text": "Washington, DC: World Bank. https: / / openknowledge. worldbank. org / handle / 10986 / 34712. 14 The survey results were published in June 2020 using data collected in 2018. The MTF defines access to electricity through seven attributes: capacity, availability, reliability, quality, affordability, formality, and health and safety. Tier 1 defines at least 4 hours availability per day including at least 1 hour per evening, with the capacity sufficient to power task lighting and phone charging / radio. Tier 2 electricity is available at least 4 hours per day, including at least 2 hours per evening, and capacity is sufficient to power low-load appliances. In Tier 3, Tier 4, and Tier 5, electricity is available at least 8 hours, 16 hours, and 23 hours, respectively, with higher standards for other attributes. 15 Off-grid solutions are not included. 16 According to the Sustainable Energy for All SDG7 tracker ( 2018 ), access rate in Niger was 20 percent in 2017; however, national data are used in the text.", + "ner_text": [ + [ + 981, + 994, + "named" + ], + [ + 942, + 947, + "national data <> data geography" + ], + [ + 966, + 970, + "national data <> reference year" + ], + [ + 1032, + 1050, + "national data <> usage context" + ] + ], + "validated": true, + "empirical_context": "15 Off-grid solutions are not included. 16 According to the Sustainable Energy for All SDG7 tracker ( 2018 ), access rate in Niger was 20 percent in 2017; however, national data are used in the text.", + "type": "data", + "explanation": "In this context, 'national data' is explicitly mentioned as being used in the text, indicating it serves as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'national data' refers to a structured collection of data used for analysis.", + "contextual_reason_agent": "In this context, 'national data' is explicitly mentioned as being used in the text, indicating it serves as a data source.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "078_Afghanistan-Eshteghal-Zaiee-Karmondena-EZ-Kar-Project", + "page": 77, + "text": "Project progress will be monitored based on the project results framework and where relevant and possible, progress against indicators will be disaggregated by internally displaced people / returnees ( IDP / R ) and gender. MoEC will hire an M & E Specialist to oversee the monitoring, reporting, and coordination of project monitoring activities implemented across components as well as with the implementing agencies of partner projects, including CIP, KMDP, and CCAP. Under Component 1, MoFA will be using the database for processing passport applications to track data on related indicators. The inquiry cases attended to by information centers and helplines will also be logged and monitored through an M & E system. Under Component 2, the CCAP MCCG has a functional M & E MIS platform, which will be updated and used for the EZ \u2010 Kar activities. Similarly, the M & E system used by CIP and KMDP will be used to track the indicators for Components 3 and 4 respectively. At the project level, MoEC will develop a simple MIS, that will be fed by data provided by the MISs of other implementing agencies. The simple project \u2010 level MIS will allow MoEC to consolidate and generate reports based on data fed by the implementing agencies \u2019 MISs and periodic reports to be submitted to MoEC. 15. MoEC on a sample basis will conduct site visits of the project activities in the municipalities to verify that IAs are reporting correctly. The reports from the IAs and site visits will be gathered on a monthly basis, which will be entered into the project Management Information System ( MIS ) that will be managed by MoEC. The MIS will be used to generate Quarterly Progress Reports ( QPRs ) that will be shared with the World Bank. 16. The project will conduct rapid assessments to capture qualitative details and unintended consequences of project implementation and recommend measures for course correction. Assessments will be carried out by the M & E Unit under Component 5, while the IAs can also conduct their own assessments under their respective components.", + "ner_text": [ + [ + 1543, + 1580, + "named" + ] + ], + "validated": false, + "empirical_context": "MoEC on a sample basis will conduct site visits of the project activities in the municipalities to verify that IAs are reporting correctly. The reports from the IAs and site visits will be gathered on a monthly basis, which will be entered into the project Management Information System ( MIS ) that will be managed by MoEC. The MIS will be used to generate Quarterly Progress Reports ( QPRs ) that will be shared with the World Bank.", + "type": "system", + "explanation": "However, it is not a dataset as it is described as a system for managing information rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Information System' which often relates to data management.", + "contextual_reason_agent": "However, it is not a dataset as it is described as a system for managing information rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "101_Project-Appraisal-Document-PAD-P163830-2017-05-22-CLEAN-05232017", + "page": 83, + "text": "The World Bank Somalia Emergency Drought Response and Recovery Project ( P163830 ) Page 79 of 111 groups will be led by women. Fodder production will target areas within acceptable distance from the homesteads, which ensures that women can actively participate. Financial Management 51. The ICRC headquarters is in Geneva with the ICRC Somalia operating directly under the ICRC Geneva - Financial Management Regulations June 2016 ( DIR2295REV \u2013 Appendix 1 ). The Somalia Delegation Financial management function is headed by Head of Finance and Administration supported by a team of five ( 5 ) Accountants based in in Nairobi with Finance and ten ( 10 ) Administration Assistants located in different field offices in Somalia. Additional Rules on Financial Management Delegation of Somalia \u2013 2016 are in place and they provide specific delegation of authority on financial commitments. Financial Management reporting responsibilities, staffing, functional responsibilities are documented in the various HR / FM policies and procedures. The Finance Department works in close consultation with the Logistics Department particularly in contracts management, commitment control, reconciliation of commitments balances in SUN Accounting System and the Purchasing and Stores Management System. External Finance is usually coordinated and channelled through the External Department in Geneva.", + "ner_text": [ + [ + 1217, + 1238, + "named" + ] + ], + "validated": false, + "empirical_context": "Financial Management reporting responsibilities, staffing, functional responsibilities are documented in the various HR / FM policies and procedures. The Finance Department works in close consultation with the Logistics Department particularly in contracts management, commitment control, reconciliation of commitments balances in SUN Accounting System and the Purchasing and Stores Management System. External Finance is usually coordinated and channelled through the External Department in Geneva.", + "type": "system", + "explanation": "However, the context indicates that it is a system used for accounting rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'System', which often implies data handling.", + "contextual_reason_agent": "However, the context indicates that it is a system used for accounting rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "035_Senegal-Casamance-Economic-Development-Project", + "page": 69, + "text": "Specifically, it will include six core functions: ( a ) e-learning, including instructional material with concrete guidance for CECs, facilitators, and communal authorities on their role at different stages of the annual investment cycles, ( b ) participatory diagnostics, designed to support the mapping of available infrastructure and services and document prioritized needs by different groups, ( c ) sub-project cycle, connected to the project MIS, which allows to submit agreed investment proposal, track approvals, and monitor implementation, ( d ) community pulse surveys, consisting of high-frequency or on demand surveys to gain regular community insights to help project implementation, ( e ) community scorecards to allow citizens to assess sub-projects, and ( f ) a GRM through multiple channels ( analogue, feature phones, smartphones, web form ) for citizens to voice complaints, concerns or provide feedback on project activities. Collected data will also feed into the publicly accessible Casamance Knowledge Management Platform developed under the Component 4, to inform dialogue among practitioners, policymakers and the public. The deployment of these tools will take into consideration connectivity and literacy constraints.", + "ner_text": [ + [ + 1005, + 1044, + "named" + ] + ], + "validated": false, + "empirical_context": "Specifically, it will include six core functions: ( a ) e-learning, including instructional material with concrete guidance for CECs, facilitators, and communal authorities on their role at different stages of the annual investment cycles, ( b ) participatory diagnostics, designed to support the mapping of available infrastructure and services and document prioritized needs by different groups, ( c ) sub-project cycle, connected to the project MIS, which allows to submit agreed investment proposal, track approvals, and monitor implementation, ( d ) community pulse surveys, consisting of high-frequency or on demand surveys to gain regular community insights to help project implementation, ( e ) community scorecards to allow citizens to assess sub-projects, and ( f ) a GRM through multiple channels ( analogue, feature phones, smartphones, web form ) for citizens to voice complaints, concerns or provide feedback on project activities. Collected data will also feed into the publicly accessible Casamance Knowledge Management Platform developed under the Component 4, to inform dialogue among practitioners, policymakers and the public. The deployment of these tools will take into consideration connectivity and literacy constraints.", + "type": "platform", + "explanation": "However, the context indicates it is a platform for dialogue and does not function as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Knowledge Management Platform' which suggests a collection of information.", + "contextual_reason_agent": "However, the context indicates it is a platform for dialogue and does not function as a structured collection of data.", + "contextual_signal": "mentioned only as a platform, not as a data source", + "tags": [] + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 120, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 115 of 117 within the provisions of PPADA, 2015 and Regulations 2020. Training on auditing in the NEMIS Platform and monitor progressively how many of the audits are carried out on this platform. Fiduciary Systems MoE Recurrent Yearly Audits conducted on the NEMIS Platform. Preparation for Kenya to participate in the 2025 Programme for International Student Assessment \u2019 s for Development ( PISA-D ). Technical DLI 6 MoE and KNEC Due Date 31-Dec - 2025 a ) admission / signing up; b ) instrument translation and adaptation of instrument; ( c ) pre-test; ( d ) main survey; and ( e ) data analysis, report writing and dissemination Strengthened supply chain for sanitary towels and accountability Technical DLI 5 MoE Due Date 29-Dec - 2023 A module in NEMIS showing distribution of sanitary towels by school and number of girls benefiting. Review and analyze NEMIS capabilities on collecting, disaggregation and reporting on data for learners with special needs and disability and recommendations to close identified gaps. Environmental and Social Systems DLI 6 MoE KISE Due Date 30-Jun-2023 Reports generated from NEMIS with granular data on special needs children by category of disability, age and gender. Design and develop a GRM MIS module compatible with NEMIS and interoperable between agencies.", + "ner_text": [ + [ + 1193, + 1198, + "named" + ] + ], + "validated": false, + "empirical_context": "Review and analyze NEMIS capabilities on collecting, disaggregation and reporting on data for learners with special needs and disability and recommendations to close identified gaps. Environmental and Social Systems DLI 6 MoE KISE Due Date 30-Jun-2023 Reports generated from NEMIS with granular data on special needs children by category of disability, age and gender. Design and develop a GRM MIS module compatible with NEMIS and interoperable between agencies.", + "type": "system", + "explanation": "However, NEMIS is described as a system rather than a structured collection of data or a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is mentioned in the context of data collection and reporting.", + "contextual_reason_agent": "However, NEMIS is described as a system rather than a structured collection of data or a dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 21, + "text": "On the supply side, labor force surveys ( LFSs ) are irregular and not frequent enough. On the demand side, the annual enterprises survey does not emphasize employment issues, the informal sector survey is as irregular as the LFS, and there is no annual agricultural survey. Finally, line ministries could benefit from INS data production and expertise to inform program preparation and M & E. 27. National accounts are engaged in a modernization process requiring support but suffer from a lack of sufficient and comprehensive trade data. While many Sub-Saharan countries are still following the 1993 national accounts framework, Cameroon transitioned successfully to the 2008 system and has been producing trimestral accounts since 2015. However, the classic annual national accounts suffer from a lack of reliable agriculture statistics. The last agriculture and livestock census was undertaken in 1984 and annual surveys stopped in the early 1990s. The Ministry of Livestock, Fishery, and Animal Industry and the Ministry of Agriculture Rural Development currently rely on indirect sources to produce the necessary basic sector statistics. A new agricultural and livestock census was originally planned for 2017, but the cost is high compared to similar exercises in the region ( CFAF 23. 6 billion ). However, the AfDB and EU are exploring ways to contribute to the financing of this census in synergy with the population census.", + "ner_text": [ + [ + 20, + 39, + "named" + ], + [ + 631, + 639, + "labor force surveys <> data geography" + ] + ], + "validated": true, + "empirical_context": "On the supply side, labor force surveys ( LFSs ) are irregular and not frequent enough. On the demand side, the annual enterprises survey does not emphasize employment issues, the informal sector survey is as irregular as the LFS, and there is no annual agricultural survey.", + "type": "survey", + "explanation": "In the context, 'labor force surveys' are mentioned as a type of survey that collects data on employment, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'labor force surveys' is a dataset because it refers to a structured collection of data related to labor force statistics.", + "contextual_reason_agent": "In the context, 'labor force surveys' are mentioned as a type of survey that collects data on employment, indicating it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 53, + "text": "The project preparation team worked with the Poverty team and the National Statistical Institute ( ISTEEBU ) to build small-area extreme poverty maps ( based on the ECVMB 2013-14 in combination with the 2010 Census data ), which provided the basis for a more systematic geographical targeting at the commune-level and could inform the planned expansion of the safety net program and other programs using the registry. In each province, the four communes with the highest poverty ( from 21 The CNPS includes 11 ministers under the authority of the President 22 The country is divided in 18 provinces, 129 communes and 2, 638 collines. 23 The Amashiga program started in 2016 and aims to foster community based-nutrition, using the lessons of the Tubaramure pilot in Cankuso and Ruyigi. The program would distribute food to all households with pregnant women or children under two, and foster behavior change in terms of food consumption, preparation, production, water and sanitation and hygiene practices, and access to health.", + "ner_text": [ + [ + 203, + 219, + "named" + ] + ], + "validated": true, + "empirical_context": "The project preparation team worked with the Poverty team and the National Statistical Institute ( ISTEEBU ) to build small-area extreme poverty maps ( based on the ECVMB 2013-14 in combination with the 2010 Census data ), which provided the basis for a more systematic geographical targeting at the commune-level and could inform the planned expansion of the safety net program and other programs using the registry. In each province, the four communes with the highest poverty ( from 21 The CNPS includes 11 ministers under the authority of the President 22 The country is divided in 18 provinces, 129 communes and 2, 638 collines.", + "type": "census", + "explanation": "This is indeed a dataset as it is used to inform geographical targeting and is referenced as a source of data in the context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific census that provides data for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it is used to inform geographical targeting and is referenced as a source of data in the context.", + "contextual_signal": "mentioned as a data source for geographical targeting", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 62, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 58 of 74 working on LIPW under sub - component 1. 1 on behalf of beneficiary HH, of which are refugees and host communities. Refugees are defined as forcibly displaced HHs originating from a country other than South Sudan and registered as refugees in South Sudan by the UNHCR. Host communities are defined as local population groups living in counties with a high concentration of refugees. measured at a minimum on a quarterly basis SNSOP MIS updated over the course of the project. Payment data will also be periodically updated in the MIS Number of beneficiary households receiving Direct Income Support who have a female primary beneficiary ( Number ) Total number of beneficiary households under comopnent 1. 2 that have a primary beneficiary, as registered in the SNSOP MIS, who is a woman.", + "ner_text": [ + [ + 529, + 538, + "named" + ] + ], + "validated": false, + "empirical_context": "Host communities are defined as local population groups living in counties with a high concentration of refugees. measured at a minimum on a quarterly basis SNSOP MIS updated over the course of the project. Payment data will also be periodically updated in the MIS Number of beneficiary households receiving Direct Income Support who have a female primary beneficiary ( Number ) Total number of beneficiary households under comopnent 1.", + "type": "system", + "explanation": "However, the context indicates that 'SNSOP MIS' is described as a system rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'MIS' which often stands for Management Information System, suggesting data management.", + "contextual_reason_agent": "However, the context indicates that 'SNSOP MIS' is described as a system rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 18, + "validated": 10, + "not_validated": 8 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 58, + "text": "Proposed activities: Four main activities will be undertaken under this subcomponent: ( a ) improvement of the methodology and support to data collection of ECAM 5 planned for 2018 or 2019; ( b ) analysis of the fourth population census in 2017; 37 ( c ) poverty mapping owing to the ongoing complementary ECAM 4; and ( d ) the design of a system of labor statistics by improving LFS and enterprises and informal sector surveys. The project will finance trainings on poverty mapping and workshops to produce regional monography of poverty. The project will also provide TA to INS to train and assist targeted ministries in integrating national statistics in the definition of sector strategies, programs, and public investment projects. 37 This activity budgeted for US $ 1 million will complement the resources ( US $ 4 million ) provided by another World Bank - financed project in the health sector. In total, the World Bank contribution in the financing of the population will amount to US $ 5 million.", + "ner_text": [ + [ + 380, + 383, + "named" + ] + ], + "validated": false, + "empirical_context": "Proposed activities: Four main activities will be undertaken under this subcomponent: ( a ) improvement of the methodology and support to data collection of ECAM 5 planned for 2018 or 2019; ( b ) analysis of the fourth population census in 2017; 37 ( c ) poverty mapping owing to the ongoing complementary ECAM 4; and ( d ) the design of a system of labor statistics by improving LFS and enterprises and informal sector surveys. The project will finance trainings on poverty mapping and workshops to produce regional monography of poverty.", + "type": "program", + "explanation": "LFS is mentioned as part of a system improvement rather than as a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed LFS is a dataset because it is mentioned in the context of improving data collection methods.", + "contextual_reason_agent": "LFS is mentioned as part of a system improvement rather than as a structured collection of data itself.", + "contextual_signal": "mentioned as a system but not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 4, + "validated": 2, + "not_validated": 2 + } + }, + { + "filename": "026_Djibouti-Health-System-Strengthening-Project", + "page": 43, + "text": "procurement, and financial management and is headed by Director of the DGP DRHF: Directorate of Human Resources and Training responsible for the allocation, redeployment and strengthening of health human resources DPCI: Directorate of Planning and International Cooperation is responsible for continuing to discuss and channel the PTFs in collaboration with the DGP and the SG DSME: Directorate of Mother and Child responsible for technical design, monitoring and supervision of the implementation of quality of care for mother and child health DSNIS: Directorate for Data and Research which will support the implementation and development of data collection tools ( DHIS2 functionality, patient file, etc. ) DRFM: The Directorate of Financial and Material Resources responsible for financial and material monitoring at the central and decentralized levels, including procurement needs DRS: The Directorate for Regional Health responsible for primary and secondary health facilities DPS: Health promotion Directorate which will coordinate all interventions at the community level", + "ner_text": [ + [ + 667, + 672, + "named" + ] + ], + "validated": false, + "empirical_context": "procurement, and financial management and is headed by Director of the DGP DRHF: Directorate of Human Resources and Training responsible for the allocation, redeployment and strengthening of health human resources DPCI: Directorate of Planning and International Cooperation is responsible for continuing to discuss and channel the PTFs in collaboration with the DGP and the SG DSME: Directorate of Mother and Child responsible for technical design, monitoring and supervision of the implementation of quality of care for mother and child health DSNIS: Directorate for Data and Research which will support the implementation and development of data collection tools ( DHIS2 functionality, patient file, etc. ) DRFM: The Directorate of Financial and Material Resources responsible for financial and material monitoring at the central and decentralized levels, including procurement needs DRS: The Directorate for Regional Health responsible for primary and secondary health facilities DPS: Health promotion Directorate which will coordinate all interventions at the community level", + "type": "system", + "explanation": "However, DHIS2 is described as a functionality or system rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned in the context of data collection tools.", + "contextual_reason_agent": "However, DHIS2 is described as a functionality or system rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "184_multi-page", + "page": 16, + "text": "The Planning Unit of the Ministry will be strengthened to monitor progress in reaching out to disadvantaged groups and test scores of students by socioeconomic background. Staff will carry out a random survey ( 5 to 10 % sample ) of students by socioeconomic background in 2001 to establish a baseline. To keep the survey simple, the socioeconomic background questions will be limited to easily identified categories such as day-laborers, civil servants, shopkeepers etc. The survey will be repeated in 2005 and 2110. D. PROJECT RATIONALE 1. Project alternatives considered and reasons for rejection Originally, the project was designed as a Sector Investment Loan, however, given the Government ' s commitment to the education sector, and the long-term assistance required, the APL was selected as the preferred lending instrument. The proposed APL is designed to support the Government ' s ambitious reform program which is expected to: ( a ) universalize basic education; ( b ) develop a more relevant curriculum for Djibouti ' s development needs while maintaining the advantages of an internationally accepted curriculum that they currently follow; ( c ) reduce gender and income inequities in enrollment; ( d ) increase system efficiency; ( e ) increase the role of the private sector in provision of education; ( f ) increase the role of communities in school management and maintenance; and ( g ) improve the quality of teachers. These are long-term objectives and need sustained financing over a longer period. In addition, some issues, for example reducing gender and income inequity in enrollment, will need an evolutionary approach as the results of studies, etc., under Phase I become available and can be used to develop specific actions in follow-up phases of the APL. The Government had also requested funding for a \" salary buffer fund \" which was not included in the project because of various legal and financial management implications.", + "ner_text": [ + [ + 195, + 208, + "named" + ], + [ + 4, + 33, + "random survey <> author" + ], + [ + 273, + 277, + "random survey <> reference year" + ], + [ + 425, + 437, + "random survey <> reference population" + ], + [ + 439, + 453, + "random survey <> reference population" + ], + [ + 455, + 466, + "random survey <> reference population" + ], + [ + 503, + 507, + "random survey <> publication year" + ], + [ + 1020, + 1028, + "random survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "The Planning Unit of the Ministry will be strengthened to monitor progress in reaching out to disadvantaged groups and test scores of students by socioeconomic background. Staff will carry out a random survey ( 5 to 10 % sample ) of students by socioeconomic background in 2001 to establish a baseline. To keep the survey simple, the socioeconomic background questions will be limited to easily identified categories such as day-laborers, civil servants, shopkeepers etc. The survey will be repeated in 2005 and 2110.", + "type": "survey", + "explanation": "This is indeed a dataset as it is described as a survey that collects data on students' socioeconomic backgrounds for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'random survey' implies a structured collection of data collected from a sample.", + "contextual_reason_agent": "This is indeed a dataset as it is described as a survey that collects data on students' socioeconomic backgrounds for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 21, + "text": "The World Bank Strengthening Public Sector Effectiveness and Statiscal Capacity Project ( P151155 ) Page 18 of 93 26. However, the periodicity of basic surveys and censuses is not observed, and quality could be improved. The general population censuses were conducted in 1976, 1987, and then 2005, while the international recommendation is to conduct a census survey every 10 years. The GoC is planning the next population census end of 2017 - early 2018, and preparatory work already started in 2016. Full financing for 2017 data collection and analysis is nearly secured. 20 The Cameroon household surveys ( Enqu\u00eate Camerounaise Aupr\u00e8s des M\u00e9nages, ECAM ) were conducted in 1996, 2001, 2007, and 2014, with a complementing survey in 2016. 21 The last three ECAMs had similar methodologies enabling solid trend analysis. However, ECAMs are supposed to be conducted at least every five years according to INS standards, and the World Bank recommends a survey every three or four years to monitor more closely the impact of public policies. Also, the ECAMs could be improved ( a ) upstream in the design to integrate specific issues related to poverty in rural areas or northern regions where poverty is the highest, including more frequent data production, and ( b ) downstream with more in-depth analysis. Also, the system of labor statistics is weak.", + "ner_text": [ + [ + 225, + 252, + "named" + ], + [ + 271, + 275, + "general population censuses <> reference year" + ], + [ + 277, + 281, + "general population censuses <> reference year" + ], + [ + 292, + 296, + "general population censuses <> reference year" + ], + [ + 353, + 366, + "general population censuses <> data type" + ], + [ + 387, + 390, + "general population censuses <> author" + ], + [ + 581, + 589, + "general population censuses <> data geography" + ] + ], + "validated": true, + "empirical_context": "However, the periodicity of basic surveys and censuses is not observed, and quality could be improved. The general population censuses were conducted in 1976, 1987, and then 2005, while the international recommendation is to conduct a census survey every 10 years. The GoC is planning the next population census end of 2017 - early 2018, and preparatory work already started in 2016.", + "type": "census", + "explanation": "In this context, it is indeed a dataset as it refers to the systematic collection of population data over time.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'general population censuses' refers to structured collections of demographic data collected at specific intervals.", + "contextual_reason_agent": "In this context, it is indeed a dataset as it refers to the systematic collection of population data over time.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "108_Jordan-PforR-PAD-P159522-FINAL-DISCLOSURE-10052016", + "page": 79, + "text": "The monthly financial positions sent by line ministries and independent institutions to the GBD / MoF present data in administrative, economic, program, funding, and geographical classifications. Finalization of the annual accounts is required by law by June of the following year, and this has been complied with during the recent years. The final accounts are published on the MoF website. Jordan has joined the IMF Special Data Dissemination Standards since January 2010. 25. The JIC and JSMO \u2019 s annual financial statements ( prepared in accordance with International Financial Reporting Standards ) are audited by an acceptable independent private sector firm in accordance with International Standards on Auditing. The auditor issued an unqualified \u2018 clean \u2019 opinion. The World Bank will rely on the JIC and JSMO \u2019 s existing auditing and reporting arrangements. 26. A PMU was established at MOPIC that will be responsible for Program coordination and implementation. Apart from this, the PMU will be entrusted with compiling the Program annual financial statements and providing any ad hoc financial reports as deemed necessary to follow on the Program financial activities. 27. Government Financial Management Information System ( GFMIS ). The new implementation of the GFMIS gives Jordan a tremendous advantage; it is the correct time to review and modernize the important systems of government information.", + "ner_text": [ + [ + 1186, + 1236, + "named" + ] + ], + "validated": false, + "empirical_context": "27. Government Financial Management Information System ( GFMIS ). The new implementation of the GFMIS gives Jordan a tremendous advantage; it is the correct time to review and modernize the important systems of government information.", + "type": "system", + "explanation": "However, the context describes it as a system for managing information, not as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a data-related function.", + "contextual_reason_agent": "However, the context describes it as a system for managing information, not as a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 65, + "text": "However, this indicator will focus on tracking the percentage Annual MoE, KNBS MoE-Annual educational Statistical booklets and KNBS surveys MoE", + "ner_text": [ + [ + 127, + 139, + "named" + ], + [ + 51, + 72, + "KNBS surveys <> data description" + ], + [ + 74, + 78, + "KNBS surveys <> publisher" + ] + ], + "validated": true, + "empirical_context": "However, this indicator will focus on tracking the percentage Annual MoE, KNBS MoE-Annual educational Statistical booklets and KNBS surveys MoE", + "type": "survey", + "explanation": "In the context, 'KNBS surveys' is explicitly mentioned as part of the tracking indicator, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'KNBS surveys' implies a collection of data gathered through surveys.", + "contextual_reason_agent": "In the context, 'KNBS surveys' is explicitly mentioned as part of the tracking indicator, indicating it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 46, + "text": "The World Bank Socio-economic Inclusion of Refugees & Host Communities in Rwanda Project Phase II ( P509677 ) Page 31 Proportion of beneficiaries indicating satisfaction with infrastructure subprojects constructed or upgraded by the project ( Percentage ) Description Quantitative indicator counting proportion of beneficiaries indicating satisfaction with infrastructure subprojects constructed or upgraded by the project. Indictor is a composite of beneficiaries responding \u201c satisfied \u201d or \u201c very satisfied \u201d on a Likert scale. Frequency Annual. Data source Annual survey. Methodology for Data Collection Survey. Responsibility for Data Collection MINEMA. Kilometers of road upgraded ( Kilometers ) Description Quantitative indicator counting kilometers of roads upgraded ( completed ). Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation. RTDA data fed to MINEMA. Responsibility for Data Collection RTDA and MINEMA. Market facilities and Integrated Craft Production Centers constructed or upgraded ( Number ) Description Quantitative indicator counting number of market facilities and ICPCs constructed or upgraded ( completed ). Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation. Responsibility for Data Collection MINEMA.", + "ner_text": [ + [ + 651, + 657, + "named" + ] + ], + "validated": false, + "empirical_context": "Methodology for Data Collection Survey. Responsibility for Data Collection MINEMA. Kilometers of road upgraded ( Kilometers ) Description Quantitative indicator counting kilometers of roads upgraded ( completed ).", + "type": "organization", + "explanation": "However, MINEMA is identified as an organization responsible for data collection, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed MINEMA is a dataset because it is mentioned in the context of data collection.", + "contextual_reason_agent": "However, MINEMA is identified as an organization responsible for data collection, not a structured collection of data itself.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 118, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 113 of 117 Action Description DLI Respon sible Timing Completion measurement 2. Review and analyse NEMIS capabilities on collecting, disaggregation and reporting on data for learners with special needs and disability and recommendations to close identified gaps. Integrated in DLI # 6 MoE / KI SE / KICD / KNEC / TSC Year 1 Reports generated from NEMIS with granular data on special needs children by category of disability, age, and gender. 3. Complete the ongoing process to institutionalize the process and procedures for administration and management of scholarships by Jomo Kenyatta Foundation ( JKF ) Integrated in DLI # 4 MoE Year 2 Scholarship administration and management processes and procedures established at JKF. 4. Design and develop a GRM MIS module compatible with NEMIS and interoperable between agencies and able to capture refugee data. N / A. MoE By Year 2, 2023 Complaints and grievances lodged through the NEMIS. Number of grievances satisfactorily addressed 5. Assess the outcomes of biogas pilots and analyze existing practices on use of biogas in schools N / A MoE Year 2 Costed action plan developed to scale up biogas projects.", + "ner_text": [ + [ + 1006, + 1011, + "named" + ] + ], + "validated": false, + "empirical_context": "N / A. MoE By Year 2, 2023 Complaints and grievances lodged through the NEMIS. Number of grievances satisfactorily addressed 5.", + "type": "system", + "explanation": "NEMIS is mentioned as a system but not explicitly as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is associated with complaints and grievances data.", + "contextual_reason_agent": "NEMIS is mentioned as a system but not explicitly as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "023_Ethiopia-Second-Phase-Development-Response-to-Displacement-Impacts-Project-in-the-Horn-of-Africa-Project", + "page": 15, + "text": "Women have less access to secure livelihoods and are less likely to be paid for their work than men. A 2021 study found that by age 22, about 97 percent of men compared to 81 percent of women were active in the labor market. 18 In the agricultural sector, over half of women workers are unpaid, and in manufacturing this is 58 percent, compared to 40 percent of male workers. 19 Because women are less likely to own land, cultivate fewer crops and have less access to credit and extension services, women \u2019 s agricultural productivity is 36 percent less per hectare than their male counterparts. 20 The impacts of climate change and the conflict place even greater pressure on women \u2019 s already volatile livelihoods. 15. Prevailing gender-based violence ( GBV ) risks are elevated due to the conflict. According to the most recent Demographic and Health Survey ( 2016 ) for Ethiopia, 26 percent of women aged 15-49 have experienced physical or sexual violence. 21 The conflict exposes women to a range of GBV. 22 Although the government and humanitarian partners have increased support for GBV services, including 34 One-Stop Centers, accessibility remains limited. B. Sectoral and Institutional Context 16. Ethiopia is the third-largest refugee hosting country in Africa and the ninth largest worldwide.", + "ner_text": [ + [ + 831, + 860, + "named" + ], + [ + 863, + 867, + "Demographic and Health Survey <> publication year" + ], + [ + 874, + 882, + "Demographic and Health Survey <> data geography" + ], + [ + 898, + 914, + "Demographic and Health Survey <> reference population" + ], + [ + 1208, + 1216, + "Demographic and Health Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "Prevailing gender-based violence ( GBV ) risks are elevated due to the conflict. According to the most recent Demographic and Health Survey ( 2016 ) for Ethiopia, 26 percent of women aged 15-49 have experienced physical or sexual violence. 21 The conflict exposes women to a range of GBV.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned as a survey providing empirical data used in the analysis of GBV risks.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides statistical data on gender-based violence.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a survey providing empirical data used in the analysis of GBV risks.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 13, + "text": "The World Bank Building Skills for Human Capital Development in South Sudan ( P178654 ) Page 9 of 66 Figure 1: Refugee and Asylum Seeker Population in South Sudan, October 31, 2022 Note: This map was created using data that is older than the more recent refugee number data that is included in the text of the PAD. 6. South Sudan is highly vulnerable to climate change and the risks of climate change are increasing. The country is particularly prone to adverse climate hazards that include extreme temperatures, droughts, and extreme precipitation and flooding. Despite having one of the richest agricultural areas in Africa, with fertile soils and abundant water, frequent flooding, droughts, ongoing conflict, and the displacement of millions of persons has drastically reduced South Sudan \u2019 s food production, resulting in an estimated 6. 6 million people experiencing high levels of acute food insecurity. 10 Considering that 95 percent of the population depends on climate-sensitive natural resources \u2014 particularly rainfed subsistence agriculture \u2014 the importance of building a skills base and engaging girls and women in climate resilient agriculture is thus imperative. At the same time, mainstreaming climate change adaptation and mitigation skills in the education system is important and will help accelerate growth, create jobs, and prepare the country to combat the effects of climate change. 7.", + "ner_text": [ + [ + 254, + 273, + "named" + ], + [ + 64, + 75, + "refugee number data <> data geography" + ], + [ + 111, + 147, + "refugee number data <> reference population" + ], + [ + 151, + 162, + "refugee number data <> data geography" + ], + [ + 176, + 180, + "refugee number data <> publication year" + ], + [ + 318, + 329, + "refugee number data <> data geography" + ] + ], + "validated": true, + "empirical_context": "The World Bank Building Skills for Human Capital Development in South Sudan ( P178654 ) Page 9 of 66 Figure 1: Refugee and Asylum Seeker Population in South Sudan, October 31, 2022 Note: This map was created using data that is older than the more recent refugee number data that is included in the text of the PAD. 6.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to specific numerical data used for analysis regarding the refugee population.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific numerical data regarding refugees.", + "contextual_reason_agent": "This is indeed a dataset as it refers to specific numerical data used for analysis regarding the refugee population.", + "contextual_signal": "follows 'uses data from'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 69, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | 60 20. Targeted results may be impacted by the exogenous risks identified in the Program \u2019 s integrated risk assessment. Increasing security concerns triggered by the ongoing conflict may distract government attention and fiscal resources, while climate change is not expected to impact the achievement of results. M & E Capacity 21. The Program \u2019 s M & E framework leverages government monitoring systems, which ensures adequate measuring and reporting of progress toward the targeted results. Indicators to measure process, output and outcome level results are deemed specific, measurable, attainable, relevant, and timely ( SMART ). Across the Result Areas, reporting on progress will be based on information and data generated by the very same IT infrastructure mobilized for the achievement of results, as follows: Sanad for DLIs 1, 2, and 3 on access to e-services, digital identity, and trusted, people-centric data sharing; the EMR system for DLI 4 on the management of electronic medical records; the HRMIS for DLI 5 on the professionalization of the civil service; the Education Management Information System ( EMIS ) for DLI 7 on the digitalization of general secondary education exam; the PMDU internet portal for DLI 8 on e-participation; and the DOS interactive internet portal for DLI 9 on interactive statistical data.", + "ner_text": [ + [ + 1105, + 1110, + "named" + ] + ], + "validated": false, + "empirical_context": "Indicators to measure process, output and outcome level results are deemed specific, measurable, attainable, relevant, and timely ( SMART ). Across the Result Areas, reporting on progress will be based on information and data generated by the very same IT infrastructure mobilized for the achievement of results, as follows: Sanad for DLIs 1, 2, and 3 on access to e-services, digital identity, and trusted, people-centric data sharing; the EMR system for DLI 4 on the management of electronic medical records; the HRMIS for DLI 5 on the professionalization of the civil service; the Education Management Information System ( EMIS ) for DLI 7 on the digitalization of general secondary education exam; the PMDU internet portal for DLI 8 on e-participation; and the DOS interactive internet portal for DLI 9 on interactive statistical data.", + "type": "system", + "explanation": "However, HRMIS is described as a system, not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HRMIS is a dataset because it is mentioned in the context of data generation for reporting purposes.", + "contextual_reason_agent": "However, HRMIS is described as a system, not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 48, + "text": "Further, the extensive teacher supports built into the project design through training, the coaching program, and technology-enabled feedback as well as opportunities for professional growth and increased transparency in teacher management are expected to further mitigate these risks. 111. The risk related to refugees and host communities is rated High. Although as part of the IDA18 eligibility process Niger \u2019 s protection framework was confirmed as adequate as elaborated earlier, there are a set of protection-related challenges that are important to highlight. These include ( a ) the lack of documentation of refugees and Nigerien nationals in the affected region; ( b ) the struggle of host populations and refugees alike to access key education services and support; and ( c ) the potential impact of refugee influx on regional stability, social cohesion, and local perceptions. These risks will be mitigated through intense community mobilization and sensitization, adoption of a holistic and sustainable approach to address the educational needs of refugees and host communities simultaneously, and the inclusion of refugee-related student data in the regular national data collection systems..", + "ner_text": [ + [ + 1128, + 1156, + "named" + ], + [ + 406, + 411, + "refugee-related student data <> data geography" + ] + ], + "validated": true, + "empirical_context": "These include ( a ) the lack of documentation of refugees and Nigerien nationals in the affected region; ( b ) the struggle of host populations and refugees alike to access key education services and support; and ( c ) the potential impact of refugee influx on regional stability, social cohesion, and local perceptions. These risks will be mitigated through intense community mobilization and sensitization, adoption of a holistic and sustainable approach to address the educational needs of refugees and host communities simultaneously, and the inclusion of refugee-related student data in the regular national data collection systems. .", + "type": "data", + "explanation": "This is indeed a dataset as it is mentioned in the context of being included in national data collection systems.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific data related to refugees and students.", + "contextual_reason_agent": "This is indeed a dataset as it is mentioned in the context of being included in national data collection systems.", + "contextual_signal": "follows 'inclusion of' indicating it is part of a data collection effort", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "013_BOSIB0efb09b920d90858a0135df22da7d1", + "page": 16, + "text": "In 2021, ID4D worked with NIDP to do a costing for a Fayda rollout, which came up with ( a ) registering adults ages 18 and older only ( US $ 283 million or US $ 3. 8 per registrant ), ( b ) registering adults and children ages 14 and older only ( US $ 308 million or US $ 3. 5 per registrant ), and ( c ) registering adults and children ages 5 and older only ( US $ 334 million or US $ 2. 8 per registrant ). The World Bank has also worked with NIDP to map use cases for Fayda, including identifying complementary World Bank engagements. During 2022 pilots, ID4D conducted an exit survey and focus group discussions among PSNP beneficiaries who registered for Fayda to get insights on any shortcomings of registration processes to fine-tune registration during scale-up. A conflict analysis and end-user survey have contributed to the design and risk mitigation measures for this project. 23 NBE. 2021. Requirements for Undertaking Account Based Transactions and Ensuring of Regulatory Limits Directive No. FIS / 04 / 2021. 24 World Bank. 2016. ID4D Country Diagnostic: Ethiopia. https: / / id4d. worldbank. org / country-action / id4d-diagnostics.", + "ner_text": [ + [ + 577, + 588, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank has also worked with NIDP to map use cases for Fayda, including identifying complementary World Bank engagements. During 2022 pilots, ID4D conducted an exit survey and focus group discussions among PSNP beneficiaries who registered for Fayda to get insights on any shortcomings of registration processes to fine-tune registration during scale-up. A conflict analysis and end-user survey have contributed to the design and risk mitigation measures for this project.", + "type": "survey", + "explanation": "However, 'exit survey' refers to a method of data collection rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'exit survey' is a dataset because it involves collecting data from participants.", + "contextual_reason_agent": "However, 'exit survey' refers to a method of data collection rather than a structured collection of data itself.", + "contextual_signal": "mentioned only as a method of data collection, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "037_Uganda-Investment-for-Industrial-Transformation-and-Employment-Project", + "page": 77, + "text": "Refugee owned businesses, in particular, are often constrained by short or uncertain time horizons and access to finance. 5. There are few medium to large enterprises in RHDs that could stimulate the local product or labor market. The business census recorded a total of 165 enterprises in Arua district that employ more than 10 workers ( less than 1 firm per square kilometer ), compared to more than 3, 900 such firms in the Kampala region ( about 20 firms per square kilometer ), although business regulations, entry and exit are the same across districts. Not surprisingly, the market does not generate a huge demand for labor, reflected in low hours per week and low wages \u2013 particularly for refugees whose wages are 35 to 45 percent lower compared to hosts and have been highly vulnerable to COVID related shocks, 6. Businesses in both RHDs tend to be small retail businesses and service providers. Refugee and host community members tend to own the same types of businesses, mostly small shops selling basic, readily available consumer products, followed by barber shops, food stalls and drug stores. In West Nile refugee settlements, there are host community-owned businesses like drug stores and mobile money kiosks. The average monthly sales for refugee business respondents are USD 220, while host community business respondents generate monthly sales of USD 388 on average.", + "ner_text": [ + [ + 235, + 250, + "named" + ], + [ + 290, + 303, + "business census <> data geography" + ], + [ + 427, + 441, + "business census <> data geography" + ], + [ + 1256, + 1284, + "business census <> reference population" + ] + ], + "validated": true, + "empirical_context": "There are few medium to large enterprises in RHDs that could stimulate the local product or labor market. The business census recorded a total of 165 enterprises in Arua district that employ more than 10 workers ( less than 1 firm per square kilometer ), compared to more than 3, 900 such firms in the Kampala region ( about 20 firms per square kilometer ), although business regulations, entry and exit are the same across districts. Not surprisingly, the market does not generate a huge demand for labor, reflected in low hours per week and low wages \u2013 particularly for refugees whose wages are 35 to 45 percent lower compared to hosts and have been highly vulnerable to COVID related shocks, 6.", + "type": "census", + "explanation": "In the context, it is explicitly mentioned as recording a total of enterprises, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'business census' implies a structured collection of data about enterprises.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as recording a total of enterprises, indicating it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 20, + "text": "This may be exacerbated in situations of high insecurity as families may see marrying their young daughters to older men as a way to protect them and to improve access to natural and financial resources. In addition, proxy indicators in Niger for social norms appear to legitimize, condone, and promote GBV, with wife beating being seen as justified by 59. 6 percent of women ( DHS 2012 ), against a regional average of 45. 7 percent. In Niger, there are no laws on domestic violence or aggravated penalties for crimes against spouses or family members. However, the GoN has been taking measures in recent years, with support from development partners, to reduce gender inequality, such as child protection committees, family planning assistance to married adolescent girls, and improvement of educational attainment for girls to remain enrolled in school in the event of pregnancy or marriage. This was supported by the World Bank \u2019 s Development Policy Financing series 2019 \u2013 20 ( P173113 ). 5. The spillover of the crisis in Mali and the Boko Haram regional crisis is causing a significant displacement of people toward and within Niger. According to the United Nations High Commissioner for Refugees ( UNHCR ), Niger is home to more than 568, 410 displaced people ( August 2021 ).", + "ner_text": [ + [ + 378, + 381, + "named" + ], + [ + 237, + 242, + "DHS <> data geography" + ], + [ + 382, + 386, + "DHS <> publication year" + ], + [ + 972, + 981, + "DHS <> publication year" + ], + [ + 1135, + 1140, + "DHS <> data geography" + ], + [ + 1216, + 1221, + "DHS <> data geography" + ] + ], + "validated": true, + "empirical_context": "In addition, proxy indicators in Niger for social norms appear to legitimize, condone, and promote GBV, with wife beating being seen as justified by 59. 6 percent of women ( DHS 2012 ), against a regional average of 45. 7 percent.", + "type": "survey", + "explanation": "In this context, 'DHS' refers to the Demographic and Health Survey, which is a structured collection of data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'DHS' refers to a dataset because it is associated with statistical data on social norms and gender-based violence.", + "contextual_reason_agent": "In this context, 'DHS' refers to the Demographic and Health Survey, which is a structured collection of data used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 7, + "validated": 6, + "not_validated": 1 + } + }, + { + "filename": "007_BOSIB-e8e37b29-1d61-491d-8aad-23a07cf57740", + "page": 87, + "text": "Firms \u2019 access to financial services, 2018 ( % firms ) Percent of firms with a checking or savings account Percent of firms with a bank loan / line of credit Source: World Bank Enterprise Surveys, 2018. 5. Financial inclusion in Chad is below SSA average and the lowest in CEMAC sub-region. In 2017, only 9 percent and 4 percent of Chad \u2019 s adult population had access to a formal bank account or to credit ( compared to 33 percent and 8. 4 percent in SSA respectively ). This is the lowest ratio within the CEMAC region. Banking penetration is lower than the average rate in the CEMAC sub-region ( 12 percent ). Access to financial services is almost inexistent outside of urban areas \u2013 and Chadian women have considerably less access to basic financial services than men. Reasons for such low levels include geographic distance to a financial institution ( for 30 percent of Chadian survey respondents ) as well as the cost of financial services ( 24 percent ) and lack of documentation ( 24 percent ) ( Figure A8. 3 ). 6. The banking sector is exposed to severe vulnerabilities, the greatest of which stem from its exposure to the public sector and to a few large private companies.", + "ner_text": [ + [ + 166, + 195, + "named" + ], + [ + 38, + 42, + "World Bank Enterprise Surveys <> publication year" + ], + [ + 166, + 176, + "World Bank Enterprise Surveys <> publisher" + ], + [ + 197, + 201, + "World Bank Enterprise Surveys <> publication year" + ], + [ + 229, + 233, + "World Bank Enterprise Surveys <> data geography" + ], + [ + 273, + 289, + "World Bank Enterprise Surveys <> data geography" + ], + [ + 332, + 336, + "World Bank Enterprise Surveys <> data geography" + ], + [ + 508, + 520, + "World Bank Enterprise Surveys <> data geography" + ] + ], + "validated": true, + "empirical_context": "Firms \u2019 access to financial services, 2018 ( % firms ) Percent of firms with a checking or savings account Percent of firms with a bank loan / line of credit Source: World Bank Enterprise Surveys, 2018. 5.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as the source of empirical data regarding firms' access to financial services.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a source of data for the analysis.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as the source of empirical data regarding firms' access to financial services.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 62, + "text": "The World Bank Uganda Digital Acceleration Project \u2013 GovNet ( P171305 ) Page 50 of 76 ANNEX 2: Gender Assessment 1. This assessment seeks to elaborate on gender considerations for the UDAP-GovNet. The assessment, ( a ) identifies the gaps between the experiences and status of women and men relevant to this project, ( b ) identifies activities to address these gaps, and ( c ) suggests suitable indicators to track progress. 2. This assessment is based on information from: ( a ) desk-based document review including gender literature, national-level gender analytics, regional policy and guidance notes, and international good practice guidelines; and ( b ) stakeholder / client consultations to identify the most relevant and targeted ways to address these gaps and design measurable indicators to track progress. The assessment accounted for data collection constraints in refugee and host communities. A survey will be conducted to inform the baseline on device ownership / digital skills during project implementation, with the aim to improve sex-disaggregated data collection that is currently unavailable, and help measure outcomes on digital literacy targeted through digital skills and online safety training. 3. The table below identifies how the project will contribute to closing gender gaps through project interventions. It highlights the biggest gaps that are being addressed by project actions and assigns a relevant and measurable indicator to each.", + "ner_text": [ + [ + 184, + 195, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Uganda Digital Acceleration Project \u2013 GovNet ( P171305 ) Page 50 of 76 ANNEX 2: Gender Assessment 1. This assessment seeks to elaborate on gender considerations for the UDAP-GovNet. The assessment, ( a ) identifies the gaps between the experiences and status of women and men relevant to this project, ( b ) identifies activities to address these gaps, and ( c ) suggests suitable indicators to track progress.", + "type": "project", + "explanation": "However, it is mentioned only as a project, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'data' in its name.", + "contextual_reason_agent": "However, it is mentioned only as a project, not as a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "012_BOSIB-9a6accb6-73d1-4bd1-8307-d41a339a51ab", + "page": 13, + "text": "Inclusion in Kenya. 9 UN University Institute for Water, Environment and Health ( 2022 ) Water Security in Africa: A Preliminary Assessment, Issue 13 https: / / inweh. unu. edu / water-security-in-africa-a-preliminary-assessment / 10 Kenya Population and Housing Census ( 2019 ). https: / / www. knbs. or. ke / publications /. 11 Kenya Population and Housing Census ( 2019 ). https: / / www. knbs. or. ke / publications /. 12 Ministry of Education. 2020. WASH in Schools Situation Analysis. Unpublished draft. 13 Alexander, Kelly T., et al. 2014. \u201c Water, Sanitation and Hygiene Conditions in Kenyan Rural Schools: Are Schools Meeting the Needs of Menstruating Girls? \u201d Water 6 ( 5 ): 1453 \u2013 1466. https: / / doi. org / 10. 3390 / w6051453.", + "ner_text": [ + [ + 234, + 269, + "named" + ], + [ + 13, + 18, + "Kenya Population and Housing Census <> data geography" + ], + [ + 234, + 239, + "Kenya Population and Housing Census <> data geography" + ], + [ + 272, + 276, + "Kenya Population and Housing Census <> publication year" + ], + [ + 330, + 335, + "Kenya Population and Housing Census <> data geography" + ] + ], + "validated": true, + "empirical_context": "unu. edu / water-security-in-africa-a-preliminary-assessment / 10 Kenya Population and Housing Census ( 2019 ). https: / / www.", + "type": "census", + "explanation": "This is indeed a dataset as it is a census that provides empirical data used for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is a census, which typically involves a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is a census that provides empirical data used for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "143_749060PAD0P124010Box374388B00OUO090", + "page": 9, + "text": "structure affects, among other things, the quality of the country ' s public institutions for service delivery. As a consequence, citizens often turn to costlier alternative providers of services. In addition, the absence of a regionally balanced development strategy together with geographic disparities in human capital has created regional disparities in living conditions. Development efforts since the end of the Civil War ( 1975-1991 ) have mostly focused on urban reconstruction, while the rural northern region, which houses almost half of the poor population, continues to lag behind. 4. Although poverty rates are comparable to those of other middle-income countries, poverty remains significant and regional disparities in living conditions are acute. Based on the latest available household budget survey, it is estimated that nearly 27 percent of the Lebanese population, or 1 million people, were poor, living on less than US $ 4 per day, and 8 percent, or 300, 000 people, were extremely poor, living on less than US $ 2. 40 per day. 7 / 8 Poverty is significantly higher in some regions.", + "ner_text": [ + [ + 793, + 816, + "named" + ], + [ + 497, + 518, + "household budget survey <> data geography" + ], + [ + 864, + 883, + "household budget survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "Although poverty rates are comparable to those of other middle-income countries, poverty remains significant and regional disparities in living conditions are acute. Based on the latest available household budget survey, it is estimated that nearly 27 percent of the Lebanese population, or 1 million people, were poor, living on less than US $ 4 per day, and 8 percent, or 300, 000 people, were extremely poor, living on less than US $ 2. 40 per day.", + "type": "survey", + "explanation": "This is indeed a dataset as it provides structured data used to estimate poverty rates in the context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey that collects data on household budgets.", + "contextual_reason_agent": "This is indeed a dataset as it provides structured data used to estimate poverty rates in the context.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "003_BOSIB-850e0c11-07c1-4c9c-8d44-4286704221bd", + "page": 17, + "text": "The project supports these initiatives by investing in the RN1 catchment area between Maradi and Zinder, using a \u201c basins of integration \u201d approach51 to unlock the economic potential of the subregion. 43 Ministry of Transport of Niger. 2015. Study on Intermediary Means of Transport for Rural Logistics ( French ). 44 Open-air kilichi production is laborious and time consuming, taking two or three days or longer during rainy seasons. 45 Transporting onions can require time, taking up to 15 days, and may result in a 6 or 7 percent loss. 46 International Monetary Fund. 2022. Financial GoInclusion in Niger: Challenges and Opportunities. 47 The Food and Agriculture Organization Corporate Statistical Database reports that commercial banks \u2019 loan portfolio share or both agropastoral production and logistics amounted to only US $ 15. 61 million, equivalent to 1 percent of the total outstanding loan portfolio \u2014 the lowest in the UEMOA region. 48 WHO. 2023. Global Status Report on Road Safety 2023. Available at: https: / / cdn. who. int / media / docs / default-source / country-profiles / road-safety / road-safety-2023-ner. pdf? sfvrsn = dd3f54fa_3 & download = true 49 \" Similarly, the provisions of Article 15 of Law No. 2014-62 of November 5, 2014, regulate the age of all vehicles at the time of importation.", + "ner_text": [ + [ + 647, + 711, + "named" + ], + [ + 236, + 240, + "Food and Agriculture Organization Corporate Statistical Database <> reference year" + ], + [ + 572, + 576, + "Food and Agriculture Organization Corporate Statistical Database <> publication year" + ], + [ + 744, + 764, + "Food and Agriculture Organization Corporate Statistical Database <> data description" + ] + ], + "validated": true, + "empirical_context": "Financial GoInclusion in Niger: Challenges and Opportunities. 47 The Food and Agriculture Organization Corporate Statistical Database reports that commercial banks \u2019 loan portfolio share or both agropastoral production and logistics amounted to only US $ 15. 61 million, equivalent to 1 percent of the total outstanding loan portfolio \u2014 the lowest in the UEMOA region.", + "type": "database", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a database that provides statistical data used in the analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a 'Corporate Statistical Database' which implies a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a database that provides statistical data used in the analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 14, + "text": "The World Bank SPF: Improved Livelihoods for Internally Displaced Persons in Azerbaijan ( P178125 ) Page 12 of 34 16. The project is targeting 520 vulnerable IDP participants with the aim of benefiting their households, so a broader group of 2, 100 total beneficiaries. The average household size in Azerbaijan is 4. 1 persons. C. PDO-Level Results Indicators 17. Achievement of the proposed Project Development Objective, will be measured through the following indicators: \u2022 Percentage of participants self-employed or employed by firms \u2022 Increase in income of households with individuals participating in the project \u2022 Percentage of registered participants completing training and receiving certificates \u2022 Beneficiaries of job-focused interventions, of which female ( core World Bank indicator ) \u2022 Percentage of beneficiaries taking a more active role in their communities disaggregated by gender and persons with disability 18. Baseline data on indicators will be collected to facilitate the measurement of project impact. Upon registration of participants for project support, data will be gathered to establish baseline conditions for each beneficiary and their household. Follow-up surveys will be conducted to compare baseline conditions to those after the completion of project activities. III. PROJECT DESCRIPTION A. Project Components 19. Component 1: Skills development.", + "ner_text": [ + [ + 1178, + 1195, + "named" + ] + ], + "validated": false, + "empirical_context": "Upon registration of participants for project support, data will be gathered to establish baseline conditions for each beneficiary and their household. Follow-up surveys will be conducted to compare baseline conditions to those after the completion of project activities. III.", + "type": "survey", + "explanation": "'Follow-up surveys' are mentioned as a method of data collection rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Follow-up surveys' refers to a dataset because surveys often collect structured data.", + "contextual_reason_agent": "'Follow-up surveys' are mentioned as a method of data collection rather than a structured collection of data itself.", + "contextual_signal": "mentioned only as a method of data collection, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 77, + "text": "The World Bank Uganda Digital Acceleration Project \u2013 GovNet ( P171305 ) Page 65 of 76 Project Total ( US $ million ) Development objectives and geographic scope UDAP-GovNet Synergies implementation commencing children and the interoperability of these systems between humanitarian managed and national systems, Uganda Intergovernmental Fiscal Transfer ( UgiFT ) - Additional Financing ( P172868 ) Status: Pending effectiveness Total 250 RSW / WHR 50 Improve the adequacy and equity of fiscal transfers and fiscal management of resources by Local Governments for health, education, community irrigation and water services across all RHDs The UgIFT operation includes integrated digitization of service delivery in education, health, water, and micro-irrigation. This includes developing strategies for digitization of sector services, results management / dashboard linked to Online Transfer Information Management System ( OTIMS ) and Online Performance Management System ( OPAMS ), electronic medical records system and so on. UDAP-GovNet will play a key role by providing the enabling infrastructure to support the digitization of health, education, and water services to strengthen management, efficiency, reporting and planning systems outlined under UgIFT at the district level, including in RHDs. UDAP-GovNet will collaborate to ensure interoperability between the activities in UDAP-GovNet and UgIFT.", + "ner_text": [ + [ + 935, + 971, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Uganda Digital Acceleration Project \u2013 GovNet ( P171305 ) Page 65 of 76 Project Total ( US $ million ) Development objectives and geographic scope UDAP-GovNet Synergies implementation commencing children and the interoperability of these systems between humanitarian managed and national systems, Uganda Intergovernmental Fiscal Transfer ( UgiFT ) - Additional Financing ( P172868 ) Status: Pending effectiveness Total 250 RSW / WHR 50 Improve the adequacy and equity of fiscal transfers and fiscal management of resources by Local Governments for health, education, community irrigation and water services across all RHDs The UgIFT operation includes integrated digitization of service delivery in education, health, water, and micro-irrigation. This includes developing strategies for digitization of sector services, results management / dashboard linked to Online Transfer Information Management System ( OTIMS ) and Online Performance Management System ( OPAMS ), electronic medical records system and so on. UDAP-GovNet will play a key role by providing the enabling infrastructure to support the digitization of health, education, and water services to strengthen management, efficiency, reporting and planning systems outlined under UgIFT at the district level, including in RHDs.", + "type": "system", + "explanation": "However, it is described as a system for performance management, not as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'system' in its name, which could imply data management.", + "contextual_reason_agent": "However, it is described as a system for performance management, not as a structured collection of data or a data source.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "167_27761", + "page": 44, + "text": "for Planning ( training consultant ). These consultants will support the DGs in the coordinating functions and preparation o f periodic and annual progress reports from MOSA to the MOF, IDA and donors. These Directorates will be strengthened through technical assistance and training. An additional advisor i s envisaged to build capacity at the DG for Aid Administration and Family Rehabilitation located in the West Bank, who will assist the DG located in Gaza on the coordination of the project in the West Bank. A detailed job description of the DGs respective responsibilities i s given in the Operational Manual. The General Directorate for Financial and Administrative Affairs will manage procurement of goods and services as well as the financial management information system under the SSNRP. The MIS system installed in Gaza i s being replicated in the West Bank. MOSA local offices in the governorates ( muderiats ) will coordinate implementation with local MOH and MOEHE offices on compliance monitoring. Chart 3 shows the organizational diagram of MOSA, although only the concerned DGs in the WB & G responsible for SSNRP implementation. UNRWA will provide a list of its beneficiaries to MOSA, which MOSA will use to validate eligible beneficiaries. Verification of beneficiary compliance with monthly cash transfer conditions will be carried out by MOH and MOEHE, and the information will be forwarded to MOSA.", + "ner_text": [ + [ + 806, + 816, + "named" + ] + ], + "validated": false, + "empirical_context": "The General Directorate for Financial and Administrative Affairs will manage procurement of goods and services as well as the financial management information system under the SSNRP. The MIS system installed in Gaza i s being replicated in the West Bank. MOSA local offices in the governorates ( muderiats ) will coordinate implementation with local MOH and MOEHE offices on compliance monitoring.", + "type": "system", + "explanation": "However, the context indicates it is a management information system, not a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MIS system' suggests a structured collection of data.", + "contextual_reason_agent": "However, the context indicates it is a management information system, not a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "033_IBArchive-38b35d89-2c5c-4636-ab7e-0ba662b95a5f", + "page": 63, + "text": "The World Bank Djibouti Skills Development for Employment Project ( P175483 ) Page 59 of 68 IRI # 11: Share of girls and women in TVET programs increased. Sub-component 2. 2 IRI # 12: Number of short-term training programs completed ( disaggregated by gender, disability and refugee status ) Sub-component 2. 2 IRI # 13: Number of individuals who are certified through newly developed RPL procedures ( disaggregated by gender, disability and refugee status ) Sub-component 2. 2 IRI # 14: Number of trainers undertaking initial training ( disaggregated by gender, disability and refugee status ) Sub-component 2. 2 IRI # 15 Beneficiary, Stakeholder and other feedback Technology based beneficiary feedback Biannual Survey conducted by PMU Survey using technology DGETFP IRI # 16: Number of trainers engaged in CPD ( disaggregated by gender, disability and refugee status ) Sub-component 2. 2 Annual targets Data collected from the DGETFP.", + "ner_text": [ + [ + 734, + 744, + "named" + ], + [ + 4, + 14, + "PMU Survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "2 IRI # 14: Number of trainers undertaking initial training ( disaggregated by gender, disability and refugee status ) Sub-component 2. 2 IRI # 15 Beneficiary, Stakeholder and other feedback Technology based beneficiary feedback Biannual Survey conducted by PMU Survey using technology DGETFP IRI # 16: Number of trainers engaged in CPD ( disaggregated by gender, disability and refugee status ) Sub-component 2. 2 Annual targets Data collected from the DGETFP.", + "type": "survey", + "explanation": "The PMU Survey is explicitly mentioned as a biannual survey conducted to gather feedback, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically collects structured data.", + "contextual_reason_agent": "The PMU Survey is explicitly mentioned as a biannual survey conducted to gather feedback, indicating it functions as a data source.", + "contextual_signal": "described as a survey that collects data", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 93, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 82 77. An integrated M & E system will be developed and implemented as part of the Project to support implementation and reporting. The MWE, NWSC, and other agencies are currently collecting sector performance data in different databases and monitoring platforms. The MWE utilizes the Water Supply Database, Water Supply Atlas, and the UPMIS. The MWE utilizes operations and financial tracking systems at its headquarters and district levels. The OPM utilized the Refugees Response Portal. The OPM is also currently developing a new refugee database and service provider performance monitoring and tracking system. The new integrated M & E system will interface with various systems to consolidate data storage and facilitate information management. Specific Project information related to procurement, disbursements, and environmental and safeguards implementation will also be integrated in the new M & E system. The development and implementation of this M & E system is expected to be centralized at the MWE. Climate Mitigation and Adaptation Co-benefits 78. An assessment of climate co-benefits for the Project was conducted for Components 1 to 3. A quantitative analysis was conducted to estimate the Project ' s impact on GHG emissions and to value the externality using the shadow price of carbon ( Annex 4 ).", + "ner_text": [ + [ + 415, + 420, + "named" + ], + [ + 426, + 429, + "UPMIS <> author" + ] + ], + "validated": true, + "empirical_context": "The MWE, NWSC, and other agencies are currently collecting sector performance data in different databases and monitoring platforms. The MWE utilizes the Water Supply Database, Water Supply Atlas, and the UPMIS. The MWE utilizes operations and financial tracking systems at its headquarters and district levels.", + "type": "database", + "explanation": "UPMIS is confirmed as a dataset since it is utilized for collecting sector performance data in the context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed UPMIS is a dataset because it is mentioned alongside other databases used for collecting performance data.", + "contextual_reason_agent": "UPMIS is confirmed as a dataset since it is utilized for collecting sector performance data in the context.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "042_Burundi-Skills-for-Jobs-Women-and-Youth-Project", + "page": 41, + "text": "Biannual analysis of project management information by the PIU will inform project processes, allowing for timely actions and adjustments ( including learning how to better support females and minority groups ). Regular follow-ups by the frontline implementing agencies ( that is, sectoral hubs, apprenticeship CEMs, and entrepreneurship implementing agencies ) in the form of brief, agile tracer studies will be conducted, with on-the-ground support by a third party. Beneficiary surveys will be conducted by a third party annually to further measure achievement of results and inform adjustments that may be needed in project design and implementation arrangements. Periodic representative, sample-based, and unannounced monitoring visits are also expected to be conducted by third parties over the project period, to supplement internal monitoring and reporting. A midterm review will involve the project \u2019 s stakeholders to collectively review project results and implementation arrangements. 71. At the PIU, an M & E system will be set up to host and manage operational data for project indicators. The PIU will work with the general secretary in charge of technical and vocational education and trades and the Bureau of Planning and Statistics under the Permanent Secretary at MNESR to attend to links and ultimate integration between the project \u2019 s M & E system and the MNESR Education Management Information System. The M & E system will be supported by the World Bank \u2019 s Geo-Enabling for Monitoring and Supervision ( GEMS ) initiative, which leverages the use of portable ICT tools", + "ner_text": [ + [ + 1016, + 1028, + "named" + ] + ], + "validated": false, + "empirical_context": "71. At the PIU, an M & E system will be set up to host and manage operational data for project indicators. The PIU will work with the general secretary in charge of technical and vocational education and trades and the Bureau of Planning and Statistics under the Permanent Secretary at MNESR to attend to links and ultimate integration between the project \u2019 s M & E system and the MNESR Education Management Information System.", + "type": "system", + "explanation": "However, it is described as a system for managing data rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves managing operational data.", + "contextual_reason_agent": "However, it is described as a system for managing data rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "148_492780PAD0P111101Official0Use0Only1", + "page": 24, + "text": "17 Risk Level Mitigation Measure been impressive at the expense of quality. issues are not tackled in an effective manner, the entire system will lose its credibility. Overall Risk Rating H The risk ratings above factor in the mitigation measures. H: High M: Moderate L: Low F. Loan / Credit Conditions and Covenants 67. There are no conditions for Board presentation other than receipt by the World Bank of approval of negotiated documents by the recipient. 68. Conditions for effectiveness include: \uf0b7 The Subsidiary Agreement between the PLO and the PA has been executed on behalf of the PLO and the PA, as represented by the MOF. \uf0b7 MOEHE \u2019 s DSQ has appointed a TMT acceptable to the Bank. \uf0b7 MOEHE \u2019 s NIET has appointed a TMT acceptable to the Bank. \uf0b7 The PCU has been adequately staffed, including with experts in the areas of financial management, procurement and reporting, and is operational. \uf0b7 The OM has been finalized and adopted by the MOEHE. \uf0b7 The SC has been established. \uf0b7 MOEHE has provided baseline data in an acceptable form that would enable the MOEHE to assess performance of the \u201c decline in shortfall of qualified teachers at the primary level. \uf0b7 Issuing of the required legal opinion on the Trust Fund Grant Agreement and the Subsidiary Agreement.", + "ner_text": [ + [ + 1007, + 1020, + "named" + ] + ], + "validated": true, + "empirical_context": "\uf0b7 The SC has been established. \uf0b7 MOEHE has provided baseline data in an acceptable form that would enable the MOEHE to assess performance of the \u201c decline in shortfall of qualified teachers at the primary level. \uf0b7 Issuing of the required legal opinion on the Trust Fund Grant Agreement and the Subsidiary Agreement.", + "type": "data", + "explanation": "In this context, 'baseline data' is explicitly mentioned as data provided to assess performance, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'baseline data' is a dataset because it refers to a collection of data used for assessment.", + "contextual_reason_agent": "In this context, 'baseline data' is explicitly mentioned as data provided to assess performance, confirming its role as a data source.", + "contextual_signal": "mentioned as data provided to assess performance", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 93, + "text": "The World Bank Niger Learning Improvement for Results in Education Project ( P168779 ) Page 87 of 101 households ( 58 percent ) than in richest households ( 63 percent ). Figure 3. 2 also shows regional disparities in terms of PCR. For instance, the PCR rate is higher in Niamey ( 118 percent ), the capital city, while it is only 19 percent in Diffa. Figure 3. 2. PCR by Socioeconomic Status Source: Authors \u2019 estimations based on ECVMA 2014. 4. Learning outcomes in Niger are particularly low, results from the 2014 PASEC show that only 9 percent of Grade 6 children reach sufficient competency threshold in reading and the proportion is only 8 percent in mathematics. In addition, the project supports activities to create an enabling environment for inclusive learning to promote school retention and learning. An emphasis is placed on the provision of alternative forms of quality education for vulnerable and out-of-school children as about half of children ages 7 \u2013 16 are not in school, and the out-of-school incidence varies largely across regions and wealth backgrounds. Furthermore, children in Niger can expect to complete only 5. 3 years of preprimary, primary, and secondary school by 18 years. However, when the years of schooling are adjusted for quality of learning, this is only equivalent to 2. 6 years.", + "ner_text": [ + [ + 518, + 523, + "named" + ] + ], + "validated": false, + "empirical_context": "4. Learning outcomes in Niger are particularly low, results from the 2014 PASEC show that only 9 percent of Grade 6 children reach sufficient competency threshold in reading and the proportion is only 8 percent in mathematics. In addition, the project supports activities to create an enabling environment for inclusive learning to promote school retention and learning.", + "type": "project", + "explanation": "PASEC is mentioned in the context of learning outcomes but is not described as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed PASEC is a dataset because it is associated with educational results and statistics.", + "contextual_reason_agent": "PASEC is mentioned in the context of learning outcomes but is not described as a data source or structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 144, + "text": "Altogether, these four regions ( North, Far North, East and Adamawa ) account for 66 percent of the poor households in the country ( even though they are home to only 38 percent of the total population ). Access to basic services is limited, and these regions are relatively isolated from the rest of the country. The presence of large numbers of refugees has exacerbated these pre-existing challenges. 3. The refugee crisis has reinforced existing territorial inequities and a rapid increase in poverty in northern Cameroon had been observed before the heightened insecurity in the region associated with Boko Haram activities. The deterioration of the security environment in northern Cameroon occurred in large part after data were collected for the 2014 ECAM. The poverty estimates for the northern regions should, therefore, be considered lower bounds, since the impact of an influx of refugees, the closure of markets, roads and frontiers, were largely not recorded by the ECAM. Because of the interruption of agricultural activities and trade, as well as population displacement and increased vulnerability and food insecurity in the Far North, approximately 2. 4 million people are considered food insecure and 250, 000 people are estimated to be suffering from acute malnutrition. 4.", + "ner_text": [ + [ + 979, + 983, + "named" + ], + [ + 60, + 67, + "ECAM <> data geography" + ], + [ + 507, + 524, + "ECAM <> data geography" + ], + [ + 753, + 757, + "ECAM <> publication year" + ] + ], + "validated": true, + "empirical_context": "The deterioration of the security environment in northern Cameroon occurred in large part after data were collected for the 2014 ECAM. The poverty estimates for the northern regions should, therefore, be considered lower bounds, since the impact of an influx of refugees, the closure of markets, roads and frontiers, were largely not recorded by the ECAM. Because of the interruption of agricultural activities and trade, as well as population displacement and increased vulnerability and food insecurity in the Far North, approximately 2.", + "type": "survey", + "explanation": "ECAM is indeed a dataset as it is explicitly mentioned in relation to data collection and analysis of poverty estimates.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because ECAM is referenced in the context of data collection for poverty estimates.", + "contextual_reason_agent": "ECAM is indeed a dataset as it is explicitly mentioned in relation to data collection and analysis of poverty estimates.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 16, + "text": "7 22. Moreover, investments in strengthening data systems ( both technical and financial ) need to continue to deepen the use of data for evidence \u2010 based decision making in the sector and further improve resource allocation. The MOE has successfully deployed an education management information system ( EMIS ), which is now hosting data on all schools and students in the system. Additional investments in a geographical information system ( GIS ) are ongoing and will allow MOE to better plan for expansion of access across all regions in the country. Leveraging the data available through the EMIS for decision making in the sector is a key opportunity for the MOE which will require additional technical assistance and capacity building to materialize. In addition, the collection, analysis, and use of student learning data and disaggregated and gender \u2010 sensitive data are essential for monitoring, targeting pedagogical interventions, and improving teacher practices in the classroom. C. Relationship to the Country Partnership Framework and Rationale for Use of Instrument 23. Relationship to the CPF. The proposed operation is fully aligned with the Jordan Country Partnership Framework ( CPF ) discussed by the World Bank Group Board on July 14, 2016. The CPF covers the period FY17 \u2013 22 and highlights the economic, geopolitical, and social challenges that Jordan has been facing, particularly with the Syrian refugee crisis.", + "ner_text": [ + [ + 305, + 309, + "named" + ], + [ + 230, + 233, + "EMIS <> publisher" + ], + [ + 358, + 366, + "EMIS <> reference population" + ], + [ + 477, + 480, + "EMIS <> publisher" + ], + [ + 665, + 668, + "EMIS <> publisher" + ], + [ + 808, + 829, + "EMIS <> data description" + ], + [ + 834, + 875, + "EMIS <> data description" + ], + [ + 1160, + 1166, + "EMIS <> data geography" + ], + [ + 1257, + 1261, + "EMIS <> publication year" + ], + [ + 1369, + 1375, + "EMIS <> data geography" + ] + ], + "validated": true, + "empirical_context": "Moreover, investments in strengthening data systems ( both technical and financial ) need to continue to deepen the use of data for evidence \u2010 based decision making in the sector and further improve resource allocation. The MOE has successfully deployed an education management information system ( EMIS ), which is now hosting data on all schools and students in the system. Additional investments in a geographical information system ( GIS ) are ongoing and will allow MOE to better plan for expansion of access across all regions in the country.", + "type": "system", + "explanation": "EMIS is indeed a dataset as it is explicitly mentioned to host data on all schools and students in the system.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is described as an education management information system that hosts data.", + "contextual_reason_agent": "EMIS is indeed a dataset as it is explicitly mentioned to host data on all schools and students in the system.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 16, + "text": "It contributes to Pillar II ( Resilience ) by setting-up a cash transfer program targeted to the poorest and stimulating investments in their human capital and Pillar III ( Governance ) by setting-up the mechanisms for delivering cash transfers effectively, such as a transparent database of potential social assistance beneficiaries, a transparent and reliable payment system, and a basic monitoring and evaluation system. The proposed project is developed in close collaboration with the poverty and health teams on targeting and nutrition services respectively. For targeting, it uses analysis from the poverty assessment and poverty maps based on the recent 2013 / 14 household survey data. The Box 2: The National Social Protection Strategy ( PNPS ), 2015 Three key strategic and one cross-cutting objectives: \u2022 Increase access to basic social services such as health, water and sanitation, and education \u2022 Ensure food and basic income security both for those that can exit extreme poverty sustainably and for those that will remain vulnerable their whole life \u2022 Strengthen natural and social risks management: social protection \u2019 s role is to strengthen the resilience of vulnerable groups i. e. their capacity to better manage the risks they face, without resorting to damaging adaptation strategies \u2022 Cross-cutting objective: Contribute to decreasing young children \u2019 s chronic malnutrition.", + "ner_text": [ + [ + 606, + 641, + "named" + ] + ], + "validated": false, + "empirical_context": "The proposed project is developed in close collaboration with the poverty and health teams on targeting and nutrition services respectively. For targeting, it uses analysis from the poverty assessment and poverty maps based on the recent 2013 / 14 household survey data. The Box 2: The National Social Protection Strategy ( PNPS ), 2015 Three key strategic and one cross-cutting objectives: \u2022 Increase access to basic social services such as health, water and sanitation, and education \u2022 Ensure food and basic income security both for those that can exit extreme poverty sustainably and for those that will remain vulnerable their whole life \u2022 Strengthen natural and social risks management: social protection \u2019 s role is to strengthen the resilience of vulnerable groups i.", + "type": "assessment, maps", + "explanation": "However, these terms refer to assessments and visual representations rather than structured collections of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'poverty assessment' and 'poverty maps' suggest structured data related to poverty.", + "contextual_reason_agent": "However, these terms refer to assessments and visual representations rather than structured collections of data used for empirical analysis.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "082_PAD2809-PAD-PUBLIC-disclosed-9-12-2018-IDA-R2018-0286-1", + "page": 68, + "text": "The World Bank Chad - Refugees and Host Communities Support Project ( P164748 ) Page 63 Accounting Arrangements 13. Accounting policies and procedures. The current accounting standards in use in West and Central African Francophone countries for ongoing Bank-financed projects will be applicable. SYSCOHADA is the assigned accounting system in West and Central African Francophone countries. Project accounts will be maintained on an accrual basis, supported with appropriate records and procedures to track commitments and to safeguard assets. Annual financial statements will be prepared by the project implementing agency in accordance with SYSCOHADA and Bank requirements. Accounting and control procedures will be documented in the Administrative, Financial and Accounting Procedures Manual. 14. FM manuals. CFS will update and adapt the administrative, financial and accounting procedures manual it currently uses for the PFS. The updated procedures manual should be adopted before project effectiveness. 15. Accounting staff. The current FM team consists of an administrative and financial specialist and a senior accountant at the central level, as well as two assistant accountants at the regional level. One additional accountant and three assistant accountants will be hired. The accountant will be based in N \u2019 djamena while the assistant accountants will be based in the regional offices to be opened as part of the new project. 16. Accounting software.", + "ner_text": [ + [ + 297, + 306, + "named" + ] + ], + "validated": false, + "empirical_context": "The current accounting standards in use in West and Central African Francophone countries for ongoing Bank-financed projects will be applicable. SYSCOHADA is the assigned accounting system in West and Central African Francophone countries. Project accounts will be maintained on an accrual basis, supported with appropriate records and procedures to track commitments and to safeguard assets.", + "type": "system", + "explanation": "SYSCOHADA is mentioned as an accounting system, not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed SYSCOHADA is a dataset because it is related to accounting and data management.", + "contextual_reason_agent": "SYSCOHADA is mentioned as an accounting system, not as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 19, + "text": "The Theory of Change ( Table 2 ) is structured around the Program \u2019 s three RAs: \u2022 RA1 on improved service delivery through digitalization \u2022 RA2 on enhanced government effectiveness through digitalization \u2022 RA3 on transparency and accountability through digitalization. 25. The Program builds synergies across its results framework. The strengthening of trusted and people-centric DPI under RA1 will bolster the digitalization of the education and health sectors and competency-based management in the civil service in RA2 and RA3. Specifically, secondary education diplomas will be digitally verifiable using DPI, which will not only increase trust in their authenticity but also allow them to be shared easily in a people-centric way ( that is, with user consent and data minimization ). For core health systems, such as those that manage EMRs, their integration with trusted DPI will improve the protection of sensitive health data while facilitating safe data sharing capabilities. The Program \u2019 s results framework is further underpinned by three main cross-cutting dimensions across the three RAs: 1 ) People-centricity, that is, a focus on output - and outcome-level results at the interface between government and the people ( across e-services and in the education and health sectors ). 2 ) Data generation and use for performance monitoring and evaluation ( M & E ) to inform policymaking and implementation ( through the release of interactive statistical data and the use of health quality data ). 3 ) Direct and indirect benefits to Syrian refugees, since the Program supports enhanced refugee access to e-services and digital ID, the digitalized secondary education examination, and e-health services. Disaggregated statistical and administrative data will help provide evidence on socioeconomic indicators and inform policy dialogue.", + "ner_text": [ + [ + 841, + 845, + "named" + ] + ], + "validated": false, + "empirical_context": "Specifically, secondary education diplomas will be digitally verifiable using DPI, which will not only increase trust in their authenticity but also allow them to be shared easily in a people-centric way ( that is, with user consent and data minimization ). For core health systems, such as those that manage EMRs, their integration with trusted DPI will improve the protection of sensitive health data while facilitating safe data sharing capabilities. The Program \u2019 s results framework is further underpinned by three main cross-cutting dimensions across the three RAs: 1 ) People-centricity, that is, a focus on output - and outcome-level results at the interface between government and the people ( across e-services and in the education and health sectors ).", + "type": "system", + "explanation": "However, EMRs are mentioned as part of core health systems and not explicitly as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMRs are a dataset because they involve electronic records of health information.", + "contextual_reason_agent": "However, EMRs are mentioned as part of core health systems and not explicitly as a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "045_Turkey-Agricultural-Employment-Support-for-Refugees-and-Turkish-Citizens-through-Enhanced-Market-Linkages-Project", + "page": 40, + "text": "The ACC PIU will receive additional ESF training before effectiveness and will be supported by the Bank \u2019 s ESF team throughout implementation, as needed. E. Corporate Requirements: Citizen Engagement, Gender and Climate Change 103. Gender equity. Despite improvements in recent years, female labor force participation ( FLFP ) rates continue to be very low at 34 percent, compared to 73 percent for men. Agriculture is a major sector of work for economically active women, with 26 percent of female employment working in agriculture and women making up close to half the agricultural workforce. 61 Refugee women are more prone to early marriages and less likely to be economically active compared to Turkish women. Public activities including contacts with agricultural intermediaries, neighborhood foremen ( muhtar ), and local community is almost exclusively conducted by the men of the household, indicating that financial matters of refugee women 60 The project benefits from the Bank \u2019 s EHSG and the FAO Guidance Note: Child Labour in Agriculture in Protracted Crises, Fragile and Humanitarian Contexts. 61 Turkish Statistical Institute Household Labor Force Survey, 2018.", + "ner_text": [ + [ + 1144, + 1172, + "named" + ], + [ + 1114, + 1143, + "Household Labor Force Survey <> publisher" + ], + [ + 1174, + 1178, + "Household Labor Force Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "Public activities including contacts with agricultural intermediaries, neighborhood foremen ( muhtar ), and local community is almost exclusively conducted by the men of the household, indicating that financial matters of refugee women 60 The project benefits from the Bank \u2019 s EHSG and the FAO Guidance Note: Child Labour in Agriculture in Protracted Crises, Fragile and Humanitarian Contexts. 61 Turkish Statistical Institute Household Labor Force Survey, 2018.", + "type": "survey", + "explanation": "This is indeed a dataset as it is a structured collection of data used for empirical analysis regarding household labor.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a survey conducted by a statistical institute.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data used for empirical analysis regarding household labor.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "155_432110PAD0P09717632B01off0use0only1", + "page": 65, + "text": "At the same time, this approach allows for further expansion through private connections in a subsequent phase, once the necessary hydraulic capacity is present in the network. Household Survey and Community Consultations 13. The economic and social feasibility of this alternative was explored through the socio - economic feasibility study and household survey, and through several rounds of discussion groups with representatives of local communities. In addition, the project team and REGIDESO organized a participatory workshop, in which experiences to date with standpost management in Burundi, Kenya, Senegal and Rwanda were presented and discussed. The workshop, which included community representatives, members of government, REGIDESO staff, World Bank representatives, and international invited speakers with direct experience in standpost management, also served to produce recommendations on the type of standpost management that would be most appropriate in the context of Bujumbura. 14. As seen in the previous section, the household survey generated baseline information regarding water supply, sanitation, electricity provision, and general socio-economic and demographic data about the 26 neighborhoods under study. It also asked respondents to express their preferences about the type of service they would like. Not surprisingly, a majority of respondents ( 63. 9 % ) would prefer to pay to have a private connection to the network.", + "ner_text": [ + [ + 346, + 362, + "named" + ], + [ + 592, + 599, + "household survey <> data geography" + ], + [ + 620, + 626, + "household survey <> data geography" + ], + [ + 987, + 996, + "household survey <> data geography" + ], + [ + 1248, + 1259, + "household survey <> reference population" + ], + [ + 1364, + 1375, + "household survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "Household Survey and Community Consultations 13. The economic and social feasibility of this alternative was explored through the socio - economic feasibility study and household survey, and through several rounds of discussion groups with representatives of local communities. In addition, the project team and REGIDESO organized a participatory workshop, in which experiences to date with standpost management in Burundi, Kenya, Senegal and Rwanda were presented and discussed.", + "type": "survey", + "explanation": "In the context, it is explicitly mentioned as part of the socio-economic feasibility study, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'household survey' implies a structured collection of data collected from households.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as part of the socio-economic feasibility study, indicating it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "003_BOSIB-850e0c11-07c1-4c9c-8d44-4286704221bd", + "page": 47, + "text": "The World Bank Southern Niger Connectivity and Integration Project ( P179770 ) Page 37 of women to physically access obstetric care in the project area. Frequency At baseline, midterm, and after completion Data source A survey on rural accessibility will be used to collect mobility data on the impact of the newly rehabilitated and upgraded rural roads on people \u2019 s access to health services, particularly on women \u2019 s travel time to access obstetric care in the project areas. Methodology for Data Collection - Prepare a GEMS form to collect mobility data from a sample of women beneficiaries of maternity services. - Measure the reduction in travel time compared to the baseline value ( based on a GIS transportation model which relies on mix of modelled data and empirical data ). Responsibility for Data Collection General Directorate for Public Health ( DGSP ) / ministry in charge of public health and social affairs, in collaboration with some World Bank experts. Length of rehabilitated RN1 Maradi \u2013 Zinder section incorporating climate resilience measures ( Km ) Description This indicator measures the total length, in kilometers, of the RN1 road between Maradi and Zinder that has been rehabilitated taking into account climate resilience measures.", + "ner_text": [ + [ + 274, + 287, + "named" + ], + [ + 4, + 14, + "mobility data <> publisher" + ], + [ + 220, + 249, + "mobility data <> data type" + ], + [ + 576, + 617, + "mobility data <> reference population" + ], + [ + 821, + 858, + "mobility data <> author" + ], + [ + 953, + 963, + "mobility data <> publisher" + ], + [ + 1001, + 1016, + "mobility data <> data geography" + ] + ], + "validated": true, + "empirical_context": "The World Bank Southern Niger Connectivity and Integration Project ( P179770 ) Page 37 of women to physically access obstetric care in the project area. Frequency At baseline, midterm, and after completion Data source A survey on rural accessibility will be used to collect mobility data on the impact of the newly rehabilitated and upgraded rural roads on people \u2019 s access to health services, particularly on women \u2019 s travel time to access obstetric care in the project areas. Methodology for Data Collection - Prepare a GEMS form to collect mobility data from a sample of women beneficiaries of maternity services.", + "type": "survey", + "explanation": "In the context, 'mobility data' is explicitly mentioned as being collected through a survey, confirming it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'mobility data' is a dataset because it refers to data collected from a survey.", + "contextual_reason_agent": "In the context, 'mobility data' is explicitly mentioned as being collected through a survey, confirming it functions as a data source.", + "contextual_signal": "follows 'data source' and described as collected from a survey", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 14, + "text": "The World Bank Tajikistan Water Supply and Sanitation Investment Project ( P177325 ) Page 11 of 89 Listening to Tajikistan Survey21 indicated a strong seasonal correlation of share of households reporting water disruptions, even for households with no connection to municipal water systems, with no significant difference between the bottom 40 and top 60 percent of population, suggesting heavy reliance on open water sources \u2014 largely irrigation canals during the off-season. However, the number of reported days with water disruptions varies from two to six days between urban and rural populations accordingly. 22 12. The quality of water collected from open water sources is usually poor, correlating with incidences of waterborne illnesses, such as diarrhea, and increasingly worrisome in areas with high density of population, heavy reliance on untreated surface water for drinking, and experiencing rapid increase in temperatures. The Household WASH Survey confirmed this finding at the national level, with 16 percent of respondents reporting experiencing gastrointestinal disorders due to poor water quality. While official statistics on waterborne diseases and diseases associated with inadequate WSS are largely underestimated, representatives of local government, schools, and health clinics, as well as local leaders also identified the poor quality of drinking water as the main cause of diarrhea outbreaks, along with poor sanitation and hygiene conditions.", + "ner_text": [ + [ + 112, + 131, + "named" + ], + [ + 4, + 14, + "Tajikistan Survey21 <> publisher" + ], + [ + 112, + 122, + "Tajikistan Survey21 <> data geography" + ], + [ + 233, + 289, + "Tajikistan Survey21 <> reference population" + ], + [ + 490, + 536, + "Tajikistan Survey21 <> data description" + ], + [ + 625, + 675, + "Tajikistan Survey21 <> data description" + ] + ], + "validated": true, + "empirical_context": "The World Bank Tajikistan Water Supply and Sanitation Investment Project ( P177325 ) Page 11 of 89 Listening to Tajikistan Survey21 indicated a strong seasonal correlation of share of households reporting water disruptions, even for households with no connection to municipal water systems, with no significant difference between the bottom 40 and top 60 percent of population, suggesting heavy reliance on open water sources \u2014 largely irrigation canals during the off-season. However, the number of reported days with water disruptions varies from two to six days between urban and rural populations accordingly.", + "type": "survey", + "explanation": "This is indeed a dataset as it is used to report findings on water disruptions among households in Tajikistan.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides empirical data on water disruptions.", + "contextual_reason_agent": "This is indeed a dataset as it is used to report findings on water disruptions among households in Tajikistan.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 12, + "text": "Despite relatively high per capita income of US $ 1, 862 in 2015, 30 percent of the population lives in poverty and 21 percent lives in extreme poverty according to the fourth ( 2018 ) round of household survey ( EDAM4 ). The same survey also found that: ( a ) 36 percent of Djibouti \u2019 s population is under 14 years old and 51 percent under 24; ( b ) its human development indicators indicate a life expectancy of 62 years and an infant mortality at birth rate to be 54 / 1, 000; and ( c ) the adult literacy rate is only 53 percent. In terms of employment opportunities, Djibouti \u2019 s national unemployment rate in 2017 stood at 47 percent for people aged 15 and older, and 22 percent for those aged 15-24, with significant variations in unemployment rates across gender, region and age2. And while there exists a slight positive correlation between employment rates and education levels, attaining higher levels of education does not guarantee more opportunities in the labor market. 3.", + "ner_text": [ + [ + 194, + 210, + "named" + ], + [ + 178, + 182, + "household survey <> publication year" + ], + [ + 275, + 283, + "household survey <> data geography" + ], + [ + 396, + 423, + "household survey <> data description" + ], + [ + 495, + 533, + "household survey <> data description" + ], + [ + 573, + 581, + "household survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "Despite relatively high per capita income of US $ 1, 862 in 2015, 30 percent of the population lives in poverty and 21 percent lives in extreme poverty according to the fourth ( 2018 ) round of household survey ( EDAM4 ). The same survey also found that: ( a ) 36 percent of Djibouti \u2019 s population is under 14 years old and 51 percent under 24; ( b ) its human development indicators indicate a life expectancy of 62 years and an infant mortality at birth rate to be 54 / 1, 000; and ( c ) the adult literacy rate is only 53 percent.", + "type": "survey", + "explanation": "In the context, it is explicitly mentioned as a source of data providing empirical analysis on poverty and demographics.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'household survey' implies a structured collection of data regarding households.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as a source of data providing empirical analysis on poverty and demographics.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 44, + "text": "The key delivery mechanisms put in place for the project, such as the MIS, will enable the project to produce regular progress reports. The PIU will organize annual financial audits for the project, annual reviews of progress, and a mid-term review to guide the project implementation after the first 2. 5 years of implementation. The mid-term review will involve project \u2019 s stakeholders and civil society in the review of performance, intermediary results, institutional arrangements, and outcomes. It will confirm the plans and processes for expansion of the project. 129. A Monitoring and Evaluation manual was produced under Merankabandi. The manual will be updated for the project, even though most of the monitoring and evaluation processes will be kept the same. The following table describes the main M & E instruments of the project. Table 4. Project M & E instruments Instrument Description Data to be collected Management Information System The MIS will collect information from different processes including registration, enrolment, payments, and claims. Information on registration and enrolment is obtained through direct collection of data; payment information is provided by the service providers and claims are fed into the system by different collection channels - Socio-economic data of households enrolled - Number of households receiving cash transfers - Amounts paid to beneficiaries - Claims submitted by beneficiaries Social Registry The Social Registry compiles socio - economic information from households that will be used to determine eligibility for social programs - Socio-economic data of households registered Iterative Beneficiary Monitoring The IBM is a phone-based survey to a sample of households ( beneficiaries and non-beneficiaries of social programs ) to inquire on program performance, satisfaction, and context - Satisfaction information of beneficiaries and non - beneficiaries of social programs - Qualitative information on the vulnerability context", + "ner_text": [ + [ + 1680, + 1683, + "named" + ], + [ + 1689, + 1707, + "IBM <> data type" + ], + [ + 1723, + 1733, + "IBM <> reference population" + ], + [ + 1736, + 1749, + "IBM <> reference population" + ] + ], + "validated": true, + "empirical_context": "Project M & E instruments Instrument Description Data to be collected Management Information System The MIS will collect information from different processes including registration, enrolment, payments, and claims. Information on registration and enrolment is obtained through direct collection of data; payment information is provided by the service providers and claims are fed into the system by different collection channels - Socio-economic data of households enrolled - Number of households receiving cash transfers - Amounts paid to beneficiaries - Claims submitted by beneficiaries Social Registry The Social Registry compiles socio - economic information from households that will be used to determine eligibility for social programs - Socio-economic data of households registered Iterative Beneficiary Monitoring The IBM is a phone-based survey to a sample of households ( beneficiaries and non-beneficiaries of social programs ) to inquire on program performance, satisfaction, and context - Satisfaction information of beneficiaries and non - beneficiaries of social programs - Qualitative information on the vulnerability context", + "type": "survey", + "explanation": "In the context, 'IBM' is explicitly described as a survey that collects information, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'IBM' refers to a phone-based survey collecting data from households.", + "contextual_reason_agent": "In the context, 'IBM' is explicitly described as a survey that collects information, confirming its role as a data source.", + "contextual_signal": "described as a survey that collects information", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 43, + "text": "To project the 2019 utilization for each service, the average over the two baseline years is used, accounting for increases in utilization due to population growth by applying to this the mean annualized, district \u2010 specific population growth rate derived from the 1998 and 2017 Pakistan censuses. 69 It is assumed that the project benefits will materialize from the second year over the period FY21 \u2013 24 and that the magnitude of impacts is expected to depend on the type of intervention that a facility receives: service utilization of facilities that will be improved within their current level of care is assumed to increase by 30 percent between FY20 and FY24, an annualized increase of 6. 8 percent. For facilities being upgraded from BHU to RHC, a fourfold increase is assumed in utilization over the project cycle, or 41 percent annually \u2014 a conservative estimate, as the catchment area of RHCs is typically 10 times that of BHUs. 10. The project is estimated to save the lives of 168 mothers and children under five ( table 1. 3 ).", + "ner_text": [ + [ + 279, + 296, + "named" + ], + [ + 188, + 247, + "Pakistan censuses <> data description" + ], + [ + 265, + 269, + "Pakistan censuses <> reference year" + ], + [ + 274, + 278, + "Pakistan censuses <> reference year" + ], + [ + 279, + 287, + "Pakistan censuses <> data geography" + ] + ], + "validated": true, + "empirical_context": "To project the 2019 utilization for each service, the average over the two baseline years is used, accounting for increases in utilization due to population growth by applying to this the mean annualized, district \u2010 specific population growth rate derived from the 1998 and 2017 Pakistan censuses. 69 It is assumed that the project benefits will materialize from the second year over the period FY21 \u2013 24 and that the magnitude of impacts is expected to depend on the type of intervention that a facility receives: service utilization of facilities that will be improved within their current level of care is assumed to increase by 30 percent between FY20 and FY24, an annualized increase of 6.", + "type": "census", + "explanation": "In this context, the Pakistan censuses are explicitly referenced as a source of population growth data used for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because censuses are typically structured collections of demographic data.", + "contextual_reason_agent": "In this context, the Pakistan censuses are explicitly referenced as a source of population growth data used for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "113_Project-Appraisal-Document-PAD-disclosable-version-P156479-08122016", + "page": 36, + "text": "Yearly Project administrative data and MIS reports CFS Share of beneficiaries with information stored in the new social registry ( % ) Personally identifying data and socio - economic data are registered in the social registry for beneficiaries of the two programs ( in percentage of beneficiaries ) Yearly MIS and Social Registry CFS Beneficiaries of Safety Nets programs ( number ) This indicator measures the number of individual beneficiaries covered by safety nets programs supported by the Bank. Safety nets programs intend to provide social assistance ( kind or cash ) to poor and Yearly MIS reports CFS", + "ner_text": [ + [ + 113, + 128, + "named" + ], + [ + 135, + 162, + "social registry <> data description" + ], + [ + 231, + 244, + "social registry <> reference population" + ], + [ + 496, + 500, + "social registry <> publisher" + ] + ], + "validated": true, + "empirical_context": "Yearly Project administrative data and MIS reports CFS Share of beneficiaries with information stored in the new social registry ( % ) Personally identifying data and socio - economic data are registered in the social registry for beneficiaries of the two programs ( in percentage of beneficiaries ) Yearly MIS and Social Registry CFS Beneficiaries of Safety Nets programs ( number ) This indicator measures the number of individual beneficiaries covered by safety nets programs supported by the Bank. Safety nets programs intend to provide social assistance ( kind or cash ) to poor and Yearly MIS reports CFS", + "type": "registry", + "explanation": "The social registry is explicitly mentioned as storing data for beneficiaries, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured collection of personally identifying and socio-economic data.", + "contextual_reason_agent": "The social registry is explicitly mentioned as storing data for beneficiaries, indicating it functions as a data source.", + "contextual_signal": "described as a management information system that stores records", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 8, + "validated": 7, + "not_validated": 1 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 93, + "text": "However, as part of the development of the ERfKE II program, the MoE with the support of the Bank, commissioned a series of eight very detailed preparation studies that, along with other analytic work at the Bank, provide considerable insights into the potential value-added and returns from the various components of the ERfKE program components. The analysis concentrated on findings from three preparation studies and one piece of Bank analytic work in particular: Education Finance ( Georgina Rawle, 2008 ); School Planning ( Bruno Parolin, 2008 ); Teacher Utilization ( Rawlinson and Allak, 2008 ); and \u201c Using PISA to Understand the Determinants of Learning in the Middle-East and North Africa Region, \u201d ( World Bank, HDNED, 2008 ). 12 Investing in Non-personnel Recurrent Expenditures likely to Enhance Quality 17. Component 3 is the second largest of the five ERfKE II program components ( about US $ 50 million ) as well as a component with aspects clearly related to the improvement of education quality in a manner supported by both the international literature on investing in education quality and the assessment, albeit suggestive, by Rawle ( 2008 ) for Jordan. As Rawle ( 2008: 42 ) discusses, Jordan \u2019 s share of recurrent educational expenditure dedicated to personnel and salaries, while falling, is still high ( compared, for example, to the OECD average of 20 percent ). Recurrent expenditure overall is also low compared to infrastructure investment. Table 3 shows that across all education programs and levels personnel expenses ( mostly salaries ) account for about 14 percent of total recurrent spending, less than half of which is devoted explicitly to quality related activities. Component 3 focuses on Teaching and 12 To a lesser extent, we used the preparation studies on Decentralization, Pre-Service Teacher Training, Vocational Education Reform; and Early Childhood Education.", + "ner_text": [ + [ + 616, + 620, + "named" + ] + ], + "validated": false, + "empirical_context": "However, as part of the development of the ERfKE II program, the MoE with the support of the Bank, commissioned a series of eight very detailed preparation studies that, along with other analytic work at the Bank, provide considerable insights into the potential value-added and returns from the various components of the ERfKE program components. The analysis concentrated on findings from three preparation studies and one piece of Bank analytic work in particular: Education Finance ( Georgina Rawle, 2008 ); School Planning ( Bruno Parolin, 2008 ); Teacher Utilization ( Rawlinson and Allak, 2008 ); and \u201c Using PISA to Understand the Determinants of Learning in the Middle-East and North Africa Region, \u201d ( World Bank, HDNED, 2008 ). 12 Investing in Non-personnel Recurrent Expenditures likely to Enhance Quality 17.", + "type": "program", + "explanation": "'PISA' is mentioned as part of a study rather than as a structured collection of data or a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'PISA' is a dataset because it is often associated with educational data and assessments.", + "contextual_reason_agent": "'PISA' is mentioned as part of a study rather than as a structured collection of data or a dataset.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 44, + "text": "The key delivery mechanisms put in place for the project, such as the MIS, will enable the project to produce regular progress reports. The PIU will organize annual financial audits for the project, annual reviews of progress, and a mid-term review to guide the project implementation after the first 2. 5 years of implementation. The mid-term review will involve project \u2019 s stakeholders and civil society in the review of performance, intermediary results, institutional arrangements, and outcomes. It will confirm the plans and processes for expansion of the project. 129. A Monitoring and Evaluation manual was produced under Merankabandi. The manual will be updated for the project, even though most of the monitoring and evaluation processes will be kept the same. The following table describes the main M & E instruments of the project. Table 4. Project M & E instruments Instrument Description Data to be collected Management Information System The MIS will collect information from different processes including registration, enrolment, payments, and claims. Information on registration and enrolment is obtained through direct collection of data; payment information is provided by the service providers and claims are fed into the system by different collection channels - Socio-economic data of households enrolled - Number of households receiving cash transfers - Amounts paid to beneficiaries - Claims submitted by beneficiaries Social Registry The Social Registry compiles socio - economic information from households that will be used to determine eligibility for social programs - Socio-economic data of households registered Iterative Beneficiary Monitoring The IBM is a phone-based survey to a sample of households ( beneficiaries and non-beneficiaries of social programs ) to inquire on program performance, satisfaction, and context - Satisfaction information of beneficiaries and non - beneficiaries of social programs - Qualitative information on the vulnerability context", + "ner_text": [ + [ + 957, + 960, + "named" + ] + ], + "validated": false, + "empirical_context": "Table 4. Project M & E instruments Instrument Description Data to be collected Management Information System The MIS will collect information from different processes including registration, enrolment, payments, and claims. Information on registration and enrolment is obtained through direct collection of data; payment information is provided by the service providers and claims are fed into the system by different collection channels - Socio-economic data of households enrolled - Number of households receiving cash transfers - Amounts paid to beneficiaries - Claims submitted by beneficiaries Social Registry The Social Registry compiles socio - economic information from households that will be used to determine eligibility for social programs - Socio-economic data of households registered Iterative Beneficiary Monitoring The IBM is a phone-based survey to a sample of households ( beneficiaries and non-beneficiaries of social programs ) to inquire on program performance, satisfaction, and context - Satisfaction information of beneficiaries and non - beneficiaries of social programs - Qualitative information on the vulnerability context", + "type": "system", + "explanation": "However, 'MIS' is described as a management information system, not explicitly as a data source or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MIS' suggests a system that collects and manages data.", + "contextual_reason_agent": "However, 'MIS' is described as a management information system, not explicitly as a data source or dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "035_Senegal-Casamance-Economic-Development-Project", + "page": 35, + "text": "As social cohesion is an important part and a higher-level objective that the project aims to contribute to, social cohesion related perception indicators will be designed and monitored through the project as well; while acknowledging that they will not form part of the project \u2019 s results framework ( as performance on social cohesion would not be attributable to only this project ), monitoring them will provide important information on the Casamance and could inform future research or operational responses in this regard. 78. Building on the innovative GIS based platform developed under PPDC, 23 the CEDP will further support deployment of a CDD app for real-time data collection and analysis which will enable project teams to use tools for in-field collection of structured digital data that automatically feed into a centralized M & E system and the Casamance Knowledge Management Platform. The integrated data will include key project indicators, based on tailor-made forms; photos, audio, videos; time and date stamps; and global positioning system coordinates that allow for automated geo-mapping of the information. Using these tools systematically allows the project to enhance the transparency and accuracy of M & E. C. Sustainability 79. Institutional sustainability. The CEDP will be implemented through existing institutions at the national, subnational, and local levels. The project will contribute to institutional sustainability by building the capacities of these 23 http: / / www. sig-ppdc. org /", + "ner_text": [ + [ + 861, + 900, + "named" + ] + ], + "validated": false, + "empirical_context": "78. Building on the innovative GIS based platform developed under PPDC, 23 the CEDP will further support deployment of a CDD app for real-time data collection and analysis which will enable project teams to use tools for in-field collection of structured digital data that automatically feed into a centralized M & E system and the Casamance Knowledge Management Platform. The integrated data will include key project indicators, based on tailor-made forms; photos, audio, videos; time and date stamps; and global positioning system coordinates that allow for automated geo-mapping of the information.", + "type": "platform", + "explanation": "However, it is mentioned as a platform and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Knowledge Management Platform' which suggests a collection of information.", + "contextual_reason_agent": "However, it is mentioned as a platform and not explicitly as a data source or structured collection of data.", + "contextual_signal": "mentioned only as a platform, not as a data source", + "tags": [] + }, + { + "filename": "023_Ethiopia-Second-Phase-Development-Response-to-Displacement-Impacts-Project-in-the-Horn-of-Africa-Project", + "page": 13, + "text": "This will increase demand for water, raising the potential for conflict and 3 According to Ethiopia \u2019 s 2007 Census. 4 World Bank Poverty and Equity Brief for Ethiopia, October 2021. 5 As of May 11, 2022, Ethiopia had registered 470, 760 COVID cases and 7, 510 fatalities: https: / / covid19. who. int / region / afro / country / et 6 World Bank analysis suggests that the poverty headcount in the 23. 5th percentile ( the national poverty rate ) increased by 11. 2 percent and for the bottom 40th percentile by 7. 7 percent between 2018 / 19 and October 2020. Inequality is estimated to have increased, with the Gini coefficient rising to 42 in October / November 2020. See Christina Wieser et al ( 2021 ) \u201c Poverty projections and profiling based on Ethiopia \u2019 s High Frequency Phone Surveys of households using a SWIFT-COVID-19 package \u201d World Bank: Washington DC. 7 2021 Humanitarian Response Plan. https: / / www. wfp. org / countries / ethiopia", + "ner_text": [ + [ + 104, + 115, + "named" + ] + ], + "validated": true, + "empirical_context": "This will increase demand for water, raising the potential for conflict and 3 According to Ethiopia \u2019 s 2007 Census. 4 World Bank Poverty and Equity Brief for Ethiopia, October 2021.", + "type": "census", + "explanation": "The 2007 Census is explicitly mentioned as a source of information in the context, indicating it is used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a census, which is typically a structured collection of data.", + "contextual_reason_agent": "The 2007 Census is explicitly mentioned as a source of information in the context, indicating it is used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 53, + "text": "The DLI disburses US $ 1 million for each percentage point increase in the number of MOH facility sites which installed and operationalized the national EMR platform out of a total number of MOH health facility sites21 in Jordan ( as of calendar year [ CY ] 2023 ) within the limit of US $ 63 million. Description The DLI supports the installation and operationalization of the national EMR platform ( that is, Hakeem ) to produce electronic medical records across all MOH health facility sites. The operational status will include at least four core functions: 1. The ability to query and / or access a record22 in the system. 2. The ability to create and / or update a record in the system. 3. The ability to refer a patient to a different facility with a common facility identifier across the system. 4. The ability to generate administrative activity reports that demonstrate the utilization of the system. 5. The ability for patients to access and view personal medical records. Data source / Agency A delivery notice from the EHS concerning the installation of the EMR platform, and the confirmation notice from the MOH about the installed EMR being operational at supported facility sites. Verification Entity KACE. 21 MOH health facility sites refer to sites that are required to have the EMR platform in accordance with the MOH decision.", + "ner_text": [ + [ + 411, + 417, + "named" + ] + ], + "validated": false, + "empirical_context": "The DLI disburses US $ 1 million for each percentage point increase in the number of MOH facility sites which installed and operationalized the national EMR platform out of a total number of MOH health facility sites21 in Jordan ( as of calendar year [ CY ] 2023 ) within the limit of US $ 63 million. Description The DLI supports the installation and operationalization of the national EMR platform ( that is, Hakeem ) to produce electronic medical records across all MOH health facility sites. The operational status will include at least four core functions: 1.", + "type": "program", + "explanation": "'Hakeem' is mentioned as a national EMR platform, indicating it is a program rather than a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Hakeem' is a dataset because it is associated with electronic medical records.", + "contextual_reason_agent": "'Hakeem' is mentioned as a national EMR platform, indicating it is a program rather than a data source.", + "contextual_signal": "mentioned only as a program, not as a data source", + "tags": [] + }, + { + "filename": "182_multi0page", + "page": 6, + "text": "The CAS progress report presented to the Board on March 21, 2000 confirms that these themes remain fundamental and places greater emphasis on social underpinning of poverty reduction. The proposed project would support the CAS social protection sector objective to improve standards of living through community-based social services targeted to poor and vulnerable population groups. The proposed project would also contribute to the CAS objective of improving governance and institution building by strengthening the capacity of the: ( i ) Government to develop, monitor and evaluate social policy; ( ii ) the social protection administration to plan, coordinate and monitor social services; and ( iii ) local govenmuents / conmmunities, non-governmental institutions and civil society to deliver those services. The project would also: ( i ) strengthen the ability of the statistical authorities to collect reliable household level data, and the capacity of Government agencies to evaluate that data in order to establish and implement an - 3 -", + "ner_text": [ + [ + 918, + 938, + "named" + ] + ], + "validated": false, + "empirical_context": "The proposed project would also contribute to the CAS objective of improving governance and institution building by strengthening the capacity of the: ( i ) Government to develop, monitor and evaluate social policy; ( ii ) the social protection administration to plan, coordinate and monitor social services; and ( iii ) local govenmuents / conmmunities, non-governmental institutions and civil society to deliver those services. The project would also: ( i ) strengthen the ability of the statistical authorities to collect reliable household level data, and the capacity of Government agencies to evaluate that data in order to establish and implement an - 3 -", + "type": "data", + "explanation": "'Household level data' is mentioned as a type of data to be collected, not as a specific dataset or data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'household level data' refers to a structured collection of data used for analysis.", + "contextual_reason_agent": "'Household level data' is mentioned as a type of data to be collected, not as a specific dataset or data source.", + "contextual_signal": "mentioned only as a type of data, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "042_Burundi-Skills-for-Jobs-Women-and-Youth-Project", + "page": 41, + "text": "Biannual analysis of project management information by the PIU will inform project processes, allowing for timely actions and adjustments ( including learning how to better support females and minority groups ). Regular follow-ups by the frontline implementing agencies ( that is, sectoral hubs, apprenticeship CEMs, and entrepreneurship implementing agencies ) in the form of brief, agile tracer studies will be conducted, with on-the-ground support by a third party. Beneficiary surveys will be conducted by a third party annually to further measure achievement of results and inform adjustments that may be needed in project design and implementation arrangements. Periodic representative, sample-based, and unannounced monitoring visits are also expected to be conducted by third parties over the project period, to supplement internal monitoring and reporting. A midterm review will involve the project \u2019 s stakeholders to collectively review project results and implementation arrangements. 71. At the PIU, an M & E system will be set up to host and manage operational data for project indicators. The PIU will work with the general secretary in charge of technical and vocational education and trades and the Bureau of Planning and Statistics under the Permanent Secretary at MNESR to attend to links and ultimate integration between the project \u2019 s M & E system and the MNESR Education Management Information System. The M & E system will be supported by the World Bank \u2019 s Geo-Enabling for Monitoring and Supervision ( GEMS ) initiative, which leverages the use of portable ICT tools", + "ner_text": [ + [ + 469, + 488, + "named" + ], + [ + 1608, + 1626, + "Beneficiary surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "Regular follow-ups by the frontline implementing agencies ( that is, sectoral hubs, apprenticeship CEMs, and entrepreneurship implementing agencies ) in the form of brief, agile tracer studies will be conducted, with on-the-ground support by a third party. Beneficiary surveys will be conducted by a third party annually to further measure achievement of results and inform adjustments that may be needed in project design and implementation arrangements. Periodic representative, sample-based, and unannounced monitoring visits are also expected to be conducted by third parties over the project period, to supplement internal monitoring and reporting.", + "type": "survey", + "explanation": "In the context, 'beneficiary surveys' are explicitly mentioned as a method to measure achievement of results, indicating they are used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'beneficiary surveys' imply a structured collection of data gathered from participants.", + "contextual_reason_agent": "In the context, 'beneficiary surveys' are explicitly mentioned as a method to measure achievement of results, indicating they are used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 31, + "text": "The MEP and MES have recently developed plans and strategies to overcome HR deficiencies, 31 with support from World Bank Projects. 32 Building on the recommendations formulated in the validated HR strategy, the component will include ( a ) training of regional and local authorities in teacher management; ( b ) building capacity of HR directorates with planning and management tools as well as professional development opportunities; ( c ) strengthening the personnel database and staff capacity to use it; ( d ) completing the census for all teaching and administrative staff in both ministries to ensure adequate control and supervision; ( e ) reviewing and updating standards and legal texts on teacher deployment, mobility, and utilization; ( f ) implementing an online platform to manage the allocation and deployment process; ( g ) raising awareness and training of staff on the new HR legal texts; and ( h ) supporting the deployment and retention measures in refugee - hosting areas. Subcomponent 3. 3. Monitoring, Evaluation, and Accountability 48. This subcomponent will strengthen monitoring and evaluation ( M & E ) and will enhance accountability measures. It will ( a ) strengthen the EMIS to allow for timely, reliable, and disaggregated education data and the establishment of community-based monitoring mechanisms.", + "ner_text": [ + [ + 1201, + 1205, + "named" + ] + ], + "validated": false, + "empirical_context": "This subcomponent will strengthen monitoring and evaluation ( M & E ) and will enhance accountability measures. It will ( a ) strengthen the EMIS to allow for timely, reliable, and disaggregated education data and the establishment of community-based monitoring mechanisms.", + "type": "system", + "explanation": "However, EMIS is described as a system for monitoring and evaluation, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it relates to education data collection.", + "contextual_reason_agent": "However, EMIS is described as a system for monitoring and evaluation, not as a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "031_IBArchive-bd2c789e-ee04-4df7-a219-9409a5f705d3", + "page": 89, + "text": "The World Bank Chad Energy Access Scale Up Project ( P174495 ) Page 84 of 87 ANNEX 6: Outcomes of a Survey on Ability and Willingness of Rural Households to Pay for Electricity Services 1. To inform the design of the project subcomponent aiming to electrify households through SHSs, a survey on ability and willingness of Chad rural households to pay for electricity services was conducted in the first half of 2021. Due to time and budget limitations, as well as security constraints, the survey was implemented in the rural areas of three Chadian provinces that were selected with the objective of obtaining representative data that can be extrapolated to the rest of the rural areas of the country. The poverty incidence, together with homogeneity / differences between provinces, played a key role in the stratification of the sample. Table 6. 1. summarizes information on the three selected provinces and sample size, while figure 6. 1. shows a Chad map with the names of provinces. Table 6. 1. Sample Size by Province Province Poverty Incidence ( % ) Sample Size Gu\u00e9ra 60. 0 248 Kanem 27. 7 241 Logone Occidental 43. 5 239 Total 728 Figure 6. 1. Map of Chad Source: Cartography Unit, the World Bank. 2.", + "ner_text": [ + [ + 100, + 185, + "named" + ], + [ + 4, + 14, + "Survey on Ability and Willingness of Rural Households to Pay for Electricity Services <> publisher" + ], + [ + 15, + 19, + "Survey on Ability and Willingness of Rural Households to Pay for Electricity Services <> data geography" + ], + [ + 411, + 415, + "Survey on Ability and Willingness of Rural Households to Pay for Electricity Services <> publication year" + ], + [ + 541, + 558, + "Survey on Ability and Willingness of Rural Households to Pay for Electricity Services <> data geography" + ], + [ + 706, + 723, + "Survey on Ability and Willingness of Rural Households to Pay for Electricity Services <> data description" + ], + [ + 950, + 954, + "Survey on Ability and Willingness of Rural Households to Pay for Electricity Services <> data geography" + ], + [ + 1194, + 1204, + "Survey on Ability and Willingness of Rural Households to Pay for Electricity Services <> publisher" + ] + ], + "validated": true, + "empirical_context": "The World Bank Chad Energy Access Scale Up Project ( P174495 ) Page 84 of 87 ANNEX 6: Outcomes of a Survey on Ability and Willingness of Rural Households to Pay for Electricity Services 1. To inform the design of the project subcomponent aiming to electrify households through SHSs, a survey on ability and willingness of Chad rural households to pay for electricity services was conducted in the first half of 2021.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to a structured collection of data gathered from a survey conducted to inform project design.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it describes a survey that collects data on households' ability and willingness to pay for electricity services.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a structured collection of data gathered from a survey conducted to inform project design.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "028_Ethiopia-Response-Recovery-Resilience-for-Conflict-Affected-Communities-in-Ethiopia-Project", + "page": 24, + "text": "However, considering the conflict damage and needs estimates at appraisal and the project \u2019 s financial envelope, it will initially prioritize support to the, Afar, Amhara, Benishangul-Gumuz, Oromia, and Tigray regions. 53 These regions have been highly impacted by the recent conflict, and are currently being assessed by the government and the World Bank via a Conflict Impact Assessment and Recovery and Reconstruction Planning exercise. These regions also host large numbers of IDPs, and are highly vulnerable to the impacts of climate change. During implementation, other regions will be considered based on resource availability and needs. The methodology to select Woredas within the regions will be articulated in the Project Operations Manual ( POM, expected by effectiveness ), taking into account: ( i ) the results of the conflict damage 51 The term \u201c basic services \u201d refers to education, health, WASH, agriculture, and other public services, mobile and / or permanent. 52 Multi-sectoral services are defined as at least two of the following: medical, psychosocial, police / security, and legal support. This indicator is calculated as the number of GBV cases that receive at least two services ( including referrals ) out of the total number of GBV cases that access services. This will be further disaggregated by point of entry for services. At the service provider level and as part of a process evaluation, analysis data from available information management tools will also give a deeper understanding of services available, unavailable, or declined by survivors. As not all survivors need or want to access more than one service, it is not expected to see a percentage increase once about 75 percent of survivors are accessing at least two services services. 53 In alphabetical order. Support will be provided as equitable share of the resources based on the assessment of damages.", + "ner_text": [ + [ + 1425, + 1438, + "named" + ] + ], + "validated": false, + "empirical_context": "This will be further disaggregated by point of entry for services. At the service provider level and as part of a process evaluation, analysis data from available information management tools will also give a deeper understanding of services available, unavailable, or declined by survivors. As not all survivors need or want to access more than one service, it is not expected to see a percentage increase once about 75 percent of survivors are accessing at least two services services.", + "type": "data", + "explanation": "'Analysis data' is not a dataset but rather refers to the data being analyzed, without indicating a structured collection or source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'analysis data' refers to a structured collection of data used for empirical analysis.", + "contextual_reason_agent": "'Analysis data' is not a dataset but rather refers to the data being analyzed, without indicating a structured collection or source.", + "contextual_signal": "mentioned only as data being analyzed, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "159_38147core", + "page": 11, + "text": "Technical Audit: This would independently monitor whether construction complies with agreed technical standards. It would audit a 5 % sample o f beneficiaries in all refugee camps and i t s recommendations would feed into ongoing construction and subsequent rounds o f project supervision. It would review civil works in the water and environment mitigation components as well. 30. Housing. Assessment Survey: The IDP land profile has changed significantly in recent months due to the rapid regularization o f property title. A Housing Assessment Survey would revalidate the household profile, the classification o f house and land ownership in each refugee camp and the precise construction needs in the case o f a partly-completed house. It would build upon the results o f the UNHCR supervised census, the EA and the Social Assessment ( SA ). The survey would help technical officers to certify payment to eligible households, supervise construction and facilitate implementation roll out. 6", + "ner_text": [ + [ + 528, + 553, + "named" + ], + [ + 166, + 179, + "Housing Assessment Survey <> data geography" + ], + [ + 575, + 592, + "Housing Assessment Survey <> data description" + ], + [ + 780, + 785, + "Housing Assessment Survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "Assessment Survey: The IDP land profile has changed significantly in recent months due to the rapid regularization o f property title. A Housing Assessment Survey would revalidate the household profile, the classification o f house and land ownership in each refugee camp and the precise construction needs in the case o f a partly-completed house. It would build upon the results o f the UNHCR supervised census, the EA and the Social Assessment ( SA ).", + "type": "survey", + "explanation": "This is a dataset as it is explicitly described as a Housing Assessment Survey that aims to collect and validate data on household profiles and property ownership.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that collects household profiles and classifications.", + "contextual_reason_agent": "This is a dataset as it is explicitly described as a Housing Assessment Survey that aims to collect and validate data on household profiles and property ownership.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "033_IBArchive-38b35d89-2c5c-4636-ab7e-0ba662b95a5f", + "page": 20, + "text": "In 2019, the Djiboutian government developed the Politique Nationale de Formation Professionelle ( or the National Vocational Training Policy ) which now provides a more comprehensive assessment of the TVET sector and the reforms that will be needed to improve the functioning of this subsector. However, the training policy fall short of distinguishing interventions and targeted activities between different social groups including youth, women, refugees, and other vulnerable groups. 24. The GoD has established the Direction G\u00e9n\u00e9rale de l \u2019 Enseignement Technique et de la Formation Professionnelle ( DGETFP ) or the General Directorate of Technical Education and Professional Training. This was established in 2012 and was expected to be responsible for development and the conduct of the country \u2019 s policies on technical education and vocational training. This office was restructured in 2018 and was expected to help the planning, budgeting, and implementing TVET programs, carry out all monitoring and evaluation functions, function 22 The UNHCR and the World Bank have worked systematically to include persons of concern in the COVID-19 response efforts in a number of low - and middle-income countries. The two agencies also undertook a set of telephone surveys to look at the socio-economic impacts of the COVID-19 policies adopted around the world and children \u2019 s mental health. 23 This decree was signed in September 2020 and thereby expanding access to skills development opportunities for refugee populations.", + "ner_text": [ + [ + 1255, + 1272, + "named" + ], + [ + 448, + 456, + "telephone surveys <> reference population" + ], + [ + 1049, + 1054, + "telephone surveys <> publisher" + ], + [ + 1063, + 1073, + "telephone surveys <> publisher" + ], + [ + 1112, + 1130, + "telephone surveys <> reference population" + ], + [ + 1288, + 1335, + "telephone surveys <> data description" + ], + [ + 1432, + 1436, + "telephone surveys <> publication year" + ], + [ + 1506, + 1525, + "telephone surveys <> reference population" + ] + ], + "validated": true, + "empirical_context": "This office was restructured in 2018 and was expected to help the planning, budgeting, and implementing TVET programs, carry out all monitoring and evaluation functions, function 22 The UNHCR and the World Bank have worked systematically to include persons of concern in the COVID-19 response efforts in a number of low - and middle-income countries. The two agencies also undertook a set of telephone surveys to look at the socio-economic impacts of the COVID-19 policies adopted around the world and children \u2019 s mental health. 23 This decree was signed in September 2020 and thereby expanding access to skills development opportunities for refugee populations.", + "type": "survey", + "explanation": "In this context, 'telephone surveys' are explicitly mentioned as a method used to gather data on socio-economic impacts, confirming their role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'telephone surveys' is a dataset because it refers to a structured method of data collection.", + "contextual_reason_agent": "In this context, 'telephone surveys' are explicitly mentioned as a method used to gather data on socio-economic impacts, confirming their role as a data source.", + "contextual_signal": "described as a method used to gather data", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "170_multi0page", + "page": 13, + "text": "In the short term, NaCSA integration into emerging national policy and institutional frameworks such as the National Recovery Strategy, the PRSP and the Medium Term Expenditure Framework, is anticipated. 3. Benefits and target population: Project interventions have been designed to reduce the risk of renewed conflict, restore confidence in the government and create the foundation for democratic and sustainable local development. The Community Development Program will finance social and economic infrastructure and support social capital building activities to facilitate the restoration of basic social services such as health and education and provide an incentive for teachers, health workers and displaced persons to return to their communities. The Rural Public Works and Shelter programs will provide employment for demobilized soldiers and unemployed youth, housing for displaced persons and feeder roads to stimulate local economic activities. The innovative activities including training and technical support will strengthen local government capacity to plan, contract, manage and sustain investments in local development and engage a wide array of stakeholders in participatory processes that contribute to sustainable local development. Targeting will be consistent with the Government ' s 2002-2003 National Recovery Strategy and the March 3, 2002 Transitional Support Strategy. Resources will be directed to ( a ) newly accessible areas that have not received any support in more than a decade; and ( b ) remote areas that have received little, if any support from the ongoing IDA-financed CRRP or other similar projects. The results of the living standards measurement survey currently underway will be available at the end of 2003 and will be used to review the validity of existing targeting modalities. Target Populations: Target groups include demobilized soldiers and unemployed youth, refugees, IDPs, female-headed households, child laborers, orphans, primary school dropouts, - 8 -", + "ner_text": [ + [ + 1659, + 1694, + "named" + ], + [ + 826, + 867, + "living standards measurement survey <> reference population" + ], + [ + 1746, + 1750, + "living standards measurement survey <> publication year" + ], + [ + 1867, + 1908, + "living standards measurement survey <> reference population" + ], + [ + 1910, + 1918, + "living standards measurement survey <> reference population" + ], + [ + 1920, + 1924, + "living standards measurement survey <> reference population" + ], + [ + 1926, + 1950, + "living standards measurement survey <> reference population" + ], + [ + 1952, + 1966, + "living standards measurement survey <> reference population" + ], + [ + 1968, + 1975, + "living standards measurement survey <> reference population" + ], + [ + 1977, + 2000, + "living standards measurement survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "Resources will be directed to ( a ) newly accessible areas that have not received any support in more than a decade; and ( b ) remote areas that have received little, if any support from the ongoing IDA-financed CRRP or other similar projects. The results of the living standards measurement survey currently underway will be available at the end of 2003 and will be used to review the validity of existing targeting modalities. Target Populations: Target groups include demobilized soldiers and unemployed youth, refugees, IDPs, female-headed households, child laborers, orphans, primary school dropouts, - 8 -", + "type": "survey", + "explanation": "This is indeed a dataset as it is described as a survey that will provide results used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data on living standards.", + "contextual_reason_agent": "This is indeed a dataset as it is described as a survey that will provide results used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 137, + "text": "It also represents a public good. Establishing an integrated EMIS covering all four education ministries under the ERSP is expected to address information asymmetries in the sector as well as improve sector management. Expected Development Impact 10. The project supports the Government in achieving its goals in the education sector as outlined in the Cameroon Vision document \u2018 Cameroun emergent \u00e0 l \u2019 horizon 2035 \u2019 and described in the Government ESS ( 2013 \u2013 2020 ). These goals include ( a ) promoting access and equity, ( b ) improving quality and relevance, ( c ) strengthening sector governance and management, ( d ) adopting sustainable financing mechanisms, ( e ) strengthening institutional aspects of education, and ( e ) developing modalities for M & E. 11. Aligned with the Government \u2019 s strategical goals, the project will contribute to improving equitable access to quality pre-primary, primary, and lower secondary education with a focus on ZEPs. In the long-term, these outcomes are expected to contribute to increasing the number of years of schooling and, consequently, increased probability of employment and higher labor earnings for workers. Additional benefits are expected to come from systemwide reforms, related to system management, accountability, and M & E. Costs 12. Cameroon \u2019 s spending on education is quite low by international standards ( figure 5. 1 ). Total public spending in support of education was equivalent to approximately 3 percent of GDP and 13. 6 percent of total Government expenditures in 2015. As a share of Government expenditures, education spending is much lower compared to the other African countries.", + "ner_text": [ + [ + 61, + 65, + "named" + ] + ], + "validated": false, + "empirical_context": "It also represents a public good. Establishing an integrated EMIS covering all four education ministries under the ERSP is expected to address information asymmetries in the sector as well as improve sector management. Expected Development Impact 10.", + "type": "system", + "explanation": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is related to information management in education.", + "contextual_reason_agent": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 45, + "text": "Frequency Quarterly Data source Project MIS Methodology for Data Collection Monitoring project implementation, MIS database. Data collected by BRD, MINEMA and BDF. Responsibility for Data Collection MINEMA, BRD, BDF Improved environmental management in the target areas People benefitting from enhanced resilience of terrestrial and aquatic systems ( Number of people ) Description Quantitative indicator counting number of beneficiaries in the catchment area where environmental management activities under component 3 have been implemented. Data is disaggregated by gender, youth ( 16-30 years, in line with GoR guidelines ) and status ( refugee / host community member ). The youth target of 26 % is based on the youth population in the five RHDs 2022 census ). The beneficiary number includes the camp-based refugee population and people living in the villages surrounding the five camps. Frequency Quarterly Data source Project MIS Methodology for Data Collection Monitoring project implementation. MIS database with population statistics for project sites cross - tabulated with hectarage benefiting from improved terrestrial and aquatic systems. Responsibility for Data Collection MINEMA Monitoring & Evaluation Plan: Intermediate Results Indicators by Components Access to Services and Socio-economic Investments Climate-resilient infrastructure subprojects for basic services ( education, health, water and sanitation ) completed in refugee hosting districts ( Number ) Description Quantitative indicator counting number of infrastructure subprojects completed in refugee hosting districts. Data is disaggregated by type of sub-project ( education, health, water and sanitation ). Climate resilience is defined by compliance with GoR standards. Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation. Responsibility for Data Collection MINEMA", + "ner_text": [ + [ + 745, + 761, + "named" + ] + ], + "validated": true, + "empirical_context": "Data is disaggregated by gender, youth ( 16-30 years, in line with GoR guidelines ) and status ( refugee / host community member ). The youth target of 26 % is based on the youth population in the five RHDs 2022 census ). The beneficiary number includes the camp-based refugee population and people living in the villages surrounding the five camps.", + "type": "census", + "explanation": "This is indeed a dataset as it provides empirical data on the youth population used for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a census, which typically involves structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it provides empirical data on the youth population used for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "034_Lebanon-Beirut-Housing-Rehabilitation-and-Cultural-and-Creative-Industries-Recovery", + "page": 13, + "text": "World Bank, Washington, DC. \u00a9 World Bank. https: / / openknowledge. worldbank. org / handle / 10986 / 34401 License: CC BY 3. 0 IGO 17 Socio-economic vulnerability: The project will prioritize the poorest and the most vulnerable households affected by the blast ( e. g. low - income, FHH, refugees ), based on a socioeconomic field survey. The vulnerability criteria considers social ( i. e. presence of elderly, female headed households, people with disabilities, refugees, building located in an area of higher social vulnerability ) and economic vulnerability ( i. e. household receiving rental support, presence of CCI, level of income ). 18 World Bank Group; European Union; United Nations. ( 2020 ). Beirut Rapid Damage and Needs Assessment. Washington, DC.: World Bank Group. 19 The terms \" housing \", \" units \", \" apartments \" are used equally throughout the document to refer to the individual residential unit. The term \" building \" refers to the urban infrastructure that may contain one or more than one housing unit and can be complemented by other uses, such as commercial. 20 The 87, 552 damaged residential building represented about 51 percent of the 171, 887 housing units assessed. 21 Combined low-income apartment buildings and low-income, single-family housing asset typologies.", + "ner_text": [ + [ + 312, + 338, + "named" + ], + [ + 0, + 10, + "socioeconomic field survey <> publisher" + ], + [ + 12, + 26, + "socioeconomic field survey <> data geography" + ], + [ + 30, + 40, + "socioeconomic field survey <> publisher" + ], + [ + 289, + 297, + "socioeconomic field survey <> reference population" + ], + [ + 439, + 463, + "socioeconomic field survey <> reference population" + ], + [ + 465, + 473, + "socioeconomic field survey <> reference population" + ], + [ + 646, + 656, + "socioeconomic field survey <> publisher" + ], + [ + 698, + 702, + "socioeconomic field survey <> publication year" + ], + [ + 748, + 762, + "socioeconomic field survey <> data geography" + ], + [ + 765, + 775, + "socioeconomic field survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "g. low - income, FHH, refugees ), based on a socioeconomic field survey. The vulnerability criteria considers social ( i.", + "type": "survey", + "explanation": "This is indeed a dataset as it is a structured collection of data gathered through a field survey.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects socioeconomic data.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data gathered through a field survey.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 25, + "text": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 21 of 47 ( FMS ), and an environmental and social safeguards specialist / officer. 47 The PMUs will be fully authorized to implement the planned activities approved by the Project Steering Committee ( PSC ). 46. A Project Coordination Committee ( PCC ) will be set up to coordinate project implementation and a PSC will be set up to provide strategic guidance and oversight. The PCC, co \u2010 chaired by Secretaries Health and Secondary Education, will meet quarterly. The PSC, chaired by the Additional Chief Secretary, will meet biannually ( see figure 2 ). Figure 2. Institutional and Implementation Arrangements B. Results Monitoring and Evaluation Arrangements 47. Building on the results chain, the M & E framework identified indicators to track project implementation progress and impact. The PDO \u2010 level health indicators are taken from the RMNCHN indicators in the DHIS, while digitization and integration of various HMIS is an intermediate indicator. The education indicators are taken from the EMIS. Where possible, relevant indicators will be disaggregated by gender. Discussions with the GoB and the UNHCR have confirmed, however, that beneficiary data by nationality will not be routinely collected or publicly released. 48.", + "ner_text": [ + [ + 999, + 1003, + "named" + ] + ], + "validated": false, + "empirical_context": "Building on the results chain, the M & E framework identified indicators to track project implementation progress and impact. The PDO \u2010 level health indicators are taken from the RMNCHN indicators in the DHIS, while digitization and integration of various HMIS is an intermediate indicator. The education indicators are taken from the EMIS.", + "type": "system", + "explanation": "However, HMIS is referred to as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HMIS is a dataset because it is mentioned in the context of tracking indicators and project implementation.", + "contextual_reason_agent": "However, HMIS is referred to as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 21, + "validated": 4, + "not_validated": 17 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 120, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 115 of 117 within the provisions of PPADA, 2015 and Regulations 2020. Training on auditing in the NEMIS Platform and monitor progressively how many of the audits are carried out on this platform. Fiduciary Systems MoE Recurrent Yearly Audits conducted on the NEMIS Platform. Preparation for Kenya to participate in the 2025 Programme for International Student Assessment \u2019 s for Development ( PISA-D ). Technical DLI 6 MoE and KNEC Due Date 31-Dec - 2025 a ) admission / signing up; b ) instrument translation and adaptation of instrument; ( c ) pre-test; ( d ) main survey; and ( e ) data analysis, report writing and dissemination Strengthened supply chain for sanitary towels and accountability Technical DLI 5 MoE Due Date 29-Dec - 2023 A module in NEMIS showing distribution of sanitary towels by school and number of girls benefiting. Review and analyze NEMIS capabilities on collecting, disaggregation and reporting on data for learners with special needs and disability and recommendations to close identified gaps. Environmental and Social Systems DLI 6 MoE KISE Due Date 30-Jun-2023 Reports generated from NEMIS with granular data on special needs children by category of disability, age and gender. Design and develop a GRM MIS module compatible with NEMIS and interoperable between agencies.", + "ner_text": [ + [ + 336, + 350, + "named" + ] + ], + "validated": false, + "empirical_context": "Training on auditing in the NEMIS Platform and monitor progressively how many of the audits are carried out on this platform. Fiduciary Systems MoE Recurrent Yearly Audits conducted on the NEMIS Platform. Preparation for Kenya to participate in the 2025 Programme for International Student Assessment \u2019 s for Development ( PISA-D ).", + "type": "system", + "explanation": "However, the NEMIS Platform is described as a system rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is associated with audits and monitoring data.", + "contextual_reason_agent": "However, the NEMIS Platform is described as a system rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "040_Iraq-COVID-19-Vaccination-Project", + "page": 49, + "text": "The World Bank Iraq COVID-19 Vaccination Project ( P177038 ) Page 44 of 54 on a regular basis in the last quarter Percentage of vaccination sites with functional cold chain The project will track the continuous functionality of the cold supply chain to ensure that vaccines are - at all times - maintained at optimal condition until being administered to beneficiaries 3 months MOHE and TPMA reports TPM MOHE / TPMA Percentage of reported serious AEFI cases for which investigations were initiated within 48 hours This indicator will measure the percentage of reported serious Adverse Events Following Immunization ( AEFI ) post COVID-19 vaccinations that have been reported to the Iraqi MOHE surveillance system, GRM and other channels that have been addressed and investigated within 48 hours of reporting to the total number of reported AEFIs. The aim is to measure the adequate and timely response and investigation to the reported AEFIs reported post COVID-19 vaccinations. 3 months Iraq MOHE surveillance system, GRM data, MOHE incident reporting and media sources.", + "ner_text": [ + [ + 682, + 712, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Iraq COVID-19 Vaccination Project ( P177038 ) Page 44 of 54 on a regular basis in the last quarter Percentage of vaccination sites with functional cold chain The project will track the continuous functionality of the cold supply chain to ensure that vaccines are - at all times - maintained at optimal condition until being administered to beneficiaries 3 months MOHE and TPMA reports TPM MOHE / TPMA Percentage of reported serious AEFI cases for which investigations were initiated within 48 hours This indicator will measure the percentage of reported serious Adverse Events Following Immunization ( AEFI ) post COVID-19 vaccinations that have been reported to the Iraqi MOHE surveillance system, GRM and other channels that have been addressed and investigated within 48 hours of reporting to the total number of reported AEFIs. The aim is to measure the adequate and timely response and investigation to the reported AEFIs reported post COVID-19 vaccinations.", + "type": "system", + "explanation": "However, the context indicates that it is a system mentioned for reporting and investigation purposes, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'surveillance system', which could imply data collection.", + "contextual_reason_agent": "However, the context indicates that it is a system mentioned for reporting and investigation purposes, not as a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "013_BOSIB0efb09b920d90858a0135df22da7d1", + "page": 12, + "text": "Foundational ID systems11 are broadly recognized as key enablers for inclusive digitalization and development. For people, the ability to establish and verify their identity is often a prerequisite for access to services and economic opportunities, such as social protection, healthcare, education, financial services, and employment. Proof of legal identity is also the basis for exercising rights, such as property ownership, and nationality. For governments and businesses, ID systems can serve as a platform for more effective and efficient service delivery by enabling the unique identification and verification of persons. Importantly, ID systems can promote greater inclusion by de-risking and reducing the costs of 8 UNHCR ' s Ethiopia Update on the Total Number of Refugees and Asylum Seekers as of August 31, 2023. 9 In Tigray, new internal displacement data has been reported, including 1, 021, 798 IDPs ( 250, 468 households ) in 643 sites across six zones ( excluding 20 woredas / districts hard to reach due to security or environmental factors ). 10 IOM. 2023. Ethiopia National Displacement Report 16 - Site Assessment Round 33 and Village Assessment Survey Round 16: Nov 2022 - Jun 2023. https: / / reliefweb. int / report / ethiopia / ethiopia-national-displacement-report-16-site-assessment-round-33-and-village-assessment-survey-round - 16-november-2022-june-2023. 11 Foundational ID systems are primarily created to provide credentials to the general population as proof of identity for a wide variety of public and private sector transactions. Common types of foundational ID systems include civil registries, national ID systems, and population registers.", + "ner_text": [ + [ + 1657, + 1677, + "named" + ], + [ + 819, + 823, + "population registers <> publication year" + ], + [ + 1632, + 1651, + "population registers <> data type" + ] + ], + "validated": true, + "empirical_context": "11 Foundational ID systems are primarily created to provide credentials to the general population as proof of identity for a wide variety of public and private sector transactions. Common types of foundational ID systems include civil registries, national ID systems, and population registers.", + "type": "registry", + "explanation": "In the context, 'population registers' are mentioned as a type of foundational ID system, which indicates they serve as a data source for identity verification.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'population registers' is a dataset because it refers to a structured collection of identity information.", + "contextual_reason_agent": "In the context, 'population registers' are mentioned as a type of foundational ID system, which indicates they serve as a data source for identity verification.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "074_Djibouti-Integrated-Cash-Transfer-and-Human-Capital-Project", + "page": 36, + "text": "The World Bank Integrated Cash Transfer and Human Capital Project ( P166220 ) Page 32 of 44 component 1. 2 e data Community counselors trained to lead community sessions Number of community counselors ( m\u00e8res conseill\u00e8res ) trained to deliver community sessions as part of the accompanying measures of the project Quarterly Project administrativ e data Routine monitoring SEAS Beneficiaries satisfied with community sessions Percentage of beneficiaries of the cash transfer program who participate in community sessions as part of the accompanying measures that are satisfied with the sessions Twice Survey Survey at middle and end of project SEAS PNSF beneficiary households with biometric data in the social registry Percentage of PNSF beneficiary households with biometric data either directly in the social registry or in a database linked to the social registry Quarterly Project administrativ e data Routine monitoring SEAS PNSF beneficiary households paid within 15 days of scheduled payment date Percentage of beneficiaries paid within 15 days of date specified in POM Quarterly Project administrativ e data Routine monitoring SEAS People in the social registry that received national identity cards with the support of the project Number of people for whom the project facilitated obtaining a national identity card or birth certificate Quarterly Project administrativ e Routine monitoring SEAS PNSF complaints registered electronically and resolved by the time of the next cash transfer payment Percentage of complaints i ) registered in PNSF MIS and ii ) with a resolution Quarterly Project administrativ e data Routine monitoring SEAS", + "ner_text": [ + [ + 681, + 695, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Integrated Cash Transfer and Human Capital Project ( P166220 ) Page 32 of 44 component 1. 2 e data Community counselors trained to lead community sessions Number of community counselors ( m\u00e8res conseill\u00e8res ) trained to deliver community sessions as part of the accompanying measures of the project Quarterly Project administrativ e data Routine monitoring SEAS Beneficiaries satisfied with community sessions Percentage of beneficiaries of the cash transfer program who participate in community sessions as part of the accompanying measures that are satisfied with the sessions Twice Survey Survey at middle and end of project SEAS PNSF beneficiary households with biometric data in the social registry Percentage of PNSF beneficiary households with biometric data either directly in the social registry or in a database linked to the social registry Quarterly Project administrativ e data Routine monitoring SEAS PNSF beneficiary households paid within 15 days of scheduled payment date Percentage of beneficiaries paid within 15 days of date specified in POM Quarterly Project administrativ e data Routine monitoring SEAS People in the social registry that received national identity cards with the support of the project Number of people for whom the project facilitated obtaining a national identity card or birth certificate Quarterly Project administrativ e Routine monitoring SEAS PNSF complaints registered electronically and resolved by the time of the next cash transfer payment Percentage of complaints i ) registered in PNSF MIS and ii ) with a resolution Quarterly Project administrativ e data Routine monitoring SEAS", + "type": "data", + "explanation": "'Biometric data' is mentioned as part of a broader context but does not function as a structured collection of data on its own.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'biometric data' is a dataset because it refers to a specific type of information collected.", + "contextual_reason_agent": "'Biometric data' is mentioned as part of a broader context but does not function as a structured collection of data on its own.", + "contextual_signal": "mentioned only as a type of data, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "195_multi-page", + "page": 24, + "text": "Annex 2 Page 4 of 1 Issues Health information systems are necessary because currently it is impossible to perform the proper epidemiological and public health analysis on a national level. It is also impossible to have significant decentralization of management authority without appropriate information infrastructure. No systems are in place at the hospital level which would support efforts to run hospitals more efficiently by measuring their efficiency ( patient flow management, inventory management, cost effectiveness and quality of physicians, as well as utilization ). If the Government intends to decentralize decision making and to hold organizations responsible for their budget, then information systems are needed. The few existing management information systems are managed and operated independently, with no data sharing across institutions, except for the minimal data set submitted to the MOH at periodic intervals, usually on paper. The network and database systems to be introduced under the project will encourage data sharing and reduction of duplicate resources. Project Inputs Financing would be provided for equipment, technical assistance, software developmentlpurchase, and training. Specifically, the project will support the collection, analysis, distribution, and storage of epidemiological data; equip hospital managers with tools for improving quality, efficiency, and physician effectiveness, and provide an infrastructure that will support decentralization.", + "ner_text": [ + [ + 27, + 53, + "named" + ] + ], + "validated": false, + "empirical_context": "Annex 2 Page 4 of 1 Issues Health information systems are necessary because currently it is impossible to perform the proper epidemiological and public health analysis on a national level. It is also impossible to have significant decentralization of management authority without appropriate information infrastructure.", + "type": "system", + "explanation": "However, the context indicates that 'Health information systems' are described as necessary infrastructure rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'information systems' which can imply data collection.", + "contextual_reason_agent": "However, the context indicates that 'Health information systems' are described as necessary infrastructure rather than a structured collection of data.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 132, + "text": "The World Bank Cameroon Education Reform Support Project ( P160926 ) Page 129 of 148 Monitoring and Evaluation 67. Results monitoring at the PDO level. For the first indicator ( distribution of teachers ), national statistical yearbooks will be used. For the other PDO level indicators, General Inspectorate of MINEDUB and DEMP will supervise the production of data, which will be subject to independent verification. 68. Results monitoring for intermediates indicators and DLIs. Each result, area will have a specific M & E mechanism ( table 3. 11 ). This system will need to be sufficiently robust to verify results, disburse against agreed indicators ( DLIs ), monitor funding to ensure that it is being used for intended purposes, prepare regular reports, and evaluate the reforms of the project. To ensure the expected progress, a baseline for each indicator has been defined. Table 3. 11.", + "ner_text": [ + [ + 206, + 236, + "named" + ] + ], + "validated": true, + "empirical_context": "Results monitoring at the PDO level. For the first indicator ( distribution of teachers ), national statistical yearbooks will be used. For the other PDO level indicators, General Inspectorate of MINEDUB and DEMP will supervise the production of data, which will be subject to independent verification.", + "type": "yearbook", + "explanation": "In this context, national statistical yearbooks are explicitly mentioned as a source of data for monitoring indicators.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because yearbooks typically compile and present statistical data in a structured format.", + "contextual_reason_agent": "In this context, national statistical yearbooks are explicitly mentioned as a source of data for monitoring indicators.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 25, + "text": "The World Bank Cash for Jobs Project ( P175327 ) Page 21 of 86 ensure transparency, accountability, and digital inclusion in the delivery. Beneficiaries will be selected through a combination of targeting instruments. First, the poorest communes in the country will be selected making use of existing poverty maps, hazard exposure maps and climate change vulnerability maps. The collines29 within these communes, given that their poverty profiles are very similar, will be selected randomly through a lottery. This will be done to avoid any political interference in the selection of the collines. Secondly, all households in the selected collines will be registered in the country \u2019 s social registry and a PMT score will be calculated for all of them. Beneficiary quota will be established for each colline and the project will select the poorest households in each colline according to the PMT until reaching the proposed quota. Finally, a community validation process will be carried out to allow communities to correct inclusion and exclusion errors that might occur in the targeting process. The targeting process might be adapted in urban and refugee areas if necessary. Beneficiaries will receive Burundi Francs ( BIF ) 36, 00030 per month ( approx. US $ 18 ). This amount is equivalent to 20 percent of the household consumption of an average poor household, which is aligned to international standards. 49.", + "ner_text": [ + [ + 315, + 335, + "named" + ] + ], + "validated": false, + "empirical_context": "Beneficiaries will be selected through a combination of targeting instruments. First, the poorest communes in the country will be selected making use of existing poverty maps, hazard exposure maps and climate change vulnerability maps. The collines29 within these communes, given that their poverty profiles are very similar, will be selected randomly through a lottery.", + "type": "map", + "explanation": "However, 'hazard exposure maps' are not described as a data source but rather as tools for selection criteria.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'maps' can represent structured data visually.", + "contextual_reason_agent": "However, 'hazard exposure maps' are not described as a data source but rather as tools for selection criteria.", + "contextual_signal": "mentioned only as a tool for selection, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 4, + "validated": 3, + "not_validated": 1 + } + }, + { + "filename": "033_IBArchive-38b35d89-2c5c-4636-ab7e-0ba662b95a5f", + "page": 63, + "text": "Administrative Records and Trainer Files DGETFP IRI # 17: Share of trainers who have recent ( < 3 years ago ) industry experience Sub-component 2. 2 Quarterly Each participating institution would have to maintain a data set that will be updated quarterly given the introduction", + "ner_text": [ + [ + 215, + 223, + "named" + ], + [ + 67, + 75, + "data set <> reference population" + ] + ], + "validated": true, + "empirical_context": "Administrative Records and Trainer Files DGETFP IRI # 17: Share of trainers who have recent ( < 3 years ago ) industry experience Sub-component 2. 2 Quarterly Each participating institution would have to maintain a data set that will be updated quarterly given the introduction", + "type": "dataset", + "explanation": "This is indeed a dataset as it is described as a structured collection of data that will be updated quarterly.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a 'data set' that institutions must maintain and update.", + "contextual_reason_agent": "This is indeed a dataset as it is described as a structured collection of data that will be updated quarterly.", + "contextual_signal": "mentioned as a data set that will be updated quarterly", + "tags": [] + }, + { + "filename": "061_Turkey-Formal-Employment-Creation-Project", + "page": 82, + "text": "The World Bank Formal Employment Creation Project ( P171766 ) Page 78 of 86 ANNEX 4: Additional Sectoral Background 1. The World Bank \u2019 s enterprise surveys have been collected to understand what firms experience in the private sector. It follows a global methodology and provides a wide range of business environment topics including access to finance indicators. 2. To estimate the effects of access to finance constraints on firms \u2019 employment growth, both subjective and objective measures of the investment climate ( that is, access to finance ) were used. The regression results are presented in tables 4. 1 to 4. 4 based on the models where employment growth is the dependent variable and the independent variables are those measuring access to finance constraints ( such as industry, sector, firm \u2019 s ownership and firm \u2019 s age ). 3. The primary variable of interest is having access to finance obstacle, which is the access to finance constraint perceived by the firms as the top obstacle for business environment. If the access to finance is the top obstacle by the firms for employment growth, it will have a negative sign.", + "ner_text": [ + [ + 138, + 156, + "named" + ], + [ + 4, + 14, + "enterprise surveys <> publisher" + ], + [ + 123, + 133, + "enterprise surveys <> publisher" + ], + [ + 196, + 201, + "enterprise surveys <> reference population" + ], + [ + 249, + 255, + "enterprise surveys <> data geography" + ], + [ + 335, + 363, + "enterprise surveys <> data description" + ] + ], + "validated": true, + "empirical_context": "The World Bank Formal Employment Creation Project ( P171766 ) Page 78 of 86 ANNEX 4: Additional Sectoral Background 1. The World Bank \u2019 s enterprise surveys have been collected to understand what firms experience in the private sector. It follows a global methodology and provides a wide range of business environment topics including access to finance indicators.", + "type": "survey", + "explanation": "In this context, 'enterprise surveys' are explicitly mentioned as collected data to understand firm experiences, confirming their role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'enterprise surveys' are typically structured collections of data used for analysis.", + "contextual_reason_agent": "In this context, 'enterprise surveys' are explicitly mentioned as collected data to understand firm experiences, confirming their role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "088_UGANDA-PAD-04272018", + "page": 16, + "text": "The Program scope will also be expanded to better align with NDP II goals of wealth creation as well as the World Bank ' s twin goals on ending extreme poverty and boosting shared prosperity. Various elements have been introduced into the design of the AF to strengthen participating MLGs impact on promoting local economic development ( LED ) and job creation. Local firms in the formal sector face considerable constraints in establishing and sustaining their businesses, limiting prospects for the creation of more and better jobs. For example, according to World Bank Enterprise Survey Data for Uganda ( 2013 ), the main constraints include infrastructure deficits and access to land; regulatory barriers and corruption; and access to finance20. LGs have a role in helping or hindering the alleviation of these constraints to support private sector development and, consequently, job creation. The recent study undertaken by the World Bank / Ministry of Local Government ( MoLG ) on LED21 highlighted that LGs are currently doing little in this direction, with their main relationship with the private sector centering on tax collection and requests for donations. The study outlined some of the constraints faced by the private sector which are within the mandate of LGs. These fell under the four broad categories of infrastructure deficits, regulatory barriers, absence of enterprise support and institutional capacity gaps within LGs. 23.", + "ner_text": [ + [ + 561, + 594, + "named" + ], + [ + 108, + 118, + "World Bank Enterprise Survey Data <> publisher" + ], + [ + 561, + 571, + "World Bank Enterprise Survey Data <> publisher" + ], + [ + 599, + 605, + "World Bank Enterprise Survey Data <> data geography" + ], + [ + 608, + 612, + "World Bank Enterprise Survey Data <> publication year" + ], + [ + 645, + 687, + "World Bank Enterprise Survey Data <> data description" + ], + [ + 689, + 723, + "World Bank Enterprise Survey Data <> data description" + ], + [ + 933, + 943, + "World Bank Enterprise Survey Data <> publisher" + ] + ], + "validated": true, + "empirical_context": "Local firms in the formal sector face considerable constraints in establishing and sustaining their businesses, limiting prospects for the creation of more and better jobs. For example, according to World Bank Enterprise Survey Data for Uganda ( 2013 ), the main constraints include infrastructure deficits and access to land; regulatory barriers and corruption; and access to finance20. LGs have a role in helping or hindering the alleviation of these constraints to support private sector development and, consequently, job creation.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referenced as 'World Bank Enterprise Survey Data' which is used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Data' in the term and is associated with a recognized survey.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referenced as 'World Bank Enterprise Survey Data' which is used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "028_Ethiopia-Response-Recovery-Resilience-for-Conflict-Affected-Communities-in-Ethiopia-Project", + "page": 60, + "text": "Semi-Annual Woreda Project Coordination Teams Aggregation of Woreda-level data on IDP beneficiaries as per the relevant Kebele \u2019 s IDP figures MoF FPCU Share of reported GBV cases who receive access to multi-sectoral response services Multi-sectoral services are defined as at least two of the following: medical, Baseline and semi-annual Data from available information Aggregation and averaging of results. The results will MoWSA FPIU", + "ner_text": [ + [ + 61, + 78, + "named" + ], + [ + 12, + 18, + "Woreda-level data <> data geography" + ], + [ + 82, + 99, + "Woreda-level data <> reference population" + ] + ], + "validated": true, + "empirical_context": "Semi-Annual Woreda Project Coordination Teams Aggregation of Woreda-level data on IDP beneficiaries as per the relevant Kebele \u2019 s IDP figures MoF FPCU Share of reported GBV cases who receive access to multi-sectoral response services Multi-sectoral services are defined as at least two of the following: medical, Baseline and semi-annual Data from available information Aggregation and averaging of results. The results will MoWSA FPIU", + "type": "data", + "explanation": "This is indeed a dataset as it refers to aggregated data on IDP beneficiaries at the woreda level, used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific level of data aggregation related to beneficiaries.", + "contextual_reason_agent": "This is indeed a dataset as it refers to aggregated data on IDP beneficiaries at the woreda level, used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 54, + "text": "Frequency Quarterly Data source UNICEF Methodology for Data Collection UNICEF to provide data / TPM to verify Responsibility for Data Collection UNICEF; PMU Percentage of completeness of reporting by facilities Description Percentage of facilities that submit complete reports within the required deadline. Frequency Quarterly Data source DHIS2 Methodology for Data Collection DHIS2 Responsibility for Data Collection MoH / PMU Percentage of states that conducted quarterly coordination meetings with a review of data and documented with minutes including action items and follow-up Description Percentage of State \u2019 s quarterly health service delivery coordination meetings for the health sector held with a review of data included in the meeting and documented with minutes which include action items and follow-up on action items. Meetings are to be held quarterly in each state. Four meetings are expected each year per state. CHDs and implementing partners will be participated in the review Frequency Quarterly Data source MoH / WHO Methodology for Data Collection WHO to provide data / TPM to verify", + "ner_text": [ + [ + 339, + 344, + "named" + ] + ], + "validated": false, + "empirical_context": "Frequency Quarterly Data source UNICEF Methodology for Data Collection UNICEF to provide data / TPM to verify Responsibility for Data Collection UNICEF; PMU Percentage of completeness of reporting by facilities Description Percentage of facilities that submit complete reports within the required deadline. Frequency Quarterly Data source DHIS2 Methodology for Data Collection DHIS2 Responsibility for Data Collection MoH / PMU Percentage of states that conducted quarterly coordination meetings with a review of data and documented with minutes including action items and follow-up Description Percentage of State \u2019 s quarterly health service delivery coordination meetings for the health sector held with a review of data included in the meeting and documented with minutes which include action items and follow-up on action items. Meetings are to be held quarterly in each state.", + "type": "system", + "explanation": "However, DHIS2 is described as a system rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned as a data source for reporting.", + "contextual_reason_agent": "However, DHIS2 is described as a system rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 65, + "text": "Regarding the implication of the fiscal context for the PforR and any impact of the PforR on the fiscal outlook, the Program expenditure framework is fully informed by the current context of fiscal consolidation; as such, it should not be impacted by the fiscal outlook of further consolidation. It only amounts to a very small percentage of current and projected public expenditures ( less than one percent ). It essentially consists of recurrent expenditures ( to the exclusion of subsidies ), which are projected to increase slightly in the next five years ( that is, by 19 percent in nominal terms ). It is not expected that further fiscal consolidation should impact it for three reasons: ( 1 ) It is narrowly limited in proportion to budget expenditures; ( 2 ) It does not call for a significant increase in appropriations; and ( 3 ) It does not include sizeable capital spending or subsidies, which are the variables commonly used for fiscal consolidation in Jordan. It also consists of budget expenditures deemed strategic for the implementation of the government \u2019 s economic strategy and reform agenda. It does not require any additional spending than what is already budgeted ( and extrapolated beyond the medium-term 29 Global Affairs Canada, 2021, Assessment of the Ministry of Health ( MOH ) technical and infrastructure needs, and human resource capacities related to Health Information System ( HIS ) management at the primary and secondary healthcare level.", + "ner_text": [ + [ + 1383, + 1408, + "named" + ] + ], + "validated": false, + "empirical_context": "It also consists of budget expenditures deemed strategic for the implementation of the government \u2019 s economic strategy and reform agenda. It does not require any additional spending than what is already budgeted ( and extrapolated beyond the medium-term 29 Global Affairs Canada, 2021, Assessment of the Ministry of Health ( MOH ) technical and infrastructure needs, and human resource capacities related to Health Information System ( HIS ) management at the primary and secondary healthcare level.", + "type": "system", + "explanation": "However, it is mentioned as a system for managing health information, not as a data source or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Health Information System' suggests a structured collection of health-related data.", + "contextual_reason_agent": "However, it is mentioned as a system for managing health information, not as a data source or dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 79, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 74 of 117 prior to construction. Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "ner_text": [ + [ + 218, + 223, + "named" + ], + [ + 589, + 608, + "NEMIS <> data type" + ], + [ + 777, + 793, + "NEMIS <> reference population" + ] + ], + "validated": true, + "empirical_context": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 74 of 117 prior to construction. Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "type": "system", + "explanation": "NEMIS is indeed a dataset as it is explicitly mentioned as a source of data for various educational management purposes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is referenced in relation to data utilization for managing primary education.", + "contextual_reason_agent": "NEMIS is indeed a dataset as it is explicitly mentioned as a source of data for various educational management purposes.", + "contextual_signal": "mentioned as a data source for managing primary education", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "076_Rwanda-Socio-Economic-Inclusion-of-Refugees-and-Host-Communities-in-Rwanda-Project", + "page": 29, + "text": "An overview of project M & E tasks is provided below. Table 5: M & E Tasks Actor Tasks MINEMA \u2022 Data collection and monitoring on component one and component three activities \u2022 Regular reporting on all project components \u2022 Conduct annual surveys by contracting consultants / firms \u2022 Evaluate activities, inform implementation and suggest modifications as needed. \u2022 Use P-MIS for monitoring, data collection and collation / analysis for project components, and reporting to the public and the World Bank. BRD \u2022 Data collection, monitoring and reporting on component two \u2022 Support evaluation of economic opportunity activities. \u2022 Report regularly to MINEMA based on project-specific reporting requirements. Districts \u2022 Support data collection, monitoring and reporting on progress on component one activities, with District field specialist ( project ) support. \u2022 Use existing mechanisms, district planning procedures, and MEIS, to the extent possible, to ensure efficient implementation of project M & E. \u2022 Report regularly to MINEMA based on project-specific reporting requirements. Implementing Partners, Technical Agencies \u2022 Include necessary resources in project implementation to support M & E", + "ner_text": [ + [ + 369, + 374, + "named" + ] + ], + "validated": false, + "empirical_context": "Table 5: M & E Tasks Actor Tasks MINEMA \u2022 Data collection and monitoring on component one and component three activities \u2022 Regular reporting on all project components \u2022 Conduct annual surveys by contracting consultants / firms \u2022 Evaluate activities, inform implementation and suggest modifications as needed. \u2022 Use P-MIS for monitoring, data collection and collation / analysis for project components, and reporting to the public and the World Bank. BRD \u2022 Data collection, monitoring and reporting on component two \u2022 Support evaluation of economic opportunity activities.", + "type": "system", + "explanation": "P-MIS is mentioned as a management information system, not as a structured collection of data or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed P-MIS is a dataset because it is associated with data collection and analysis.", + "contextual_reason_agent": "P-MIS is mentioned as a management information system, not as a structured collection of data or dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "184_multi-page", + "page": 16, + "text": "13 Monitoring and Evaluation Monitoring will be done according to the development indicators given in the attachment to Annex 1. The project will strengthen the capacity of CNOSEGE, and the Planning Unit of the Ministry so that monitoring reports on the implementation of the reform can include key progress and impact indicators. Currently the Planning unit generates statistical data on all aspects of the education sector, however this can be further strengthened to monitor progress on key reform objectives such as access, equity and quality. In addition, during the donors round-table UNESCO offered support to develop an Education Management Information System ( EMIS ). If this is not in place by the end of Phase I of the APL, this would be a priority item for Phase II. Evaluation of the impact of the reforms will be done by CNOSEGE by recruiting experts in this field and an initial evaluation will be done at the end of Phase I. Particular areas of impact assessment will be student performance and success in reaching out to disadvantaged groups. Normally, student performance would be measured by overall test results but as the pool of students widens to include students from less advantaged socioeconomic groups, there will be a downward pressure on test scores.", + "ner_text": [ + [ + 628, + 667, + "named" + ] + ], + "validated": false, + "empirical_context": "Currently the Planning unit generates statistical data on all aspects of the education sector, however this can be further strengthened to monitor progress on key reform objectives such as access, equity and quality. In addition, during the donors round-table UNESCO offered support to develop an Education Management Information System ( EMIS ). If this is not in place by the end of Phase I of the APL, this would be a priority item for Phase II.", + "type": "system", + "explanation": "However, it is mentioned as a system that supports data management rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a data-related function.", + "contextual_reason_agent": "However, it is mentioned as a system that supports data management rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "162_PAD12050PAD0P15264600PUBLIC00Box391428B", + "page": 13, + "text": "Hence, lack of access to healthcare in communities with the highest concentration of refugees in Beka \u2019 a, North and Wadi Khalid is mainly attributed to the limited number of PHCCs and the inability of local communities to pay for private clinics. 10 The 14 Network PHCCs that served the Beka \u2019 a governorate before the Syrian crisis are today hosting an additional 35 percent of Syrian refugees. Moreover, UN agencies and international donor organizations contracted the majority of existing PHCCs for the delivery of care to Syrian refugees. As a result, the gap between increased demand and existing supply is deepening the vulnerability of the Lebanese in these areas as competition for health services and resources continues. 20. The unprecedented rise in demand for PHC services associated with the limited supply is crowding Lebanese out of hospital services and is compromising access to affordable healthcare. Comparing the MoPH utilization data11 for the first six months in 2013 with that in 2014, shows that while the number of Syrian patients attending PHCCs increased by 7. 1 percent, the number of Lebanese patients attending the same PHCCs decreased by 16. 6 percent. This is also the case with the number of visits to PHCCs, where the numbers increased by 33 percent for Syrians and decreased by 28. 9 percent for Lebanese. There is an undocumented evidence to suggest that Lebanese are dissatisfied with the long waiting time and lack of financial support for PHC visits, similar to Syrian refugees. This situation is significantly compromising access of Lebanese citizens to healthcare, putting pressure on the delivery and quality of services. In the medium - to long-term, the impact of delayed healthcare for Lebanese, particularly for the vulnerable, could result in increased costs and overall levels of morbidity in the future. 10 International Medical Corps Lebanon, Syrian Refugee Response, January-June, 2013. 11 Ministry of Public Health, Primary Care Department, September 2014.", + "ner_text": [ + [ + 934, + 957, + "named" + ], + [ + 117, + 128, + "MoPH utilization data11 <> data geography" + ], + [ + 380, + 395, + "MoPH utilization data11 <> reference population" + ], + [ + 986, + 990, + "MoPH utilization data11 <> reference year" + ], + [ + 1004, + 1008, + "MoPH utilization data11 <> publication year" + ], + [ + 1041, + 1056, + "MoPH utilization data11 <> reference population" + ], + [ + 1114, + 1131, + "MoPH utilization data11 <> reference population" + ], + [ + 1216, + 1241, + "MoPH utilization data11 <> data description" + ], + [ + 1942, + 1967, + "MoPH utilization data11 <> author" + ], + [ + 2004, + 2008, + "MoPH utilization data11 <> publication year" + ], + [ + 2025, + 2043, + "MoPH utilization data11 <> usage context" + ] + ], + "validated": true, + "empirical_context": "The unprecedented rise in demand for PHC services associated with the limited supply is crowding Lebanese out of hospital services and is compromising access to affordable healthcare. Comparing the MoPH utilization data11 for the first six months in 2013 with that in 2014, shows that while the number of Syrian patients attending PHCCs increased by 7. 1 percent, the number of Lebanese patients attending the same PHCCs decreased by 16.", + "type": "data", + "explanation": "This is indeed a dataset as it provides structured information on patient attendance at primary healthcare centers, used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific utilization data from the Ministry of Public Health (MoPH).", + "contextual_reason_agent": "This is indeed a dataset as it provides structured information on patient attendance at primary healthcare centers, used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 19, + "text": "Participants in civic engagement and social cohesion training will be tested on their knowledge and tracked to assess whether they are more active in their communities as a result of the training. Special emphasis will be placed on assessing the difference in project benefits between male and female participants and for persons with disabilities. The PIU will prepare semi-annual reports to provide a summary of implementation progress on project activities and cross-cutting functions ( FM, Procurement, and Environmental and Social Risk Management ) of the project. 36. Baseline data that is collected on income and other household characteristics will be structured in such a way to allow for comparison to national poverty lines and levels of income. While such data comparisons will not be incorporated into the results framework, they will be used to assess the extent to which the project is helping to move beneficiaries out of poverty and into situations of greater economic self-reliance. C. Sustainability 37. As the Government is in the process of adjusting its approach to support IDPs as some of them prepare to relocate, or consider future relocation back to homeland territories, the proposed activities will assist the government in developing and implementing a revised approach to IDPs that can be scaled up in the future. As the response to the recent conflict is still unfolding, IDP support is likely to evolve and be the focus of increased support from the Government and international development partners, including the Bank. The grant is expected to contribute directly to improving the wellbeing and livelihood prospects of IDPs, as well as to strengthening government capacity to plan for effective livelihood support programs for IDPs.", + "ner_text": [ + [ + 574, + 587, + "named" + ], + [ + 322, + 347, + "Baseline data <> reference population" + ], + [ + 353, + 356, + "Baseline data <> author" + ], + [ + 712, + 755, + "Baseline data <> data description" + ] + ], + "validated": true, + "empirical_context": "36. Baseline data that is collected on income and other household characteristics will be structured in such a way to allow for comparison to national poverty lines and levels of income. While such data comparisons will not be incorporated into the results framework, they will be used to assess the extent to which the project is helping to move beneficiaries out of poverty and into situations of greater economic self-reliance.", + "type": "data", + "explanation": "This is indeed a dataset as it is structured data collected for analysis and comparison to national poverty lines.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'Baseline data' is a dataset because it refers to collected information on income and household characteristics.", + "contextual_reason_agent": "This is indeed a dataset as it is structured data collected for analysis and comparison to national poverty lines.", + "contextual_signal": "described as collected data for comparison", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "153_476340PAD0ZR0P101Official0Use0Only1", + "page": 76, + "text": "Creation of a New Sector Specific Data Management Systems ( UW3. 2 million ) 23. In the absence o f an integrated system for managing forest resource information, it i s difficult for MECNT to make informed decisions, elaborate policies, and manage the forest sector. To remedy this situation, MECNT will develop an integrated system for the management o f forest resources,: ( i ) a geospatial information management system ( SyGIS ), including an interactive forestry atlas updated regularly to allow monitoring, especially o f forest title limits and logging roads; and ( ii ) a forest management information system ( SIGEF ) based on a sub-system o f forest accounting, which collects financial data linked to the forest sector, and a sub-system o f physical log tracking, which physically validates data collected on the ground as well as the enforcement of regulations. 24. To achieve this, the project will support: ( i ) a team o f international and national experts specialized in conceiving, experimenting, and deploying the two systems on the ground; ( ii ) information seminars and workshops; ( iii ) provision o f office equipment, furniture, and logistics; and ( iv ) operational 64", + "ner_text": [ + [ + 427, + 432, + "named" + ] + ], + "validated": false, + "empirical_context": "In the absence o f an integrated system for managing forest resource information, it i s difficult for MECNT to make informed decisions, elaborate policies, and manage the forest sector. To remedy this situation, MECNT will develop an integrated system for the management o f forest resources,: ( i ) a geospatial information management system ( SyGIS ), including an interactive forestry atlas updated regularly to allow monitoring, especially o f forest title limits and logging roads; and ( ii ) a forest management information system ( SIGEF ) based on a sub-system o f forest accounting, which collects financial data linked to the forest sector, and a sub-system o f physical log tracking, which physically validates data collected on the ground as well as the enforcement of regulations. 24.", + "type": "system", + "explanation": "However, it is described as a system for managing information rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'information management system' in its name.", + "contextual_reason_agent": "However, it is described as a system for managing information rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "101_Project-Appraisal-Document-PAD-P163830-2017-05-22-CLEAN-05232017", + "page": 38, + "text": "The monitoring system has a two-pronged approach: 1 ) the internal data collection and analysis by the EcoSec team through regular field assessment visits and market survey that is conducted in 14 regions on a monthly basis; 2 ) the exchange with the relevant stakeholders such as SRCS, communities, local associations and NGOs, different governmental authorities at the field and central level, the UN led Food / Nutrition and Shelter clusters at the regional and central levels, including specialized agencies and projects such as FSNAU and Famine Early Warning Network ( FEWS NET ). ICRC will submit to the WB technical reports on the project activities and progress. The bi-annual report includes the interim reporting format that is currently being used to report to other donors. In addition, ICRC will also provide an end of project report including information on achievements and impacts, which will be based on M & E tools used by ICRC. Use of a limited Third-Party Technical Review ( TPTR ) to complement ICRC M & E due diligence on the project results and processes will also be considered. 76. Currently FAO M & E conducts multiple impact assessment studies for the project they implement: Baseline Surveys, Post-Distribution Assessments and Impact Assessments. The Baseline Surveys, conducted regularly, will employ a hybrid approach that uses FAO Field Monitors ( currently about 15 across Somalia ) and independent consulting firm as a Service Provider which is contracted and overseen by FAO. The use of", + "ner_text": [ + [ + 543, + 571, + "named" + ] + ], + "validated": false, + "empirical_context": "The monitoring system has a two-pronged approach: 1 ) the internal data collection and analysis by the EcoSec team through regular field assessment visits and market survey that is conducted in 14 regions on a monthly basis; 2 ) the exchange with the relevant stakeholders such as SRCS, communities, local associations and NGOs, different governmental authorities at the field and central level, the UN led Food / Nutrition and Shelter clusters at the regional and central levels, including specialized agencies and projects such as FSNAU and Famine Early Warning Network ( FEWS NET ). ICRC will submit to the WB technical reports on the project activities and progress.", + "type": "project", + "explanation": "However, it is mentioned as a project and not as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Network' in its name, suggesting a collection of data.", + "contextual_reason_agent": "However, it is mentioned as a project and not as a data source in the context.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 21, + "text": "On the supply side, labor force surveys ( LFSs ) are irregular and not frequent enough. On the demand side, the annual enterprises survey does not emphasize employment issues, the informal sector survey is as irregular as the LFS, and there is no annual agricultural survey. Finally, line ministries could benefit from INS data production and expertise to inform program preparation and M & E. 27. National accounts are engaged in a modernization process requiring support but suffer from a lack of sufficient and comprehensive trade data. While many Sub-Saharan countries are still following the 1993 national accounts framework, Cameroon transitioned successfully to the 2008 system and has been producing trimestral accounts since 2015. However, the classic annual national accounts suffer from a lack of reliable agriculture statistics. The last agriculture and livestock census was undertaken in 1984 and annual surveys stopped in the early 1990s. The Ministry of Livestock, Fishery, and Animal Industry and the Ministry of Agriculture Rural Development currently rely on indirect sources to produce the necessary basic sector statistics. A new agricultural and livestock census was originally planned for 2017, but the cost is high compared to similar exercises in the region ( CFAF 23. 6 billion ). However, the AfDB and EU are exploring ways to contribute to the financing of this census in synergy with the population census.", + "ner_text": [ + [ + 180, + 202, + "named" + ], + [ + 631, + 639, + "informal sector survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "On the supply side, labor force surveys ( LFSs ) are irregular and not frequent enough. On the demand side, the annual enterprises survey does not emphasize employment issues, the informal sector survey is as irregular as the LFS, and there is no annual agricultural survey. Finally, line ministries could benefit from INS data production and expertise to inform program preparation and M & E.", + "type": "survey", + "explanation": "However, the context indicates it is irregular and not emphasized for employment issues, suggesting it is not a reliable data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically collects data.", + "contextual_reason_agent": "However, the context indicates it is irregular and not emphasized for employment issues, suggesting it is not a reliable data source.", + "contextual_signal": "mentioned only as a survey, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "078_Afghanistan-Eshteghal-Zaiee-Karmondena-EZ-Kar-Project", + "page": 71, + "text": "This situation calls for innovative solutions that can enhance the ability of the project team to systematically conduct accountability and M & E activities remotely. To this effect, the project will utilize the Geo \u2010 Enabling Initiative for Monitoring and Supervision ( GEMS ), which uses geo \u2010 referenced data collected by mobile devices ( e. g. smartphones or tablets ) to monitor and verify project implementation. GEMS is an open source program that can be easily customized through simple training workshops provided by the World Bank task team to project implementing agencies. It can be linked with the existing MIS of implementing agencies and the EZ \u2010 Kar website. Accountability: 9. Accountability is enhanced through better use of formal country systems and community \u2010 based accountability interventions of the project. Formal accountability mechanisms include: ( i ) Third Party Monitoring ( TPM ), ( ii ) Procurement controls \u2010 National Procurement Authority ( NPA ), ex \u2010 post reviews, ( iii ) FM \u2013 strengthened internal controls. Community or citizens accountability mechanisms could include: ( i ) CCAP accountability mechanisms, ( ii ) Citizens Feedback Model ( CFM ). Citizens \u2019 Feedback Model ( CFM ): 10. As part of the citizen feedback mechanism, an SMS \u2010 based feedback collection system will be piloted for Component 1. It will reach out to passport applicants on their mobile phones and seek their feedback on ( a ) the time required in receiving the passport; ( b ) the fees paid for the service; and ( c ) general satisfaction with the service. This pilot is informed by successful implementation in several countries. After reviewing the results of this intervention, it could be replicated for other services under the project. Third Party Monitoring ( TPM ): 11. EZ \u2010 Kar project is eligible for Third Party Monitoring ( TPM ) by the ARTF Supervisory Agents under the overall anti \u2010 corruption efforts of CMU. Components of the project which embody high level of implementation risk will be monitored via third party. TPM could be utilized whenever the risk associated with project implementation is", + "ner_text": [ + [ + 1273, + 1311, + "named" + ] + ], + "validated": false, + "empirical_context": "Citizens \u2019 Feedback Model ( CFM ): 10. As part of the citizen feedback mechanism, an SMS \u2010 based feedback collection system will be piloted for Component 1. It will reach out to passport applicants on their mobile phones and seek their feedback on ( a ) the time required in receiving the passport; ( b ) the fees paid for the service; and ( c ) general satisfaction with the service.", + "type": "system", + "explanation": "However, it is not a dataset itself but rather a system designed to collect feedback.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves collecting feedback data through an SMS system.", + "contextual_reason_agent": "However, it is not a dataset itself but rather a system designed to collect feedback.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 25, + "text": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 21 of 47 ( FMS ), and an environmental and social safeguards specialist / officer. 47 The PMUs will be fully authorized to implement the planned activities approved by the Project Steering Committee ( PSC ). 46. A Project Coordination Committee ( PCC ) will be set up to coordinate project implementation and a PSC will be set up to provide strategic guidance and oversight. The PCC, co \u2010 chaired by Secretaries Health and Secondary Education, will meet quarterly. The PSC, chaired by the Additional Chief Secretary, will meet biannually ( see figure 2 ). Figure 2. Institutional and Implementation Arrangements B. Results Monitoring and Evaluation Arrangements 47. Building on the results chain, the M & E framework identified indicators to track project implementation progress and impact. The PDO \u2010 level health indicators are taken from the RMNCHN indicators in the DHIS, while digitization and integration of various HMIS is an intermediate indicator. The education indicators are taken from the EMIS. Where possible, relevant indicators will be disaggregated by gender. Discussions with the GoB and the UNHCR have confirmed, however, that beneficiary data by nationality will not be routinely collected or publicly released. 48.", + "ner_text": [ + [ + 947, + 951, + "named" + ], + [ + 873, + 902, + "DHIS <> data description" + ], + [ + 922, + 939, + "DHIS <> data type" + ] + ], + "validated": true, + "empirical_context": "Building on the results chain, the M & E framework identified indicators to track project implementation progress and impact. The PDO \u2010 level health indicators are taken from the RMNCHN indicators in the DHIS, while digitization and integration of various HMIS is an intermediate indicator. The education indicators are taken from the EMIS.", + "type": "system", + "explanation": "DHIS is mentioned as a source for health indicators, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because DHIS is referenced in the context of health indicators and data tracking.", + "contextual_reason_agent": "DHIS is mentioned as a source for health indicators, confirming its role as a data source.", + "contextual_signal": "mentioned as a source for health indicators", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 11, + "validated": 5, + "not_validated": 6 + } + }, + { + "filename": "026_Djibouti-Health-System-Strengthening-Project", + "page": 33, + "text": "( including regulating and engaging private sector in health ), standards, protocols, guidelines, and plans related to RMNCAH-N, PHC, quality of care, pandemic preparedness and response, digitalization and e-health strategy, integration of refugees into the national health system, human resources for health, review of supply chain efficiency and controls to improve efficiency, prevention of the medicalization of FGM, health care waste management, \u201c green \u201d facilities and climate change mitigation and adaptation measures, emergency preparedness and response, including climate-change induced or exacerbated natural disasters and pandemics / epidemics. TA will also address high-leverage initiatives including the rollout of DHIS2 ( which will disaggregate services by patients \u2019 refugee status ), development of and rollout of the national Logistics Management Information System ( eLMIS ), and activation and operationalization of a national nutrition surveillance system; TA will also resolve bottlenecks preventing refugees from receiving free services to which they are entitled 30 Topics on climate resilience will include preventive behavior and recognition of warning signs around water - and vector-borne diseases exacerbated by climate change, multi-risk contingency planning and disease surveillance and response.", + "ner_text": [ + [ + 845, + 884, + "named" + ] + ], + "validated": false, + "empirical_context": "( including regulating and engaging private sector in health ), standards, protocols, guidelines, and plans related to RMNCAH-N, PHC, quality of care, pandemic preparedness and response, digitalization and e-health strategy, integration of refugees into the national health system, human resources for health, review of supply chain efficiency and controls to improve efficiency, prevention of the medicalization of FGM, health care waste management, \u201c green \u201d facilities and climate change mitigation and adaptation measures, emergency preparedness and response, including climate-change induced or exacerbated natural disasters and pandemics / epidemics. TA will also address high-leverage initiatives including the rollout of DHIS2 ( which will disaggregate services by patients \u2019 refugee status ), development of and rollout of the national Logistics Management Information System ( eLMIS ), and activation and operationalization of a national nutrition surveillance system; TA will also resolve bottlenecks preventing refugees from receiving free services to which they are entitled 30 Topics on climate resilience will include preventive behavior and recognition of warning signs around water - and vector-borne diseases exacerbated by climate change, multi-risk contingency planning and disease surveillance and response.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source, indicating it does not function as a dataset in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a data-related function.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source, indicating it does not function as a dataset in this context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "086_NEW-ETHIOPIA-PAD-06072018", + "page": 107, + "text": "99 given at budget preparation, it would difficult to trace the budget and expenditure during reporting. It also will be difficult for capturing Program-related transactions and reporting on them. ARRA uses the Gregorian calendar as fiscal year, which is different from EFY and needs alignment during the Program implementation. 8. Budget monitoring. The budget control of the proposed four entities is satisfactory. Before effecting payment, the transaction requested for payment is checked on whether it is included in the approved budget and whether sufficient uncommitted budget balance is available. The EIC and MoLSA use the IBEX budget control module, IPDC uses Excel spreadsheet, and ARRA uses \u2018 FAST \u2019 system to track expenditures against the budget. In addition, a quarterly report ( comparison of budget with actual expenditure ) is produced and performance is monitored by the management. With regard to the annual budget utilization, except IPDC, a good budget performance was registered by the other entities in the past three years. Capacity constraints, contractors \u2019 delay in completing works on time, and the regions \u2019 delay in effecting right-of-way payments and resettlements were reasons for low budget utilization by IPDC. 9. Program budgeting arrangements. The Program will follow the Federal GoE ' s budgeting procedure and calendar. The procedures and calendar are documented in the Federal GoE Budget manual.", + "ner_text": [ + [ + 631, + 635, + "named" + ] + ], + "validated": false, + "empirical_context": "Before effecting payment, the transaction requested for payment is checked on whether it is included in the approved budget and whether sufficient uncommitted budget balance is available. The EIC and MoLSA use the IBEX budget control module, IPDC uses Excel spreadsheet, and ARRA uses \u2018 FAST \u2019 system to track expenditures against the budget. In addition, a quarterly report ( comparison of budget with actual expenditure ) is produced and performance is monitored by the management.", + "type": "system", + "explanation": "'IBEX' is identified as a budget control module, which indicates it is a system rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'IBEX' is a dataset because it is mentioned in the context of tracking expenditures.", + "contextual_reason_agent": "'IBEX' is identified as a budget control module, which indicates it is a system rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "172_multi0page", + "page": 10, + "text": "In addition, there is a lack of basic furniture, teaching and learning materials, overcrowding in many schools in safer areas, disorientation and psychological trauma experienced by a large segment of the population, especially children, a weakened institutional capacity of the MEST in managing the education system, a serious lack of information and data to plan the provision of services, and poor coordination of the various initiatives among numerous actors working in the education sector. Main Sector Issues 2. 2 Data presented in this section draw extensively from two sources: the National School Survey Report ( NSSR ) conducted by Research Triangle Institute and Paul Musker and Associates in collaboration with the Central Statistics Office ( CSO ), at the end of 2001 in the context of the preparation of the project; and the Multiple Cluster Indicators Survey ( MICS ) undertaken in 2000. The NSSR received support of a PHRD grant, and covered all existing institutions ( Grades 1-9 ), taking into account formal, non-formal, community-based and rapid education programs. ( a ) Limited access to education and regional and gender disparities - 5 -", + "ner_text": [ + [ + 590, + 619, + "named" + ], + [ + 228, + 236, + "National School Survey Report <> reference population" + ], + [ + 622, + 626, + "National School Survey Report <> acronym" + ], + [ + 642, + 669, + "National School Survey Report <> author" + ], + [ + 674, + 700, + "National School Survey Report <> author" + ], + [ + 776, + 780, + "National School Survey Report <> publication year" + ], + [ + 934, + 938, + "National School Survey Report <> publisher" + ], + [ + 1177, + 1195, + "National School Survey Report <> usage context" + ] + ], + "validated": true, + "empirical_context": "Main Sector Issues 2. 2 Data presented in this section draw extensively from two sources: the National School Survey Report ( NSSR ) conducted by Research Triangle Institute and Paul Musker and Associates in collaboration with the Central Statistics Office ( CSO ), at the end of 2001 in the context of the preparation of the project; and the Multiple Cluster Indicators Survey ( MICS ) undertaken in 2000. The NSSR received support of a PHRD grant, and covered all existing institutions ( Grades 1-9 ), taking into account formal, non-formal, community-based and rapid education programs.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source of data for the analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a report that presents data from a survey.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source of data for the analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 13, + "text": "Poverty is disproportionately concentrated in rural areas and in the northern regions of the country. The 2014 household survey found that 56. 8 percent of rural families are poor, compared to just 8. 9 percent of urban families. 3 Overall, approximately 87 percent of the poor live in rural areas. Moreover, a majority of poor individuals are concentrated in the three northern regions of the country: the Far North, North, and Adamawa regions. More than one-half ( 56 percent ) of all poor inhabitants are located in the Far North and North regions, a significant increase from 34 percent in 2001. While poverty has increased in northern Cameroon, the incidence of poverty in the center-west of the country ( in the Littoral, Center, West, and South West regions ), as well as in Douala and Yaound\u00e9, has declined. 3. A rapid increase in poverty in northern Cameroon had been observed before the heightened regional insecurity associated with Boko Haram activities in 2014. The deterioration of the security environment in northern Cameroon occurred largely over the course of the past three years, following the collection of data in support of the 2014 Enqu\u00eate Camerounaise aupr\u00e8s des M\u00e9nages ( ECAM \u2013 Cameroon Household Survey ) household survey.", + "ner_text": [ + [ + 1156, + 1195, + "named" + ], + [ + 106, + 110, + "Enqu\u00eate Camerounaise aupr\u00e8s des M\u00e9nages <> publication year" + ], + [ + 111, + 127, + "Enqu\u00eate Camerounaise aupr\u00e8s des M\u00e9nages <> data type" + ], + [ + 156, + 170, + "Enqu\u00eate Camerounaise aupr\u00e8s des M\u00e9nages <> reference population" + ], + [ + 323, + 339, + "Enqu\u00eate Camerounaise aupr\u00e8s des M\u00e9nages <> reference population" + ], + [ + 429, + 444, + "Enqu\u00eate Camerounaise aupr\u00e8s des M\u00e9nages <> data geography" + ], + [ + 969, + 973, + "Enqu\u00eate Camerounaise aupr\u00e8s des M\u00e9nages <> reference year" + ], + [ + 1151, + 1155, + "Enqu\u00eate Camerounaise aupr\u00e8s des M\u00e9nages <> publication year" + ] + ], + "validated": true, + "empirical_context": "A rapid increase in poverty in northern Cameroon had been observed before the heightened regional insecurity associated with Boko Haram activities in 2014. The deterioration of the security environment in northern Cameroon occurred largely over the course of the past three years, following the collection of data in support of the 2014 Enqu\u00eate Camerounaise aupr\u00e8s des M\u00e9nages ( ECAM \u2013 Cameroon Household Survey ) household survey.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referred to as a household survey that collected data for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a household survey, which typically involves structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referred to as a household survey that collected data for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 60, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 56 of 74 Project Operations Manual, and have received cash transfers, at least for one payment cycle. minimum on a quarterly basis Information System ( MIS ) course of project implementation. Payment data will be liked to and updated in the MIS. Number of beneficiaries receiving cash for performing labor intensive public works who are female Number of total beneficiaries that directly receive cash transfer for working on LIPW under sub - component 1. 1 on behalf of beneficiary HH, of which are female This indicator will be measured at a minimum on a quarterly basis Registration and payment data in the SNSOP MIS Beneficiary data is collected during registration and updated over the course of the project. Payment data will also be periodically updated in the MIS Selected Implementing Partner Number of beneficiary households receiving cash transfer for participating in the behavioral change communication training The number of beneficiary households that participate in behavioral change communication training activities to receive their cash transfer.", + "ner_text": [ + [ + 286, + 298, + "named" + ] + ], + "validated": true, + "empirical_context": "minimum on a quarterly basis Information System ( MIS ) course of project implementation. Payment data will be liked to and updated in the MIS. Number of beneficiaries receiving cash for performing labor intensive public works who are female Number of total beneficiaries that directly receive cash transfer for working on LIPW under sub - component 1.", + "type": "data", + "explanation": "In the context, 'Payment data' is linked to the MIS and is used to track beneficiaries, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'Payment data' is a dataset because it refers to specific information collected regarding payments made to beneficiaries.", + "contextual_reason_agent": "In the context, 'Payment data' is linked to the MIS and is used to track beneficiaries, indicating it functions as a data source.", + "contextual_signal": "mentioned as part of the MIS that updates records", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 5, + "validated": 4, + "not_validated": 1 + } + }, + { + "filename": "143_749060PAD0P124010Box374388B00OUO090", + "page": 9, + "text": "In addition, with a 54 percent gross enrolment rate in tertiary education in 2010, Lebanon greatly exceeds the 31 and 26 percent enrolment rates registered, respectively, for the MENA region and for middle-income countries. Still, weak institutional capacity and inefficiencies limit the country ' s prospects for developing higher quality education and health services. In some sectors, public service delivery is severely under - resourced, especially in terms of skilled staff, which affects the performance of ministries and their capacity to deliver quality services. Therefore, those who can afford it rely on the more expensive services provided by the private sector. There is widespread consensus that improved service delivery is a critical step toward a more inclusive Lebanon. 7 The last national household budget survey conducted in Lebanon was in 2004. The Central Administration of Statistics is currently finalizing the next HBS ( 2011 / 2012 ) with support from the World Bank. United Nations Development Program ( 2008 ). Poverty, Growth and Income Distribution in Lebanon. Beirut, Lebanon. 9 Findings from the Lebanon Financial Capability and Literacy Survey undertaken in May 2012. This was the first survey ever measuring financial literacy and capabilities at the country level. 10 All comparisons in the paragraph are based on the World Development Indicators for 2010 10", + "ner_text": [ + [ + 1129, + 1177, + "named" + ], + [ + 83, + 90, + "Lebanon Financial Capability and Literacy Survey <> data geography" + ], + [ + 780, + 787, + "Lebanon Financial Capability and Literacy Survey <> data geography" + ], + [ + 846, + 853, + "Lebanon Financial Capability and Literacy Survey <> data geography" + ], + [ + 954, + 958, + "Lebanon Financial Capability and Literacy Survey <> publication year" + ], + [ + 983, + 993, + "Lebanon Financial Capability and Literacy Survey <> publisher" + ], + [ + 995, + 1029, + "Lebanon Financial Capability and Literacy Survey <> publisher" + ], + [ + 1083, + 1090, + "Lebanon Financial Capability and Literacy Survey <> data geography" + ], + [ + 1100, + 1107, + "Lebanon Financial Capability and Literacy Survey <> data geography" + ], + [ + 1129, + 1136, + "Lebanon Financial Capability and Literacy Survey <> data geography" + ], + [ + 1192, + 1200, + "Lebanon Financial Capability and Literacy Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "Beirut, Lebanon. 9 Findings from the Lebanon Financial Capability and Literacy Survey undertaken in May 2012. This was the first survey ever measuring financial literacy and capabilities at the country level.", + "type": "survey", + "explanation": "This is indeed a dataset as it is a structured collection of data specifically designed to measure financial literacy at the country level.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey measuring financial literacy and capabilities.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data specifically designed to measure financial literacy at the country level.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 62, + "text": "53 for IPF Borrowers \u201d ( July 2016 ) ( \u201c Procurement Regulations \u201d ), the Bank \u2019 s Systematic Tracking and Exchanges in Procurement ( STEP ) system will be used to clear and update Procurement Plans and conduct all procurement exchanges for the Project. It will also be used as a repository of procurement documents for activities above and below the prior review thresholds. STEP will be used for publication of the procurement plan, notices, and contract award. Through STEP, the General Procurement Notice shall be published as well. A training will be administered for the different users of STEP. The procurement plan shall integrate the prior review thresholds associated with contract risks considered \u201c Substantial \u201d. 14. Staffing. The DCU will need an experienced procurement officer, to carry out the consultant selection process and communicate with the consultants. The procurement officer will rely on the expertise of the MOE for technical aspects of the selection process and contract management. 15. Procurement monitoring. The efficiency indicator related to procurement processing, and time used for evaluation shall be monitored. In addition to the desk support provided through enquiries and prior review, a supervision mission once every 6 months and a yearly post \u2010 review of procurement actions will be carried out.", + "ner_text": [ + [ + 83, + 147, + "named" + ] + ], + "validated": false, + "empirical_context": "53 for IPF Borrowers \u201d ( July 2016 ) ( \u201c Procurement Regulations \u201d ), the Bank \u2019 s Systematic Tracking and Exchanges in Procurement ( STEP ) system will be used to clear and update Procurement Plans and conduct all procurement exchanges for the Project. It will also be used as a repository of procurement documents for activities above and below the prior review thresholds.", + "type": "system", + "explanation": "However, it is described as a system for managing procurement documents, not as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a system that manages procurement information.", + "contextual_reason_agent": "However, it is described as a system for managing procurement documents, not as a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 17, + "text": "The government has embraced the long-term development approach to refugee management, aligning with international good practice. 6 Important gains over the last five years include: ( a ) almost all refugee children now attend national schools rather than camp-based schools; ( b ) most urban refugees have access to the government \u2019 s national health insurance scheme; ( c ) plans are in place to transition camp-based health clinics to government management to lower costs and support integration of refugees into national service delivery systems; and ( d ) the government is implementing an economic inclusion strategy for refugees and host communities. Investments made over the last five years have addressed many of the impacts of the refugee presence that had created tensions with host community members. Survey data from the mid-term review ( MTR ) of Jya Mbere Phase I showed that this has strengthened social cohesion between refugees and host communities, creating a strong foundation for future economic progress. 5. Building on the gains made, the government \u2019 s strategic aim for refugee management is now to promote self-reliance. As one of the four objectives of the GCR, self-reliance is also central to both the National Strategy for Sustainable Graduation and the draft Refugee Sustainable Graduation Strategy.", + "ner_text": [ + [ + 813, + 824, + "named" + ], + [ + 198, + 214, + "Survey data <> reference population" + ] + ], + "validated": true, + "empirical_context": "Investments made over the last five years have addressed many of the impacts of the refugee presence that had created tensions with host community members. Survey data from the mid-term review ( MTR ) of Jya Mbere Phase I showed that this has strengthened social cohesion between refugees and host communities, creating a strong foundation for future economic progress. 5.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as data collected from a mid-term review, used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'survey data' typically refers to collected information from a structured survey.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as data collected from a mid-term review, used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 49, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 38 Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Piped household water connections ( constructed or rehabilitated ) resulting from the project interventions Definition / Description Piped household water connection is defined as a connection that provides piped water to the consumer through either a house or yard connection. It does not include, inter alia, standpipes, protected well, borehole, protected spring, piped water provided through tanker trucks, or vendors, unprotected wells, unprotected spring, rivers, ponds and other surface water bodies, or bottled water. Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC Area performance reports, Refugee Database, Refugees GIS Mapping for WASH and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report.", + "ner_text": [ + [ + 744, + 760, + "named" + ] + ], + "validated": false, + "empirical_context": "It does not include, inter alia, standpipes, protected well, borehole, protected spring, piped water provided through tanker trucks, or vendors, unprotected wells, unprotected spring, rivers, ponds and other surface water bodies, or bottled water. Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC Area performance reports, Refugee Database, Refugees GIS Mapping for WASH and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'system' in its name, which can imply data collection.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 48, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XXXIX Collection Intermediate Result Indicators Result Area 1 on improved service delivery through digitalization Increasing the inclusive adoption of people-centric digital identity Description Individuals adopting people-centric digital identity [ Number ]. Frequency Annually. Data source Annual reports on digital ID implementation by MODEE. Methodology for Data Collection Indicator values will be collected from MODEE \u2019 s administrative data on digital ID activations and cross-checked by the IVA through spot surveys. Responsibility for Data Collection MODEE. Description All DPI platforms supporting digitalized service delivery are fully accessible to non-citizens, including refugees [ Yes / No ]. Frequency Annually. Data source Annual reports on DPI implementation by MODEE. Methodology for Data Collection Indicator values will be collected from MODEE \u2019 s administrative data on DPI registrations and usage, and cross-checked by the IVA through spot surveys. Responsibility for Data Collection MODEE. Description Service providers integrating trusted, people-centric DPI in their service design and workflow to facilitate service delivery, including private-sector service providers [ Number ]. Frequency Annually. Data source Annual reports on DPI implementation by MODEE and public - and private-sector relying parties.", + "ner_text": [ + [ + 523, + 542, + "named" + ], + [ + 434, + 439, + "administrative data <> publisher" + ], + [ + 513, + 518, + "administrative data <> publisher" + ], + [ + 875, + 880, + "administrative data <> publisher" + ], + [ + 954, + 959, + "administrative data <> publisher" + ], + [ + 1375, + 1380, + "administrative data <> publisher" + ] + ], + "validated": true, + "empirical_context": "Data source Annual reports on digital ID implementation by MODEE. Methodology for Data Collection Indicator values will be collected from MODEE \u2019 s administrative data on digital ID activations and cross-checked by the IVA through spot surveys. Responsibility for Data Collection MODEE.", + "type": "administrative data", + "explanation": "In this context, 'administrative data' is explicitly mentioned as a source for collecting indicator values, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'administrative data' typically refers to structured data collected for administrative purposes.", + "contextual_reason_agent": "In this context, 'administrative data' is explicitly mentioned as a source for collecting indicator values, confirming its role as a dataset.", + "contextual_signal": "mentioned as a source for collecting indicator values", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 11, + "validated": 10, + "not_validated": 1 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 47, + "text": "The World Bank Socio-economic Inclusion of Refugees & Host Communities in Rwanda Project Phase II ( P509677 ) Page 32 Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation including reporting from beneficiary insitutitons. BRD data fed to MINEMA Responsibility for Data Collection BRD and MINEMA. Matching grants issued to businesses in low-carbon industries ( Number ) Description Quantitative indicator counting number of matching grants made to businesses in low carbon industries. The low carbon industries will be identified in the Project Implementation Manual. Frequency Quarterly. Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation including reporting from beneficiary insitutitons. BRD data fed to MINEMA. Responsibility for Data Collection BRD and MINEMA. Beneficiaries of partial credit guarantee scheme that are still operational one year after intervention ( Percentage ) Description Quantitative indicator counting number of beneficiaires of the credit guarantee scheme that are operational 1 year after the intervention. Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation including reporting from beneficiaries. BDF data fed to MINEMA. Responsibility for Data Collection BDF and MINEMA Beneficiaries of matching grant that are still operational one year after intervention ( Percentage ) Description Quantitative indicator counting number of beneficiaires that received matching grants and that are operational 1 year after the intervention.", + "ner_text": [ + [ + 1351, + 1359, + "named" + ], + [ + 74, + 80, + "BDF data <> data geography" + ] + ], + "validated": true, + "empirical_context": "Methodology for Data Collection Monitoring project implementation including reporting from beneficiaries. BDF data fed to MINEMA. Responsibility for Data Collection BDF and MINEMA Beneficiaries of matching grant that are still operational one year after intervention ( Percentage ) Description Quantitative indicator counting number of beneficiaires that received matching grants and that are operational 1 year after the intervention.", + "type": "data", + "explanation": "In the context, 'BDF data' is explicitly referenced as part of the data collection process, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'BDF data' is a dataset because it is mentioned in the context of data collection and reporting.", + "contextual_reason_agent": "In the context, 'BDF data' is explicitly referenced as part of the data collection process, indicating it functions as a data source.", + "contextual_signal": "mentioned as part of data collection and reporting", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "113_Project-Appraisal-Document-PAD-disclosable-version-P156479-08122016", + "page": 36, + "text": "24 Indicator Description Project Development Objective Indicators Indicator Name Description ( indicator definition etc. ) Frequency Data Source / Methodology Responsibility for Data Collection Establishment and functioning of a Safety Net Unit A national safety nets unit is created by decree with permanent staff and budget line and is functioning with core staff hired and executing relevant duties. Yearly Administrative and Budget Reports CFS - Ministry of Planning and Prospective Design, testing and validation of a targeting system of Chad safety nets This indicator refers to the development and use of an effective targeting mechanism for the beneficiaries of cash transfer pilot. Once Targeting assessment CFS Design, development, utilization and assessment of a Management Information system ( MIS ) This refers to the development of i ) a computerized system for the registry and for the management of the various benefits of the two safety nets pilots and ii ) of a Social Registry, that could be used by different programs, capable of collecting, analyzing and storing key beneficiary information and on their economic status.", + "ner_text": [ + [ + 980, + 995, + "named" + ], + [ + 543, + 547, + "Social Registry <> data geography" + ], + [ + 653, + 689, + "Social Registry <> reference population" + ], + [ + 1084, + 1111, + "Social Registry <> data description" + ] + ], + "validated": true, + "empirical_context": "Yearly Administrative and Budget Reports CFS - Ministry of Planning and Prospective Design, testing and validation of a targeting system of Chad safety nets This indicator refers to the development and use of an effective targeting mechanism for the beneficiaries of cash transfer pilot. Once Targeting assessment CFS Design, development, utilization and assessment of a Management Information system ( MIS ) This refers to the development of i ) a computerized system for the registry and for the management of the various benefits of the two safety nets pilots and ii ) of a Social Registry, that could be used by different programs, capable of collecting, analyzing and storing key beneficiary information and on their economic status.", + "type": "registry", + "explanation": "The Social Registry is indeed a dataset as it is explicitly mentioned as a system for collecting, analyzing, and storing data on beneficiaries.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is described as a system capable of collecting and storing key beneficiary information.", + "contextual_reason_agent": "The Social Registry is indeed a dataset as it is explicitly mentioned as a system for collecting, analyzing, and storing data on beneficiaries.", + "contextual_signal": "described as a system for collecting, analyzing and storing data", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "182_multi0page", + "page": 48, + "text": "On the data production side, the plan calls for institutionalizing the regular collection of household-level information by introducing a multi-topic panel household survey in the Strategic Plan of Statistics of INSTAT. As a result of the Project ' s support and the consensus reached among stakeholders, the next 5-year plan calls for the implementation of a Living Standard Measurement Survey ( LSMS ) in 2002, to be repeated after 3 years. The system also includes annual panel surveys on a sub-sample of LSMS households to monitor trends in areas like poverty, unemployment, migration and social services on a more frequent basis than the LSMS allows for. An important component of the assistance will be to promote activities to foster a closer dialogue between data producers and the various data users from governmental and non-governmental institutions, the donor community, and the civil society at large. This consultation process will be encouraged since the early stages of the Project ' s cycle and is expected to increase the relevance of the information produced, and so augment its role for policy-making purposes. On the data use side, the strategy calls for building analytical capacity within the Government to take full advantage of the statistical information being generated by INSTAT. Towards this end, the Project will provide continuous support both in the analysis and dissemination of policy-relevant information primarily through technical assistance, training and the provision of the necessary equipment and software for the Poverty Unit to be established in MOLSA. Sub-Component 2: Social Services Policy Development: The objectives of the sub-component are to assist the MOLSA to: a ) strengthen its policy formulation capacity, b ) further develop legislative and institutional framework for social services, c ) develop capacity for policy monitoring, evaluation and program improvement, d ) design and implement a national public awareness campaign on social exclusion, including - 45 -", + "ner_text": [ + [ + 643, + 647, + "named" + ], + [ + 138, + 172, + "LSMS <> data type" + ], + [ + 212, + 218, + "LSMS <> publisher" + ], + [ + 407, + 411, + "LSMS <> publication year" + ], + [ + 468, + 488, + "LSMS <> data description" + ] + ], + "validated": true, + "empirical_context": "As a result of the Project ' s support and the consensus reached among stakeholders, the next 5-year plan calls for the implementation of a Living Standard Measurement Survey ( LSMS ) in 2002, to be repeated after 3 years. The system also includes annual panel surveys on a sub-sample of LSMS households to monitor trends in areas like poverty, unemployment, migration and social services on a more frequent basis than the LSMS allows for. An important component of the assistance will be to promote activities to foster a closer dialogue between data producers and the various data users from governmental and non-governmental institutions, the donor community, and the civil society at large.", + "type": "survey", + "explanation": "LSMS is indeed a dataset as it is described as a Living Standard Measurement Survey that collects data for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed LSMS is a dataset because it is referred to as a survey that collects data on living standards.", + "contextual_reason_agent": "LSMS is indeed a dataset as it is described as a Living Standard Measurement Survey that collects data for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 60, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 56 of 74 Project Operations Manual, and have received cash transfers, at least for one payment cycle. minimum on a quarterly basis Information System ( MIS ) course of project implementation. Payment data will be liked to and updated in the MIS. Number of beneficiaries receiving cash for performing labor intensive public works who are female Number of total beneficiaries that directly receive cash transfer for working on LIPW under sub - component 1. 1 on behalf of beneficiary HH, of which are female This indicator will be measured at a minimum on a quarterly basis Registration and payment data in the SNSOP MIS Beneficiary data is collected during registration and updated over the course of the project. Payment data will also be periodically updated in the MIS Selected Implementing Partner Number of beneficiary households receiving cash transfer for participating in the behavioral change communication training The number of beneficiary households that participate in behavioral change communication training activities to receive their cash transfer.", + "ner_text": [ + [ + 225, + 243, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 56 of 74 Project Operations Manual, and have received cash transfers, at least for one payment cycle. minimum on a quarterly basis Information System ( MIS ) course of project implementation. Payment data will be liked to and updated in the MIS.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Information System' suggests a structured collection of data.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 12, + "text": "According to the United Nations ( UN ) e-government index and the World Bank ( WB ) GovTech Maturity index, 6 despite significant progress in digital government, there is an opportunity for improvement to voice and accountability, as well as to access to and quality of services. Internet and mobile connectivity and the use of internet social media is widespread, with close to 10 million internet users in 2023 ( an 88 percent penetration rate ). There are over 8. 5 million active cellular mobile connections, and over 6. 5 million social media users ( that is, 58 percent of the population ), with 45 percent of users being women. 7 9. Jordan has been actively working on the digitalization of public services; however, user adoption remains limited. The Sanad application implemented by MODEE includes digital ID, electronic signature, and a personal document store, in addition to functioning as a unified online portal for accessing digitalized public services. Approximately 800, 000 users ( that is, about 7 percent of the population ) have activated their Sanad accounts as of January 2024, which is far below the GOJ \u2019 s goal of 3. 5 million active digital IDs by 2025.", + "ner_text": [ + [ + 759, + 764, + "named" + ] + ], + "validated": false, + "empirical_context": "Jordan has been actively working on the digitalization of public services; however, user adoption remains limited. The Sanad application implemented by MODEE includes digital ID, electronic signature, and a personal document store, in addition to functioning as a unified online portal for accessing digitalized public services. Approximately 800, 000 users ( that is, about 7 percent of the population ) have activated their Sanad accounts as of January 2024, which is far below the GOJ \u2019 s goal of 3.", + "type": "application", + "explanation": "'Sanad' is not a dataset as it refers to an application rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Sanad' is a dataset because it is associated with user accounts and digital services.", + "contextual_reason_agent": "'Sanad' is not a dataset as it refers to an application rather than a structured collection of data.", + "contextual_signal": "mentioned only as an application, not as a data source", + "tags": [] + }, + { + "filename": "037_Uganda-Investment-for-Industrial-Transformation-and-Employment-Project", + "page": 18, + "text": "Land officially earmarked for industrial use \u2014 with appropriate access to infrastructure services and reflecting sound environmental and social ( E & S ) planning \u2014 is not easily accessible outside of the main Southern Corridor along the Kampala road to the east and west, particularly for new and / or foreign investors. Government services in support of new investors ( permits, certification, and so on ) are generally difficult to access outside greater Kampala. An investment outside Kampala faces significantly longer lead times and higher transactions costs than comparable investments within the Kampala region. As a result, the cost of locating into new regions and markets of Uganda is prohibitively high and deters potential first-movers from investing. Refugees and Host Sector Specific Issues 18. There are few medium-to-large enterprises in RHDs that could stimulate the local product or labor market. The business census recorded a total of 165 enterprises in Arua district that employ more than 10 workers ( less than 1 firm per square kilometer ), compared to more than 3, 900 such firms in the Kampala region ( about 20 25 World Bank Group. 2019. Profiting from Parity: Unlocking the Potential of Women ' s Business in Africa. World Bank. 26 Shepherd. 2016. Uganda: Improving Export Performance. International Growth Center. Exporter Statistics Drawn from VAT, PIT, and Customs Data from the Uganda Revenue Authority ( URA ).", + "ner_text": [ + [ + 920, + 935, + "named" + ], + [ + 238, + 245, + "business census <> data geography" + ], + [ + 604, + 618, + "business census <> data geography" + ], + [ + 975, + 988, + "business census <> data geography" + ], + [ + 1112, + 1126, + "business census <> data geography" + ], + [ + 1141, + 1151, + "business census <> publisher" + ], + [ + 1159, + 1163, + "business census <> publication year" + ], + [ + 1245, + 1255, + "business census <> publisher" + ], + [ + 1260, + 1268, + "business census <> author" + ] + ], + "validated": true, + "empirical_context": "There are few medium-to-large enterprises in RHDs that could stimulate the local product or labor market. The business census recorded a total of 165 enterprises in Arua district that employ more than 10 workers ( less than 1 firm per square kilometer ), compared to more than 3, 900 such firms in the Kampala region ( about 20 25 World Bank Group. 2019.", + "type": "census", + "explanation": "In the context, it is used to report the number of enterprises, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'business census' implies a structured collection of data about enterprises.", + "contextual_reason_agent": "In the context, it is used to report the number of enterprises, indicating it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "108_Jordan-PforR-PAD-P159522-FINAL-DISCLOSURE-10052016", + "page": 67, + "text": "As a result of the high reservation wage, many Jordanians remain unemployed as they queue for 21 West Asia North Africa Institute. 2015. \u201c Forging New Strategies in Protracted Refugee Crises: Syrian Refuges and the Host State Economy. \u201d 22 Mercy Corp. 2012. \u201c Analysis of Host Community-Refugee Tensions in Mafraq, Jordan. \u201d, Amman Net see ammannet / sy / and Su, A. 2015. \u201c The Mighty Pen ( 2014 ). \u201d Columbia Journalism Review, August. 23, 23 UNHCR regularly disseminates information on evolving policies by short messaging services and other means. Most recently, UNHCR disseminated responses to frequently asked questions regarding work permit requirements and procedures and impact on refugee status. http: / / unhcr. us6. list - manage1. com / track / click? u = 21ac4d661afc676782cbf14bc & id = 8bb817deb6 & e = cd2e73ef4f 24 Employment Unemployment Survey for 2015. Available online at: http: / / www. dos. gov. jo / dos_home_e / main / linked-html / Emp & Un. htm. The very low rate of participation among Jordanian women is generally attributed to a combination of lack of affordable child care and social views about suitable jobs for women. 25 See The National Employment Strategy 2011 \u2013 2020: An Update and Future Directions ( ILO, 2015 ) based on data for 2009 \u2013 2014. 26 ILO and FAFO. \u201c Impact of Syrian Refugees on the Jordanian Labor Market. \u201d 27 Ibid.", + "ner_text": [ + [ + 833, + 863, + "named" + ], + [ + 131, + 135, + "Employment Unemployment Survey <> publication year" + ], + [ + 307, + 321, + "Employment Unemployment Survey <> data geography" + ], + [ + 367, + 371, + "Employment Unemployment Survey <> publication year" + ], + [ + 868, + 872, + "Employment Unemployment Survey <> publication year" + ], + [ + 1245, + 1249, + "Employment Unemployment Survey <> publication year" + ], + [ + 1270, + 1281, + "Employment Unemployment Survey <> reference year" + ] + ], + "validated": true, + "empirical_context": "com / track / click? u = 21ac4d661afc676782cbf14bc & id = 8bb817deb6 & e = cd2e73ef4f 24 Employment Unemployment Survey for 2015. Available online at: http: / / www.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to a specific survey that collects data on employment and unemployment.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a specific survey that collects data on employment and unemployment.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 17, + "text": "This is seen also at the county level, with some of the most vulnerable counties having the lowest coverage of their food insecure population, in some cases near zero coverage, by any SP program. The balance of instruments ( i. e., public works versus conditional / unconditional transfers ) also varies substantially between and within states, limiting the extent to which different instruments can be targeted to relevant groups for human capital accumulation. While robust data on coverage do not exist, host communities and refugees also face a lack of access to safety nets, with most such support coming from UNHCR and other humanitarian partners, which, while crucial, is not systematic or linked into broader government or donor efforts at the national level. The monitoring of risks to effectively respond to changing contexts continues to be weak, with lack of field presence and robust third-party monitoring as the main constraints. 17. In recognition, partners are working closely together to better coordinate efforts for more systemic SP delivery in the country. There exists a number of donor-led coordination platforms that aim to ensure better harmonization and synergy in the SP efforts by partners.", + "ner_text": [ + [ + 1103, + 1135, + "named" + ] + ], + "validated": false, + "empirical_context": "In recognition, partners are working closely together to better coordinate efforts for more systemic SP delivery in the country. There exists a number of donor-led coordination platforms that aim to ensure better harmonization and synergy in the SP efforts by partners.", + "type": "platform", + "explanation": "However, the term refers to coordination platforms, which are not structured collections of data but rather initiatives for collaboration.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to platforms that could potentially collect or organize data.", + "contextual_reason_agent": "However, the term refers to coordination platforms, which are not structured collections of data but rather initiatives for collaboration.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "027_Jordan-Emergency-Food-Security-Project", + "page": 51, + "text": "The current core application software comprises: ( i ) Hyperion for budget preparation; ( ii ) ORACLE financials for budget execution; and ( iii ) software for interfacing with other software for Debt Management, Payroll, Bank Reconciliation, and Revenue Management. GFMIS is fully utilized for budget execution. For budget preparation, however, there are manual interventions with various ICT tools being used. MoITS will utilize the functionalities available in their Oracle database and accounting module when recording transactions related to the Project operations, by creating a separate cost center for the Project which is used for recording the day-to-day transactions and large contract purchases under both components, the FO might also use excel sheet in support of the Oracle system to prepare Bank required reports such as the WA-IFRs and the Semiannual IFRs. Furthermore, the assigned FO will work on improving the automated linkage between the MoITS \u2019 Oracle based accounting system and the IFRs to be produced for the Bank purposes on excel spread sheets. 9. Financial Section of the POM: MoITS will develop the FM section of the POM used in the project which will cover all administrative, financial, and accounting, budgetary, and human resources procedures relevant to the additional activities to be financed under the project. The POM should describe the payment procedures, including controls and oversight arrangements.", + "ner_text": [ + [ + 752, + 763, + "named" + ] + ], + "validated": false, + "empirical_context": "For budget preparation, however, there are manual interventions with various ICT tools being used. MoITS will utilize the functionalities available in their Oracle database and accounting module when recording transactions related to the Project operations, by creating a separate cost center for the Project which is used for recording the day-to-day transactions and large contract purchases under both components, the FO might also use excel sheet in support of the Oracle system to prepare Bank required reports such as the WA-IFRs and the Semiannual IFRs. Furthermore, the assigned FO will work on improving the automated linkage between the MoITS \u2019 Oracle based accounting system and the IFRs to be produced for the Bank purposes on excel spread sheets.", + "type": "tool", + "explanation": "However, in this context, it is mentioned as a tool used for preparing reports, not as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'excel sheet' can store data in a structured format.", + "contextual_reason_agent": "However, in this context, it is mentioned as a tool used for preparing reports, not as a data source itself.", + "contextual_signal": "mentioned only as a tool, not as a data source", + "tags": [] + }, + { + "filename": "158_40156", + "page": 54, + "text": "PFO IGAD Focal Points in NACs UNHCR IPs UNHCR Country HIV Focal Point UNHCR Regional HIV Focal Point NGOs for CBMPs Main reporting line Information copied for information LEGEND 32. IGAD will develop a data warehouse with the project funding as an HIV database to facilitate the archiving, analysis and sharing of information. Some data will be made available on the IGAD website. The IGAD website will also include information about the GLIA, ARCAN and other initiatives of which the IGAD countries are members. 33. Regular supervision by PFO, NAC Focal Points and UNHCR will assess implementation quality and verify data submitted and will be funded, for activities implemented by this project, by the grant. For this purpose, supervision guidelines will be developed and piloted for use by those involved in supervision and data quality assurance. 34. Structured HIV learning and operational research will be important; this is a new HIV initiative in a high risk area. Therefore, the project will fund the identification of a learning agenda and support IGAD to secure resources for the execution of research studies through a resource mobilization strategy.", + "ner_text": [ + [ + 202, + 216, + "named" + ] + ], + "validated": false, + "empirical_context": "PFO IGAD Focal Points in NACs UNHCR IPs UNHCR Country HIV Focal Point UNHCR Regional HIV Focal Point NGOs for CBMPs Main reporting line Information copied for information LEGEND 32. IGAD will develop a data warehouse with the project funding as an HIV database to facilitate the archiving, analysis and sharing of information. Some data will be made available on the IGAD website.", + "type": "system", + "explanation": "However, it is described as a system for archiving and sharing information, not as a dataset itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'data warehouse' suggests a structured collection of data.", + "contextual_reason_agent": "However, it is described as a system for archiving and sharing information, not as a dataset itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "062_Turkey-Municipal-Services-Improvement-Project", + "page": 46, + "text": "The World Bank Municipal Services Improvement Project in Refugee Affected Areas ( P169996 ) Page 42 of 94 refugees ) managed sanitation services in the selected municipalities that have been provided through this Project. progress reports, based on updated beneficiary \u2019 s records from respective SKIs. People benefitting from safely managed solid waste services in the selected municipalities as a result of the project This indicator measures the cumulative number of people benefitted from safely managed solid waste services in the selected municipalities that have been provided through this Project. Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs. Municipal PIUs and ILBANK PMU People benefitting from safely managed solid waste services in the selected municipalities ( out of which female ) This indicator measures the percentage of female benefitted from safely managed solid waste services in the selected municipalities that have been provided through this Project. Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs.", + "ner_text": [ + [ + 635, + 643, + "named" + ], + [ + 4, + 14, + "PMU Data <> publisher" + ], + [ + 693, + 713, + "PMU Data <> data type" + ] + ], + "validated": true, + "empirical_context": "People benefitting from safely managed solid waste services in the selected municipalities as a result of the project This indicator measures the cumulative number of people benefitted from safely managed solid waste services in the selected municipalities that have been provided through this Project. Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs. Municipal PIUs and ILBANK PMU People benefitting from safely managed solid waste services in the selected municipalities ( out of which female ) This indicator measures the percentage of female benefitted from safely managed solid waste services in the selected municipalities that have been provided through this Project.", + "type": "data", + "explanation": "In this context, 'PMU Data' is indeed used as a source of information for tracking the number of people benefitting from the project, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'PMU Data' is a dataset because it is referenced in the context of compiling and recording information related to project beneficiaries.", + "contextual_reason_agent": "In this context, 'PMU Data' is indeed used as a source of information for tracking the number of people benefitting from the project, indicating it functions as a dataset.", + "contextual_signal": "mentioned as a source of information for compiling reports", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 10, + "validated": 9, + "not_validated": 1 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 25, + "text": "16 NCHRD and MOE \u2014 which started under ERfKE II and contributed to producing key pieces of analytical work that have played a significant role in policy development \u2014 will continue under the proposed Program. 21 One example of an evaluation study can be to look at how gender issues are addressed across different policy proposals and program interventions. The annual plan for the M & E activities will be endorsed by the GPSC and shared with all concerned donors and education sector partners. 52. The MOE has strengthened its data systems and will continue to do so to meet the M & E requirements of the Program. Under ERfKE II22, the MOE completed the redesign of its OpenEMIS, which was successfully deployed in all public schools in Jordan. OpenEMIS is a comprehensive and integrated information and data collection system, which covers all education levels. The system captures disaggregated school level data on students, teachers, grades, subjects, and teaching periods for all MOE operated schools and institutions in the country. A dashboard linked to the OpenEMIS is currently being developed and will be accessible to individual units at the MOE to enable them to monitor their respective indicators. The Queen Rania Center ( QRC ) manages the EMIS and its dashboard.", + "ner_text": [ + [ + 1067, + 1075, + "named" + ] + ], + "validated": false, + "empirical_context": "The system captures disaggregated school level data on students, teachers, grades, subjects, and teaching periods for all MOE operated schools and institutions in the country. A dashboard linked to the OpenEMIS is currently being developed and will be accessible to individual units at the MOE to enable them to monitor their respective indicators. The Queen Rania Center ( QRC ) manages the EMIS and its dashboard.", + "type": "system", + "explanation": "OpenEMIS is described as a system managing data, but it is not explicitly mentioned as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed OpenEMIS is a dataset because it is associated with capturing and managing data.", + "contextual_reason_agent": "OpenEMIS is described as a system managing data, but it is not explicitly mentioned as a data source itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 9, + "validated": 2, + "not_validated": 7 + } + }, + { + "filename": "005_BOSIB-8191b179-7209-4faa-b5e0-11783bcd492d", + "page": 10, + "text": "The literacy rate for adults aged 15 and older is 98 percent, and the share of adults aged 15 and above who had no formal education has declined from 19. 2 percent in 1950 to a projected 1. 8 percent in 2020. 8 Younger cohorts are also attaining more years of education; 1 Source: Macro Poverty Outlook for Costa Rica: April 2024; 2 Source: World Development Indicators ( WDI ) https: / / data. worldbank. org / indicator / NE. TRD. GNFS. ZS? locations = CR 3 Source: World Economic Outlook ( WEO ), October 2023, https: / / www. imf. org / external / datamapper / LUR @ WEO / CRI? zoom = CRI & highlight = CRI 4 World Bank estimates using administrative records and annual statistical reports from the Directorate General of Migrants and Foreigners. https: / / www. migracion. go. cr / Paginas / Centro % 20de % 20Documentaci % C3 % B3n / Estad % C3 % ADsticas. aspx. 5 World Bank, Climate Change Knowledge Portal - Costa Rica. 6 World Bank, GFFDR, ThinkHazard! Portal. 7 Ibid 8 Source: https: / / ourworldindata. org /", + "ner_text": [ + [ + 341, + 369, + "named" + ], + [ + 22, + 46, + "World Development Indicators <> reference population" + ], + [ + 167, + 171, + "World Development Indicators <> reference year" + ], + [ + 203, + 207, + "World Development Indicators <> publication year" + ], + [ + 307, + 317, + "World Development Indicators <> data geography" + ], + [ + 372, + 375, + "World Development Indicators <> acronym" + ], + [ + 613, + 623, + "World Development Indicators <> publisher" + ], + [ + 871, + 881, + "World Development Indicators <> publisher" + ], + [ + 917, + 927, + "World Development Indicators <> data geography" + ], + [ + 931, + 941, + "World Development Indicators <> publisher" + ] + ], + "validated": true, + "empirical_context": "8 percent in 2020. 8 Younger cohorts are also attaining more years of education; 1 Source: Macro Poverty Outlook for Costa Rica: April 2024; 2 Source: World Development Indicators ( WDI ) https: / / data. worldbank.", + "type": "dataset", + "explanation": "In the context, it is explicitly mentioned as a source, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a source of data.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as a source, indicating it functions as a dataset.", + "contextual_signal": "mentioned as a source of data", + "tags": [] + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 43, + "text": "The increase in service utilization is translated into deaths averted using published empirical studies; mortality rates for Balochistan were taken from the 2012 and 2017 \u2013 18 PDHS. 70 Effect sizes for the impacts of an additional skilled birth on maternal and neonatal mortality rates come from Graham, Bell, and Bullough ( 2001 ) 71 and Bhutta et al. ( 2014 ), 72 respectively. The effect size used to estimate the impact of measles vaccination on mortality of children under five is based on McGovern and Canning ( 2015 ). 73 Because their effect size is for full child vaccination, an adjustment factor of 0. 89 is applied to the share of children with 69 Pakistan Bureau of Statistics. Population Census 2017. http: / / www. pbs. gov. pk / content / block \u2010 wise \u2010 provisional \u2010 summary \u2010 results \u2010 6th \u2010 population \u2010 housing \u2010 census \u2010 2017 \u2010 january \u2010 03 \u2010 2018; for data from the 1998 and earlier censuses, see http: / / www. pbs. gov. pk / sites / default / files / population_census / Administrative % 20Units. pdf. 70 Maternal mortality rates from PDHS 2012 \u2013 13 are used since no maternal mortality data is available from PDHS 2017 \u2013 18. 71 Graham, W. J., J. S.", + "ner_text": [ + [ + 176, + 180, + "named" + ], + [ + 105, + 120, + "PDHS <> data description" + ], + [ + 125, + 136, + "PDHS <> data geography" + ], + [ + 157, + 161, + "PDHS <> reference year" + ], + [ + 166, + 175, + "PDHS <> reference year" + ], + [ + 296, + 322, + "PDHS <> author" + ], + [ + 339, + 352, + "PDHS <> author" + ], + [ + 495, + 515, + "PDHS <> author" + ], + [ + 660, + 689, + "PDHS <> publisher" + ], + [ + 709, + 713, + "PDHS <> publication year" + ], + [ + 1064, + 1073, + "PDHS <> reference year" + ], + [ + 1139, + 1148, + "PDHS <> publication year" + ], + [ + 1189, + 1207, + "PDHS <> usage context" + ] + ], + "validated": true, + "empirical_context": "The increase in service utilization is translated into deaths averted using published empirical studies; mortality rates for Balochistan were taken from the 2012 and 2017 \u2013 18 PDHS. 70 Effect sizes for the impacts of an additional skilled birth on maternal and neonatal mortality rates come from Graham, Bell, and Bullough ( 2001 ) 71 and Bhutta et al. ( 2014 ), 72 respectively.", + "type": "survey", + "explanation": "In the context, PDHS is explicitly mentioned as a source for mortality rates, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because PDHS refers to a specific survey that provides empirical data.", + "contextual_reason_agent": "In the context, PDHS is explicitly mentioned as a source for mortality rates, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "086_NEW-ETHIOPIA-PAD-06072018", + "page": 70, + "text": "62 IR Indicator 5. 2: Establish system of recording, reporting and information sharing for handling maladministration, procurement, and corruption complaints ( including compiled reports on complaints ) System must include mechanisms for recording, reporting and information for handling complaints. Annually EIC Assessment of established systems for handling complaints PCU Independent agent hired by the PCU n. a. - not linked to DLI Citizen Engagement IR Indicator 6. 1: Share of workers reporting satisfaction with their workplace condition in the existing industrial parks This indicator tracks level of satisfaction of factory workers ( including refugees ) within the existing industrial parks. This indicator will be tracked through a survey of factory-level compliance. Annually EIC Reporting template PCU Independent agent hired by the PCU n. a. - not linked to DLI Corporate Scorecard IR Indicator 7. 1: Number of program beneficiaries This is a corporate results indicator. This indicator tracks number of refugees and Ethiopians benefitting from the project interventions. To avoid duplication, one beneficiary will be counted only once. Data will be disaggregated gender and age group.", + "ner_text": [ + [ + 743, + 777, + "named" + ], + [ + 474, + 544, + "survey of factory-level compliance <> data description" + ], + [ + 625, + 640, + "survey of factory-level compliance <> reference population" + ], + [ + 653, + 661, + "survey of factory-level compliance <> reference population" + ] + ], + "validated": true, + "empirical_context": "1: Share of workers reporting satisfaction with their workplace condition in the existing industrial parks This indicator tracks level of satisfaction of factory workers ( including refugees ) within the existing industrial parks. This indicator will be tracked through a survey of factory-level compliance. Annually EIC Reporting template PCU Independent agent hired by the PCU n.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned that the survey will track the level of satisfaction, indicating it is used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data on worker satisfaction.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned that the survey will track the level of satisfaction, indicating it is used for empirical analysis.", + "contextual_signal": "follows 'tracks level of satisfaction' and 'will be tracked through a survey'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "074_Djibouti-Integrated-Cash-Transfer-and-Human-Capital-Project", + "page": 11, + "text": "Finally, under any scenario, Djibouti is forecasted to experience more highly unusual ( 3-sigma ) and unprecedented ( 5-sigma ) summer month of extreme temperature anomalies than other Middle East and North Africa ( MENA ) countries. 8-9 3 Djibouti \u2019 s Natural Disaster Risk Profile, ThinkHazard, 2019. 4 \u201c Mapped: How Every Part of the World Has Warmed and Could Continue to Warm \u201d, Climate Brief, September 2018. Based on IPCC models and Berkley Earth data. 5 Djibouti \u2019 s Country Profile, Climate Change Knowledge Portal, World Bank, 2017. 6 Last May, Cyclone Sagar dumped 110mm of precipitation on Djibouti \u2019 s capital ( roughly the annual average in 48 hours ), inundated most neighborhoods, affected up to 50, 000 people, disrupted access to water and sanitation services for weeks, and compromised the ability of thousands of households to provided their children with adequate nutrition according to a United Nations Humanitarian Needs Assessment.", + "ner_text": [ + [ + 492, + 523, + "named" + ] + ], + "validated": false, + "empirical_context": "Based on IPCC models and Berkley Earth data. 5 Djibouti \u2019 s Country Profile, Climate Change Knowledge Portal, World Bank, 2017. 6 Last May, Cyclone Sagar dumped 110mm of precipitation on Djibouti \u2019 s capital ( roughly the annual average in 48 hours ), inundated most neighborhoods, affected up to 50, 000 people, disrupted access to water and sanitation services for weeks, and compromised the ability of thousands of households to provided their children with adequate nutrition according to a United Nations Humanitarian Needs Assessment.", + "type": "portal", + "explanation": "However, it is mentioned as a portal and not specifically as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Knowledge Portal' which suggests a collection of information.", + "contextual_reason_agent": "However, it is mentioned as a portal and not specifically as a data source or structured collection of data.", + "contextual_signal": "mentioned only as a portal, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 9, + "validated": 1, + "not_validated": 8 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 30, + "text": "The proposed project will also support continued capacity-building for Taazour General Delegation and CSA to better track implementation, monitor safety net interventions, and evaluate policy and programs based on the systematic and organized feedback from beneficiaries, impact evaluations and the analysis of MIS data. 79. Digital innovations will be leveraged to support the monitoring and evaluation of the project during implementation. In particular, the Geo-Enabling initiative for Monitoring and Supervision ( GEMS ) under the Kobo Toolbox Platform piloted under the on-going project will continue to be leveraged for this project. The GEMS will enable Taazour to collect and structure digital data that automatically feeds into a centralized M & E system. The platform will be customized to capture relevant indicators, photos, audio, videos; time and date stamps; and GPS coordinates that allow for automated geo-mapping of the project during implementation. This platform is already used in other Sahel countries and is being deployed to the other developing partners members of the Sahel Alliance. Using these tools systematically allows remote supervision, frequent environmental and social monitoring, and coordination across projects and partners working in the same area. C. Sustainability 80. The sustainability of the activities supported by the project can be assessed through three Against Exclusion. Its objectives are inter alia to: ( a ) define national social protection, solidarity and social cohesion policies; ( b ) to coordinate the implementation of the national SP policy with poor and vulnerable populations; and ( c ) ensure universal access to basic services for these populations.", + "ner_text": [ + [ + 311, + 319, + "named" + ], + [ + 257, + 270, + "MIS data <> reference population" + ], + [ + 694, + 706, + "MIS data <> data type" + ], + [ + 852, + 872, + "MIS data <> data description" + ], + [ + 878, + 893, + "MIS data <> data description" + ], + [ + 1008, + 1023, + "MIS data <> data geography" + ], + [ + 1607, + 1638, + "MIS data <> reference population" + ] + ], + "validated": true, + "empirical_context": "The proposed project will also support continued capacity-building for Taazour General Delegation and CSA to better track implementation, monitor safety net interventions, and evaluate policy and programs based on the systematic and organized feedback from beneficiaries, impact evaluations and the analysis of MIS data. 79.", + "type": "data", + "explanation": "In this context, 'MIS data' is indeed used as a source of information for tracking and evaluating interventions, confirming it as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'MIS data' is a dataset because it refers to data collected from a Management Information System.", + "contextual_reason_agent": "In this context, 'MIS data' is indeed used as a source of information for tracking and evaluating interventions, confirming it as a dataset.", + "contextual_signal": "mentioned as a data source for tracking and evaluating interventions", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "108_Jordan-PforR-PAD-P159522-FINAL-DISCLOSURE-10052016", + "page": 79, + "text": "This approach improves transparency with respect to the purposes of resources and the efficiency of their use. It is also helpful in improving communications between the Government and its citizens. The GFMIS will easily provide information on how the annual Budget Law and its execution support the strategic priorities of the Government. The GFMIS has been fully rolled out to budget units ( ministries, departments, and regional financial centers ) encompassed in the Budget Law except three ( the Ministry of Foreign Affairs, Water Authority, and civil defense ) that would be completed in 2016. The Government has launched GFMIS phase two aiming to connect IPUs by 2018. 28. The current GFMIS implementation utilizes a subset of the functionalities of the underlying application software. The current core application software comprises: ( a ) Hyperion for budget preparation; ( b ) Oracle Financials for budget execution; and ( c ) software for interfacing to other software for debt management, payroll, bank reconciliation, and revenue management. The GFMIS is fully utilized for", + "ner_text": [ + [ + 203, + 208, + "named" + ] + ], + "validated": false, + "empirical_context": "It is also helpful in improving communications between the Government and its citizens. The GFMIS will easily provide information on how the annual Budget Law and its execution support the strategic priorities of the Government. The GFMIS has been fully rolled out to budget units ( ministries, departments, and regional financial centers ) encompassed in the Budget Law except three ( the Ministry of Foreign Affairs, Water Authority, and civil defense ) that would be completed in 2016.", + "type": "system", + "explanation": "However, GFMIS is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed GFMIS is a dataset because it is mentioned in the context of providing information.", + "contextual_reason_agent": "However, GFMIS is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 49, + "text": "Likewise, the public sector pays higher earnings, and an additional year of schooling increases the probability of employment in the public sector by 41 percent. An additional year of schooling also increases the probability of wage employment by 38 percent and non-wage self-employment by 19 percent compared to farming activity. Similarly, an additional year of schooling increases the probability of working in industry sectors by 31 percent and in service sectors by 28 percent compared to the agricultural sector. Furthermore, an additional year of schooling reduces the likelihood of falling below the poverty line by 14 percent. 95. Educated women are more likely to receive better returns than educated men, thereby reinforcing the fact that education is a tool that helps foster equality and promote inclusive growth. An additional year of schooling increases the chances of finding paid employment by 9 percent for women compared to 8 percent for men. An additional year of schooling for females also increases the likelihood of being employed in the public sector, provides greater opportunities for working in more productive sectors and with employment contracts, and reduces the chances of falling below the national poverty line in numbers greater than their male 74 See World Bank Report 84215-ET. 75 Estimate based on Ethiopia Living Standard Measurement Survey ( LSMS ) 2019", + "ner_text": [ + [ + 1335, + 1378, + "named" + ], + [ + 1286, + 1296, + "Ethiopia Living Standard Measurement Survey <> publisher" + ], + [ + 1335, + 1343, + "Ethiopia Living Standard Measurement Survey <> data geography" + ], + [ + 1381, + 1385, + "Ethiopia Living Standard Measurement Survey <> publisher" + ], + [ + 1388, + 1392, + "Ethiopia Living Standard Measurement Survey <> publication year" + ], + [ + 1408, + 1426, + "Ethiopia Living Standard Measurement Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "An additional year of schooling for females also increases the likelihood of being employed in the public sector, provides greater opportunities for working in more productive sectors and with employment contracts, and reduces the chances of falling below the national poverty line in numbers greater than their male 74 See World Bank Report 84215-ET. 75 Estimate based on Ethiopia Living Standard Measurement Survey ( LSMS ) 2019", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referenced in the context as a source for estimates.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Survey' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referenced in the context as a source for estimates.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "118_PAD1199-PAD-P144637-IDA-R2015-0247-1-Box393201B-OUO-9", + "page": 74, + "text": "The objectives of the M & E activities are to provide PNDP III staff and stakeholders with regular information on project implementation and outputs; identify bottlenecks and impediments in the project implementation; ensure that all the activities under PNDP III are implemented in compliance with the PIM; determine to what extent the NCU achieves its goals and objectives, and how it affects the intended beneficiaries \u2019 social conditions and capacities; and maintain acceptable performance standards for environmental and social impacts. The arrangements for M & E are critical given the multitude of capacity building and subproject activities that will take place under the project. 89. The project management information system ( MIS ) set up during PNDP II will be adjusted to the needs of the proposed project. Indeed, the new MIS will include the following adjustments: ( i ) complete on-line connection with RCUs and NCU to strengthen decentralized supervision; ( ii ) extend MIS to allow comparison of planned versus actual performance ( i. e., physical and financial ) in a format that can also be used in reports to be presented to government and Bank; ( iii ) integrate financial management system, and ( iv ) launching of the MIS on the internet for public access with the aim of promoting transparency. 90. The monitoring and evaluation system of PNDP III is designed to consolidate and improve the system used during the first and second phase of the program. It will be organized at four levels: communal, departmental, regional, and national. It will use the PRO-ADP software ( Progiciel d \u2019 Appui au D\u00e9veloppement Participatif ) developed during phase 2 to facilitate the monitoring of the implementation of CDP at the communal level. 91. The communal level will be the operational level of the system. Communal development agents will be trained to monitor the implementation of CDPs and to collect data and transmit them to the departmental level. These data will include feedback from communes on PNDP implementation as well as environmental monitoring indicators to determine the effectiveness of environmental mitigation measures implemented under subprojects and the extent to which the", + "ner_text": [ + [ + 2052, + 2087, + "named" + ] + ], + "validated": false, + "empirical_context": "Communal development agents will be trained to monitor the implementation of CDPs and to collect data and transmit them to the departmental level. These data will include feedback from communes on PNDP implementation as well as environmental monitoring indicators to determine the effectiveness of environmental mitigation measures implemented under subprojects and the extent to which the", + "type": "indicator", + "explanation": "However, 'environmental monitoring indicators' are not a dataset but rather metrics or measures used to assess environmental conditions.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'indicators' can imply a structured collection of data points.", + "contextual_reason_agent": "However, 'environmental monitoring indicators' are not a dataset but rather metrics or measures used to assess environmental conditions.", + "contextual_signal": "mentioned only as metrics, not as a data source", + "tags": [] + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 21, + "text": "On the supply side, labor force surveys ( LFSs ) are irregular and not frequent enough. On the demand side, the annual enterprises survey does not emphasize employment issues, the informal sector survey is as irregular as the LFS, and there is no annual agricultural survey. Finally, line ministries could benefit from INS data production and expertise to inform program preparation and M & E. 27. National accounts are engaged in a modernization process requiring support but suffer from a lack of sufficient and comprehensive trade data. While many Sub-Saharan countries are still following the 1993 national accounts framework, Cameroon transitioned successfully to the 2008 system and has been producing trimestral accounts since 2015. However, the classic annual national accounts suffer from a lack of reliable agriculture statistics. The last agriculture and livestock census was undertaken in 1984 and annual surveys stopped in the early 1990s. The Ministry of Livestock, Fishery, and Animal Industry and the Ministry of Agriculture Rural Development currently rely on indirect sources to produce the necessary basic sector statistics. A new agricultural and livestock census was originally planned for 2017, but the cost is high compared to similar exercises in the region ( CFAF 23. 6 billion ). However, the AfDB and EU are exploring ways to contribute to the financing of this census in synergy with the population census.", + "ner_text": [ + [ + 319, + 327, + "named" + ], + [ + 631, + 639, + "INS data <> data geography" + ], + [ + 734, + 738, + "INS data <> publication year" + ], + [ + 901, + 905, + "INS data <> reference year" + ] + ], + "validated": true, + "empirical_context": "On the demand side, the annual enterprises survey does not emphasize employment issues, the informal sector survey is as irregular as the LFS, and there is no annual agricultural survey. Finally, line ministries could benefit from INS data production and expertise to inform program preparation and M & E. 27.", + "type": "data", + "explanation": "In the context, 'INS data' is referred to as a source of information that can inform program preparation and monitoring & evaluation, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'INS data' is a dataset because it is mentioned in the context of data production and expertise.", + "contextual_reason_agent": "In the context, 'INS data' is referred to as a source of information that can inform program preparation and monitoring & evaluation, indicating it functions as a dataset.", + "contextual_signal": "mentioned as a source of information for program preparation and M & E", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "185_multi-page", + "page": 41, + "text": "42 Increase in percent of households with a chronically ill adult ( 15-49 years ) who have received external help in caring for a patient or replacing lost income in the past year Increase in percentage of households caring for an orphan that has received assistance from outside the family Increase in percent of adults with access to quality STI / TB / O [ case management Percent decrease in reported STI / TB / OI prevalence Percentage of people expressing nondiscriminatory attitudes towards people with HIV / AIDS 3 ) Strengthened capacity of Increase in the number of Project data institutions and communities to institutions providing effective respond to the epidemic in a coordination at nat ' l, multisectoral and sustained provincial, and district levels manner for the planning and implementation of HIV / AIDS interventions Proper mechanisms in place to Survey data transfer funds for prevention, care, and support at district and community levels Percent increase in number of Survey data organizations capable of designing, implementing, and evaluating HIV / AIDS / STI activities", + "ner_text": [ + [ + 868, + 879, + "named" + ] + ], + "validated": true, + "empirical_context": "42 Increase in percent of households with a chronically ill adult ( 15-49 years ) who have received external help in caring for a patient or replacing lost income in the past year Increase in percentage of households caring for an orphan that has received assistance from outside the family Increase in percent of adults with access to quality STI / TB / O [ case management Percent decrease in reported STI / TB / OI prevalence Percentage of people expressing nondiscriminatory attitudes towards people with HIV / AIDS 3 ) Strengthened capacity of Increase in the number of Project data institutions and communities to institutions providing effective respond to the epidemic in a coordination at nat ' l, multisectoral and sustained provincial, and district levels manner for the planning and implementation of HIV / AIDS interventions Proper mechanisms in place to Survey data transfer funds for prevention, care, and support at district and community levels Percent increase in number of Survey data organizations capable of designing, implementing, and evaluating HIV / AIDS / STI activities", + "type": "survey", + "explanation": "In this context, 'Survey data' is explicitly mentioned as a source of information used to measure various health-related metrics.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Survey data' suggests a structured collection of information gathered from surveys.", + "contextual_reason_agent": "In this context, 'Survey data' is explicitly mentioned as a source of information used to measure various health-related metrics.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 29, + "text": "The World Bank Education Quality Improvement Project ( P179363 ) Page 20 of 68 52. Inclusion. Despite significant achievements on inclusive education, students with disabilities still face exclusion in Moldova. The Joint Evaluation of the Implementation of the Programme for Development of Inclusive Education 2011 \u2013 2020 highlighted significant efforts that have been made in recent years to increase disability inclusion in education. Nevertheless, the report revealed that educational institutions, at all levels, are only partially prepared to facilitate the access of children with disabilities ( especially motor skill disorders and hearing and / or visual impairments ) through access infrastructure. In this regard, the accessibility of all types of educational institutions is still an issue for Moldova. These accessibility constraints are reflected in wider disparities in education and subsequently work opportunities for people with disabilities. Principles of universal access will guide the project preparation including the design of the new high schools in terms of physical access, safety and emergency egress, and access to learning opportunities to ensure inclusion and safety of students with disabilities. 53. Personal data protection. The EMIS and national and international assessments adhere to the national personal data protection legislation. The national practices are informed and strengthened by the successful participation of Moldova in PISA, governed by the OECD Personal Data Protection Rules and practices.", + "ner_text": [ + [ + 1470, + 1474, + "named" + ] + ], + "validated": false, + "empirical_context": "The EMIS and national and international assessments adhere to the national personal data protection legislation. The national practices are informed and strengthened by the successful participation of Moldova in PISA, governed by the OECD Personal Data Protection Rules and practices.", + "type": "assessment", + "explanation": "However, PISA is mentioned as an assessment rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed PISA is a dataset because it is associated with assessments that collect data on student performance.", + "contextual_reason_agent": "However, PISA is mentioned as an assessment rather than a structured collection of data or a data source.", + "contextual_signal": "mentioned only as an assessment, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + }, + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 28, + "text": "The KNEC \u2019 s school specific analysis for the learning assessments and examinations conducted in 2020 and 2021, and for the school reentry learning assessments conducted in January 2021 after the prolonged school closure, will be used to set the baseline and targets in the SIP for improving learning outcomes. \u2022 Learners school attendance: target schools will conduct regular parent / community meetings and relevant mobilization activities to ensure regular school attendance and specifically monitor retention of girls in grades 6 to 8 and allow reentry for teenage mothers in primary in line with the National reentry guidelines. Target schools are expected to closely track student \u2019 s attendance by gender and grade and submit attendance data on NEMIS. Subcounty-based education teams from the MoE, will be expected to visit target schools once a month to monitor school attendance, identify learners at risk of dropping out and agree on remedial actions with the school management. \u2022 School management and accountability: target schools will be supported to comply with MoE \u2019 s requirements for accountability30 in management of the student capitation grants ( as well as the proposed school grant ) and facilitate teachers to participate in the monthly SBTS cluster meetings. School heads, their deputies, and the senior teacher, will be expected to complete the school instructional leadership module which the TSC plans to develop and deliver through a third party. Support to the 50 30 Include inter alia, availability of approved costed annual work plan by the school committee / boards of management; maintenance of updated cash books; evidence of appropriate store ledgers; adherence to procurement procedures; and school level public disclosure of relevant information IPF", + "ner_text": [ + [ + 752, + 757, + "named" + ], + [ + 4, + 8, + "NEMIS <> publisher" + ], + [ + 97, + 101, + "NEMIS <> reference year" + ], + [ + 106, + 110, + "NEMIS <> publication year" + ], + [ + 124, + 159, + "NEMIS <> data description" + ], + [ + 181, + 185, + "NEMIS <> publication year" + ], + [ + 733, + 748, + "NEMIS <> data type" + ] + ], + "validated": true, + "empirical_context": "\u2022 Learners school attendance: target schools will conduct regular parent / community meetings and relevant mobilization activities to ensure regular school attendance and specifically monitor retention of girls in grades 6 to 8 and allow reentry for teenage mothers in primary in line with the National reentry guidelines. Target schools are expected to closely track student \u2019 s attendance by gender and grade and submit attendance data on NEMIS. Subcounty-based education teams from the MoE, will be expected to visit target schools once a month to monitor school attendance, identify learners at risk of dropping out and agree on remedial actions with the school management.", + "type": "system", + "explanation": "NEMIS is indeed a dataset as it is referenced as a system for tracking and submitting attendance data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is mentioned in the context of submitting attendance data.", + "contextual_reason_agent": "NEMIS is indeed a dataset as it is referenced as a system for tracking and submitting attendance data.", + "contextual_signal": "mentioned as a data source for attendance data", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "172_multi0page", + "page": 55, + "text": "technical assistance to benefit the Personnel Directorate, which will include the appointment of a personnel management specialist. The objective is to support the establishment of an effective human resources management system to ensure that: ( i ) all staff are duly accounted for; that ( ii ) recruitment and deployment of staff ( in particular teachers ) are carried out in a transparent and rational fashion to maximize cost-effectiveness; and that ( iii ) administrative procedures are being streamlined. Capacity building activities in this area will include the following tasks: e Cleaning the MEST ' s existing personnel and preparing a database including both civil servants ( under the Personnel Management Office ) and personnel under contracts ( e. g. teachers in government-assisted schools ). Information on personnel will have to include geographical location ( i. e.: deployment ), qualifications, date of recruitment, ranking grade / level, etc. Procedures to ensure that this database is updated regularly will be determined and integrated within the EMIS. o Overhauling of rules and procedures used for the appointment / recruitment and deployment of existing and new personnel, in particular teachers, to expedite and ease the overall process ( i. e. providing better access to and rapid processing of information ) and ensuring greater compliance with existing rules in this area.", + "ner_text": [ + [ + 1070, + 1074, + "named" + ] + ], + "validated": false, + "empirical_context": "e. : deployment ), qualifications, date of recruitment, ranking grade / level, etc. Procedures to ensure that this database is updated regularly will be determined and integrated within the EMIS. o Overhauling of rules and procedures used for the appointment / recruitment and deployment of existing and new personnel, in particular teachers, to expedite and ease the overall process ( i.", + "type": "system", + "explanation": "However, EMIS is described as a system for managing information rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is mentioned in the context of managing data related to personnel.", + "contextual_reason_agent": "However, EMIS is described as a system for managing information rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "086_NEW-ETHIOPIA-PAD-06072018", + "page": 106, + "text": "98 the budget credibility of the country remained to be good supported with continuing robust budget execution and internal control systems. Budget transparency and comprehensiveness has also improved since the 2010 assessment. Good performance was noted on arrears management, access by public to fiscal information and revenue administration. The tax audit function is gradually increasing focus on risk assessment, but capacity constraints remain. Budget execution systems appear to continue to work well. Robust internal control systems remain. Procurement systems have improved since the 2010 assessment, although publication of procurement information has not progressed as much. Furthermore, effectiveness of scrutiny has strengthened to an extent given that the macroeconomic and fiscal framework is being reviewed by the relevant legislation unit and strengthened procedure for review of draft budget. Legislative scrutiny of audit reports improved performance on depth of hearing and monitoring implementation of recommendations. Although improvements are noted, strengthening the internal audit function has proceeded at a slower pace than expected. The assessment revealed that high staff turnover and capacity constraints remain in procurement and internal audit capacity. The provision of electronic links between the IBEX systems in Bureaus of Finance and Economic Development and those in sector bureaus, where IBEX was being established on a stand-alone basis remains to be the constraint affecting the ratings on accounting and reporting.", + "ner_text": [ + [ + 1332, + 1336, + "named" + ] + ], + "validated": false, + "empirical_context": "The assessment revealed that high staff turnover and capacity constraints remain in procurement and internal audit capacity. The provision of electronic links between the IBEX systems in Bureaus of Finance and Economic Development and those in sector bureaus, where IBEX was being established on a stand-alone basis remains to be the constraint affecting the ratings on accounting and reporting.", + "type": "system", + "explanation": "However, IBEX is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed IBEX is a dataset because it is mentioned in the context of systems related to finance and accounting.", + "contextual_reason_agent": "However, IBEX is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "151_533550PAD0P117101Official0Use0Only1", + "page": 85, + "text": "Successful Application of the PMT for Pro-poor Targeting Results 236. in CT program resources going to those in most extreme need. These are: There are very clear steps and processes that lead to successful application of the PMT, resulting a ) Clarification of objectives: Clear orientation sessions for community leaders and beneficiaries on the objectives of the Program as part of a comprehensive communication strategy. b ) Co-responsibilities: Clarification of the co-responsibilities of community leaders for good implementation of the program, i. e., ensuring that eligible households are enrolled. c ) Qualified staff with computer skills: A comprehensive data registration and an application of PMT require staff with good computer skills. d ) Management Information System ( MIS ): A comprehensive and well-designed MIS system helps administrators to apply the PMT weights to generate the PMT score formula to determine eligible beneficiaries in addition to supporting accountability throughout the full CT program cycle. e ) Operations Manual: A comprehensive OM with instruments and guidelines guides all operational and organizational activities related to the Program. B. Gender Considerations - Inclusion of Female Household Members 237. The Yemen Country Assistance Strategy points out that Yemen \u2019 s social challenges are exacerbated by a fast-paced demographic growth. Nearly 50 percent of the population is below 15.", + "ner_text": [ + [ + 754, + 783, + "named" + ] + ], + "validated": false, + "empirical_context": "c ) Qualified staff with computer skills: A comprehensive data registration and an application of PMT require staff with good computer skills. d ) Management Information System ( MIS ): A comprehensive and well-designed MIS system helps administrators to apply the PMT weights to generate the PMT score formula to determine eligible beneficiaries in addition to supporting accountability throughout the full CT program cycle. e ) Operations Manual: A comprehensive OM with instruments and guidelines guides all operational and organizational activities related to the Program.", + "type": "system", + "explanation": "However, it is described as a system that supports operations rather than a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Management Information System' suggests a structured collection of data.", + "contextual_reason_agent": "However, it is described as a system that supports operations rather than a data source itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "082_PAD2809-PAD-PUBLIC-disclosed-9-12-2018-IDA-R2018-0286-1", + "page": 82, + "text": "The World Bank Chad - Refugees and Host Communities Support Project ( P164748 ) Page 77 ANNEX 2: GEO-SPATIAL ANALYSIS 1. A major obstacle to effective targeting of development projects in many developing countries is the lack of existing datasets. One option to overcome this obstacle is remote sensing. For this project, preliminary work has been done to determine the host population around selected camps using remote sensing imaging analysis. To generate population estimates, the analysis uses world population census data and statistical modeling based on the relationship between populations and physical socioeconomic characteristics such as land uses, dwelling units and image pixel characteristics. 2. The figure shows the population layer within 25 km of selected camps, and the table shows population estimates at 50 km, 25 km, 15 km, 10 km and 5 km from the camps. Since village boundary information for Chad is not available in world population data, satellite imagery and estimates of average village size from the most recent census will be used to approximate the number of host villages around each camp. Population Layer within 25 km of Selected Camps in the East, South, and Lake Chad Regions Source: World Bank Geospatial Operations Support Team ( GOST ).", + "ner_text": [ + [ + 499, + 527, + "named" + ], + [ + 4, + 14, + "world population census data <> publisher" + ], + [ + 15, + 19, + "world population census data <> data geography" + ], + [ + 370, + 407, + "world population census data <> reference population" + ], + [ + 733, + 780, + "world population census data <> data description" + ], + [ + 917, + 921, + "world population census data <> data geography" + ], + [ + 987, + 1020, + "world population census data <> data description" + ], + [ + 1195, + 1212, + "world population census data <> data geography" + ], + [ + 1221, + 1231, + "world population census data <> publisher" + ] + ], + "validated": true, + "empirical_context": "For this project, preliminary work has been done to determine the host population around selected camps using remote sensing imaging analysis. To generate population estimates, the analysis uses world population census data and statistical modeling based on the relationship between populations and physical socioeconomic characteristics such as land uses, dwelling units and image pixel characteristics. 2.", + "type": "census", + "explanation": "This is indeed a dataset as it is explicitly mentioned as being used to generate population estimates.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific type of data collection used for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as being used to generate population estimates.", + "contextual_signal": "follows 'uses data from'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 44, + "text": "The key delivery mechanisms put in place for the project, such as the MIS, will enable the project to produce regular progress reports. The PIU will organize annual financial audits for the project, annual reviews of progress, and a mid-term review to guide the project implementation after the first 2. 5 years of implementation. The mid-term review will involve project \u2019 s stakeholders and civil society in the review of performance, intermediary results, institutional arrangements, and outcomes. It will confirm the plans and processes for expansion of the project. 129. A Monitoring and Evaluation manual was produced under Merankabandi. The manual will be updated for the project, even though most of the monitoring and evaluation processes will be kept the same. The following table describes the main M & E instruments of the project. Table 4. Project M & E instruments Instrument Description Data to be collected Management Information System The MIS will collect information from different processes including registration, enrolment, payments, and claims. Information on registration and enrolment is obtained through direct collection of data; payment information is provided by the service providers and claims are fed into the system by different collection channels - Socio-economic data of households enrolled - Number of households receiving cash transfers - Amounts paid to beneficiaries - Claims submitted by beneficiaries Social Registry The Social Registry compiles socio - economic information from households that will be used to determine eligibility for social programs - Socio-economic data of households registered Iterative Beneficiary Monitoring The IBM is a phone-based survey to a sample of households ( beneficiaries and non-beneficiaries of social programs ) to inquire on program performance, satisfaction, and context - Satisfaction information of beneficiaries and non - beneficiaries of social programs - Qualitative information on the vulnerability context", + "ner_text": [ + [ + 1463, + 1478, + "named" + ], + [ + 1522, + 1532, + "Social Registry <> reference population" + ], + [ + 1723, + 1733, + "Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "Project M & E instruments Instrument Description Data to be collected Management Information System The MIS will collect information from different processes including registration, enrolment, payments, and claims. Information on registration and enrolment is obtained through direct collection of data; payment information is provided by the service providers and claims are fed into the system by different collection channels - Socio-economic data of households enrolled - Number of households receiving cash transfers - Amounts paid to beneficiaries - Claims submitted by beneficiaries Social Registry The Social Registry compiles socio - economic information from households that will be used to determine eligibility for social programs - Socio-economic data of households registered Iterative Beneficiary Monitoring The IBM is a phone-based survey to a sample of households ( beneficiaries and non-beneficiaries of social programs ) to inquire on program performance, satisfaction, and context - Satisfaction information of beneficiaries and non - beneficiaries of social programs - Qualitative information on the vulnerability context", + "type": "registry", + "explanation": "The Social Registry is indeed a dataset as it compiles and organizes socio-economic data used for determining eligibility for social programs.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it compiles socio-economic information from households.", + "contextual_reason_agent": "The Social Registry is indeed a dataset as it compiles and organizes socio-economic data used for determining eligibility for social programs.", + "contextual_signal": "described as a registry that compiles socio-economic information", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 29, + "text": "The World Bank SPF: Improved Livelihoods for Internally Displaced Persons in Azerbaijan ( P178125 ) Page 27 of 34 Indicator Name Corporate Unit of Measure Baseline End Target Frequency Data Source / Methodology Responsibility for Data Collection three months after civic engagement training. Percentage of beneficiaries taking a more active role in their communities - disabled Percentag e 0. 00 50. 00 Twice, once before civic engagement training and again at least three months after civic engagement training completion. For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection. For individuals trained only as part of Component 3, a separate pre-training survey will be conducted as well as a follow-up survey conducted at least three months after civic engagement training. M & E Specialist Description: Total number of individuals trained in civic engagement topics who report a higher level of community activity compared to the level reported before receiving training and support.", + "ner_text": [ + [ + 602, + 633, + "named" + ], + [ + 4, + 14, + "Post-Training Completion Survey <> publisher" + ], + [ + 45, + 73, + "Post-Training Completion Survey <> reference population" + ], + [ + 77, + 87, + "Post-Training Completion Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "00 Twice, once before civic engagement training and again at least three months after civic engagement training completion. For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection. For individuals trained only as part of Component 3, a separate pre-training survey will be conducted as well as a follow-up survey conducted at least three months after civic engagement training.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey used for collecting data in the context of the project.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey used for data collection.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey used for collecting data in the context of the project.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "117_Somali-Urban-Investment-Planning-Project", + "page": 15, + "text": "Feasibility and preliminary design studies for the primary roads will be undertaken to assess whether the selection of the three roads by the Benadir Regional Administration ( BRA ) is the most efficient use of roads funds in relation to the roads already being upgraded with Turkish assistance, survey these roads, take soil core samples along the roadway to determine the existing sub-base and base materials of the roads, determine the standards for roads construction, locate the options for road material quarries, locate options for the asphalt and concrete batching plants, assess the labor pool available in Mogadishu vis-\u00e0-vis roads construction, and provide cost estimates. For the secondary / community roads, the activities will focus on a dialogue with communities to determine the priority works to be undertaken in each of the 17 Districts in Mogadishu, to determine optimum implementation modalities, and provide cost estimates.", + "ner_text": [ + [ + 321, + 338, + "named" + ] + ], + "validated": false, + "empirical_context": "Feasibility and preliminary design studies for the primary roads will be undertaken to assess whether the selection of the three roads by the Benadir Regional Administration ( BRA ) is the most efficient use of roads funds in relation to the roads already being upgraded with Turkish assistance, survey these roads, take soil core samples along the roadway to determine the existing sub-base and base materials of the roads, determine the standards for roads construction, locate the options for road material quarries, locate options for the asphalt and concrete batching plants, assess the labor pool available in Mogadishu vis-\u00e0-vis roads construction, and provide cost estimates. For the secondary / community roads, the activities will focus on a dialogue with communities to determine the priority works to be undertaken in each of the 17 Districts in Mogadishu, to determine optimum implementation modalities, and provide cost estimates.", + "type": "sample", + "explanation": "'Soil core samples' are not a dataset but rather physical samples taken for testing and analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'soil core samples' are data because they are collected for analysis.", + "contextual_reason_agent": "'Soil core samples' are not a dataset but rather physical samples taken for testing and analysis.", + "contextual_signal": "mentioned as part of the assessment process, not as a data source", + "tags": [] + }, + { + "filename": "040_Iraq-COVID-19-Vaccination-Project", + "page": 49, + "text": "The World Bank Iraq COVID-19 Vaccination Project ( P177038 ) Page 44 of 54 on a regular basis in the last quarter Percentage of vaccination sites with functional cold chain The project will track the continuous functionality of the cold supply chain to ensure that vaccines are - at all times - maintained at optimal condition until being administered to beneficiaries 3 months MOHE and TPMA reports TPM MOHE / TPMA Percentage of reported serious AEFI cases for which investigations were initiated within 48 hours This indicator will measure the percentage of reported serious Adverse Events Following Immunization ( AEFI ) post COVID-19 vaccinations that have been reported to the Iraqi MOHE surveillance system, GRM and other channels that have been addressed and investigated within 48 hours of reporting to the total number of reported AEFIs. The aim is to measure the adequate and timely response and investigation to the reported AEFIs reported post COVID-19 vaccinations. 3 months Iraq MOHE surveillance system, GRM data, MOHE incident reporting and media sources.", + "ner_text": [ + [ + 714, + 717, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Iraq COVID-19 Vaccination Project ( P177038 ) Page 44 of 54 on a regular basis in the last quarter Percentage of vaccination sites with functional cold chain The project will track the continuous functionality of the cold supply chain to ensure that vaccines are - at all times - maintained at optimal condition until being administered to beneficiaries 3 months MOHE and TPMA reports TPM MOHE / TPMA Percentage of reported serious AEFI cases for which investigations were initiated within 48 hours This indicator will measure the percentage of reported serious Adverse Events Following Immunization ( AEFI ) post COVID-19 vaccinations that have been reported to the Iraqi MOHE surveillance system, GRM and other channels that have been addressed and investigated within 48 hours of reporting to the total number of reported AEFIs. The aim is to measure the adequate and timely response and investigation to the reported AEFIs reported post COVID-19 vaccinations.", + "type": "system", + "explanation": "However, 'GRM' is not explicitly described as a data source or structured collection of data in the context provided.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'GRM' is a dataset because it is mentioned in the context of tracking and reporting data related to vaccinations.", + "contextual_reason_agent": "However, 'GRM' is not explicitly described as a data source or structured collection of data in the context provided.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 68, + "text": "30 \u2010 0. 06 3. 93 2. 60 Incomplete lower secondary 22. 00 23. 00 0. 05 0. 01 21. 33 20. 00 Incomplete upper secondary 34. 00 32. 40 \u2010 0. 05 \u2010 0. 01 30. 73 29. 40 Completed upper secondary but not post \u2010 secondary 12. 00 13. 50 0. 13 0. 03 0. 01 18. 50 0. 02 22. 50 Post \u2010 secondary 24. 00 25. 50 0. 06 0. 01 25. 50 25. 50 Source: Income and Expenditure Survey, 2010, Department of Statistics and own calculations.", + "ner_text": [ + [ + 329, + 358, + "named" + ], + [ + 360, + 364, + "Income and Expenditure Survey <> publication year" + ], + [ + 366, + 390, + "Income and Expenditure Survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "50 25. 50 Source: Income and Expenditure Survey, 2010, Department of Statistics and own calculations.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source of information used for calculations.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source of information used for calculations.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "127_PAD10180PAD0P14972400PUBLIC00Box391431B", + "page": 54, + "text": "44 Procedures for preparing and approving RAP 54. Project Screening. Once the subprojects are identified by unions or municipalities, the PMU will obtain all permits / approvals related to the Project. Thereafter, they will cooperate with unions or municipalities to carry out social screening to determine whether or not the subprojects will result in any resettlement impact. The PMU will then decide on the need for the preparation of a Resettlement Action Plan ( RAP ) or an abbreviated RAP. 55. Socioeconomic and Inventory Survey. Following the identification of the subprojects that may involve involuntary resettlement, the PMU in cooperation with unions and municipalities will carry out a socio-economic study and census survey, in which baseline data within the subproject \u2019 s target areas is collected. This information shall include the PAPs and related household members or dependents, total land holdings, and affected assets. This information will be put in writing and shall be used in determining the appropriate compensation and assistance for each affected individual / household. 56. RAP preparation, review and approval. Once the census survey is completed, the PMU will work with relevant unions and municipalities to prepare the RAP.", + "ner_text": [ + [ + 723, + 736, + "named" + ], + [ + 382, + 385, + "census survey <> author" + ], + [ + 631, + 634, + "census survey <> publisher" + ], + [ + 747, + 760, + "census survey <> data type" + ], + [ + 849, + 853, + "census survey <> reference population" + ], + [ + 899, + 918, + "census survey <> data description" + ], + [ + 1183, + 1186, + "census survey <> author" + ] + ], + "validated": true, + "empirical_context": "Socioeconomic and Inventory Survey. Following the identification of the subprojects that may involve involuntary resettlement, the PMU in cooperation with unions and municipalities will carry out a socio-economic study and census survey, in which baseline data within the subproject \u2019 s target areas is collected. This information shall include the PAPs and related household members or dependents, total land holdings, and affected assets.", + "type": "survey", + "explanation": "This is indeed a dataset as it involves the collection of baseline data for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'census survey' implies a structured collection of data collected during the survey.", + "contextual_reason_agent": "This is indeed a dataset as it involves the collection of baseline data for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "010_BOSIB1554c314c0a2187c019d7e85bc2a91", + "page": 16, + "text": "Int J Equity Health 19, 23 ( 2020 ). 16 Nyawira, L., Tsofa, B., Musiega, A. et al. Management of human resources for health: implications for health systems efficiency in Kenya. BMC Health Serv Res 22, 1046 ( 2022 ). 17 McCollum R, Limato R, Otiso L, et al. Health system governance following devolution: comparing experiences of decentralisation in Kenya and IndonesiaBMJ Global Health 2018; 3: e000939 18 Kairu, A., Orangi, S., Mbuthia, B. et al. Examining health facility financing in Kenya in the context of devolution. BMC Health Serv Res 21, 1086 ( 2021 ). 19 Ministry of Health Kenya Harmonized Health Facility Assessment 2018-19. The diagnostic tests were: HIV, malaria, and syphilis rapid test; urine test for pregnancy; blood glucose; urine dipstick for glucose and protein; and hemoglobin levels", + "ner_text": [ + [ + 665, + 702, + "named" + ] + ], + "validated": false, + "empirical_context": "19 Ministry of Health Kenya Harmonized Health Facility Assessment 2018-19. The diagnostic tests were: HIV, malaria, and syphilis rapid test; urine test for pregnancy; blood glucose; urine dipstick for glucose and protein; and hemoglobin levels", + "type": "test", + "explanation": "However, these are specific tests rather than a structured collection of data or a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it lists specific diagnostic tests that could imply data collection.", + "contextual_reason_agent": "However, these are specific tests rather than a structured collection of data or a dataset.", + "contextual_signal": "mentioned only as diagnostic tests, not as a data source", + "tags": [] + }, + { + "filename": "015_BOSIB0e7334a5d0570a3e40f8ae4d0c1266", + "page": 20, + "text": "The World Bank Enhancing Connectivity and Resilience in the Far North of Cameroon for Inclusiveness Project ( P178207 ) Page 21 of 82 functioning road asset management system. In addition, unenforced axle load controls pose a serious threat to road assets. This project will provide the necessary technical assistance to the Road Maintenance Fund by establishing a management system to improve the strategic planning for and forecasting of maintenance works. 23. Climate data and risks are not systematically included in the planning of interventions on the transport network, in project design, construction methods, or in the management of assets and operations. Decision makers and implementation entities lack the necessary data and analytics on the exposure and vulnerability of the Cameroonian road network to the existing and future effects of climate change. Cameroon \u2019 s under-designed and undermaintained road infrastructure is particularly vulnerable to natural hazards and climate change impacts. 24. Cameroon loses approximately 9. 8 percent of its GDP annually due to its unsafe roads. The Global Road Safety Facility estimates the total annual cost of fatal and serious road crashes in Cameroon to be at least US $ 3. 2 billion, which represented 9. 8 percent of Cameroon \u2019 s GDP in 2016. 39 The road fatality rate in Cameroon40 was estimated at 30. 1 deaths per 100, 000 population in 2016.", + "ner_text": [ + [ + 146, + 174, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Enhancing Connectivity and Resilience in the Far North of Cameroon for Inclusiveness Project ( P178207 ) Page 21 of 82 functioning road asset management system. In addition, unenforced axle load controls pose a serious threat to road assets.", + "type": "system", + "explanation": "However, it is not a dataset as it is described as a management system rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'system' which can imply data management.", + "contextual_reason_agent": "However, it is not a dataset as it is described as a management system rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "158_40156", + "page": 44, + "text": "programs for CBMPs in line with the IGAD HIV strategy b ) Number of persons from IGAD Member states trained in M & E including the use of data M & E system ( including structured learning agenda ) a ) Number of website hits in the last 12 months 0 b ) Percentage of NACs that submit program monitoring data to the PFO within specified reporting time frames 0 c ) Percentage of refugee, IDP, returnee and cross border areas included in IGAD member states \u2019 National Sentinel Surveillance system in the last 12 months d ) Number of operational research studies funded 40", + "ner_text": [ + [ + 456, + 493, + "named" + ], + [ + 377, + 422, + "National Sentinel Surveillance system <> reference population" + ] + ], + "validated": true, + "empirical_context": "programs for CBMPs in line with the IGAD HIV strategy b ) Number of persons from IGAD Member states trained in M & E including the use of data M & E system ( including structured learning agenda ) a ) Number of website hits in the last 12 months 0 b ) Percentage of NACs that submit program monitoring data to the PFO within specified reporting time frames 0 c ) Percentage of refugee, IDP, returnee and cross border areas included in IGAD member states \u2019 National Sentinel Surveillance system in the last 12 months d ) Number of operational research studies funded 40", + "type": "system", + "explanation": "In this context, it is used as a data source for monitoring and evaluation, indicating it functions as a structured collection of data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'system' which often relates to data collection.", + "contextual_reason_agent": "In this context, it is used as a data source for monitoring and evaluation, indicating it functions as a structured collection of data.", + "contextual_signal": "mentioned as a data source for monitoring and evaluation", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 47, + "text": "Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation including reporting from beneficiaries. BRD data fed to MINEMA. Responsibility for Data Collection BRD and MINEMA. Households benefiting from graduation programming ( Number ) Description Quantitative indicator counting number of households that benefit from graduation programming. Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation. Responsibility for Data Collection MINEMA. Businesses that benefit from partial credit guarantee scheme ( Number ) Description Quantitative indicator counting number of businesses that benefit from the BDF partial credit guarantee scheme. Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation including reporting from beneficiaries. BDF data fed to MINEMA. Responsibility for Data Collection BDF and MINEMA Volume of additional credit facilitated by the partial credit guarantee scheme ( Amount ( USD ) ) Description Quantitative indicator counting private capital enabled through PFIs under the BDF partial credit guarantee scheme. This is calculated based on an average loan size of US $ 700 x 3, 000 beneficiairies, for an approxiate target of US $ 2 million. Frequency Quarterly", + "ner_text": [ + [ + 180, + 188, + "named" + ] + ], + "validated": true, + "empirical_context": "Methodology for Data Collection Monitoring project implementation including reporting from beneficiaries. BRD data fed to MINEMA. Responsibility for Data Collection BRD and MINEMA.", + "type": "data", + "explanation": "In the context, 'BRD data' is explicitly referenced as being fed to MINEMA, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'BRD data' is a dataset because it is mentioned in the context of data collection and reporting.", + "contextual_reason_agent": "In the context, 'BRD data' is explicitly referenced as being fed to MINEMA, indicating it is used as a data source.", + "contextual_signal": "follows 'data fed to MINEMA'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "151_533550PAD0P117101Official0Use0Only1", + "page": 84, + "text": "233. In a series of micro-simulations on existing SWF beneficiary and survey data from 2008, the PMT targeting accuracy in increasing the coverage of the poor was compared to the previous Score Card targeting method used by SWF. The SWF 2008 survey included income level information as well as PMT / Score Card indicator data, allowing comparison of actual income reported with the PMT and Score Card scores for each applicant. The simulations showed that using the Score Card targeting method would extend coverage to only 8. 4 percent of the population and reach only 10. 8 percent of the lowest HBS decile ( Le., the poorest 10 percent ). By applying the PMT weights, the coverage for Group A remained approximately the same ( 9 percent ), but reach was extended to cover 26. 9 percent of the poorest 10 percent. By including Group A & B data in the simulation, 35 percent of the Yemeni population would be covered and the program would reach 77. 5 percent of the poorest 10 percent. In considering budget allocation to the extreme poor, the Score Card method was found to reach approximately 19 percent of the poorest, while the PMT Group A method reached approximately 50 percent of the poorest.", + "ner_text": [ + [ + 233, + 248, + "named" + ] + ], + "validated": true, + "empirical_context": "In a series of micro-simulations on existing SWF beneficiary and survey data from 2008, the PMT targeting accuracy in increasing the coverage of the poor was compared to the previous Score Card targeting method used by SWF. The SWF 2008 survey included income level information as well as PMT / Score Card indicator data, allowing comparison of actual income reported with the PMT and Score Card scores for each applicant. The simulations showed that using the Score Card targeting method would extend coverage to only 8.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned as a survey that provides data used for empirical analysis in the context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey conducted in 2008 that collected data on income levels and PMT/Score Card indicators.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a survey that provides data used for empirical analysis in the context.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "090_Djibouti-Public-Admin-PAD-PAD2604-04062018", + "page": 26, + "text": "Most of the spending on goods will be for the acquisition and deployment of ( a ) a new IT system for tax administration, ( b ) an interoperable digital platform, ( c ) a Public Key Infrastructure ( PKI ), and ( d ) an e-ID system. Consulting services will be related to feasibility studies, as well as training and the establishment of legal and regulatory frameworks that will underpin the e-government and various IT systems. C. Lessons Learned and Reflected in the Project Design 52. The project design reflects important global lessons on institutional development and governance as detailed in the 2017 World Development Report ( WDR ), as well as lessons on the digital transformation process as identified in the 2016 WDR. The 2017 WDR on Governance and Law identifies commitment, coordination, and cooperation as the three core functions of institutions that are needed to ensure that rules and resources yield the desired outcomes. These three key ingredients are well reflected in the project design. The Government has demonstrated a strong commitment through its own funding of a first-class data center and ICT infrastructure, including its stated objective to provide e - services more broadly to the population. Coordination of all these efforts and initiatives is being conducted at the center of government by the General Secretariat of Government that reports directly to the President. Regarding the establishment of the e-ID, a great deal of coordination is taking place among key stakeholders ( Ministries of Interior, Social Affairs, Decentralization, and the Police ) to build consensus and avoid duplication. The National Security Fund has deployed biometric cards for 79, 000 individuals and has taken biographic and some biometric data for 281, 317 beneficiaries. The State Secretary of Social Solidarity is building a social registry tied to biometric information and has registered 33, 000 beneficiaries", + "ner_text": [ + [ + 1846, + 1861, + "named" + ] + ], + "validated": false, + "empirical_context": "The National Security Fund has deployed biometric cards for 79, 000 individuals and has taken biographic and some biometric data for 281, 317 beneficiaries. The State Secretary of Social Solidarity is building a social registry tied to biometric information and has registered 33, 000 beneficiaries", + "type": "registry", + "explanation": "However, 'social registry' is mentioned as a project being built and not explicitly as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'social registry' is a dataset because it involves the collection of information about beneficiaries.", + "contextual_reason_agent": "However, 'social registry' is mentioned as a project being built and not explicitly as a data source in the context.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 8, + "validated": 7, + "not_validated": 1 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 82, + "text": "The World Bank Strengthening Public Sector Efficiency and Statistical Capacity Project ( P151155 ) Page 79 of 93 \uf0b7 Increased share of competitive procurement methods \uf0b7 Increase value for money for public contract \uf0b7 Operationalization of MOOC Subcomponent 3. 2: Enhancing the capacity of organizations in the procurement system to carry out their functions \uf0b7 Clarified procurement rules, guidelines, and procedures \uf0b7 Setting an e-tracking system for the procurement chain 0. 5m Subcomponent 3. 3: Streamlining the procurement regulatory framework \uf0b7 Revised procurement code and related instruments 0. 7m Subcomponent 3. 4: Improving the management and monitoring of procurement performance: \uf0b7 Piloting individual performance contract approach in the procurement system \uf0b7 RRI to support procurement process performance in the pilot 3. 3m Improved decision - making process based on reliable statistical data Component 4: Enhancing the use of statistics for policy making \uf0b7 Timely production of reliable statistical data \uf0b7 Statistics widely disseminated Subcomponent 4. 1: Improvement of poverty-related data \uf0b7 Production of a series of Poverty Notes ( based on ECAM 4 and high - frequency surveys ) \uf0b7 Production of ECAM 5 \uf0b7 Analysis of the population census \uf0b7 Production of the LFS 5. 4m Subcomponent 4. 2: Strengthening the national accounts production \uf0b7 Quarterly production of improved national accounts ( including revised methodology for cross-border trade statistics ) \uf0b7 Creation of an economic simulation tool for MINEPAT simulation 2", + "ner_text": [ + [ + 1276, + 1279, + "named" + ], + [ + 4, + 14, + "LFS <> publisher" + ], + [ + 1238, + 1255, + "LFS <> data type" + ] + ], + "validated": true, + "empirical_context": "3m Improved decision - making process based on reliable statistical data Component 4: Enhancing the use of statistics for policy making \uf0b7 Timely production of reliable statistical data \uf0b7 Statistics widely disseminated Subcomponent 4. 1: Improvement of poverty-related data \uf0b7 Production of a series of Poverty Notes ( based on ECAM 4 and high - frequency surveys ) \uf0b7 Production of ECAM 5 \uf0b7 Analysis of the population census \uf0b7 Production of the LFS 5. 4m Subcomponent 4.", + "type": "dataset", + "explanation": "In the context, 'LFS' is associated with the production of poverty-related data, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'LFS' is a dataset because it is mentioned in the context of producing statistical data.", + "contextual_reason_agent": "In the context, 'LFS' is associated with the production of poverty-related data, indicating it functions as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 4, + "validated": 2, + "not_validated": 2 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 62, + "text": "This indicator will be measured at least on a quarterly basis SNSOP MIS This data will be collected through registration and payments Implementing Partner Number of beneficiaries receiving economic opportunities who are youth Number of beneficiaries receiving economic opportunities under Component 2, in accordance with the Project Operations Manual, of which are youth, defined as people between the ages of 18 and 35 years, and have receive at least 1 This indicator will be measured, at a minimum, on a quarterly basis SNSOP MIS Beneficiary data will be gathered at registration and will be updated over the course of project implementation. Payment data will be regularly updated in the SNSOP MIS The Implementing Partner in charge of Component 2 will be responsible for data collection", + "ner_text": [ + [ + 523, + 532, + "named" + ], + [ + 155, + 211, + "SNSOP MIS <> data description" + ], + [ + 220, + 225, + "SNSOP MIS <> reference population" + ], + [ + 533, + 549, + "SNSOP MIS <> data type" + ], + [ + 646, + 658, + "SNSOP MIS <> data type" + ] + ], + "validated": true, + "empirical_context": "This indicator will be measured at least on a quarterly basis SNSOP MIS This data will be collected through registration and payments Implementing Partner Number of beneficiaries receiving economic opportunities who are youth Number of beneficiaries receiving economic opportunities under Component 2, in accordance with the Project Operations Manual, of which are youth, defined as people between the ages of 18 and 35 years, and have receive at least 1 This indicator will be measured, at a minimum, on a quarterly basis SNSOP MIS Beneficiary data will be gathered at registration and will be updated over the course of project implementation. Payment data will be regularly updated in the SNSOP MIS The Implementing Partner in charge of Component 2 will be responsible for data collection", + "type": "management information system", + "explanation": "It is indeed a dataset as it is described as a management information system that stores records of beneficiary and payment data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a system that collects and manages data.", + "contextual_reason_agent": "It is indeed a dataset as it is described as a management information system that stores records of beneficiary and payment data.", + "contextual_signal": "described as a management information system that stores records", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 18, + "validated": 10, + "not_validated": 8 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 41, + "text": "36 Target Values Data Collection and Reporting Project Outcome Indicators Baseline YR1 YR2 YR3 YR4 YR5 YR6 Frequency and Reports Data Collection Instruments Responsibility for Data Collection policy and planning. 2. 6 Extent to which SIS / EMIS is producing data for continuous monitoring of the 32 key performance indicators. 10 indicators 12 indicators 20 Indicators 32 Indicators Yrs 1, 3 and 6 External assessment of EMIS Directorate of Educational Planning ( DEP ) 3. Teaching and Learning Resource Development 3. 1 Teacher policies revised to support application of national teacher standards. Nil Policies identified 50 % target policies revised 100 % of target policies revised Yrs 1, 3 and 6 MoE Report DTQS / PSPS 3. 2 Number / percentage of newly appointed teachers completing post - recruitment initial training in ETC. Nil 6 % 30 % 50 % 60 % 70 % 80 % Annual MoE Reports DTQS / ETC 3. 3 Number / percentage of new teachers appointed using a competency - based model.", + "ner_text": [ + [ + 240, + 244, + "named" + ] + ], + "validated": false, + "empirical_context": "2. 6 Extent to which SIS / EMIS is producing data for continuous monitoring of the 32 key performance indicators. 10 indicators 12 indicators 20 Indicators 32 Indicators Yrs 1, 3 and 6 External assessment of EMIS Directorate of Educational Planning ( DEP ) 3.", + "type": "system", + "explanation": "However, EMIS is described as a system and not explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with producing data for monitoring indicators.", + "contextual_reason_agent": "However, EMIS is described as a system and not explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 78, + "text": "Draft bidding documents under Sub-components 1. 1 and Component 2 for Busia and Mbale have been submitted to the Bank and bidding documents for Gulu will be ready by the end of October 2018. Request for Expression of Interest ( REOI ) for construction supervision for Busia and Mbale will be published in July 2018. The risk identification for the MWE and the NWSC and risk mitigation plan are based on the Procurement Risk Assessment and Management System. Based on the assessment, the Project procurement risk rating is High. The key risks and risk mitigation action plan are indicated in Table 2. 1. The residual risks after the implementation of the mitigation measures would be reduced to Substantial. 24. Use of national procurement procedures. Due to recent changes in Uganda \u2019 s procurement legal framework, Uganda National Procurement Procedures are not in line with the World Bank Procurement Regulations Section V - Para 5. 4 National Procurement Procedures.", + "ner_text": [ + [ + 407, + 456, + "named" + ] + ], + "validated": false, + "empirical_context": "Request for Expression of Interest ( REOI ) for construction supervision for Busia and Mbale will be published in July 2018. The risk identification for the MWE and the NWSC and risk mitigation plan are based on the Procurement Risk Assessment and Management System. Based on the assessment, the Project procurement risk rating is High.", + "type": "system", + "explanation": "However, it is described as a system for risk assessment and management, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Assessment and Management System', which suggests a structured approach to data.", + "contextual_reason_agent": "However, it is described as a system for risk assessment and management, not as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 17, + "text": "The global spread of COVID-19 has led to unprecedented disruptions in schooling, requiring understanding of how students \u2019 learning has changed. Moldova \u2019 s participation in PISA 2025 and the analysis of PISA 2022 results are important for managing learning recovery from the pandemic. The MoER consolidated and strengthened the student-level EMIS to support evidence-based decision-making. 22 EMIS now covers preschool, primary, general secondary, and vocational education data, which are regularly collected by the system. Reports on the performance of each school are produced and disclosed annually. However, significant differences exist between data reported in the EMIS open data portal and data published by the National Bureau of Statistics ( NBS ) of Moldova. A single integrated EMIS capable of linking information on students with population register data and financing data of the Ministry of Finance ( MoF ) is currently lacking. The current EMIS and established data reporting practices provide a solid foundation for the development of an integrated EMIS 22 Under support of the completed World Bank-financed Moldova Education Reform Project.", + "ner_text": [ + [ + 790, + 794, + "named" + ] + ], + "validated": false, + "empirical_context": "However, significant differences exist between data reported in the EMIS open data portal and data published by the National Bureau of Statistics ( NBS ) of Moldova. A single integrated EMIS capable of linking information on students with population register data and financing data of the Ministry of Finance ( MoF ) is currently lacking. The current EMIS and established data reporting practices provide a solid foundation for the development of an integrated EMIS 22 Under support of the completed World Bank-financed Moldova Education Reform Project.", + "type": "system", + "explanation": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is mentioned in the context of data reporting and integration.", + "contextual_reason_agent": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "115_PAD1678-PAD-P152822-IDA-R2016-0090-1-Box394886B-OUO-9", + "page": 105, + "text": "District accountants will coordinate the follow up of accountabilities from the communities with the subcounty accountants. The Public Financial Management Act 2015 and related Regulations; Treasury Accounting Instructions, 2003; Local Government Act; Local Government ( Financial and Accounting ) Regulations, 2007; and Local Government Financial Management Manual. SUN Accounting System but likely to migrate to the Integrated Financial Management Information System ( IFMIS ) June 30 Djibouti: Agence Djiboutienne de D\u00e9veloppement Social ( Djiboutian Social Development Agency, ADDS ) ADDS accounting staff under the Department of Accounting includes a finance officer who reports to the \u201c Agent Comptable, \u201d but the PIU will need to recruit an accountant who will be responsible for all financial management aspects of the project. Project Financial Operation Manual, which needs to be updated to cover this project. TOMPRO Accounting system and a management information system that needs to improve its functionality with respect to its reporting mechanism. December 31 Internal Control and Internal Audit Arrangements 57. Internal controls. The internal control procedures will be documented in the financial management manuals that are shown in the table 4. 1 for each of the implementing entities and their PIMs that will take into consideration gaps in their existing financial management manuals", + "ner_text": [ + [ + 367, + 388, + "named" + ] + ], + "validated": false, + "empirical_context": "The Public Financial Management Act 2015 and related Regulations; Treasury Accounting Instructions, 2003; Local Government Act; Local Government ( Financial and Accounting ) Regulations, 2007; and Local Government Financial Management Manual. SUN Accounting System but likely to migrate to the Integrated Financial Management Information System ( IFMIS ) June 30 Djibouti: Agence Djiboutienne de D\u00e9veloppement Social ( Djiboutian Social Development Agency, ADDS ) ADDS accounting staff under the Department of Accounting includes a finance officer who reports to the \u201c Agent Comptable, \u201d but the PIU will need to recruit an accountant who will be responsible for all financial management aspects of the project. Project Financial Operation Manual, which needs to be updated to cover this project.", + "type": "system", + "explanation": "However, the context indicates that it is a system mentioned for accounting purposes, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'System' in its name, which can imply data handling.", + "contextual_reason_agent": "However, the context indicates that it is a system mentioned for accounting purposes, not as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "105_Mali-Reinsertion-of-Ex-combatants-Project", + "page": 42, + "text": "The assessment also covers a mapping of potential training facilities in project locations to verify their operational capacity to deliver, depending on available human and material resources. In this context, vocational training and economic activities to be proposed to ex-combatants will be designed based both on each individual \u2019 s background and existing opportunities in the region of return, as will be identified in the market analysis report. In addition to this assessment, preliminary data was obtained from a rapid survey realized by an NGO which investigated the prevailing trends pertaining to the profile of the future beneficiaries, including their concerns and interests, as well as their social and educational background. These two survey and research works significantly contribute to ensuring proper match between the supply of skills and labor market demand in the reinsertion project design. 33. Psychosocial suffering and mental and behavioral disorders are risk factors for socio - economic reinsertion and post-war recovery. Along with the screening process for chronic illnesses and physical disability, during the cantonment process, medical personnel will screen all ex-combatants for psychological trauma and socially challenging behaviors. The project will provide psycho-social counselling for beneficiaries who require such assistance during the initial six months. Follow-up psycho-social counselling will be provided to those who would still be in need in their communities during the following twelve-month period of reinsertion. Mali disposes of limited but existing capacity to treat psychological disorders in some psychiatric clinics. 29 The project will consider evaluating the feasibility of treatment of violence related trauma spectrum disorders through a referral system, which could include a capacity building, training and technical assistance. This community based support would be contingent on existing capacity for the provision of such support which will be determined through an assessment. The 29 For example the psychiatric clinic of Point G Hospital.", + "ner_text": [ + [ + 522, + 534, + "named" + ], + [ + 272, + 285, + "rapid survey <> reference population" + ], + [ + 550, + 553, + "rapid survey <> publisher" + ], + [ + 628, + 648, + "rapid survey <> reference population" + ], + [ + 707, + 740, + "rapid survey <> data description" + ], + [ + 1567, + 1571, + "rapid survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "In this context, vocational training and economic activities to be proposed to ex-combatants will be designed based both on each individual \u2019 s background and existing opportunities in the region of return, as will be identified in the market analysis report. In addition to this assessment, preliminary data was obtained from a rapid survey realized by an NGO which investigated the prevailing trends pertaining to the profile of the future beneficiaries, including their concerns and interests, as well as their social and educational background. These two survey and research works significantly contribute to ensuring proper match between the supply of skills and labor market demand in the reinsertion project design.", + "type": "survey", + "explanation": "This is indeed a dataset as it is described as a survey that provides preliminary data for the project.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'rapid survey' implies a structured collection of data gathered for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it is described as a survey that provides preliminary data for the project.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "110_Lebaon-RACE2-PforR-Board-Package-PAD-WB-9-5-16-09072016", + "page": 67, + "text": "One of the main challenges faced during the implementation of the first phase of RACE is the unavailability of timely and reliable data, structured data collection systems, and systems-capacity to analyze data that could inform policy decisions or programmatic interventions. There are currently multiple data collection mechanisms working in parallel, in addition to long delays in the ability for MEHE or CERD to collect and use school-level data for decision-making. As a result, MEHE and CERD will develop a data management framework detailing how data about schools, students, teachers, and other personnel is collected, validated, categorized, and used by different stakeholders. MEHE will also deploy an EMIS in order to support the timely collection of the data, so that decision-making at both the school and the central MEHE and CERD levels is based on accurate and updated data.", + "ner_text": [ + [ + 431, + 448, + "named" + ] + ], + "validated": false, + "empirical_context": "One of the main challenges faced during the implementation of the first phase of RACE is the unavailability of timely and reliable data, structured data collection systems, and systems-capacity to analyze data that could inform policy decisions or programmatic interventions. There are currently multiple data collection mechanisms working in parallel, in addition to long delays in the ability for MEHE or CERD to collect and use school-level data for decision-making. As a result, MEHE and CERD will develop a data management framework detailing how data about schools, students, teachers, and other personnel is collected, validated, categorized, and used by different stakeholders.", + "type": "data", + "explanation": "'School-level data' is mentioned in a general sense and not as a specific dataset or data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'school-level data' refers to a structured collection of data related to schools.", + "contextual_reason_agent": "'School-level data' is mentioned in a general sense and not as a specific dataset or data source.", + "contextual_signal": "mentioned only as a type of data, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 3, + "validated": 2, + "not_validated": 1 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 46, + "text": "Economic Opportunity & Self-reliance Businesses and cooperatives that receive capacity building support and that are operational 1 year after intervention ( Percentage ) Description Quantitative indicator counting percentage of businesses and cooperatives that receive capacity - building support under sub-component 2 ( a ) that are still operational one year after the capacity building has been completed. Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation. BRD data fed to MINEMA. Responsibility for Data Collection BRD and MINEMA. Financial institutions that receive capacity building and are providing financial services to refugees ( Number ) Description Quantitative indicator counting number of instituitons that receive capcity building and that are providing financial services to refugees. Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation including reporting from beneficiary insitutitons. BRD data fed to MINEMA. Responsibility for Data Collection BRD, BDF and MINEMA. Micro-finance institutions and Savings and Credit Cooperatives that become project participating financial institutions ( Number ) Description Quantitative indicator counting number of MFIs and SACCOs that become project participating financial instutions. Frequency Quarterly.", + "ner_text": [ + [ + 1082, + 1090, + "named" + ], + [ + 182, + 204, + "BRD data <> data type" + ] + ], + "validated": true, + "empirical_context": "Methodology for Data Collection Monitoring project implementation including reporting from beneficiary insitutitons. BRD data fed to MINEMA. Responsibility for Data Collection BRD, BDF and MINEMA.", + "type": "data", + "explanation": "In the context, 'BRD data' is explicitly referenced as being fed to MINEMA, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'BRD data' is a dataset because it is mentioned in the context of data collection and reporting.", + "contextual_reason_agent": "In the context, 'BRD data' is explicitly referenced as being fed to MINEMA, indicating it is used as a data source.", + "contextual_signal": "follows 'data fed to MINEMA'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "190_multi-page", + "page": 10, + "text": "Prior to the certification, and including the pre-implementation period during which the PPF will be used, disbursements will be based on SOEs subject to the thresholds described in Annex 6, as well as on supporting documentation for advances made to the SA for expenditures above the threshold and for direct payments. Each disbursement application will be signed by two authorized signatories whose names and corresponding signature specimens will be subrnitted to the Administrator through MOH. Counterpart Funding. The project will open a separate bank account for the counterpart funding from the PA for the Value Added Tax ( VAT ) compensation, as well as for the counterpart contributions financed by the PA, which represent 85 % of the incremental operating costs. Prior to Board presentation, the PIU will open an account for counterpart funding in a conmmercial bank, based on a formal agreement reached between the Ministry of Finance ( MOF ) and MOH regarding the procedure for VAT reimbursement. Monitoring and Evaluation ( M & E ). The PIU will be responsible for monitoring progress against agreed-upon performance indicators ( specified in Annex 1 ). For this purpose, it will develop and maintain a project information system which will generate annual progress reports for the PA and relevant donors, including the Administrator. Under Component 3, an in-depth Client Access, Utilization, and Satisfaction Survey ( CAUS ) will be conducted at the start and end of the project imnplementation period in order to measure the impact of the project on the quality and efficiency of the PHC services in the selected clinics. The MOH ' s technical units responsible for implementing each project component will provide the PIU with quarterly progress reports summarizing the current status of project implementation, including financial - 7 -", + "ner_text": [ + [ + 1216, + 1242, + "named" + ] + ], + "validated": false, + "empirical_context": "The PIU will be responsible for monitoring progress against agreed-upon performance indicators ( specified in Annex 1 ). For this purpose, it will develop and maintain a project information system which will generate annual progress reports for the PA and relevant donors, including the Administrator. Under Component 3, an in-depth Client Access, Utilization, and Satisfaction Survey ( CAUS ) will be conducted at the start and end of the project imnplementation period in order to measure the impact of the project on the quality and efficiency of the PHC services in the selected clinics.", + "type": "system", + "explanation": "However, it is described as a project information system, not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'system' which can imply data management.", + "contextual_reason_agent": "However, it is described as a project information system, not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "036_Tajikistan-Resilient-Landscape-Restoration-Project", + "page": 43, + "text": "The World Bank RESILAND CA + Program: Tajikistan Resilient Landscape Restoration Project ( P171524 ) Page 39 of 104 objectives ( annex 1 ), which will be used to track progress in implementation activities. Smartphone-based field data capture tools will be promoted as part of the monitoring efforts. 88. The CEP IT will carry out in-depth midterm and completion assessments. Before the midterm review in Year 3 and at the end of the project, analysis of changes in forest cover and land use patterns will be carried out based on GIS mapping, to monitor the land area where sustainable land management practices have been adopted. The project will also contribute to regional monitoring under the RESILAND CA +. Well-being surveys will be carried out before the midterm review in Year 3 and at the project end. These surveys will also determine changes in monetary or non-monetary benefits from landscape restoration forestry, pasture, and agricultural lands, disaggregated by gender. A final impact evaluation of the landscape restoration activities will be carried out in the final year of implementation. 89. Project impact on land degradation in the targeted landscapes will be monitored and evaluated using the UNCCD LDN Impact Monitoring Methodology.", + "ner_text": [ + [ + 207, + 248, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank RESILAND CA + Program: Tajikistan Resilient Landscape Restoration Project ( P171524 ) Page 39 of 104 objectives ( annex 1 ), which will be used to track progress in implementation activities. Smartphone-based field data capture tools will be promoted as part of the monitoring efforts. 88.", + "type": "tool", + "explanation": "However, it is not a dataset but rather a tool used for capturing data, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it mentions 'data capture tools' which implies data collection.", + "contextual_reason_agent": "However, it is not a dataset but rather a tool used for capturing data, not a structured collection of data itself.", + "contextual_signal": "mentioned only as a tool, not as a data source", + "tags": [] + }, + { + "filename": "116_PAD1510-PAD-P152821-IDA-R2016-0078-1-Box394886B-OUO-9", + "page": 52, + "text": "The first families to move as part of the local integration process were vulnerable former 27 Zambia Central Statistical Office, 2010, Census of Population and Housing 28 World Bank, 2015, Mapping Subnational Poverty in Zambia, p. 27-29; World Bank, 2015. 6th edition, Zambia Economic Brief indicates that the national poverty rate in Zambia is 62 percent, p. 1. 29 Zambia Central Statistics Office, 2011, Living Conditions Monitoring Survey 2006 and 2010, p. 249 30 Oxford Policy Management, 2014, Baseline Study and Monitoring and Evaluation Framework for Phase II of the PPCR: 26 percent reported moderate hunger in the Barotse sub-basin where Kaoma is, whereas the figure was 7 percent in the Kafue sub-basin where Solwezi is. In both regions less than 1 percent stated they had severe hunger, p. 70 31 788 Angolans out of 12715 residing in Meheba and Mayukwayukwa. There are an additional 5890 self-settled Angolans and 56 Angolans registered in Lusaka. ( UNHCR, 2015 ) 32 Examples of vulnerability criteria include: separated child, exposure to multiple displacements, physical disability, older person unable to care for self, and single female household representative. UNHCR Angolan and Rwandan Refugee Profile as of November 6, 2015, ( UNHCR, 2015 ), p. 1, 2 33 World Bank, 2013, PAD - Zambia Strengthening Climate Resilience ( PPCR Phase II ), p. 1", + "ner_text": [ + [ + 135, + 167, + "named" + ], + [ + 94, + 127, + "Census of Population and Housing <> author" + ], + [ + 129, + 133, + "Census of Population and Housing <> reference year" + ], + [ + 220, + 226, + "Census of Population and Housing <> data geography" + ], + [ + 623, + 640, + "Census of Population and Housing <> data geography" + ] + ], + "validated": true, + "empirical_context": "The first families to move as part of the local integration process were vulnerable former 27 Zambia Central Statistical Office, 2010, Census of Population and Housing 28 World Bank, 2015, Mapping Subnational Poverty in Zambia, p. 27-29; World Bank, 2015.", + "type": "census", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a census, which is a structured collection of data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a census, which typically involves structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a census, which is a structured collection of data used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "013_BOSIB0efb09b920d90858a0135df22da7d1", + "page": 16, + "text": "Fayda, by providing a digitally verifiable ID, is envisioned to provide enough assurance to waive these controls, hence reducing costs, time, and risks associated with account opening and credit applications. This is particularly important as social assistance programs like the PSNP ( which can be gateways for financial inclusion ) are currently being digitalized with the transition to digital payments and a new management information system ( MIS ) to replace the current manual processes used to target and enroll beneficiaries. Fayda can also improve the integrity and transparency of this rather complex and currently paper-based safety nets delivery chain. There is also interest from the GoE to use Fayda to support microfinance for farmers and for providing ID to students ages 16 and older. This is being done to certify diplomas, community health insurance, public and private pensions, use of mobile money and microloans, and SIM card registration. 23. Fayda could also play an important role in realizing peace dividends in Ethiopia. First, at the social level, Fayda will enable all citizens and residents to exercise their rights related to having proof of their legal identity, and it will be the first universally accessible ID system that only focuses on individual identity, without collecting information on ethnicity nor religion.", + "ner_text": [ + [ + 416, + 445, + "named" + ] + ], + "validated": false, + "empirical_context": "Fayda, by providing a digitally verifiable ID, is envisioned to provide enough assurance to waive these controls, hence reducing costs, time, and risks associated with account opening and credit applications. This is particularly important as social assistance programs like the PSNP ( which can be gateways for financial inclusion ) are currently being digitalized with the transition to digital payments and a new management information system ( MIS ) to replace the current manual processes used to target and enroll beneficiaries. Fayda can also improve the integrity and transparency of this rather complex and currently paper-based safety nets delivery chain.", + "type": "system", + "explanation": "However, it is mentioned as a system that replaces manual processes, not as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'management information system' suggests a structured collection of data.", + "contextual_reason_agent": "However, it is mentioned as a system that replaces manual processes, not as a data source itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "094_PAD-final-02262018", + "page": 27, + "text": "The CDR has been the implementing agency for all recent projects financed by the World Bank and has the required skills. 50. The monitoring of results during implementation will include disaggregation of select indicators by region, type of works, gender, and nationality. The Results Framework includes key project indicators to monitor project implementation success. It is designed to guarantee effectiveness in the measurement of key project outcomes and outputs based on simple and measurable indicators. Nevertheless, the monitoring of the project during implementation will be through the implementing agency \u2019 s reports and the World Bank Implementation Status and Results Reports, further information and disaggregation of some indicators by nationality and gender, and the extent and type of works, among others. All workers will be identified either through a form of identification or through their work permit numbers. Their work will be monitored using standard monitoring tools ( daily time sheets that are filled, aggregated, and logged into a system every week by an onsite project supervisor, which will then be reviewed by the contractor \u2019 s main offsite office ). Public transport surveys will also provide disaggregated information on", + "ner_text": [ + [ + 1184, + 1208, + "named" + ], + [ + 81, + 91, + "Public transport surveys <> publisher" + ] + ], + "validated": true, + "empirical_context": "Their work will be monitored using standard monitoring tools ( daily time sheets that are filled, aggregated, and logged into a system every week by an onsite project supervisor, which will then be reviewed by the contractor \u2019 s main offsite office ). Public transport surveys will also provide disaggregated information on", + "type": "survey", + "explanation": "In the context, it is explicitly mentioned that these surveys will provide disaggregated information, indicating they are used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'public transport surveys' implies a structured collection of data related to public transport.", + "contextual_reason_agent": "In the context, it is explicitly mentioned that these surveys will provide disaggregated information, indicating they are used as a data source.", + "contextual_signal": "follows 'will also provide disaggregated information on'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 41, + "text": "A separate manual of procedures will be endorsed to define the PBC process and the detailed roles and responsibilities of all actors at the central, regional, district, and school levels. B. Results Monitoring and Evaluation Arrangements 79. The project will use the Results Framework to monitor and assess progress in the implementation of activities and in achievement of the PDO. The Results Framework includes PDO-level and intermediate results indicators, baselines and target values, frequency, data source methodology, and responsibilities for data collection. Where feasible, data collected will be disaggregated by gender and refugee status to monitor interventions \u2019 specific impact on these populations. Data to monitor the project and inform the indicators will be drawn from three main sources: official government data, surveys / evaluations, and progress reports produced by the PCU. In areas difficult to reach due to insecurity or conflict, the proposed project will use \u2018 Enhanced Monitoring and Evaluation \u2019 to monitor implementation progress for ongoing investments by geo-enabled methods and supplemented by community discussions. 80. The MEP and MES have low M & E capacity and producing timely and reliable data remains an issue. The ministries do not have an effective EMIS with data collection tools and processes, and school census are carried out through paper-based questionnaires.", + "ner_text": [ + [ + 1293, + 1297, + "named" + ] + ], + "validated": false, + "empirical_context": "The MEP and MES have low M & E capacity and producing timely and reliable data remains an issue. The ministries do not have an effective EMIS with data collection tools and processes, and school census are carried out through paper-based questionnaires.", + "type": "system", + "explanation": "However, EMIS is described as a system and not as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it relates to data collection and management.", + "contextual_reason_agent": "However, EMIS is described as a system and not as a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 31, + "text": "universities and polytechnic institutes \u2022 Provision of equipment for remote learning \u2022 Post-secondary training on digital skills in climate resilient agriculture \u2022 Stipends to female students and refugee / host communities to increase their enrollment \u2022 Digital agriculture skills training courses designed / implemented \u2022 Increased enrollment of females in digital agriculture programs \u2022 Increased participation of refugee / host communities in digital agriculture programs \u2022 Increase in skills development opportunities in digital agriculture: o Youth with certified skills to improve agricultural practices, o Youth employed Workforce possessing key skills to successfully transition into the labor market and increase productivity Enhanced resilience in the agriculture sector, including to climate - related events \u2022 Low institutional capacity to deliver education services for human capital development \u2022 Lack of informed policymaking \u2022 Lack of data on students, budget, and other sector indicators \u2022 Train government actors on key issues for education service delivery and coordination \u2022 Capacity building for data collection ( i. e., on basic access statistics, supervision, learning outcomes, and audits ) \u2022 Establish a cross-ministerial PIU \u2022 Realtime sector monitoring system developed and launched \u2022 Training courses for Ministry staff delivered in fiduciary management, budgeting and planning, strategy development \u2022 Offices equipped \u2022 PIU fully staffed and trained \u2022 Strengthened capacity for management of the education system Better informed budgeting and sector planning. Increased local capacity to deliver education services.", + "ner_text": [ + [ + 1253, + 1286, + "named" + ] + ], + "validated": false, + "empirical_context": "e. , on basic access statistics, supervision, learning outcomes, and audits ) \u2022 Establish a cross-ministerial PIU \u2022 Realtime sector monitoring system developed and launched \u2022 Training courses for Ministry staff delivered in fiduciary management, budgeting and planning, strategy development \u2022 Offices equipped \u2022 PIU fully staffed and trained \u2022 Strengthened capacity for management of the education system Better informed budgeting and sector planning. Increased local capacity to deliver education services.", + "type": "system", + "explanation": "However, the context describes it as a system without indicating it functions as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'monitoring system' which suggests data collection.", + "contextual_reason_agent": "However, the context describes it as a system without indicating it functions as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "144_84657-PAD-P132268-Project-Commitment-Paper", + "page": 17, + "text": "Indicator ( 1 ) includes the following studies and actions to be implemented in the first year: - Indicator ( 1 ) SWM Strategy: \uf0b7 Strategy Development: this entails development of plans for management and treatment of slaughterhouse and medical waste and the development of plans for closure and rehabilitation of unsanitary dumpsites8. \uf0b7 Implementation of the MIS: the design and implementation of the MIS is targeted for the first year. Relevant SWM data such as waste tonnage handled, costs, etc., is currently tracked by service providers; however, the MIS will facilitate the consolidation and reporting of this data and allow JSC-H & B to more effectively track progress and performance. B3. Project Design Targeting In the absence of data on household income, the Project uses geographical targeting at the level of the governorates where poverty level is estimated at 18. 3 % on average, 32. 5 % in Hebron and 21. 3 % in Bethlehem ( 2010 ) 9. Both governorates also have the highest unemployment rates in the West Bank with averages of 22. 8 % and 22. 4 %, respectively10. The vulnerability of these groups is exacerbated by mobility restrictions and poor market access. Output-based subsidy payment 8 These closures are part of the World Bank project and including them in the OBA targets will further incentivize JSC-H & B to achieve closures according to the planned schedule. In addition, these closures are necessary to meet OBA targets for Indicator ( 3 ) Waste Managed. 9 PCBS: West Bank Southern Governorates Statistical Yearbook, 2011 10 PCSBS Press Release on Labour Force Survey Results, Labour Force Survey ( January-March, 2011 ) Round ( Q1 / 2011 ).", + "ner_text": [ + [ + 403, + 406, + "named" + ] + ], + "validated": false, + "empirical_context": "Indicator ( 1 ) includes the following studies and actions to be implemented in the first year: - Indicator ( 1 ) SWM Strategy: \uf0b7 Strategy Development: this entails development of plans for management and treatment of slaughterhouse and medical waste and the development of plans for closure and rehabilitation of unsanitary dumpsites8. \uf0b7 Implementation of the MIS: the design and implementation of the MIS is targeted for the first year. Relevant SWM data such as waste tonnage handled, costs, etc., is currently tracked by service providers; however, the MIS will facilitate the consolidation and reporting of this data and allow JSC-H & B to more effectively track progress and performance.", + "type": "system", + "explanation": "'MIS' is mentioned as a management information system, not as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it involves data tracking and reporting.", + "contextual_reason_agent": "'MIS' is mentioned as a management information system, not as a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 41, + "text": "32 Indicator 2. 4: Teacher feedback on training and certification system monitored, analyzed, and included in the annual monitoring and progress reports developed by ETC No Yes / No No Yes Annually MOE Teacher surveys Reformed student assessment and certification system Indicator 3. 1: Grade 3 diagnostic test on early grade reading and math implemented 7. 2 No Yes / No No Yes Annually MOE Assessments records for a sample of schools Indicator 3. 2: Legal framework for the Tawjihi exam has been adopted so that its secondary graduation and certification function is separated from its function as a screening mechanism for university entrance 7. 4 No Yes / No No Yes Annually MOE Indicator 3. 3: Student and Teacher Feedback on first phase Tawjihi reform inform the Tawjihi reform rollout plan No Yes / No No Yes Annually MOE Student and teacher feedback surveys Strengthened management of education system Indicator 4. 1: Geographical Information System ( GIS ) and Open EMIS produce accurate and timely disaggregated data needed for program monitoring and evaluation 8. 1 No Yes / No No Yes Annually MOE GIS reports should reflect data updated on a quarterly basis and verified by MOE using its verification protocols to be considered accurate Indicator 4. 2: Percentage of bids for goods and works that needed to be re \u2010 bid No Percentage N / A < 20 % Annually MOE", + "ner_text": [ + [ + 970, + 979, + "named" + ] + ], + "validated": false, + "empirical_context": "3: Student and Teacher Feedback on first phase Tawjihi reform inform the Tawjihi reform rollout plan No Yes / No No Yes Annually MOE Student and teacher feedback surveys Strengthened management of education system Indicator 4. 1: Geographical Information System ( GIS ) and Open EMIS produce accurate and timely disaggregated data needed for program monitoring and evaluation 8. 1 No Yes / No No Yes Annually MOE GIS reports should reflect data updated on a quarterly basis and verified by MOE using its verification protocols to be considered accurate Indicator 4.", + "type": "system", + "explanation": "'Open EMIS' is mentioned as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Open EMIS' is a dataset because it is associated with data production and management.", + "contextual_reason_agent": "'Open EMIS' is mentioned as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "129_PAD1030-ENGLISH-P149242-PUBLIC-FINAL-LEB-ENPTP-English", + "page": 41, + "text": "With respect to the NPTP implementation and institutional structure, the following explains the overall implementation arrangements of the program: \uf0b7 The Council of Ministers makes policy decisions related to the NPTP, allocates annual budget, and defines cut-off scores which determine benefits. \uf0b7 The Social-IMC reviews progress of NPTP and makes recommendations to the Council of Ministers. \uf0b7 The NPTP Project Unit in the MOSA is responsible for the following: ( i ) managing the NPTP database in MOSA; ( ii ) receiving household applications; ( iii ) interfacing with applicants; ( iv ) entering data; ( v ) conducting household visits; ( vi ) checking for data errors; ( vii ) transmitting data to the central database of the NPTP CMU; ( viii ) verifying claims from hospitals, schools, and PHCs and authorizing payments; ( ix ) managing the outreach campaign; ( x ) managing the e - card food voucher beneficiaries list, delivery of the e-cards to beneficiaries, and follow up; and ( xi ) monitoring of the program ( specifically inputs and outputs ). \uf0b7 The NPTP CMU in the PCM is responsible for the following: ( i ) managing the central database; ( ii ) validating data and cross-checking with national databases; ( iii ) processing household data, generating scores and ranks according to the proxy-means testing ( PMT ) formula, and providing the list of beneficiaries; ( iv ) maintaining the PMT formula; ( v ) analyzing national data and reporting findings to the Social Inter-Ministerial Committee ( Social-IMC ); ( vi ) monitoring of program results including targeting performance; and ( vii ) auditing data processing.", + "ner_text": [ + [ + 885, + 925, + "named" + ], + [ + 954, + 967, + "e - card food voucher beneficiaries list <> reference population" + ] + ], + "validated": true, + "empirical_context": "\uf0b7 The Social-IMC reviews progress of NPTP and makes recommendations to the Council of Ministers. \uf0b7 The NPTP Project Unit in the MOSA is responsible for the following: ( i ) managing the NPTP database in MOSA; ( ii ) receiving household applications; ( iii ) interfacing with applicants; ( iv ) entering data; ( v ) conducting household visits; ( vi ) checking for data errors; ( vii ) transmitting data to the central database of the NPTP CMU; ( viii ) verifying claims from hospitals, schools, and PHCs and authorizing payments; ( ix ) managing the outreach campaign; ( x ) managing the e - card food voucher beneficiaries list, delivery of the e-cards to beneficiaries, and follow up; and ( xi ) monitoring of the program ( specifically inputs and outputs ). \uf0b7 The NPTP CMU in the PCM is responsible for the following: ( i ) managing the central database; ( ii ) validating data and cross-checking with national databases; ( iii ) processing household data, generating scores and ranks according to the proxy-means testing ( PMT ) formula, and providing the list of beneficiaries; ( iv ) maintaining the PMT formula; ( v ) analyzing national data and reporting findings to the Social Inter-Ministerial Committee ( Social-IMC ); ( vi ) monitoring of program results including targeting performance; and ( vii ) auditing data processing.", + "type": "list", + "explanation": "This is a dataset as it is explicitly mentioned in the context as part of the management of the e-card food voucher program.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a list of beneficiaries, which implies a structured collection of data.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned in the context as part of the management of the e-card food voucher program.", + "contextual_signal": "mentioned as a list of beneficiaries in the context of managing the program", + "tags": [] + }, + { + "filename": "029_IBArchive-ea5ff2ad-6f1f-45bf-a2ad-036440562c18", + "page": 65, + "text": "The World Bank Electricity Access Scale-up Project ( EASP ) ( P166685 ) Page 60 of 130 2017. People provided with access to electricity with grid, mini-grid, off-grid under the project, of which refugees The indicator will track the number of refugees beneficiary of access to grid, mini-grid, and off-grid solutions under the project. Quarterly Project implementati on progress reports Service providers customers database, approved loan applications and installation reports. UECCC, MEMD People with access to clean cooking solutions under the project The indicator will track the number of people benefitting from access to clean cooking solutions under the project, disaggregated by gender. Average household size of 4. 7 people, UBOS National Household Survey 2016 - 2017. Quarterly Project implementati on progress reports. Average household size based on UBOS National Household Survey. Approved loan and grants applications and installation reports. UECCC, MEMD Commercial and productive uses beneficiaries of grid, mini-grid, off-grid access The indicator will track beneficiaries of grid, mini - grid, off-grid electricity access for commercial and productive uses purposes. Quarterly Project implementati on progress report. Service providers customers database, MEMD database, approved loan applications and installation reports. Productive uses technologies include: internal wiring, efficient appliances, SRU, SPU, milling units etc.", + "ner_text": [ + [ + 862, + 892, + "named" + ], + [ + 87, + 91, + "UBOS National Household Survey <> publication year" + ], + [ + 695, + 732, + "UBOS National Household Survey <> data description" + ], + [ + 765, + 776, + "UBOS National Household Survey <> reference year" + ] + ], + "validated": true, + "empirical_context": "Quarterly Project implementati on progress reports. Average household size based on UBOS National Household Survey. Approved loan and grants applications and installation reports.", + "type": "survey", + "explanation": "The context confirms it is used as a data source for average household size, indicating it is indeed a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides data on average household size.", + "contextual_reason_agent": "The context confirms it is used as a data source for average household size, indicating it is indeed a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "026_Djibouti-Health-System-Strengthening-Project", + "page": 58, + "text": "The World Bank Djibouti Health System Strengthening ( P178033 ) Page 54 of 64 Balbala. People who have received essential health, nutrition, and population ( HNP ) services Semi - annual HMIS Semi-annual internal project review by DGP MOH People who have received essential health, nutrition, and population ( HNP ) services - Female ( RMS requirement ) Number of children immunized Number of women and children who have received basic nutrition services Number of deliveries attended by skilled health personnel Of which refugees Of which host community residents A \" host community resident \" is any non-refugee resident of the commune of Balbala. Children under 5 years old whose growth monitoring is recorded and reported in a central database Share of children who are being monitored regularly for nutrition status, as long as at least one of wasting, stunting, or underweight is measured.", + "ner_text": [ + [ + 731, + 747, + "named" + ], + [ + 78, + 85, + "central database <> data geography" + ], + [ + 641, + 648, + "central database <> data geography" + ], + [ + 650, + 676, + "central database <> reference population" + ] + ], + "validated": true, + "empirical_context": "People who have received essential health, nutrition, and population ( HNP ) services Semi - annual HMIS Semi-annual internal project review by DGP MOH People who have received essential health, nutrition, and population ( HNP ) services - Female ( RMS requirement ) Number of children immunized Number of women and children who have received basic nutrition services Number of deliveries attended by skilled health personnel Of which refugees Of which host community residents A \" host community resident \" is any non-refugee resident of the commune of Balbala. Children under 5 years old whose growth monitoring is recorded and reported in a central database Share of children who are being monitored regularly for nutrition status, as long as at least one of wasting, stunting, or underweight is measured.", + "type": "database", + "explanation": "In this context, it is indeed a dataset as it refers to a structured collection of recorded growth monitoring data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'central database' suggests a structured collection of data.", + "contextual_reason_agent": "In this context, it is indeed a dataset as it refers to a structured collection of recorded growth monitoring data.", + "contextual_signal": "mentioned as a source for recorded and reported data", + "tags": [] + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 29, + "text": "The World Bank South Sudan Health Sector Transformation Project ( HSTP ) ( P181385 ) Page 26 of 68 some filters mainly related to the presence of children below 12 years and pregnant women. The project will rely on the refugee household data collected by UNHCR. The targeting methodology will be tested to ensure its applicability to refugees and adapted as needed. Moreover, these activities will be complemented by a strong communications campaign designed in partnership with humanitarian agencies to ensure that the project is seen as fair to both refugees and host communities. C. Sustainability 66. The project contributes to sustainability in two ways. First, in line with the CEN, the project focuses on building the institutional capacity, enhancing stewardship and governance of the MoH. This will be undertaken through customized training approach to MoH personnel to ensure an acceptable level of project management and fiduciary oversight at the MoH before transitioning fully to a government-led implementation modality. Investment under the project is expected to strengthen the health system in the country, ensuring institutional sustainability to manage service delivery.", + "ner_text": [ + [ + 219, + 241, + "named" + ], + [ + 15, + 26, + "refugee household data <> data geography" + ], + [ + 255, + 260, + "refugee household data <> publisher" + ], + [ + 334, + 342, + "refugee household data <> reference population" + ] + ], + "validated": true, + "empirical_context": "The World Bank South Sudan Health Sector Transformation Project ( HSTP ) ( P181385 ) Page 26 of 68 some filters mainly related to the presence of children below 12 years and pregnant women. The project will rely on the refugee household data collected by UNHCR. The targeting methodology will be tested to ensure its applicability to refugees and adapted as needed.", + "type": "data", + "explanation": "This is indeed a dataset as it is explicitly mentioned that the project will rely on the refugee household data collected by UNHCR.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific data collected about refugee households.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned that the project will rely on the refugee household data collected by UNHCR.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "101_Project-Appraisal-Document-PAD-P163830-2017-05-22-CLEAN-05232017", + "page": 39, + "text": "To help increase access to fodder, especially during the dry season, the project will support the development of commercial fodder production. Cash-for-work programs under this project will emphasize the rehabilitation or construction of new water management systems that are critical to community and livelihoods recovery. D. Role of Partners 80. This Project has been designed following an inclusive multi-stakeholder and multi-partner process. The project team worked closely with FAO and ICRC to inform and design the project components, to ensure complementarity, geographical and programmatic harmonization. Whereas the ICRC will deliver immediate food, water and cash response to the drought affected population, FAO will provide livelihood 29 FAO \u2019 s impact assessment report shall present a detailed evaluation, which includes key data such as a Food Consumption Index, the dietary diversity score and the increase in Tropical Livestock Unit ( TLU ). Data collection will use the RIMA-based questionnaire ( RIMA \u2013 Resilience Index Measurement and Analysis )", + "ner_text": [ + [ + 855, + 877, + "named" + ] + ], + "validated": false, + "empirical_context": "The project team worked closely with FAO and ICRC to inform and design the project components, to ensure complementarity, geographical and programmatic harmonization. Whereas the ICRC will deliver immediate food, water and cash response to the drought affected population, FAO will provide livelihood 29 FAO \u2019 s impact assessment report shall present a detailed evaluation, which includes key data such as a Food Consumption Index, the dietary diversity score and the increase in Tropical Livestock Unit ( TLU ). Data collection will use the RIMA-based questionnaire ( RIMA \u2013 Resilience Index Measurement and Analysis )", + "type": "index", + "explanation": "However, the Food Consumption Index is mentioned as a key data point rather than a structured collection of data or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'index' which often refers to a collection of data points.", + "contextual_reason_agent": "However, the Food Consumption Index is mentioned as a key data point rather than a structured collection of data or dataset.", + "contextual_signal": "mentioned only as a data point, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 7, + "validated": 3, + "not_validated": 4 + } + }, + { + "filename": "068_Burundi-Integrated-Community-Development-Project", + "page": 85, + "text": "In situations of displacement, women and girls are among the most vulnerable, facing a number of challenges including extreme poverty, lack of access to basic infrastructure, services and livelihoods. 4. Where access to food is inadequate, women and girls \u2014 who most often shoulder the burden of finding and collecting fuel, water and food \u2014 may venture to unprotected areas where they are at heightened risk of sexual abuse, including forced and / or coerced prostitution. 54 Nearly 60 percent of key informants indicate that water collection points and firewood are at risk areas for women. 55 This risk is present for women both in refugee camps and in host communities, though may be heightened in refugee camps that are located farther from local sources. 50 2016-2017 Demographic and Health Survey in Burundi 51 Victoria Rames, Cl\u00e9mence Bununagi, and Caritas Niyonzima ( 2017 ) USAID / Burundi Gender Analysis Report. Prepared by Banyan Global. 52 UNHCR ( 2018 ). Congolese Situation: Responding to the needs of displaced Congolese and Refugees. Annex \u2013 Burundi. Supplemental Appeal. http: / / reporting. unhcr. org / sites / default / files / 2018 % 20congolese % 20Situation % 20SB % 20 - % 20Burundi. pdf 53 IASC ( 2015 ). Guidelines for integrating gender-based violence interventions in humanitarian action: reducing risk, promoting resilience and aiding recovery. 54 IASC ( 2015 ). Guidelines for integrating gender-based violence interventions in humanitarian action: reducing risk, promoting resilience and aiding recovery. 55 UNOCHA ( 2019 ). Burundi Humanitarian Needs Overview.", + "ner_text": [ + [ + 774, + 803, + "named" + ], + [ + 31, + 46, + "Demographic and Health Survey <> reference population" + ], + [ + 764, + 773, + "Demographic and Health Survey <> reference year" + ], + [ + 807, + 814, + "Demographic and Health Survey <> data geography" + ], + [ + 818, + 832, + "Demographic and Health Survey <> author" + ], + [ + 834, + 851, + "Demographic and Health Survey <> author" + ], + [ + 857, + 874, + "Demographic and Health Survey <> author" + ], + [ + 877, + 881, + "Demographic and Health Survey <> publication year" + ], + [ + 936, + 949, + "Demographic and Health Survey <> publisher" + ], + [ + 1060, + 1067, + "Demographic and Health Survey <> data geography" + ], + [ + 1558, + 1565, + "Demographic and Health Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "55 This risk is present for women both in refugee camps and in host communities, though may be heightened in refugee camps that are located farther from local sources. 50 2016-2017 Demographic and Health Survey in Burundi 51 Victoria Rames, Cl\u00e9mence Bununagi, and Caritas Niyonzima ( 2017 ) USAID / Burundi Gender Analysis Report. Prepared by Banyan Global.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly identified as a 'Demographic and Health Survey' used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly identified as a 'Demographic and Health Survey' used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "182_multi0page", + "page": 6, + "text": "Improved capacity of General Administration of Social Services ( GASS ) staff, local governments, NGOs, community-based organizations ( CBOs ), and other organizations to deliver, monitor and manage services ( monitoring system, regular data reports, publications, standards and guidelines etc. ). 6. Increased public awareness of social inclusion issues of poor and vulnerable population groups, and increased numbers of people seeking referrals to social services. 7. Government analysis and development of social policy at all stages ( design, monitoring and evaluation ) is improved, measured through qualitative assessments of policy performance by key stakeholders ( e. g., Parliament, municipalities, NGO ' s, extemal development agencies ). 8. Introduction of a more reliable, timely and relevant household survey instrument for poverty monitoring and policy evaluation based on the 2001 population census and forthcoming LSMS and panel surveys. Concrete targets for each output to be achieved by Borrower will be agreed in detail at the beginning of each year through annual work prograrn. B. Strategic Context 1. Sector-related Country Assistance Strategy ( CAS ) goal supported by the project: ( see Annex 1 ) Document number: IDA / R2000-20 Date of latest CAS discussion: 03 / 21 / 00 Poverty alleviation and human development were identified as two strategic priorities of the 1998 CAS for Albania.", + "ner_text": [ + [ + 891, + 913, + "named" + ] + ], + "validated": true, + "empirical_context": "8. Introduction of a more reliable, timely and relevant household survey instrument for poverty monitoring and policy evaluation based on the 2001 population census and forthcoming LSMS and panel surveys. Concrete targets for each output to be achieved by Borrower will be agreed in detail at the beginning of each year through annual work prograrn.", + "type": "census", + "explanation": "This is indeed a dataset as it is a structured collection of data used for poverty monitoring and policy evaluation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific census that collects data on households.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data used for poverty monitoring and policy evaluation.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "185_multi-page", + "page": 41, + "text": "42 Increase in percent of households with a chronically ill adult ( 15-49 years ) who have received external help in caring for a patient or replacing lost income in the past year Increase in percentage of households caring for an orphan that has received assistance from outside the family Increase in percent of adults with access to quality STI / TB / O [ case management Percent decrease in reported STI / TB / OI prevalence Percentage of people expressing nondiscriminatory attitudes towards people with HIV / AIDS 3 ) Strengthened capacity of Increase in the number of Project data institutions and communities to institutions providing effective respond to the epidemic in a coordination at nat ' l, multisectoral and sustained provincial, and district levels manner for the planning and implementation of HIV / AIDS interventions Proper mechanisms in place to Survey data transfer funds for prevention, care, and support at district and community levels Percent increase in number of Survey data organizations capable of designing, implementing, and evaluating HIV / AIDS / STI activities", + "ner_text": [ + [ + 992, + 1003, + "named" + ] + ], + "validated": true, + "empirical_context": "42 Increase in percent of households with a chronically ill adult ( 15-49 years ) who have received external help in caring for a patient or replacing lost income in the past year Increase in percentage of households caring for an orphan that has received assistance from outside the family Increase in percent of adults with access to quality STI / TB / O [ case management Percent decrease in reported STI / TB / OI prevalence Percentage of people expressing nondiscriminatory attitudes towards people with HIV / AIDS 3 ) Strengthened capacity of Increase in the number of Project data institutions and communities to institutions providing effective respond to the epidemic in a coordination at nat ' l, multisectoral and sustained provincial, and district levels manner for the planning and implementation of HIV / AIDS interventions Proper mechanisms in place to Survey data transfer funds for prevention, care, and support at district and community levels Percent increase in number of Survey data organizations capable of designing, implementing, and evaluating HIV / AIDS / STI activities", + "type": "survey", + "explanation": "'Survey data' is indeed a dataset as it is explicitly mentioned in the context as a source of information for measuring various health-related metrics.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'Survey data' is a dataset because it refers to data collected from surveys.", + "contextual_reason_agent": "'Survey data' is indeed a dataset as it is explicitly mentioned in the context as a source of information for measuring various health-related metrics.", + "contextual_signal": "'Survey data' mentioned as a source for measuring health metrics.", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 19, + "text": "The Theory of Change ( Table 2 ) is structured around the Program \u2019 s three RAs: \u2022 RA1 on improved service delivery through digitalization \u2022 RA2 on enhanced government effectiveness through digitalization \u2022 RA3 on transparency and accountability through digitalization. 25. The Program builds synergies across its results framework. The strengthening of trusted and people-centric DPI under RA1 will bolster the digitalization of the education and health sectors and competency-based management in the civil service in RA2 and RA3. Specifically, secondary education diplomas will be digitally verifiable using DPI, which will not only increase trust in their authenticity but also allow them to be shared easily in a people-centric way ( that is, with user consent and data minimization ). For core health systems, such as those that manage EMRs, their integration with trusted DPI will improve the protection of sensitive health data while facilitating safe data sharing capabilities. The Program \u2019 s results framework is further underpinned by three main cross-cutting dimensions across the three RAs: 1 ) People-centricity, that is, a focus on output - and outcome-level results at the interface between government and the people ( across e-services and in the education and health sectors ). 2 ) Data generation and use for performance monitoring and evaluation ( M & E ) to inform policymaking and implementation ( through the release of interactive statistical data and the use of health quality data ). 3 ) Direct and indirect benefits to Syrian refugees, since the Program supports enhanced refugee access to e-services and digital ID, the digitalized secondary education examination, and e-health services. Disaggregated statistical and administrative data will help provide evidence on socioeconomic indicators and inform policy dialogue.", + "ner_text": [ + [ + 1487, + 1506, + "named" + ], + [ + 1546, + 1561, + "health quality data <> reference population" + ] + ], + "validated": true, + "empirical_context": "The Program \u2019 s results framework is further underpinned by three main cross-cutting dimensions across the three RAs: 1 ) People-centricity, that is, a focus on output - and outcome-level results at the interface between government and the people ( across e-services and in the education and health sectors ). 2 ) Data generation and use for performance monitoring and evaluation ( M & E ) to inform policymaking and implementation ( through the release of interactive statistical data and the use of health quality data ). 3 ) Direct and indirect benefits to Syrian refugees, since the Program supports enhanced refugee access to e-services and digital ID, the digitalized secondary education examination, and e-health services.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to specific data used for performance monitoring and evaluation in health.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'health quality data' suggests a collection of data related to health metrics.", + "contextual_reason_agent": "This is indeed a dataset as it refers to specific data used for performance monitoring and evaluation in health.", + "contextual_signal": "follows 'uses data from'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 63, + "text": "EMIS deconcentrated ( MINEDUB / MINESEC ) and operational in at least five ( 5 ) regions ( c ) Budget for EMIS activities, including evaluation, for subsequent year for ministries in charge of education sector approved DLR 6. 5: ( a ) Integrated statistical yearbook of education and training sector available and publicly disseminated ( b ) EMIS deconcentrated ( MINEDUB / MINESEC ) and operational in all ten ( 10 ) regions ( c ) Evaluation of national EMIS completed ( d ) Budget for EMIS activities for subsequent year for ministries in charge of education sector approved", + "ner_text": [ + [ + 455, + 459, + "named" + ] + ], + "validated": false, + "empirical_context": "EMIS deconcentrated ( MINEDUB / MINESEC ) and operational in at least five ( 5 ) regions ( c ) Budget for EMIS activities, including evaluation, for subsequent year for ministries in charge of education sector approved DLR 6. 5: ( a ) Integrated statistical yearbook of education and training sector available and publicly disseminated ( b ) EMIS deconcentrated ( MINEDUB / MINESEC ) and operational in all ten ( 10 ) regions ( c ) Evaluation of national EMIS completed ( d ) Budget for EMIS activities for subsequent year for ministries in charge of education sector approved", + "type": "program", + "explanation": "However, EMIS is mentioned as a program and not as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is related to education statistics.", + "contextual_reason_agent": "However, EMIS is mentioned as a program and not as a structured collection of data or a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 12, + "text": "As a result, the country demonstrated one of the strongest improvements in performance among PISA-participating countries and economies over 2009 \u2013 2018 in all three assessed subjects: reading, math, and science. Moldova \u2019 s performance is above what is expected considering its GDP per capita. 8. Pandemic-related school closures had negative impacts on learning of students, threatening their future well-being and productivity in the workplace. The COVID-related school closures are estimated to have affected learning by an equivalent of 8 PISA points, eroding about 20 percent of learning gains made over the last decade. The learning loss in Moldova \u2019 s future earnings due to school closures is estimated at about 0. 8 percent of GDP annually. This translates into an aggregated lifetime loss in earnings 7 Major objectives of the Moldova Education Strategy and Action Plan 2030 include aligning education to the labor market needs, ensuring access to quality education for all, strengthening social cohesion, promoting effective implementation of digital technologies, creating opportunities for lifelong learning, and promoting innovation and change in education including through the relevant scientific research. 8 Currently, the gross enrollment rate is 108 percent in primary education and 110 percent in secondary education ( World Bank data ). There are no recent reliable data on the net enrollment rates at the primary and secondary levels. Earlier rates dating back to 2018 show low net enrollment rates of around 86 percent, which could be the result of an overestimation of the size of the resident school-age population due to outmigration, thus meriting further study. 9 The Education Management Information System ( EMIS ) was reengineered to provide data at the school, teacher, and student levels. Many of these advancements were supported by the World Bank-financed Moldova Education Reform Project. 10 Changing from the traditional, subjective allocation scheme.", + "ner_text": [ + [ + 1697, + 1736, + "named" + ] + ], + "validated": false, + "empirical_context": "Earlier rates dating back to 2018 show low net enrollment rates of around 86 percent, which could be the result of an overestimation of the size of the resident school-age population due to outmigration, thus meriting further study. 9 The Education Management Information System ( EMIS ) was reengineered to provide data at the school, teacher, and student levels. Many of these advancements were supported by the World Bank-financed Moldova Education Reform Project.", + "type": "system", + "explanation": "However, it is described as a system and not explicitly mentioned as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a data-related function.", + "contextual_reason_agent": "However, it is described as a system and not explicitly mentioned as a data source in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "158_40156", + "page": 52, + "text": "UNHCR already has BSS information for some of the camps to be supported by the project, and if resources are made available in future, it is envisioned that the other camps would be covered by BSS as well. b ) Sentinel surveillance: Sentinel surveillance will be undertaken by IGAD member states, who will add additional sentinel sites to their national sentinel surveillance system \u2013 one in each refugee site, IDP site and surrounding population. Initially, only those sites supported by this project will be added. Later, when the regional HIV M & E strategy has been developed, all the sites and camps will be added to national sentinel surveillance. c ) Health facility surveys: At all health facilities in hot spots, a health facility survey will be undertaken to assess the availability of HIV related services in the health facilities that are servicing populations in the 14 hot spots. d ) Rapid assessment in hot spot areas: All 14 hot spot areas \u2013 the seven targeted for year 1 of implementation and the seven targeted for year 2 of implementation - will be assessed at the start of year 1.", + "ner_text": [ + [ + 658, + 681, + "named" + ], + [ + 690, + 707, + "Health facility surveys <> reference population" + ], + [ + 724, + 746, + "Health facility surveys <> data type" + ], + [ + 1116, + 1134, + "Health facility surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "Later, when the regional HIV M & E strategy has been developed, all the sites and camps will be added to national sentinel surveillance. c ) Health facility surveys: At all health facilities in hot spots, a health facility survey will be undertaken to assess the availability of HIV related services in the health facilities that are servicing populations in the 14 hot spots. d ) Rapid assessment in hot spot areas: All 14 hot spot areas \u2013 the seven targeted for year 1 of implementation and the seven targeted for year 2 of implementation - will be assessed at the start of year 1.", + "type": "survey", + "explanation": "This is indeed a dataset as it involves a systematic survey to collect data on HIV-related services.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured assessment of health facilities.", + "contextual_reason_agent": "This is indeed a dataset as it involves a systematic survey to collect data on HIV-related services.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "028_Ethiopia-Response-Recovery-Resilience-for-Conflict-Affected-Communities-in-Ethiopia-Project", + "page": 59, + "text": "The World Bank Response - Recovery - Resilience for Conflict-Affected Communities in Ethiopia ( P177233 ) Page 55 of 104 livelihood infrastructure; and MHPSS service-related facilities. Climate-resilient infrastructure would have explicit consideration and internalization of the risks and opportunities that alternative climate change scenarios are likely to imply for the design and O & M of the infrastructure. Agency of Ethiopia Beneficiaries with rebuilt and improved access to climate-resilient community infrastructure - female Semi-Annual Woreda Project Coordination Teams, Central Statistical Agency of Ethiopia Aggregation of Woreda-level data on female beneficiaries as per the relevant Kebele \u2019 s demographics MoF FPCU Beneficiaries with rebuilt and improved access to climate-resilient community infrastructure - displaced ' Displaced ' are persons who have been forced or obliged to flee or to leave their homes or places of habitual residence, in particular as a result of or in order to avoid the effects of armed conflict, situations of generalized violence, violations of human rights or natural or human-made disasters.", + "ner_text": [ + [ + 636, + 653, + "named" + ], + [ + 85, + 93, + "Woreda-level data <> data geography" + ], + [ + 424, + 432, + "Woreda-level data <> data geography" + ], + [ + 612, + 620, + "Woreda-level data <> data geography" + ], + [ + 657, + 677, + "Woreda-level data <> reference population" + ] + ], + "validated": true, + "empirical_context": "Climate-resilient infrastructure would have explicit consideration and internalization of the risks and opportunities that alternative climate change scenarios are likely to imply for the design and O & M of the infrastructure. Agency of Ethiopia Beneficiaries with rebuilt and improved access to climate-resilient community infrastructure - female Semi-Annual Woreda Project Coordination Teams, Central Statistical Agency of Ethiopia Aggregation of Woreda-level data on female beneficiaries as per the relevant Kebele \u2019 s demographics MoF FPCU Beneficiaries with rebuilt and improved access to climate-resilient community infrastructure - displaced ' Displaced ' are persons who have been forced or obliged to flee or to leave their homes or places of habitual residence, in particular as a result of or in order to avoid the effects of armed conflict, situations of generalized violence, violations of human rights or natural or human-made disasters.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to aggregated data on female beneficiaries, which is used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to aggregated data at a specific administrative level.", + "contextual_reason_agent": "This is indeed a dataset as it refers to aggregated data on female beneficiaries, which is used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "167_27761", + "page": 20, + "text": "The FMIS system at the MOF and the Post Bank were found to be adequate to support the project. The project \u2019 s financial statements, including Statement of Expenditures ( SOE ) and the Special Account ( SA ), would be audited quarterly internally and annually by an independent auditor, in accordance with internationally accepted standards. In addition, the auditor would carry out field spot check audits to ascertain compliance with contractual requirements. Compliance with conditional cash transfers would be monitored by an independent external consultant ( paragraph C. 3 ). 4. Social Opportunities, constraints, impacts, and risks arising. out of the socio-cultural and political context. The impact of closure and incursions in the West Bank and Gaza has been extensively documented. On the rise are poverty, unemployment, school drop out rates; on the decline or deteriorating are household incomes, living conditions, school attendance rates, at-large nutritional status and in particular of children 0-5 years of age. 13 The so called \u201c newly poor \u201d, those who are just above the poverty line before the outbreak of the second Intifada, in the third quarter of 2000, have been assisted by various interventions. To date, it has been much more difficult to find ways to assist the very poor, or first decile, whose coping mechanisms are essentially exhausted. The proposed project i s designed to support children ( 0-18 years old ) in the first decile. Although the SHC has targeted the first decile for more than 20 years, the program i s very small today in relation to need and requires new instruments to address the present situation and to shift MOSA \u2019 s overall strategy from one of coping to one of social springboard. Therefore, the project will support MOSA in reshaping its social assistance strategy and to improve effectiveness and efficiency of ~ l3 PCBS quarterly household surveys, Palestinian Living Conditions quarterly surveys, IUCN, University of Geneva, Living Standards quarterly surveys, Nutrition Study, Johns Hopkins University, 2002, Nutrition Survey, PCBS and BirZeit University, 2002. 17", + "ner_text": [ + [ + 2023, + 2038, + "named" + ], + [ + 741, + 750, + "Nutrition Study <> data geography" + ], + [ + 1003, + 1028, + "Nutrition Study <> reference population" + ], + [ + 1049, + 1059, + "Nutrition Study <> reference population" + ], + [ + 1876, + 1908, + "Nutrition Study <> data type" + ], + [ + 2040, + 2064, + "Nutrition Study <> author" + ], + [ + 2066, + 2070, + "Nutrition Study <> publication year" + ], + [ + 2099, + 2117, + "Nutrition Study <> author" + ], + [ + 2119, + 2123, + "Nutrition Study <> publication year" + ] + ], + "validated": true, + "empirical_context": "Although the SHC has targeted the first decile for more than 20 years, the program i s very small today in relation to need and requires new instruments to address the present situation and to shift MOSA \u2019 s overall strategy from one of coping to one of social springboard. Therefore, the project will support MOSA in reshaping its social assistance strategy and to improve effectiveness and efficiency of ~ l3 PCBS quarterly household surveys, Palestinian Living Conditions quarterly surveys, IUCN, University of Geneva, Living Standards quarterly surveys, Nutrition Study, Johns Hopkins University, 2002, Nutrition Survey, PCBS and BirZeit University, 2002. 17", + "type": "study", + "explanation": "The context indicates that 'Nutrition Study' is referenced alongside other surveys and studies, implying it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Nutrition Study' suggests a structured collection of data related to nutrition.", + "contextual_reason_agent": "The context indicates that 'Nutrition Study' is referenced alongside other surveys and studies, implying it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 21, + "validated": 8, + "not_validated": 13 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 70, + "text": "The World Bank Tajikistan Water Supply and Sanitation Investment Project ( P177325 ) Page 67 of 89 ` ` Table 1. 2. Results of the Baseline Survey Conducted under the RWSSP ( % ) Gender 6 \u2013 17 years 18 \u2013 24 years 25 \u2013 34 years 35 \u2013 44 years 45 \u2013 54 years 55 \u2013 64 years 65 years and Older Total Male 46 10 8 3 12 12 31 13 Female 54 90 92 97 88 88 69 87 Total 100 100 100 100 100 100 100 100 26. Poor water quality also increases the risk of diarrhea and other waterborne diseases. Official statistics do not capture all the diarrhea cases in rural areas: only 58 percent of survey respondents reported visiting a health facility when they or their family members develop diarrhea, more than one - third of respondents preferred to self-medicate, and only 6 percent used traditional medicine. This is confirmed by the fact that 34 percent of respondents answered that they preferred home care ( treatment ), about 10 percent do not attach much importance to diarrhea, and 5 percent find it expensive to go to a health facility. Among other factors are the dissatisfaction with the health facilities or the remoteness of health facilities. The fact that many households defer to home-based treatment increases the time and emotional burden on mothers as the main caregivers.", + "ner_text": [ + [ + 130, + 145, + "named" + ], + [ + 4, + 14, + "Baseline Survey <> publisher" + ], + [ + 15, + 25, + "Baseline Survey <> data geography" + ], + [ + 572, + 590, + "Baseline Survey <> reference population" + ], + [ + 1053, + 1095, + "Baseline Survey <> data description" + ] + ], + "validated": true, + "empirical_context": "2. Results of the Baseline Survey Conducted under the RWSSP ( % ) Gender 6 \u2013 17 years 18 \u2013 24 years 25 \u2013 34 years 35 \u2013 44 years 45 \u2013 54 years 55 \u2013 64 years 65 years and Older Total Male 46 10 8 3 12 12 31 13 Female 54 90 92 97 88 88 69 87 Total 100 100 100 100 100 100 100 100 26. Poor water quality also increases the risk of diarrhea and other waterborne diseases.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referred to in the context as the source of results presented in the survey.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Baseline Survey' suggests a structured collection of data collected for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referred to in the context as the source of results presented in the survey.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "050_Cameroon-Secondary-Education-and-Skills-Development-Project", + "page": 41, + "text": "The employers \u2019 and trainees \u2019 satisfaction with the training is another important parameter that will be used to measure the performance of the SDF. 100. The M & E system will incorporate gender-related aspects and include a formal impact evaluation. The Results Framework will include ( a ) one specific indicator to track inclusion of gender contents in CBT packages and teacher training modules; ( b ) the disaggregation of key indicators by gender, for both participation and completion of project activities; and ( c ) a tracer study that will systematically include gender-disaggregated data, to better inform policies and practices. Finally, under Component 3 financing, the project will conduct an impact evaluation of innovative gender-related interventions. The details of the impact evaluation will be specified in the PIM. C. Sustainability 101. The sustainability of the project will be enhanced by several factors: ( a ) the high level of government commitment to strengthening the secondary education and skills development sectors as outlined in multiple strategic documents; ( b ) the use of country systems and processes for Component 1 and part of Component 2; ( c ) focus on building implementation capacity of key implementation agencies to sustain the interventions in the future as an integral part of the project; ( d ) the small contribution of IDA to the overall sector budget; and ( e ) improvements to", + "ner_text": [ + [ + 527, + 539, + "named" + ] + ], + "validated": false, + "empirical_context": "The M & E system will incorporate gender-related aspects and include a formal impact evaluation. The Results Framework will include ( a ) one specific indicator to track inclusion of gender contents in CBT packages and teacher training modules; ( b ) the disaggregation of key indicators by gender, for both participation and completion of project activities; and ( c ) a tracer study that will systematically include gender-disaggregated data, to better inform policies and practices. Finally, under Component 3 financing, the project will conduct an impact evaluation of innovative gender-related interventions.", + "type": "study", + "explanation": "However, 'tracer study' is mentioned as a type of study rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'tracer study' is a dataset because it involves data collection and analysis.", + "contextual_reason_agent": "However, 'tracer study' is mentioned as a type of study rather than a structured collection of data itself.", + "contextual_signal": "mentioned only as a study, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 21, + "validated": 8, + "not_validated": 13 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 51, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XLII Procedure Indicator values will be collected from MODEE \u2019 s administrative data on digital ID activations and cross - checked by the IVA through spot surveys. People-centric digital identity is verified as follows: ( 1 ) it uniquely identifies users through strong identity assurance against existing identity evidence; ( 2 ) it allows the secure digital verification of users \u2019 identity attributes and credentials by relying parties; ( 3 ) it is optimized for use for both remote / online and in-person transactions, according to the use case; ( 4 ) it is equally usable, irrespective of whether the user initiates a transaction from within the DPI environment or outside of it; ( 5 ) it is equally accessible to Jordanian citizens and eligible non-citizens, including refugees; ( 6 ) it allows the integration of digital identity verification into service workflows, including existing services and legacy systems, through standards-based protocols and interfaces that are well documented and accessible to relying parties; ( 7 ) it is equally accessible to relying parties across the public and private sectors; ( 8 ) it is usable by diverse populations including women, elders, persons with disabilities, and rural populations, as assured through implementation of human-centered design approaches.", + "ner_text": [ + [ + 245, + 257, + "named" + ] + ], + "validated": true, + "empirical_context": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XLII Procedure Indicator values will be collected from MODEE \u2019 s administrative data on digital ID activations and cross - checked by the IVA through spot surveys. People-centric digital identity is verified as follows: ( 1 ) it uniquely identifies users through strong identity assurance against existing identity evidence; ( 2 ) it allows the secure digital verification of users \u2019 identity attributes and credentials by relying parties; ( 3 ) it is optimized for use for both remote / online and in-person transactions, according to the use case; ( 4 ) it is equally usable, irrespective of whether the user initiates a transaction from within the DPI environment or outside of it; ( 5 ) it is equally accessible to Jordanian citizens and eligible non-citizens, including refugees; ( 6 ) it allows the integration of digital identity verification into service workflows, including existing services and legacy systems, through standards-based protocols and interfaces that are well documented and accessible to relying parties; ( 7 ) it is equally accessible to relying parties across the public and private sectors; ( 8 ) it is usable by diverse populations including women, elders, persons with disabilities, and rural populations, as assured through implementation of human-centered design approaches.", + "type": "survey", + "explanation": "In this context, 'spot surveys' is indeed used as a method to collect data for verification purposes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'spot surveys' is a dataset because it implies a collection of data gathered through surveys.", + "contextual_reason_agent": "In this context, 'spot surveys' is indeed used as a method to collect data for verification purposes.", + "contextual_signal": "follows 'collected from' indicating it is a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 3, + "validated": 2, + "not_validated": 1 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 92, + "text": "Stakeholder feedback gathered through these meetings informed the final versions of the safeguards documents. Land availability is one of the key aspects looked at during the environmental and social screening and impact assessment. All subprojects shall therefore be screened to confirm availability of land before proceeding with its preparation. Another concern is building capacity within local agencies on safeguards management. These concerns will be addressed in the IWMDP through the various training and capacity - building initiatives proposed under Component 4 ( institutional strengthening ) and budgeted for under the ESMF. Monitoring and Evaluation 75. The IAs will be responsible for monitoring and reporting. The MWE and the NWSC will lead data collection and compilation analysis. A Project Liaison Officer will be appointed within the MWE to lead the overall coordination effort in the collection and consolidation of progress reports submitted by each of the MWE \u2019 s departments that are directly involved in Project implementation ( RWSSD, UWSSD, and DWRM ) and the NWSC. Monitoring and reporting will focus on key performance data from specific Project activities that contribute to the Project \u2019 s intermediate results and PDO-level outcomes. 76. Data collection and reporting. Progress reports will be submitted on a quarterly basis by the MWE \u2019 s departments and the NWSC to the WESLD. The WESLD will coordinate with the Planning Department of the MWE to ensure results inform sector-wide reporting, decision-making, and supervision. The WESLD will submit progress reports every semester to the World Bank. To the extent possible, client satisfaction surveys will also be conducted each quarter to monitor the level of client satisfaction with the quality of service. A midterm review will be carried out to evaluate implementation progress and identify potential issues in need of attention and resolution. An end-of-project evaluation will also be conducted two months before Project closing to assess whether the intermediate results and PDO-level indicators were achieved, the sustainability of the results, and lessons learned.", + "ner_text": [ + [ + 1655, + 1682, + "named" + ], + [ + 1131, + 1151, + "client satisfaction surveys <> data type" + ], + [ + 1619, + 1629, + "client satisfaction surveys <> publisher" + ], + [ + 2172, + 2190, + "client satisfaction surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "The WESLD will submit progress reports every semester to the World Bank. To the extent possible, client satisfaction surveys will also be conducted each quarter to monitor the level of client satisfaction with the quality of service. A midterm review will be carried out to evaluate implementation progress and identify potential issues in need of attention and resolution.", + "type": "survey", + "explanation": "In this context, client satisfaction surveys are explicitly mentioned as a method to collect data on client satisfaction, indicating they are used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because client satisfaction surveys are structured collections of data used to gather feedback.", + "contextual_reason_agent": "In this context, client satisfaction surveys are explicitly mentioned as a method to collect data on client satisfaction, indicating they are used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "161_28046", + "page": 19, + "text": "Table: Classification of Regions by Poverty Levels Lower Central Higher Forest Total Urban Rural Guinea Guinea Guinea Guinea Conakry Percent urban Dooulation 30 100 0 23 9 16 22 100 Population density 29 n. a. n. a 31 31 14 34 2429 Access to safe water 52 49 45 43 45 47 48 80 Percent poor 40 n. a. n. a 24 36 40 25 11 Percent with less than U S 3 0 0 40 24 52 42 51 62 33 7 Depth o f poverty ( gap between 13 7 18 14 17 23 9 1 Share o f National Poverty ) ( % ) 100 9 88 22 28 32 15 3 Per capita expenditure ( \u2018 000 469. 5 n. a. n. a 401. 3 377. 6 316. 0 484. 4 n. a. Share of food in total spending 50 43 61 5 1 61 55 57 36 Source: UNPOP 1998, RGPH, General Population and Housing Census 1996, Comprehensive Consumption Budget Survey 199411995, Household Consumption Survey. Taken from the \u2018 Guinea Country Status Report \u201d, In the same vein, health indicators also indicate inequity problems as the table below shows: Table: Household Health Behaviors: Comparison by Place of Residence and by Region Lower Central Higher Forest Indicators Country Urban Rural Guinea Guinea Guinea Guinea Conakry Child health % of newborns < 6 months old on exclusive Median duration of breast feeding 22 4 - 224 242 1 9 9 22 1 22 1 breastfeeding 95. 2 - - 987 9 3 6 9 6 0 9 2 6 95 9 14", + "ner_text": [ + [ + 747, + 775, + "named" + ], + [ + 634, + 639, + "Household Consumption Survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "a. Share of food in total spending 50 43 61 5 1 61 55 57 36 Source: UNPOP 1998, RGPH, General Population and Housing Census 1996, Comprehensive Consumption Budget Survey 199411995, Household Consumption Survey. Taken from the \u2018 Guinea Country Status Report \u201d, In the same vein, health indicators also indicate inequity problems as the table below shows: Table: Household Health Behaviors: Comparison by Place of Residence and by Region Lower Central Higher Forest Indicators Country Urban Rural Guinea Guinea Guinea Guinea Conakry Child health % of newborns < 6 months old on exclusive Median duration of breast feeding 22 4 - 224 242 1 9 9 22 1 22 1 breastfeeding 95.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it is explicitly mentioned as a source of data for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a source of data in the context.", + "contextual_reason_agent": "The context confirms it is a dataset as it is explicitly mentioned as a source of data for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 27, + "text": "The World Bank SPF: Improved Livelihoods for Internally Displaced Persons in Azerbaijan ( P178125 ) Page 25 of 34 Indicator Name Corporate Unit of Measure Baseline End Target Frequency Data Source / Methodology Responsibility for Data Collection in the project Completion Survey conducted at least three months after training completion. needed. Description: Total participant household income reported at the time of applying for project enrolment divided by total income reported before project completion. Name: Percentage of registered participants completing training and receiving certificates Percentag e 0. 00 85. 00 Twice, once upon registration and once upon training certification. Reports provided by training providers. M & E Specialist, training providers. Description: Total number of participants receiving certificates divided by the total number of registered participants. Name: Beneficiaries of job-focused interventions \u2714 Number 0. 00 520. 00 Once Total number of household members of applicants accepted and registered for training support. M & E Specialist Beneficiaries of job - focused interventions - Female \u2714 Number 0. 00 260. 00 Once Total number of household members in female participants accepted and registered for training support.", + "ner_text": [ + [ + 253, + 278, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank SPF: Improved Livelihoods for Internally Displaced Persons in Azerbaijan ( P178125 ) Page 25 of 34 Indicator Name Corporate Unit of Measure Baseline End Target Frequency Data Source / Methodology Responsibility for Data Collection in the project Completion Survey conducted at least three months after training completion. needed.", + "type": "survey", + "explanation": "However, it is mentioned as part of a project and not as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often implies data collection.", + "contextual_reason_agent": "However, it is mentioned as part of a project and not as a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 27, + "text": "This component will consist of three subcomponents: \uf0b7 Subcomponent 4. 1: Improvement of poverty-related data by providing TA and financing to ( a ) the design, data collection, and analysis of ECAM 5 with a particular focus on northern regions \u2019 rural areas and gender issues ( agricultural income, shocks, and vulnerability, gender, and so on ); ( b ) data collection and analysis of the fourth population census; 28 ( c ) analysis of the complementary household survey ( ECAM 4 ); ( d ) design a system of labor statistics by improving LFS and enterprises and informal sector surveys; 28 This activity budgeted for US $ 1 million will complement the resources ( US $ 4 million ) provided by another World Bank financed project in the health sector. In total the World Bank contribution in the financing of the population will amount to US $ 5 million.", + "ner_text": [ + [ + 193, + 199, + "named" + ], + [ + 227, + 243, + "ECAM 5 <> data geography" + ], + [ + 701, + 711, + "ECAM 5 <> publisher" + ], + [ + 764, + 774, + "ECAM 5 <> publisher" + ] + ], + "validated": true, + "empirical_context": "This component will consist of three subcomponents: \uf0b7 Subcomponent 4. 1: Improvement of poverty-related data by providing TA and financing to ( a ) the design, data collection, and analysis of ECAM 5 with a particular focus on northern regions \u2019 rural areas and gender issues ( agricultural income, shocks, and vulnerability, gender, and so on ); ( b ) data collection and analysis of the fourth population census; 28 ( c ) analysis of the complementary household survey ( ECAM 4 ); ( d ) design a system of labor statistics by improving LFS and enterprises and informal sector surveys; 28 This activity budgeted for US $ 1 million will complement the resources ( US $ 4 million ) provided by another World Bank financed project in the health sector. In total the World Bank contribution in the financing of the population will amount to US $ 5 million.", + "type": "survey", + "explanation": "It is indeed a dataset as it is explicitly referenced for data collection and analysis related to poverty.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is mentioned in the context of data collection and analysis.", + "contextual_reason_agent": "It is indeed a dataset as it is explicitly referenced for data collection and analysis related to poverty.", + "contextual_signal": "mentioned as a data collection and analysis focus", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "129_PAD1030-ENGLISH-P149242-PUBLIC-FINAL-LEB-ENPTP-English", + "page": 38, + "text": "In order to upload the funds to each card, WFP provides the bank with a household Payment List or a table for each household to which a transfer needs to be made, including: ( a ) the NPTP registration number; ( b ) the number of the Prepaid Card; ( c ) the date of birth of the household head; ( d ) gender of the household head; and ( e ) the amount of the WFP voucher benefit to be transferred. The household payment list also indicates the WFP-benefit voucher account, currency, the transfer date and the WFP voucher benefit availability end date for that month. 16. The bank uploads the credit to the cards as requested by WFP once the Household Payment List and Letter of Authorization are received, reviewed and approved. WFP partners distribute the cards to the Social Development Centers, where NPTP social workers are responsible for distributing the cards to beneficiaries. Along with each card, the NPTP social workers will distribute an NPTP registration certificate, with all the names and dates of births of each household member.", + "ner_text": [ + [ + 72, + 94, + "named" + ], + [ + 43, + 46, + "household Payment List <> publisher" + ], + [ + 184, + 208, + "household Payment List <> data description" + ], + [ + 220, + 246, + "household Payment List <> data description" + ], + [ + 258, + 293, + "household Payment List <> data description" + ], + [ + 301, + 329, + "household Payment List <> data description" + ], + [ + 359, + 362, + "household Payment List <> publisher" + ], + [ + 509, + 512, + "household Payment List <> publisher" + ], + [ + 628, + 631, + "household Payment List <> publisher" + ], + [ + 729, + 732, + "household Payment List <> publisher" + ] + ], + "validated": true, + "empirical_context": "In order to upload the funds to each card, WFP provides the bank with a household Payment List or a table for each household to which a transfer needs to be made, including: ( a ) the NPTP registration number; ( b ) the number of the Prepaid Card; ( c ) the date of birth of the household head; ( d ) gender of the household head; and ( e ) the amount of the WFP voucher benefit to be transferred. The household payment list also indicates the WFP-benefit voucher account, currency, the transfer date and the WFP voucher benefit availability end date for that month.", + "type": "list", + "explanation": "This is indeed a dataset as it is a structured collection of data used for processing payments to households.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it contains structured information about households and their payment details.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data used for processing payments to households.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 44, + "text": "The key delivery mechanisms put in place for the project, such as the MIS, will enable the project to produce regular progress reports. The PIU will organize annual financial audits for the project, annual reviews of progress, and a mid-term review to guide the project implementation after the first 2. 5 years of implementation. The mid-term review will involve project \u2019 s stakeholders and civil society in the review of performance, intermediary results, institutional arrangements, and outcomes. It will confirm the plans and processes for expansion of the project. 129. A Monitoring and Evaluation manual was produced under Merankabandi. The manual will be updated for the project, even though most of the monitoring and evaluation processes will be kept the same. The following table describes the main M & E instruments of the project. Table 4. Project M & E instruments Instrument Description Data to be collected Management Information System The MIS will collect information from different processes including registration, enrolment, payments, and claims. Information on registration and enrolment is obtained through direct collection of data; payment information is provided by the service providers and claims are fed into the system by different collection channels - Socio-economic data of households enrolled - Number of households receiving cash transfers - Amounts paid to beneficiaries - Claims submitted by beneficiaries Social Registry The Social Registry compiles socio - economic information from households that will be used to determine eligibility for social programs - Socio-economic data of households registered Iterative Beneficiary Monitoring The IBM is a phone-based survey to a sample of households ( beneficiaries and non-beneficiaries of social programs ) to inquire on program performance, satisfaction, and context - Satisfaction information of beneficiaries and non - beneficiaries of social programs - Qualitative information on the vulnerability context", + "ner_text": [ + [ + 70, + 73, + "named" + ] + ], + "validated": false, + "empirical_context": "The key delivery mechanisms put in place for the project, such as the MIS, will enable the project to produce regular progress reports. The PIU will organize annual financial audits for the project, annual reviews of progress, and a mid-term review to guide the project implementation after the first 2.", + "type": "system", + "explanation": "However, 'MIS' is mentioned as a management information system that supports project reporting, not as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is an acronym that could imply a structured system for managing information.", + "contextual_reason_agent": "However, 'MIS' is mentioned as a management information system that supports project reporting, not as a data source itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "167_27761", + "page": 30, + "text": "Instead, proxy indicators will be used to show that there i s no further deterioration of baseline conditions, or there i s a net positive change. The working conditions in the West Bank and Gaza make it difficult to use the experimental quantitative methods in which outcomes are compared with those of a randomly assigned control group that i s otherwise eligible for the program and similar to the participants. In child nutrition, the monitoring will only measure the progress of the child from project year 0 when the project starts to year 4 when the project ends, or earlier when the child enrollment in the nutrition program ends at 5 years old. Comparing a sample of project beneficiaries to a comparison group with similar characteristics using baseline and follow-up surveys will assess project impact. This method will establish the net project impact. The baseline values will be updated during a pilot targeting phase where information will be collected in ten governorates, five in the West Bank and the other five in Gaza. The results from survey data, monitoring indicators, and qualitative assessments will be entered into the project \u2019 s Management Information System ( MIS ), and the following measurable indicators will be generated to determine project impact and output: ( i ) Health / Nutrition Grants. The following outcome indicators will be used: 0 0 0 0 Net change in the percentage children brought regularly to health centers for preventive care ( target: 2-10 % ). Net change in children 0-5 years old with complete immunization scheme ( target: 2-10 % ). Net change in children 0-5 years old complying with regular growth and health monitoring ( target: 2-15 % ). Net change in nutritional status ( as measured through anthropometrical indicators ) of children 0 - 3 years old ( target: 2-1596 ). 26 Where TBD i s used in the indicators below, the indicator i s being determined by the pilot program ( January-May, 2004 ). 27", + "ner_text": [ + [ + 1056, + 1067, + "named" + ], + [ + 177, + 186, + "survey data <> data geography" + ], + [ + 191, + 195, + "survey data <> data geography" + ], + [ + 676, + 697, + "survey data <> reference population" + ], + [ + 755, + 785, + "survey data <> data type" + ], + [ + 1001, + 1010, + "survey data <> data geography" + ], + [ + 1033, + 1037, + "survey data <> data geography" + ], + [ + 1510, + 1532, + "survey data <> reference population" + ], + [ + 1601, + 1623, + "survey data <> reference population" + ], + [ + 1947, + 1951, + "survey data <> publication year" + ] + ], + "validated": true, + "empirical_context": "The baseline values will be updated during a pilot targeting phase where information will be collected in ten governorates, five in the West Bank and the other five in Gaza. The results from survey data, monitoring indicators, and qualitative assessments will be entered into the project \u2019 s Management Information System ( MIS ), and the following measurable indicators will be generated to determine project impact and output: ( i ) Health / Nutrition Grants. The following outcome indicators will be used: 0 0 0 0 Net change in the percentage children brought regularly to health centers for preventive care ( target: 2-10 % ).", + "type": "survey", + "explanation": "In this context, 'survey data' is explicitly mentioned as part of the information collected for analysis, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'survey data' is a dataset because it refers to collected information from surveys.", + "contextual_reason_agent": "In this context, 'survey data' is explicitly mentioned as part of the information collected for analysis, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "005_BOSIB-8191b179-7209-4faa-b5e0-11783bcd492d", + "page": 38, + "text": "Responsibility for Data Collection PIU IRI 3. 1. 2 Gender parity in enrollment in STEAM courses in technical stream of Secondary schools \u2013 Gender gap indicator ( Percentage ) Description This indicator measures gender parity in mean enrollment in Science, Technology, Engineering, Arts and Mathematic courses in the technical stream of secondary education. Frequency Annual Data source MEP Department for Technical Secondary Education Methodology for Data Collection The PIU compiles data from administrative records from MEP Department for Technical Secondary Education. Responsibility for Data Collection PIU IRI 3. 1. 3 Number of students registered in online English learning app Description Students registered in the online app will be measured in real time. Frequency Annual Data source The English learning app. Methodology for Data Collection The app will provide the number of users in real time on its main page. Responsibility for Data Collection PIU IRI 3. 1. 4 Number of students completing Secondary certification under MEP ' s \" Brete \" program Description This indicator measures the number of students awarded the certificate after satisfactorily completing online courses. Frequency Annual Data source MEP Department for Technical Secondary Education Methodology for Data Collection Administrative records from MEP Department for Technical Secondary Education. Responsibility for Data MEP Department for Technical Secondary Education", + "ner_text": [ + [ + 494, + 516, + "named" + ] + ], + "validated": false, + "empirical_context": "2 Gender parity in enrollment in STEAM courses in technical stream of Secondary schools \u2013 Gender gap indicator ( Percentage ) Description This indicator measures gender parity in mean enrollment in Science, Technology, Engineering, Arts and Mathematic courses in the technical stream of secondary education. Frequency Annual Data source MEP Department for Technical Secondary Education Methodology for Data Collection The PIU compiles data from administrative records from MEP Department for Technical Secondary Education. Responsibility for Data Collection PIU IRI 3.", + "type": "document", + "explanation": "'Administrative records' are mentioned as a source of information rather than a dataset themselves.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'administrative records' refers to a structured collection of data used for analysis.", + "contextual_reason_agent": "'Administrative records' are mentioned as a source of information rather than a dataset themselves.", + "contextual_signal": "mentioned only as a source of information, not as a data source", + "tags": [] + }, + { + "filename": "013_BOSIB0efb09b920d90858a0135df22da7d1", + "page": 12, + "text": "Foundational ID systems11 are broadly recognized as key enablers for inclusive digitalization and development. For people, the ability to establish and verify their identity is often a prerequisite for access to services and economic opportunities, such as social protection, healthcare, education, financial services, and employment. Proof of legal identity is also the basis for exercising rights, such as property ownership, and nationality. For governments and businesses, ID systems can serve as a platform for more effective and efficient service delivery by enabling the unique identification and verification of persons. Importantly, ID systems can promote greater inclusion by de-risking and reducing the costs of 8 UNHCR ' s Ethiopia Update on the Total Number of Refugees and Asylum Seekers as of August 31, 2023. 9 In Tigray, new internal displacement data has been reported, including 1, 021, 798 IDPs ( 250, 468 households ) in 643 sites across six zones ( excluding 20 woredas / districts hard to reach due to security or environmental factors ). 10 IOM. 2023. Ethiopia National Displacement Report 16 - Site Assessment Round 33 and Village Assessment Survey Round 16: Nov 2022 - Jun 2023. https: / / reliefweb. int / report / ethiopia / ethiopia-national-displacement-report-16-site-assessment-round-33-and-village-assessment-survey-round - 16-november-2022-june-2023. 11 Foundational ID systems are primarily created to provide credentials to the general population as proof of identity for a wide variety of public and private sector transactions. Common types of foundational ID systems include civil registries, national ID systems, and population registers.", + "ner_text": [ + [ + 1632, + 1651, + "named" + ] + ], + "validated": false, + "empirical_context": "11 Foundational ID systems are primarily created to provide credentials to the general population as proof of identity for a wide variety of public and private sector transactions. Common types of foundational ID systems include civil registries, national ID systems, and population registers.", + "type": "system", + "explanation": "'National ID systems' are described as systems for providing credentials, not as structured collections of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'national ID systems' is a dataset because it relates to identity verification and data collection.", + "contextual_reason_agent": "'National ID systems' are described as systems for providing credentials, not as structured collections of data used for empirical analysis.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "203_multi-page", + "page": 6, + "text": "HUDC, as administrator of Part A, will be responsible for planning, design, procurement. supervision of implementation and commissioning of completed works for transfer to concerned municipalities, public utilities or sectoral ministries responsible for operation and maintenance of such works. Part B The general infrastructure needs of about 300 poor municipalities and villages were identified during appraisal. In the absence of reliable poverty indicators, data from the National Aid Fund ( NAF ) on the number of families receiving NAF aid were used as a proxy poverty indicator. Unemployment data, which is generally a good indicator of poverty, was not reliable and could not be used. However, poverty surveys are included in the project to help improve identification mechanisms to be used in future phases of the CIP. The priority needs in these eligible areas were based on a list of needs identified by the municipalities and village councils available at CVDB. This list was not all-inclusive and did not directly take into account the views of the beneficiary population. During appraisal 12 communities and their basic infrastructure needs were reviewed and a program of eligible investments were identified for implementation in the first year of the CIP. However, since among the needs identified by the local councils there were many investments not considered of priority, it was agreed that CVDB will undertake in the first year of the CIP detailed surveys and", + "ner_text": [ + [ + 702, + 717, + "named" + ], + [ + 1411, + 1415, + "poverty surveys <> publisher" + ], + [ + 1496, + 1514, + "poverty surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "Unemployment data, which is generally a good indicator of poverty, was not reliable and could not be used. However, poverty surveys are included in the project to help improve identification mechanisms to be used in future phases of the CIP. The priority needs in these eligible areas were based on a list of needs identified by the municipalities and village councils available at CVDB.", + "type": "survey", + "explanation": "In the context, 'poverty surveys' are explicitly mentioned as part of the project to improve identification mechanisms, indicating they are used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'poverty surveys' implies a structured collection of data related to poverty.", + "contextual_reason_agent": "In the context, 'poverty surveys' are explicitly mentioned as part of the project to improve identification mechanisms, indicating they are used as a data source.", + "contextual_signal": "included in the project to help improve identification mechanisms", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "042_Burundi-Skills-for-Jobs-Women-and-Youth-Project", + "page": 14, + "text": "While women account for 52. 7 percent of the workforce, but nearly all of those employed work in agriculture ( 96 percent of women versus 88 percent of men ). In urban areas, 52 percent of women are engaged in non-salaried work, compared to 27 percent of men. In rural areas, only 3 percent of working women are employed in salaried work ( Figure 1 ). The lack of skills and quality jobs for women has repercussions for women \u2019 s fertility decisions, and, ultimately, for the development of Burundi ( see Box 1 ). Burundi \u2019 s Utilization-Adjusted HCI \u2014 a measure that captures how efficiently human capital is used in productive activities \u2014 is 0. 28 for men and 0. 32 for women. Figure 1. Share of Employment across Urban and Rural Areas Source: Enqu\u00eate sur les Conditions de Vie des M\u00e9nages au Burundi ( ECVMB, Integrated Household Living Conditions Survey ) 2013 \u2013 2014. 5. With the right institutions and policies in place, gainfully employed youth, including young women, can increase the country \u2019 s capacity to save and make crucial investments in physical capital, job training, and technological advancement, which could ultimately yield a rich demographic dividend for Burundi. Recognizing this potential, the Government has undertaken a reform agenda focused on improving access to quality education and youth employability and expanding economic opportunities for vulnerable groups such as the poor, women, and refugee and host communities. 2 Likewise, expanding 2 World Bank Group Country Partnership Framework ( CPF ) for the Period FY19 \u2013 FY23. https: / / openknowledge. worldbank. org / bitstream / handle / 10986 / 32114 / Burundi-Country-Partnership-Framework-for-the - Period-FY19-FY23. pdf? sequence = 1 & isAllowed = y; The reform agendas are outlined in Burundi \u2019 s National Development Plan ( NDP ) ( 2018 \u2013 2027 ) and its National Youth Policy ( 2016 \u2013 2026 ).", + "ner_text": [ + [ + 813, + 858, + "named" + ], + [ + 491, + 498, + "Integrated Household Living Conditions Survey <> data geography" + ], + [ + 514, + 521, + "Integrated Household Living Conditions Survey <> data geography" + ], + [ + 526, + 550, + "Integrated Household Living Conditions Survey <> data description" + ], + [ + 673, + 678, + "Integrated Household Living Conditions Survey <> reference population" + ], + [ + 796, + 803, + "Integrated Household Living Conditions Survey <> data geography" + ], + [ + 861, + 872, + "Integrated Household Living Conditions Survey <> publication year" + ], + [ + 1179, + 1186, + "Integrated Household Living Conditions Survey <> data geography" + ], + [ + 1477, + 1487, + "Integrated Household Living Conditions Survey <> publisher" + ], + [ + 1776, + 1783, + "Integrated Household Living Conditions Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "Figure 1. Share of Employment across Urban and Rural Areas Source: Enqu\u00eate sur les Conditions de Vie des M\u00e9nages au Burundi ( ECVMB, Integrated Household Living Conditions Survey ) 2013 \u2013 2014. 5.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referenced as a source of data for the analysis presented in the figure.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a survey that collects data on living conditions.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referenced as a source of data for the analysis presented in the figure.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 11, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 6 of 117 I. STRATEGIC CONTEXT A. Country Context 1. Before the Coronavirus Disease 2019 ( COVID-19 ) pandemic, Kenya had achieved strong development gains, becoming a lower-middle-income country, reducing poverty, and building its human capital. Kenya had a population of 47. 5 million ( 2019 ) and a per capita gross domestic product ( GDP ) of US $ 1, 879 ( World Development Indicators ( WDI ), 2020 ). Its relatively diversified economy posted strong, sustained growth, lifting real GDP by an annual average of 5. 0 percent from 2010-19. Poverty rates have declined; the share of the population living below the national poverty line fell from 46. 8 percent in 2005 / 06 to 36. 1 percent in 2015 / 16. The country \u2019 s ranking in the World Bank \u2019 s Human Capital Index ( HCI, 2020 ) places Kenya third in Sub-Saharan Africa, after Seychelles and Mauritius, reflecting significant investments in health care and basic education. 2. The COVID-19 pandemic has significantly affected the economy and increased poverty, although a significant albeit uneven recovery is underway.", + "ner_text": [ + [ + 437, + 465, + "named" + ], + [ + 4, + 14, + "World Development Indicators <> publisher" + ], + [ + 160, + 164, + "World Development Indicators <> publication year" + ], + [ + 188, + 193, + "World Development Indicators <> data geography" + ], + [ + 468, + 471, + "World Development Indicators <> acronym" + ], + [ + 475, + 479, + "World Development Indicators <> publication year" + ], + [ + 610, + 617, + "World Development Indicators <> reference year" + ], + [ + 772, + 781, + "World Development Indicators <> reference year" + ], + [ + 814, + 824, + "World Development Indicators <> publisher" + ], + [ + 856, + 860, + "World Development Indicators <> publication year" + ] + ], + "validated": true, + "empirical_context": "Kenya had a population of 47. 5 million ( 2019 ) and a per capita gross domestic product ( GDP ) of US $ 1, 879 ( World Development Indicators ( WDI ), 2020 ). Its relatively diversified economy posted strong, sustained growth, lifting real GDP by an annual average of 5.", + "type": "index", + "explanation": "In this context, it is used as a source of data for GDP, confirming it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'World Development Indicators' is a recognized source of economic data.", + "contextual_reason_agent": "In this context, it is used as a source of data for GDP, confirming it functions as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 7, + "validated": 3, + "not_validated": 4 + } + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 92, + "text": "The World Bank Niger Accelerating Electricity Access Project ( Hask\u00e9 ) ( P174034 ) Page 85 of 174 ANPER and ANERSOL project databases. acquire an off-grid system. This is obtained from NIGELEC ' s customers database, ANPER and ANERSOL ( for mini-grid and standalone systems ). The number of people would be estimated by multiplying the number of households by the average household size at the national or regional level. People provided with new or improved electricity service - Female Semi - annually. National statistics / Proj ect data. Estimated based on the share of female in the national population as published by the national statistics institute or other credible sources. NIGELEC / ANPER / ANERS OL. Refugee and host beneficiaries provided with electricity services through grid, mini grids and electrified public institutions This relates to the number of refugees and host populations living in localities and settings where the project provides electricity through grid, mini grids and off-grid electrification ( under components 1, 2 and 3 ). Semi - annually. National statistics / Proj ect data / UNHCR data / World Bank data. Estimated based on collected data about refugees and host communities zones and project-level data. NIGELEC / ANPER / ANERS OL.", + "ner_text": [ + [ + 98, + 103, + "named" + ], + [ + 4, + 14, + "ANPER <> publisher" + ], + [ + 713, + 743, + "ANPER <> reference population" + ], + [ + 1128, + 1138, + "ANPER <> publisher" + ], + [ + 1225, + 1243, + "ANPER <> data type" + ] + ], + "validated": true, + "empirical_context": "The World Bank Niger Accelerating Electricity Access Project ( Hask\u00e9 ) ( P174034 ) Page 85 of 174 ANPER and ANERSOL project databases. acquire an off-grid system.", + "type": "database", + "explanation": "In the context, 'ANPER' is referenced as part of project databases, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'ANPER' is a dataset because it is mentioned alongside 'project databases'.", + "contextual_reason_agent": "In the context, 'ANPER' is referenced as part of project databases, indicating it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 4, + "validated": 1, + "not_validated": 3 + } + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 77, + "text": "The World Bank Uganda Digital Acceleration Project \u2013 GovNet ( P171305 ) Page 65 of 76 Project Total ( US $ million ) Development objectives and geographic scope UDAP-GovNet Synergies implementation commencing children and the interoperability of these systems between humanitarian managed and national systems, Uganda Intergovernmental Fiscal Transfer ( UgiFT ) - Additional Financing ( P172868 ) Status: Pending effectiveness Total 250 RSW / WHR 50 Improve the adequacy and equity of fiscal transfers and fiscal management of resources by Local Governments for health, education, community irrigation and water services across all RHDs The UgIFT operation includes integrated digitization of service delivery in education, health, water, and micro-irrigation. This includes developing strategies for digitization of sector services, results management / dashboard linked to Online Transfer Information Management System ( OTIMS ) and Online Performance Management System ( OPAMS ), electronic medical records system and so on. UDAP-GovNet will play a key role by providing the enabling infrastructure to support the digitization of health, education, and water services to strengthen management, efficiency, reporting and planning systems outlined under UgIFT at the district level, including in RHDs. UDAP-GovNet will collaborate to ensure interoperability between the activities in UDAP-GovNet and UgIFT.", + "ner_text": [ + [ + 875, + 920, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Uganda Digital Acceleration Project \u2013 GovNet ( P171305 ) Page 65 of 76 Project Total ( US $ million ) Development objectives and geographic scope UDAP-GovNet Synergies implementation commencing children and the interoperability of these systems between humanitarian managed and national systems, Uganda Intergovernmental Fiscal Transfer ( UgiFT ) - Additional Financing ( P172868 ) Status: Pending effectiveness Total 250 RSW / WHR 50 Improve the adequacy and equity of fiscal transfers and fiscal management of resources by Local Governments for health, education, community irrigation and water services across all RHDs The UgIFT operation includes integrated digitization of service delivery in education, health, water, and micro-irrigation. This includes developing strategies for digitization of sector services, results management / dashboard linked to Online Transfer Information Management System ( OTIMS ) and Online Performance Management System ( OPAMS ), electronic medical records system and so on. UDAP-GovNet will play a key role by providing the enabling infrastructure to support the digitization of health, education, and water services to strengthen management, efficiency, reporting and planning systems outlined under UgIFT at the district level, including in RHDs.", + "type": "system", + "explanation": "However, it is described as a system rather than a structured collection of data, indicating it does not function as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information Management System' in its name, suggesting a data-related function.", + "contextual_reason_agent": "However, it is described as a system rather than a structured collection of data, indicating it does not function as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 33, + "text": "In line with the Government \u2019 s vision, it has been agreed that the project will target host community schools. As such, benefits will accrue to both refugee children and host communities simultaneously, as most refugee children are enrolled in regular schools ( see annex 6 for details ). Expected direct beneficiaries include about 300 schools, enrolling about 150, 000 pupils, of which approximately 20, 000 are refugees. Education inputs ( for example, classrooms, teachers, teaching and learning materials, and furniture ) in the refugee-affected regions supported under the project are very limited. Providing additional support under the project to these communities will reduce the pressure on the education system that would be expected in receiving an increase in refugee students. Disaggregated data by refugee / host community status will be collected and reported under the project. Efforts will be made to gradually integrate existing refugee-related data into MINEDUB \u2019 s EMIS. 54. Results Area 7: Improved learning environment and quality of education for children in host community schools affected by refugees. The objectives of this results area are to ( a ) promote refugee welfare and inclusion in host communities \u2019 socioeconomic structure, ( b ) help ensure access and quality of services and basic infrastructure to refugees and host communities, and ( c ) supplement Government finances where these have been strained by expenditures related to their hosting responsibilities. These objectives are consistent with the IDA18 RSW resource allocation framework implementation guidelines ( details can be found in annex 2 ).", + "ner_text": [ + [ + 987, + 991, + "named" + ] + ], + "validated": false, + "empirical_context": "Disaggregated data by refugee / host community status will be collected and reported under the project. Efforts will be made to gradually integrate existing refugee-related data into MINEDUB \u2019 s EMIS. 54.", + "type": "system", + "explanation": "However, EMIS is described as a management information system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is mentioned in the context of data collection and reporting.", + "contextual_reason_agent": "However, EMIS is described as a management information system, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "042_Burundi-Skills-for-Jobs-Women-and-Youth-Project", + "page": 104, + "text": "By improving the quality and relevance of skills training programs in formal and informal institutions, this project will support the youth to meet the needs of the of the Burundian economy in sectors with high labor potential. In turn, higher levels of education are expected to raise earnings, increase the probability of wage employment and increase the likelihood of finding employment in more productive sectors. Cost-Benefit Analysis 3. Other non-monetized benefits are expected from the project, such as private sector development, tax revenue increases, and lowered social spending by the Government. Thus, the values for IRR and NPV generated are lower bounds as they are based only on quantifiable labor market returns, and do not include the valuation of positive externalities and spillover effects from the increase in education and skills of Burundi \u2019 s youth. 4. The analysis includes Components 1 and 2 of the project, which account for 88 percent of the project \u2019 s budget. The following assumptions are made for all project components. Agents earn a certain fixed annual income throughout their professional life, which lasts 35 working years. There is no inflation or wage growth. Yearly income is based on ECVMB data from 2013, by level of education of the targeted group. The unemployment rate is 8 percent for all graduates.", + "ner_text": [ + [ + 1226, + 1231, + "named" + ], + [ + 134, + 139, + "ECVMB <> reference population" + ], + [ + 172, + 181, + "ECVMB <> data geography" + ], + [ + 856, + 863, + "ECVMB <> data geography" + ], + [ + 1054, + 1060, + "ECVMB <> reference population" + ], + [ + 1242, + 1246, + "ECVMB <> publication year" + ], + [ + 1297, + 1327, + "ECVMB <> data description" + ], + [ + 1336, + 1345, + "ECVMB <> reference population" + ] + ], + "validated": true, + "empirical_context": "There is no inflation or wage growth. Yearly income is based on ECVMB data from 2013, by level of education of the targeted group. The unemployment rate is 8 percent for all graduates.", + "type": "data", + "explanation": "ECVMB is indeed a dataset as it provides data used for empirical analysis regarding income and education.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed ECVMB is a dataset because it is referenced in relation to yearly income data based on education levels.", + "contextual_reason_agent": "ECVMB is indeed a dataset as it provides data used for empirical analysis regarding income and education.", + "contextual_signal": "mentioned as a source of data for income analysis", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 58, + "text": "The population census is the most important data collection activity in developing countries for at least two reasons. First, it provides spatial distribution of the population that cannot be obtained with demographic projections. Second, the cartography of the census provides the sampling framework of all other statistical operations ( household surveys, agricultural censuses, enterprise censuses, and so on ). In addition, population census data are used to construct poverty maps, a powerful tool for targeting social programs. Since the census is planned for 2017, the project will contribute to the analysis phase of the population census. 42. The subcomponent will support poverty analysis. As stated, INS conducted its fourth living conditions survey in 2014 ( ECAM 4 ) and a complementary survey financed by the Government early 2017; the first results are expected by end of 2017. The objective of the complementary survey is to update the Cameroon poverty map, probably at the commune level. The survey collects some data at the household level as well as at the community level. Information is also collected on school and health facilities. 43. Finally, the LFS methodology needs to be upgraded and updated, and data collection frequency improved to be relevant for policy making. 44.", + "ner_text": [ + [ + 952, + 972, + "named" + ] + ], + "validated": false, + "empirical_context": "As stated, INS conducted its fourth living conditions survey in 2014 ( ECAM 4 ) and a complementary survey financed by the Government early 2017; the first results are expected by end of 2017. The objective of the complementary survey is to update the Cameroon poverty map, probably at the commune level. The survey collects some data at the household level as well as at the community level.", + "type": "concept", + "explanation": "However, the 'Cameroon poverty map' is mentioned as an objective of the survey rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a map that could imply data representation.", + "contextual_reason_agent": "However, the 'Cameroon poverty map' is mentioned as an objective of the survey rather than a structured collection of data itself.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 93, + "text": "The proposed project will continue strengthening EMIS capacity within MINEDUB and MINESEC, while expanding the system to MINESUP and MINEFOP, in turn creating a fully integrated EMIS for the education sector as a whole. Under this result area, the following activities will be supported: ( a ) the development of an integrated EMIS for MINEDUB-MINESEC-MINESUP - MINEFOP, including the establishment of new institutional arrangements for the sector, unified methods and processes, and a common platform for data collection and management; ( b ) the deconcentration of EMIS to regional, divisional and sub-divisional levels ( as applicable ); ( c ) the provision of equipment and software to support the functioning of EMIS, and so on; and ( d ) the strengthening of human capacity and resources, particularly in the areas of statistics and data management. The release of IDA funds under this", + "ner_text": [ + [ + 49, + 53, + "named" + ] + ], + "validated": false, + "empirical_context": "The proposed project will continue strengthening EMIS capacity within MINEDUB and MINESEC, while expanding the system to MINESUP and MINEFOP, in turn creating a fully integrated EMIS for the education sector as a whole. Under this result area, the following activities will be supported: ( a ) the development of an integrated EMIS for MINEDUB-MINESEC-MINESUP - MINEFOP, including the establishment of new institutional arrangements for the sector, unified methods and processes, and a common platform for data collection and management; ( b ) the deconcentration of EMIS to regional, divisional and sub-divisional levels ( as applicable ); ( c ) the provision of equipment and software to support the functioning of EMIS, and so on; and ( d ) the strengthening of human capacity and resources, particularly in the areas of statistics and data management.", + "type": "system", + "explanation": "However, EMIS is described as a system for managing data rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it relates to data collection and management.", + "contextual_reason_agent": "However, EMIS is described as a system for managing data rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "005_BOSIB-8191b179-7209-4faa-b5e0-11783bcd492d", + "page": 12, + "text": "Digitalization has accelerated in recent years, with 92 percent of schools connected to the internet - Costa Rica has embarked on a sustained push for digitalization of education and has made digital competencies an integral part of the curriculum. With a focus on connectivity, 877 schools were equipped with satellite based internet in the past year and 1, 346 schools upgraded to fiber optic connection. As of March 2024, 92 percent of schools have connectivity. With the objective of reaching universal connectivity, an additional 388 will be equipped in 2024. 14 However, these efforts are only a part of a larger digitalization effort to accelerate learning and develop digital competencies. On the hardware side, secure internal school networks are required, together with more computing devices ( mainly portable computers and tablets ). Digital learning platforms need to be developed and deployed, with high quality digital content integrated with the curriculum and supported with digital assessments. To make investments in the digital arena and have a sustained effect on learning, Costa Rica has built on decades of experience in this arena with digital competencies now a part of the formal curriculum and hence a core of learning.", + "ner_text": [ + [ + 846, + 872, + "named" + ] + ], + "validated": false, + "empirical_context": "On the hardware side, secure internal school networks are required, together with more computing devices ( mainly portable computers and tablets ). Digital learning platforms need to be developed and deployed, with high quality digital content integrated with the curriculum and supported with digital assessments. To make investments in the digital arena and have a sustained effect on learning, Costa Rica has built on decades of experience in this arena with digital competencies now a part of the formal curriculum and hence a core of learning.", + "type": "program", + "explanation": "However, it is not a dataset as it refers to a program or initiative rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'digital learning platforms' could imply a collection of educational data.", + "contextual_reason_agent": "However, it is not a dataset as it refers to a program or initiative rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "182_multi0page", + "page": 8, + "text": "Poverty and Social Impact of Transition: The recent repeated shocks - - a consequence of political strife and social unrest - - have created major economic and social problems in the country. Because of these shocks, many Albanians have realized a loss of income and earning opportunities and have incurred risks to their personal well-being and safety. Poverty, which is concentrated in the rural areas and north of the country, is a result of the inability of individuals, households or communities to protect themselves, formally or informally, against the outcome of these shocks, or even against the more mundane income risks faced over their lifetime. A poverty profile based on 1996 data showed that some 30 percent of the rural and some 15 percent of the urban population are poor, with many others vulnerable to poverty due to their incomes being close to the poverty threshold ( this information will be updated through an analysis of a Living Conditions Survey, a Multiple Indicators Cluster Survey and ( likely ) a Household Budget Survey to be completed by May 2001, and through an FY2002 LSMS ). Young people, less educated people, and families with many children, are particularly at risk of poverty.", + "ner_text": [ + [ + 1095, + 1106, + "named" + ], + [ + 222, + 231, + "FY2002 LSMS <> reference population" + ], + [ + 1268, + 1286, + "FY2002 LSMS <> usage context" + ] + ], + "validated": true, + "empirical_context": "Poverty, which is concentrated in the rural areas and north of the country, is a result of the inability of individuals, households or communities to protect themselves, formally or informally, against the outcome of these shocks, or even against the more mundane income risks faced over their lifetime. A poverty profile based on 1996 data showed that some 30 percent of the rural and some 15 percent of the urban population are poor, with many others vulnerable to poverty due to their incomes being close to the poverty threshold ( this information will be updated through an analysis of a Living Conditions Survey, a Multiple Indicators Cluster Survey and ( likely ) a Household Budget Survey to be completed by May 2001, and through an FY2002 LSMS ). Young people, less educated people, and families with many children, are particularly at risk of poverty.", + "type": "survey", + "explanation": "In the context, it is mentioned as part of an analysis to update poverty information, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'LSMS' typically refers to the Living Standards Measurement Study, which is a known survey collecting data on living conditions.", + "contextual_reason_agent": "In the context, it is mentioned as part of an analysis to update poverty information, indicating it is used as a data source.", + "contextual_signal": "follows 'through an analysis of' indicating it is a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "187_multi-page", + "page": 10, + "text": "Results of that survey the private sector ) sector manufacturing wages below will provide several baseline measures of public / private pay 0. 5 provides inadequate attraction for relationships. Thereafter, the Government will report on skilled staff, or above 1. 5 indicates civil service and other public employee salaries, whose capture by existing personnel. changes will be tracked relative to changes in measures such as GDP and CPI, to assess whether public salaries are adjusting as desired, given their starting point. ( Periodicity: Once for baseline. Annual for tracking changes within the public sector relative to changes in GDP and CPI. ) Civil service pay Horizontal decompression Central pay and employment registry currently being ( horizontal ( discretionary allowances over and developed by the government will, once up and running, compression ) above base pay ) in excess of 1: 1. 2 produce reports on the composition of the budget-financed provides opportunities for excessive wage bill by component of salary, as defined in the Civil managerial discretion, facilitating Service Law, including variance in that composition across organized corruption and public agencies. ( Periodicity: Semi-annual, once the pay and rent-seeking. employment registry is functioning ). A survey of public officials is being completed in early 2000. Data from that survey, as well as from the public / private salary survey to be undertaken later in 2000, will allow identification of a baseline assessment of horizontal compression. The survey of public officials will be repeated in 2002, permitting assessment near the time of the project ' s midterm review of progress on reducing any excessive horizontal decompression. ( Periodicity: Once for baseline. At least one follow-up survey ) 7 -", + "ner_text": [ + [ + 1397, + 1427, + "named" + ], + [ + 211, + 221, + "public / private salary survey <> publisher" + ], + [ + 1348, + 1352, + "public / private salary survey <> reference year" + ], + [ + 1589, + 1593, + "public / private salary survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "A survey of public officials is being completed in early 2000. Data from that survey, as well as from the public / private salary survey to be undertaken later in 2000, will allow identification of a baseline assessment of horizontal compression. The survey of public officials will be repeated in 2002, permitting assessment near the time of the project ' s midterm review of progress on reducing any excessive horizontal decompression.", + "type": "survey", + "explanation": "This is a dataset as it is described as a survey that will provide data for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data on salaries.", + "contextual_reason_agent": "This is a dataset as it is described as a survey that will provide data for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 43, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 32 Indicator Name People provided with access to improved water sources Definition / Description Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC Area performance reports, Refugee Database, Refugees GIS Mapping for WASH and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE and NWSC Indicator Name People provided with access to improved water sources - Female ( RMS requirement ) Definition / Description Frequency Bi-annually. Data Source MWE M & E system, NWSC Area performance reports, Refugee Database, Refugees GIS Mapping for WASH and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report.", + "ner_text": [ + [ + 253, + 282, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Integrated Water Management and Development Project ( P163782 ) 32 Indicator Name People provided with access to improved water sources Definition / Description Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC Area performance reports, Refugee Database, Refugees GIS Mapping for WASH and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report.", + "type": "report", + "explanation": "However, it is mentioned as a report and not as a structured collection of data, indicating it is not a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'reports' which can imply structured data.", + "contextual_reason_agent": "However, it is mentioned as a report and not as a structured collection of data, indicating it is not a dataset.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 20, + "validated": 4, + "not_validated": 16 + } + }, + { + "filename": "026_Djibouti-Health-System-Strengthening-Project", + "page": 33, + "text": "( including regulating and engaging private sector in health ), standards, protocols, guidelines, and plans related to RMNCAH-N, PHC, quality of care, pandemic preparedness and response, digitalization and e-health strategy, integration of refugees into the national health system, human resources for health, review of supply chain efficiency and controls to improve efficiency, prevention of the medicalization of FGM, health care waste management, \u201c green \u201d facilities and climate change mitigation and adaptation measures, emergency preparedness and response, including climate-change induced or exacerbated natural disasters and pandemics / epidemics. TA will also address high-leverage initiatives including the rollout of DHIS2 ( which will disaggregate services by patients \u2019 refugee status ), development of and rollout of the national Logistics Management Information System ( eLMIS ), and activation and operationalization of a national nutrition surveillance system; TA will also resolve bottlenecks preventing refugees from receiving free services to which they are entitled 30 Topics on climate resilience will include preventive behavior and recognition of warning signs around water - and vector-borne diseases exacerbated by climate change, multi-risk contingency planning and disease surveillance and response.", + "ner_text": [ + [ + 729, + 734, + "named" + ], + [ + 240, + 248, + "DHIS2 <> reference population" + ], + [ + 1023, + 1031, + "DHIS2 <> reference population" + ] + ], + "validated": true, + "empirical_context": "( including regulating and engaging private sector in health ), standards, protocols, guidelines, and plans related to RMNCAH-N, PHC, quality of care, pandemic preparedness and response, digitalization and e-health strategy, integration of refugees into the national health system, human resources for health, review of supply chain efficiency and controls to improve efficiency, prevention of the medicalization of FGM, health care waste management, \u201c green \u201d facilities and climate change mitigation and adaptation measures, emergency preparedness and response, including climate-change induced or exacerbated natural disasters and pandemics / epidemics. TA will also address high-leverage initiatives including the rollout of DHIS2 ( which will disaggregate services by patients \u2019 refugee status ), development of and rollout of the national Logistics Management Information System ( eLMIS ), and activation and operationalization of a national nutrition surveillance system; TA will also resolve bottlenecks preventing refugees from receiving free services to which they are entitled 30 Topics on climate resilience will include preventive behavior and recognition of warning signs around water - and vector-borne diseases exacerbated by climate change, multi-risk contingency planning and disease surveillance and response.", + "type": "system", + "explanation": "In this context, DHIS2 is mentioned as a system that disaggregates services by patients' refugee status, indicating its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because DHIS2 is often associated with health data management.", + "contextual_reason_agent": "In this context, DHIS2 is mentioned as a system that disaggregates services by patients' refugee status, indicating its role as a data source.", + "contextual_signal": "system but mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "040_Iraq-COVID-19-Vaccination-Project", + "page": 20, + "text": "Safety surveillance \u2022 Vaccine safety surveillance approach is aligned with WHO recommendations to detect serious AEFIs to provide timely data that can be shared with relevant stakeholders for rapid action. \u2022 AEFI plan is currently being finalized with preparations for training and implementation activities underway. Demand generation and communication \u2022 A demand generation and community engagement plan for optimizing the uptake of the COVID-19 vaccine has been developed in collaboration with the World Bank, UNICEF, and WHO and is included as an annex in the NDVP. \u2022 The communication and demand generation plan incorporates social and behavioral data from a national Facebook survey, which gathered data on vaccine hesitancy in the population, and is aimed at \u2022 Adoption of the communication by the high-level government bodies is instrumental in ensuring its successful implementation.", + "ner_text": [ + [ + 673, + 688, + "named" + ], + [ + 630, + 656, + "Facebook survey <> data type" + ], + [ + 664, + 672, + "Facebook survey <> data geography" + ], + [ + 705, + 730, + "Facebook survey <> data description" + ] + ], + "validated": true, + "empirical_context": "Demand generation and communication \u2022 A demand generation and community engagement plan for optimizing the uptake of the COVID-19 vaccine has been developed in collaboration with the World Bank, UNICEF, and WHO and is included as an annex in the NDVP. \u2022 The communication and demand generation plan incorporates social and behavioral data from a national Facebook survey, which gathered data on vaccine hesitancy in the population, and is aimed at \u2022 Adoption of the communication by the high-level government bodies is instrumental in ensuring its successful implementation.", + "type": "survey", + "explanation": "This is indeed a dataset as it is described as a survey that gathered data used for empirical analysis on vaccine hesitancy.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data on vaccine hesitancy.", + "contextual_reason_agent": "This is indeed a dataset as it is described as a survey that gathered data used for empirical analysis on vaccine hesitancy.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "032_IBArchive-e8d67f4f-bc76-49af-9b6c-6099c748075b", + "page": 54, + "text": "The World Bank Enhancing Community Resilience and Local Governance Project Phase II ( P177093 ) Page 49 of 73 MoFP and LGB as the proposed implementing agencies have had experience implementing World Bank - financed projects. However, due to protracted conflict and economic crisis, the knowledge gained at both national and sub-national verification levels have been eroded. In addition, the project will hire experienced fiduciary consultants to support the PMU to be established at the MoFP. The PMU will also be staffed with expertise in project management, FM, procurement, M & E, and safeguards, among others, to support project implementation. The IOM will be expected to maintain adequate capacity in South Sudan to ensure fiduciary due diligence and monitoring and verification of community-level subprojects. Given the World Bank staff \u2019 s mobility constraints outside Juba, monitoring systems will be robust, including a geo-enabled monitoring system by IOM, TPMA, and iterative conflict assessments, as described earlier. The FM capacity assessment of the MoFP is currently under way and will inform the FM risk rating and mitigation measures during implementation. A detailed procurement capacity assessment was conducted as part of the preparation of the project to inform the procurement risks and recommend mitigation measures. 121.", + "ner_text": [ + [ + 932, + 961, + "named" + ] + ], + "validated": false, + "empirical_context": "The IOM will be expected to maintain adequate capacity in South Sudan to ensure fiduciary due diligence and monitoring and verification of community-level subprojects. Given the World Bank staff \u2019 s mobility constraints outside Juba, monitoring systems will be robust, including a geo-enabled monitoring system by IOM, TPMA, and iterative conflict assessments, as described earlier. The FM capacity assessment of the MoFP is currently under way and will inform the FM risk rating and mitigation measures during implementation.", + "type": "system", + "explanation": "However, it is described as a system rather than a structured collection of data, indicating it is not a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'monitoring system' which suggests data collection.", + "contextual_reason_agent": "However, it is described as a system rather than a structured collection of data, indicating it is not a dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 17, + "text": "The global spread of COVID-19 has led to unprecedented disruptions in schooling, requiring understanding of how students \u2019 learning has changed. Moldova \u2019 s participation in PISA 2025 and the analysis of PISA 2022 results are important for managing learning recovery from the pandemic. The MoER consolidated and strengthened the student-level EMIS to support evidence-based decision-making. 22 EMIS now covers preschool, primary, general secondary, and vocational education data, which are regularly collected by the system. Reports on the performance of each school are produced and disclosed annually. However, significant differences exist between data reported in the EMIS open data portal and data published by the National Bureau of Statistics ( NBS ) of Moldova. A single integrated EMIS capable of linking information on students with population register data and financing data of the Ministry of Finance ( MoF ) is currently lacking. The current EMIS and established data reporting practices provide a solid foundation for the development of an integrated EMIS 22 Under support of the completed World Bank-financed Moldova Education Reform Project.", + "ner_text": [ + [ + 1066, + 1070, + "named" + ] + ], + "validated": false, + "empirical_context": "A single integrated EMIS capable of linking information on students with population register data and financing data of the Ministry of Finance ( MoF ) is currently lacking. The current EMIS and established data reporting practices provide a solid foundation for the development of an integrated EMIS 22 Under support of the completed World Bank-financed Moldova Education Reform Project.", + "type": "system", + "explanation": "However, in this context, EMIS is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because EMIS (Education Management Information System) is often associated with data management.", + "contextual_reason_agent": "However, in this context, EMIS is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 48, + "text": "The World Bank Strengthening Public Sector Efficiency and Statistical Capacity Project ( P151155 ) Page 45 of 93 Intermediate Results Indicators FY Indicator Name Baseline YR1 YR2 YR3 YR4 YR5 YR6 End Target Number of program budget and associated result indicators redesigned ( based on the new budget classification ) revised 0. 00 0. 00 6. 00 6. 00 6. 00 6. 00 6. 00 6. 00 Validation of Public Investment Selection Manual that include Poverty in selection criteria N Y Y Y Y Y Y Y Comprehensive budgeted Investment Projects database available online and updated quarterly within 4 weeks after the end of each quarter N N N Y Y Y Y Y Publication of annual budget and spending through on-line BOOST database by January 30 N N Y Y Y Y Y Y Procurement staff trained with demonstrated competency in procurement 0. 00 0. 00 70. 00 70. 00 70. 00 70. 00 70. 00 70. 00 ECAM 5 survey revised methodology is aligned with new international standards on Poverty surveys N N Y Y Y Y Y Y Interval between two Population Census reduced 17. 00 12. 00 12. 00 12. 00 12. 00 12. 00 12. 00 12. 00", + "ner_text": [ + [ + 996, + 1013, + "named" + ], + [ + 4, + 14, + "Population Census <> publisher" + ] + ], + "validated": true, + "empirical_context": "00 70. 00 ECAM 5 survey revised methodology is aligned with new international standards on Poverty surveys N N Y Y Y Y Y Y Interval between two Population Census reduced 17. 00 12.", + "type": "census", + "explanation": "In this context, it is used as a source of information for the revised methodology, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Population Census' typically refers to a structured collection of demographic data.", + "contextual_reason_agent": "In this context, it is used as a source of information for the revised methodology, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 40, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 35 of 117 59. All DLIs and their annual results and achievements will be verified by an Independent Verification Agency ( IVA ). MoE, in consultation with the World Bank and in line with required procurement regulations, will explore whether the existing IVA for SEQIP could also verify results for this program. The DLIs will be verified through pre-identified and disclosed data from the National Program Coordinating Unit ( PCU ). The annual IVA reports will serve as the basis for assessing progress toward the achievement of the DLI targets, and for disbursement authorization by the World Bank for the PforR part of PEELP. Funds released against the DLIs will be deposited in two DAs, one in MoE and TSC based on the proportions agreed for each of the DLIs. III. PROGRAM IMPLEMENTATION A. Institutional and Implementation Arrangements The Program will be implemented by MoE and the TSC as the main implementing agencies, with support from participating agencies KICD and KNEC. MoE may engage other agencies in the implementation of the activities under the IPF Component listed in Technical Annex 3 Table 3. 3 shows the specific mandates of these institutions.", + "ner_text": [ + [ + 424, + 457, + "named" + ] + ], + "validated": true, + "empirical_context": "MoE, in consultation with the World Bank and in line with required procurement regulations, will explore whether the existing IVA for SEQIP could also verify results for this program. The DLIs will be verified through pre-identified and disclosed data from the National Program Coordinating Unit ( PCU ). The annual IVA reports will serve as the basis for assessing progress toward the achievement of the DLI targets, and for disbursement authorization by the World Bank for the PforR part of PEELP.", + "type": "data", + "explanation": "This is indeed a dataset as it is explicitly mentioned as data used for verifying results and assessing progress.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'data' that is pre-identified and disclosed for verification purposes.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as data used for verifying results and assessing progress.", + "contextual_signal": "follows 'verified through' indicating it is used as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 54, + "text": "DLR 5. 3: Enhanced digital literacy / skills. DLR 5. 4: Utilization of the HRMIS and other digital platforms for recording competitive recruitments, performance-based promotions, and digital upskilling. Data source / Agency DLR 5. 0: SPAC DLR 5. 1 and 2: SPAC Assessment Center HR interview and evaluation records, as well as HRMIS data records. DLR 5. 3: Training registry maintained by the IPA and HRMIS data records. DLR 5. 4: HRMIS data reports. Verification Entity KACE. Procedure DLR 5. 0: Verification of adopted SPAC by-law and subsidiary regulation. DLR 5. 1: Verification of recruitment records and competency assessment reports shared biannually by the SPAC and competence-based recruitments recorded by the HRMIS. DLR 5. 2: Verification of promotion records and competency assessment reports shared biannually by the SPAC and performance-based promotions recorded by the HRMIS and other digital platforms. DLR 5. 3: Verification of digital skill certifying training records shared annually by IPA and digital skill credentials recorded by the HRMIS and other digital platforms. DLR 5. 4: Cross-validation of HRMIS and other digital platforms data reports with SPAC Assessment Center interview and evaluation records and IPA training assessment reports. DLI 6 on the establishment of digital health standards across a national Health Information Exchange ( HIE ) Formula Scalable. The DLI disburses up to US $ 29 million across the three following DLRs:", + "ner_text": [ + [ + 1120, + 1125, + "named" + ] + ], + "validated": false, + "empirical_context": "DLR 5. 4: Cross-validation of HRMIS and other digital platforms data reports with SPAC Assessment Center interview and evaluation records and IPA training assessment reports. DLI 6 on the establishment of digital health standards across a national Health Information Exchange ( HIE ) Formula Scalable.", + "type": "system", + "explanation": "HRMIS is referred to as a system, not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HRMIS is a dataset because it is mentioned alongside data reports.", + "contextual_reason_agent": "HRMIS is referred to as a system, not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "171_multi0page", + "page": 37, + "text": "I '. v - - Key Perhomiance | Data Collection Strategy ' ierrtrchy of Objectiv J Jndicats ~ J _-_ _ Critical Assumptlons Output from each Output Indicators: Project reports: ( from Outputs to Objective ) Component: 1. Improved mobility: physical 4 of connectivity solutions Supervision missions, periodic Second phase of Transmilenio works and support to policy constructed. reports, user surveys, is completed in the next 3-year reforms aimed at increasing - increased # of km. covered Transmilenio, IDU and STT period. the use of public transport and by the transport system. field surveys, ICR. non-motorized transport - # of trunk stations built. Residents use the public modes. - # of km of interconnected Posting of indicators in the transport and non-motorized bike paths in operation. project ' s web site. facilities as anticipated. - # of intersections controlled in unified network system. Safety framework for bicycle - # of articulated and feeder use is established and in buses in service. operation. - # of Passengers transported in a working day. - # of new users in the transport system. - decreased # of traffic-related accidents, injuries and deaths. - increased use of bike paths. - increased service quality. 4 of road-safety programs presented. - reduction of the oversupply in public transport capacity. 2.", + "ner_text": [ + [ + 383, + 395, + "named" + ], + [ + 650, + 659, + "user surveys <> reference population" + ], + [ + 1344, + 1362, + "user surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "Improved mobility: physical 4 of connectivity solutions Supervision missions, periodic Second phase of Transmilenio works and support to policy constructed. reports, user surveys, is completed in the next 3-year reforms aimed at increasing - increased # of km. covered Transmilenio, IDU and STT period.", + "type": "survey", + "explanation": "In the context, 'user surveys' is explicitly mentioned as a source of information used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'user surveys' is a dataset because it implies a structured collection of responses from users.", + "contextual_reason_agent": "In the context, 'user surveys' is explicitly mentioned as a source of information used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 57, + "text": "The World Bank Cash for Jobs Project ( P175327 ) Page 53 of 86 181. Procurement capacity assessment. The Borrower opted to have the procurement activities under the responsibility of an existing agency with a track record ( the Merankabandi project fiduciary team ). However, an assessment of the Merankabandi project fiduciary team using the Procurement Risk Assessment and Management System conducted in June 2021 ( archived in the system on June 11, 2021 ) concluded on an urgent need of hiring a qualified senior procurement specialist with a strong experience in World Bank procedures to support the procurement function. In addition, the project is the first project under the World Bank Procurement Regulations to be managed by the Merankabandi fiduciary team. The other main risks are those inherent to the country. 182. Procurement Risk Rating. The project procurement risk prior to the mitigation measures is \u201c high \u201d. The risk will be reduced to a residual rating of \u201c substantial \u201d upon consideration of successful implementation of the mitigation measures. 183. Preparation of Project Procurement Strategy for Development ( PPSD ). In accordance with the requirement of the Bank procurement regulations, the Merankabandi fiduciary team together with the core project preparation team from the Borrower have prepared a PPSD. The PPSD provides the basis and justifications for procurement approaches and decisions including market analysis and selection methods.", + "ner_text": [ + [ + 343, + 392, + "named" + ] + ], + "validated": false, + "empirical_context": "The Borrower opted to have the procurement activities under the responsibility of an existing agency with a track record ( the Merankabandi project fiduciary team ). However, an assessment of the Merankabandi project fiduciary team using the Procurement Risk Assessment and Management System conducted in June 2021 ( archived in the system on June 11, 2021 ) concluded on an urgent need of hiring a qualified senior procurement specialist with a strong experience in World Bank procedures to support the procurement function. In addition, the project is the first project under the World Bank Procurement Regulations to be managed by the Merankabandi fiduciary team.", + "type": "system", + "explanation": "However, it is not a dataset as it is described as a system used for assessment rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Assessment' and 'Management System', which can imply structured data.", + "contextual_reason_agent": "However, it is not a dataset as it is described as a system used for assessment rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "158_40156", + "page": 47, + "text": "Based on the outcome of the health facilities surveys in border areas, which will be conducted under Component 2 ( Health component ), the MOH in each member country and IGAD, will define the necessary improvements for provision of adopted service package by health facilities and elaborate an implementation plan ( and determine what can, if any, be supported by the project funds ). 7. In response to the mobility of the population, in addition to the availability of services, the subcomponent will support establishment of referral mechanism to support service continuity for CBMPs across the borders. Populations to be targeted are: ( i ) cross border and mobile populations ( long distance drivers, nomads, uniformed personnel, cross-border traders, commercial sex workers, etc. ); ( ii ) local populations living in the strategic CBMP concentrated areas ( unemployed young men and women, khat and alcohol sellers; local bus drivers, shopkeepers, etc. ) and ( iii ) nomad camping sites located 5 to 10 kilometers around the concentrated areas. 8. Thus the target areas of support are key hot-spot areas or strategic cross border and mobile populations concentrated areas located at or near the border. The mapping assessment conducted in the 7 IGAD countries identified a number of these key spot areas for each country. The proposed project would cover selected eligible hot spot areas; it is envisaged, however, that other interested donors would support and follow the same model for additional sites. This would enable other partners to support the overall IGAD partnership program. Over the long term, it is expected that the national programs could eventually take responsibility for targeting and financing activities for some of the targeted groups ( nomads, IDPs, and national vulnerable groups located along the border areas ). 9. Support and services to CBMPs will be provided through those organizations ( including MOH and NGOs ) identified as having substantial programs in areas which meet the selection 43", + "ner_text": [ + [ + 28, + 53, + "named" + ], + [ + 139, + 142, + "health facilities surveys <> publisher" + ], + [ + 644, + 679, + "health facilities surveys <> reference population" + ], + [ + 682, + 703, + "health facilities surveys <> reference population" + ], + [ + 705, + 711, + "health facilities surveys <> reference population" + ], + [ + 713, + 732, + "health facilities surveys <> reference population" + ], + [ + 734, + 754, + "health facilities surveys <> reference population" + ], + [ + 756, + 778, + "health facilities surveys <> reference population" + ], + [ + 863, + 893, + "health facilities surveys <> reference population" + ], + [ + 895, + 919, + "health facilities surveys <> reference population" + ], + [ + 921, + 938, + "health facilities surveys <> reference population" + ], + [ + 1934, + 1937, + "health facilities surveys <> publisher" + ] + ], + "validated": true, + "empirical_context": "Based on the outcome of the health facilities surveys in border areas, which will be conducted under Component 2 ( Health component ), the MOH in each member country and IGAD, will define the necessary improvements for provision of adopted service package by health facilities and elaborate an implementation plan ( and determine what can, if any, be supported by the project funds ). 7.", + "type": "survey", + "explanation": "In the context, it is explicitly mentioned as part of the health component and is used to inform necessary improvements, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'health facilities surveys' implies a structured collection of data gathered from health facilities.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as part of the health component and is used to inform necessary improvements, indicating it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "143_749060PAD0P124010Box374388B00OUO090", + "page": 88, + "text": "We are making an optimistic assumption that, by 2024, 100 percent of extremely poor households will be covered with NPTP benefits. Since we are using 2004 HBS data to simulate NPTP impact, we are assuming that the extreme poverty line is US $ 2. 4 per capita per day, and that 7. 2 percent of the population ( or 273, 761 individuals ) were extremely poor in that year. Assuming no changes in the distributions or total number of extreme poor in Lebanon between 2004 and 201260 NPTP currently ( in 2012 ) covers 84, 322 / 273, 761 = 30. 8 percent of extremely poor individuals ( assuming that all the current NPTP beneficiaries are extremely poor, i. e., there are no errors of inclusion ). Thus, 100 percent coverage rate assumes better outreach achieved by 2024. 18. Thus, the simulation of the expected impact of NPTP on aggregate extreme poverty and inequality in Lebanon consists of applying the average value of the NPTP benefit ( US $ 541. 15 per household in 2004 prices, assuming the value of the benefit increased at the same rate as overall CPI, or by 40 percent from 2004 to 2011 ) to the income of all individuals below the extreme poverty line of US $ 2. 4 per capita per day. We use the 2004 Household Budget Survey to run this simulation. The results are summarized in Table 3 below: 60 The estimated number of extremely poor in 2012 was determined based on the 2012 population in Lebanon ( World Development Indicator ) and the extreme poverty rate of 7. 2 percent ( 2004 Household Budget Survey or HBS ). Due to lack of updated data on poverty, it is assumed that the extreme poverty rate will remain the same as it was in 2004, when it was assessed through a HBS. 89", + "ner_text": [ + [ + 1202, + 1230, + "named" + ] + ], + "validated": true, + "empirical_context": "4 per capita per day. We use the 2004 Household Budget Survey to run this simulation. The results are summarized in Table 3 below: 60 The estimated number of extremely poor in 2012 was determined based on the 2012 population in Lebanon ( World Development Indicator ) and the extreme poverty rate of 7.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly used to run a simulation and inform the analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides data for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly used to run a simulation and inform the analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 62, + "text": "Women in rural areas are doubly or triply deprived: by the intersection of their gender, ethnicity and poverty. 27 15. Displacement. Women and girls in refugee and host communities in Hodh Chargui are disproportionally vulnerable to the complex challenges facing the region in terms of access to services, livelihoods and agency. Only 40 percent of the young women below the age of 24 in the wilaya are literate, an indicator that is likely to deteriorate given the low school enrollment rates. Many females are married during childhood ( 20 percent before the age of 15 ), and a large majority of adult women have been subjected to FGM / E. With high levels of divorce, a significant proportion of households are comprised of women and children only. Among the refugees, family separation linked to displacement results in more than half of households in the camp without a male head present. Women and girls in the camps are equally experiencing GBV, including sexual harassment, rape and physical assault, female genital mutilation, and child marriage ( sometimes as a protection against the risks mentioned before ). Lack of latrines and remote access to water increase risks in host communities. Gender data 16. Sex-disaggregated data is scarce in Mauritania and data on some key gender gaps \u2013 individual poverty measure, time use, some types of GBV \u2013 is missing. The on-going impact evaluations of both Tekavoul and the gender training course will make some additional data available. 27 Gender analysis for the EU 2nd Gender Action Plan in Mauritania.", + "ner_text": [ + [ + 1217, + 1239, + "named" + ] + ], + "validated": false, + "empirical_context": "Gender data 16. Sex-disaggregated data is scarce in Mauritania and data on some key gender gaps \u2013 individual poverty measure, time use, some types of GBV \u2013 is missing. The on-going impact evaluations of both Tekavoul and the gender training course will make some additional data available.", + "type": "data", + "explanation": "However, it is not a dataset itself but rather a type of data that highlights the absence of structured collections in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'sex-disaggregated data' implies a structured collection of data categorized by gender.", + "contextual_reason_agent": "However, it is not a dataset itself but rather a type of data that highlights the absence of structured collections in the context.", + "contextual_signal": "mentioned only as a type of data, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 35, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 23 Figure 3. World Bank-financed Investments in Human Capital in Ethiopia 51. Building on the experience of ESPES and its predecessor Promoting Basic Services ( PBS ), there are well established mechanisms for monitoring and reporting of financing and results from a multi-sectoral program like the HCO. The HCO will track allocations to the human capital sectors in May every year during the budgeting stage ( when the MEFF is approved ), then a review will be carried out on the MEFF allocations vis-\u00e0-vis the budget during November ( after the budget is approved ), quarterly IFRs which provide disaggregated woreda level spending on human capital sectors ( health, education, water, etc. ) will be tracked, and then annual audit reports will provide details on allocations vs-a-vis spending on human capital sectors. D. Program Development Objective ( s ) ( PDO ) and PDO Level Results Indicators 52. The objective of the operation ( PDO ) is to improve learning outcomes and nutrition services for girls and boys, and to strengthen service delivery and accountability, in all regions including areas affected by conflict, droughts and high levels of refugees.", + "ner_text": [ + [ + 629, + 643, + "named" + ] + ], + "validated": false, + "empirical_context": "Building on the experience of ESPES and its predecessor Promoting Basic Services ( PBS ), there are well established mechanisms for monitoring and reporting of financing and results from a multi-sectoral program like the HCO. The HCO will track allocations to the human capital sectors in May every year during the budgeting stage ( when the MEFF is approved ), then a review will be carried out on the MEFF allocations vis-\u00e0-vis the budget during November ( after the budget is approved ), quarterly IFRs which provide disaggregated woreda level spending on human capital sectors ( health, education, water, etc. ) will be tracked, and then annual audit reports will provide details on allocations vs-a-vis spending on human capital sectors. D.", + "type": "report", + "explanation": "However, 'quarterly IFRs' is mentioned as a type of report rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'quarterly IFRs' refers to a dataset because it includes disaggregated financial data.", + "contextual_reason_agent": "However, 'quarterly IFRs' is mentioned as a type of report rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a report, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 20, + "validated": 4, + "not_validated": 16 + } + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 13, + "text": "Although GDP decelerated by 0. 3 percent in 2020 because of Coronavirus Disease ( COVID-19 ), in the scenario that the pandemic is brought under control in 2021, economic growth is projected at 2-3 percent during 2021-23. 2. Burundi remains one of the poorest countries in the World and the poorest in Africa in 20211. The latest household survey data from 2013 / 142 shows that 64. 9 percent of Burundians lived below the basic needs \u2019 poverty line and 38. 7 percent below the food poverty line. Burundi is one the most food-insecure countries with the highest hunger score in the World according to the 2018 State of Food Security and Nutrition in the World Report3. In addition, Burundi has been ranked as one of 8 countries in which alarming hunger has been identified according to the 2020 Global Hunger Index. 3. Key human capital outcomes lag behind comparable countries in the region and restrict the productivity growth potential. Burundi \u2019 s Human Capital Index ( HCI ) shows that a child born in Burundi today will be 38 percent as productive when he grows up as he could be if he enjoyed complete education and full health4. This is slightly lower than the average for Sub-Saharan Africa region but slightly higher than the average for low-income countries, ranking Burundi as 154 out of 175 countries on the World Bank HCI.", + "ner_text": [ + [ + 330, + 351, + "named" + ], + [ + 225, + 232, + "household survey data <> data geography" + ], + [ + 396, + 406, + "household survey data <> reference population" + ], + [ + 497, + 504, + "household survey data <> data geography" + ], + [ + 682, + 689, + "household survey data <> data geography" + ], + [ + 940, + 947, + "household survey data <> data geography" + ] + ], + "validated": true, + "empirical_context": "Burundi remains one of the poorest countries in the World and the poorest in Africa in 20211. The latest household survey data from 2013 / 142 shows that 64. 9 percent of Burundians lived below the basic needs \u2019 poverty line and 38.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to specific survey data used to analyze poverty levels in Burundi.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'household survey data' implies a structured collection of data from surveys.", + "contextual_reason_agent": "This is indeed a dataset as it refers to specific survey data used to analyze poverty levels in Burundi.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 63, + "text": "As such, it would help to transform health sector service delivery and produce more quality data for timely decision-making. The Program aims to address critical gaps by supporting the expansion, interoperability, and effective use of digital health information systems in Jordan. The Program includes Results Areas that foster a foundational environment, thus enabling digital transformation at scale via the HIE and appropriate oversight, governance, and management mechanisms. Building on the strong interoperability layer, the national EMR system will be expanded to all MOH facilities at the primary, secondary, and tertiary levels. Currently, the Electronic Medical Records do not track refugee status, making it difficult to determine whether a registered non-Jordanian \u2013 including Syrian refugee patient is eligible for subsidized healthcare services. This contributes to the low uptake of health-services by refugees. Collected data from the national EMR will then be used to strengthen institutional capacity in data use through institutionalized data quality audits. 7. Result Area 2 on government effectiveness supports a cross-cutting objective of enhancing the professionalization of the civil service, including its digital literacy, as well as two sector specific strategic objectives, namely: improving student assessments through digitalization and enhancing the quality of health data. - Enhancing the professionalization of the civil service in Jordan is one of the main strategic thrusts of the Public Sector Modernization Roadmap.", + "ner_text": [ + [ + 653, + 679, + "named" + ] + ], + "validated": false, + "empirical_context": "Building on the strong interoperability layer, the national EMR system will be expanded to all MOH facilities at the primary, secondary, and tertiary levels. Currently, the Electronic Medical Records do not track refugee status, making it difficult to determine whether a registered non-Jordanian \u2013 including Syrian refugee patient is eligible for subsidized healthcare services. This contributes to the low uptake of health-services by refugees.", + "type": "system", + "explanation": "However, it is described as a system that does not function as a data source in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Electronic Medical Records' suggests a collection of patient data.", + "contextual_reason_agent": "However, it is described as a system that does not function as a data source in this context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "167_27761", + "page": 20, + "text": "The FMIS system at the MOF and the Post Bank were found to be adequate to support the project. The project \u2019 s financial statements, including Statement of Expenditures ( SOE ) and the Special Account ( SA ), would be audited quarterly internally and annually by an independent auditor, in accordance with internationally accepted standards. In addition, the auditor would carry out field spot check audits to ascertain compliance with contractual requirements. Compliance with conditional cash transfers would be monitored by an independent external consultant ( paragraph C. 3 ). 4. Social Opportunities, constraints, impacts, and risks arising. out of the socio-cultural and political context. The impact of closure and incursions in the West Bank and Gaza has been extensively documented. On the rise are poverty, unemployment, school drop out rates; on the decline or deteriorating are household incomes, living conditions, school attendance rates, at-large nutritional status and in particular of children 0-5 years of age. 13 The so called \u201c newly poor \u201d, those who are just above the poverty line before the outbreak of the second Intifada, in the third quarter of 2000, have been assisted by various interventions. To date, it has been much more difficult to find ways to assist the very poor, or first decile, whose coping mechanisms are essentially exhausted. The proposed project i s designed to support children ( 0-18 years old ) in the first decile. Although the SHC has targeted the first decile for more than 20 years, the program i s very small today in relation to need and requires new instruments to address the present situation and to shift MOSA \u2019 s overall strategy from one of coping to one of social springboard. Therefore, the project will support MOSA in reshaping its social assistance strategy and to improve effectiveness and efficiency of ~ l3 PCBS quarterly household surveys, Palestinian Living Conditions quarterly surveys, IUCN, University of Geneva, Living Standards quarterly surveys, Nutrition Study, Johns Hopkins University, 2002, Nutrition Survey, PCBS and BirZeit University, 2002. 17", + "ner_text": [ + [ + 1876, + 1880, + "named" + ], + [ + 741, + 750, + "PCBS <> data geography" + ], + [ + 755, + 759, + "PCBS <> data geography" + ], + [ + 1003, + 1028, + "PCBS <> reference population" + ], + [ + 1156, + 1177, + "PCBS <> reference year" + ], + [ + 1965, + 1985, + "PCBS <> author" + ], + [ + 1987, + 2021, + "PCBS <> data type" + ], + [ + 2040, + 2064, + "PCBS <> author" + ], + [ + 2066, + 2070, + "PCBS <> publication year" + ], + [ + 2099, + 2117, + "PCBS <> author" + ], + [ + 2119, + 2123, + "PCBS <> publication year" + ], + [ + 2143, + 2161, + "PCBS <> usage context" + ] + ], + "validated": true, + "empirical_context": "Although the SHC has targeted the first decile for more than 20 years, the program i s very small today in relation to need and requires new instruments to address the present situation and to shift MOSA \u2019 s overall strategy from one of coping to one of social springboard. Therefore, the project will support MOSA in reshaping its social assistance strategy and to improve effectiveness and efficiency of ~ l3 PCBS quarterly household surveys, Palestinian Living Conditions quarterly surveys, IUCN, University of Geneva, Living Standards quarterly surveys, Nutrition Study, Johns Hopkins University, 2002, Nutrition Survey, PCBS and BirZeit University, 2002. 17", + "type": "survey", + "explanation": "In the context, 'PCBS' is associated with quarterly household surveys, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'PCBS' is a dataset because it is mentioned alongside various surveys and studies.", + "contextual_reason_agent": "In the context, 'PCBS' is associated with quarterly household surveys, indicating it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "069_Pakistan-Strengthening-Institutions-for-Refugee-Administration-Project", + "page": 30, + "text": "The World Bank Strengthening Institutions for Refugee Administration Project ( P165542 ) 22 Improved resolution of complaints of refugees and refugee hosting communities through operationalized complaints handling mechanism ( s ) This indicator tracks the the performance of Complaints Handling Mechanisms. Complaint is defined as administrative matters within the mandate of the Commissionerates for Afghan Refugees. Semi-Annual Data collected from the community based Complaints Handling System Review of the list of complaints, date of receipt and date of resolution. CCAR Data on socio-economic characteristics of refugees and refugee hosting communities published regularly. This indicator tracks the collection and publication of data on socio-economic characteristics of refugees and host communities. Annual The CCAR will provide information on the number of surveys conducted with evidence of data collected Completion reports providing summary statistics CCAR ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Strengthened mechanism for management of information across participating entities This indicator measures the availability of data on refugees through a functional Management Semi - annual MIS database Collecting data from the MIS CCAR", + "ner_text": [ + [ + 1407, + 1410, + "named" + ] + ], + "validated": false, + "empirical_context": "This indicator tracks the collection and publication of data on socio-economic characteristics of refugees and host communities. Annual The CCAR will provide information on the number of surveys conducted with evidence of data collected Completion reports providing summary statistics CCAR ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Strengthened mechanism for management of information across participating entities This indicator measures the availability of data on refugees through a functional Management Semi - annual MIS database Collecting data from the MIS CCAR", + "type": "system", + "explanation": "'MIS' refers to a Management Information System, which is a system but not explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is mentioned in the context of data collection.", + "contextual_reason_agent": "'MIS' refers to a Management Information System, which is a system but not explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "151_533550PAD0P117101Official0Use0Only1", + "page": 79, + "text": "( PMT ) method that has been put into place. On this basis, it has been decided that the program targeting mechanism should be revised based on data analysis from the beneficiary and applicant survey. The Program currently has 1 million beneficiaries and 600, 000 applicants who are on the waiting list. If targeted correctly, the program can reach a very large fraction of the poor in Yemen. 2 17. Since the improvement of targeting is a key part of component 1 of the ISP, the project can result in much larger benefits down the road if it succeeds in improving targeting procedures used at SWF. Also, by increasing the likelihood that better-off beneficiaries ( particularly those in groups E and F ) will not remain on beneficiary rolls, the ISP will result in the cash transfer program being more efficient in meeting its poverty targeting goals. 218. Primary benefits of the program: Increased school enrollment and attendance, reduced rates of mortality, morbidity and malnutrition, and higher consumption. Three of the main benefits of SWF \u2019 s cash transfer program are likely to be: ( i ) increased school enrolment and attendance for children; and ( ii ) reduced rates of mortality, morbidity and malnutrition among 0 to 5 year old children; and ( iii ) increased consumption among beneficiary households.", + "ner_text": [ + [ + 167, + 199, + "named" + ], + [ + 227, + 250, + "beneficiary and applicant survey <> reference population" + ], + [ + 386, + 391, + "beneficiary and applicant survey <> data geography" + ], + [ + 1292, + 1314, + "beneficiary and applicant survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "( PMT ) method that has been put into place. On this basis, it has been decided that the program targeting mechanism should be revised based on data analysis from the beneficiary and applicant survey. The Program currently has 1 million beneficiaries and 600, 000 applicants who are on the waiting list.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned that data analysis is conducted from the beneficiary and applicant survey.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data from beneficiaries and applicants.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned that data analysis is conducted from the beneficiary and applicant survey.", + "contextual_signal": "follows 'based on data analysis from'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 44, + "text": "Having accrued in-country experience in DLI implemention under the CEQUIL Project, the World Bank is well-placed to provide support to DLI-related activities and share international experience on RBF. B. Technical 93. The technical content of the project has been designed based on the goals of the ESS ( 2013 \u2013 2020 ), Cameroon Vision 2035, and the DSCE 2020, and an in-depth sector diagnostic. 26 A holistic approach will be adopted to address the challenges currently facing the education sector as described earlier. The interventions under the project support the four priority areas articulated in the Government ESS and are consistent with the findings of the in-depth sector diagnostic in Cameroon, the findings of the 2018 WDR, 27 and the findings of the forthcoming regional study on quality of education. 28 The design of interventions under each results area is informed by lessons learned from experience accrued by the Government and the World Bank in the implementation of Education and other relevant projects ( see section III, A ), from DPs \u2019 knowledge ( for example, ECD and refugees ), technical studies undertaken before or during project preparation ( textbook policy study, PBF pre-pilot feasibility study, ECD diagnostic, EMIS diagnostic, and study on refugees ), and builds on ongoing reforms in the education sector ( for example, textbook policy reform supported by the World Bank \u2019 s DPO", + "ner_text": [ + [ + 776, + 814, + "named" + ] + ], + "validated": false, + "empirical_context": "26 A holistic approach will be adopted to address the challenges currently facing the education sector as described earlier. The interventions under the project support the four priority areas articulated in the Government ESS and are consistent with the findings of the in-depth sector diagnostic in Cameroon, the findings of the 2018 WDR, 27 and the findings of the forthcoming regional study on quality of education. 28 The design of interventions under each results area is informed by lessons learned from experience accrued by the Government and the World Bank in the implementation of Education and other relevant projects ( see section III, A ), from DPs \u2019 knowledge ( for example, ECD and refugees ), technical studies undertaken before or during project preparation ( textbook policy study, PBF pre-pilot feasibility study, ECD diagnostic, EMIS diagnostic, and study on refugees ), and builds on ongoing reforms in the education sector ( for example, textbook policy reform supported by the World Bank \u2019 s DPO", + "type": "study", + "explanation": "However, it is not functioning as a data source in this context, as it is mentioned as a study rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'study' which can imply data collection.", + "contextual_reason_agent": "However, it is not functioning as a data source in this context, as it is mentioned as a study rather than a structured collection of data.", + "contextual_signal": "mentioned only as a study, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 21, + "validated": 8, + "not_validated": 13 + } + }, + { + "filename": "066_Jordan-Youth-Technology-and-Jobs-Project", + "page": 17, + "text": "The IPF instrument was chosen to support critical investments for the growth of digital economy and to create income opportunities, with disbursement-linked indicators ( DLIs ) driving a focus on results. The project aims to build an impetus for private - sector-led growth of the digital sector by: ( i ) deepening the pool of quality digital skills; and ( ii ) leveraging the government \u2019 s ambitious agenda of digitizing government services to create a demand for digital services and jobs. The project is structured around interventions that address constraints to both the supply and demand side of digitally skilled labor in Jordan, with Component 1 focusing on strengthening the supply of digital skills and Component 2 boosting demand in the digital economy and hence jobs and income opportunities. Component 3 supports project management. 24. The use of an IPF with results-based financing modality will link disbursements to the achievement of results. The DLIs provide a common set of results that reflect joint priorities of MoDEE, Ministry of Labor ( MoL ), and Ministry of Education ( MoE ) in preparing for the fourth industrial revolution. DLIs will be a critical tool for shifting the policy dialogue towards results, especially in the initial years, and generating momentum around key activities that are bottlenecks in the system.", + "ner_text": [ + [ + 137, + 167, + "named" + ] + ], + "validated": false, + "empirical_context": "The IPF instrument was chosen to support critical investments for the growth of digital economy and to create income opportunities, with disbursement-linked indicators ( DLIs ) driving a focus on results. The project aims to build an impetus for private - sector-led growth of the digital sector by: ( i ) deepening the pool of quality digital skills; and ( ii ) leveraging the government \u2019 s ambitious agenda of digitizing government services to create a demand for digital services and jobs.", + "type": "indicator", + "explanation": "However, disbursement-linked indicators are not a structured collection of data but rather metrics used to assess project performance.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'indicators' can imply measurable data points.", + "contextual_reason_agent": "However, disbursement-linked indicators are not a structured collection of data but rather metrics used to assess project performance.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "182_multi0page", + "page": 31, + "text": "It also calls for increased public awareness about social problems, and suggests that the SSDP focus on certain key g 7oups of vulnerable people, including women at risk of violence, youths at risk, street children, and the abandloned elderly. For the project preparation stage, the VNICA has set up baseline data on the number of people in vulnerable situations and the type and number of supplied services in four Albanian districts ( Tirana, Durres, Skodra and Vlora ). In order to monitor and evaluate progress of projects in the four districts; list of indicators has been selected for periodic follow-up. Monitoring and evaluation will be done at the district level and carried out once a year. In one year, the SSDP project will develop a national monitoring strategy. Preliminary suggestions for the preparation of a national strategy include conducting research throughout Albania using the research instruments used in the VNICA. These include a rapid quantitative assessment of vulnerable groups in the districts, a \" vulnerability census \" ( data collection and survey ), and relative ranking of vulnerability. In addition, the project will support a permnanent household survey that will assist the Government in monitoring trends in living standards and poverty correlates over time.", + "ner_text": [ + [ + 1029, + 1049, + "named" + ], + [ + 300, + 313, + "vulnerability census <> data description" + ], + [ + 437, + 443, + "vulnerability census <> data geography" + ], + [ + 445, + 451, + "vulnerability census <> data geography" + ], + [ + 453, + 459, + "vulnerability census <> data geography" + ], + [ + 464, + 469, + "vulnerability census <> data geography" + ], + [ + 882, + 889, + "vulnerability census <> data geography" + ], + [ + 956, + 1023, + "vulnerability census <> data description" + ], + [ + 1088, + 1121, + "vulnerability census <> data description" + ] + ], + "validated": true, + "empirical_context": "Preliminary suggestions for the preparation of a national strategy include conducting research throughout Albania using the research instruments used in the VNICA. These include a rapid quantitative assessment of vulnerable groups in the districts, a \" vulnerability census \" ( data collection and survey ), and relative ranking of vulnerability. In addition, the project will support a permnanent household survey that will assist the Government in monitoring trends in living standards and poverty correlates over time.", + "type": "census", + "explanation": "The term 'vulnerability census' is explicitly mentioned as part of the data collection efforts in the context, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'census', which typically refers to a structured collection of data.", + "contextual_reason_agent": "The term 'vulnerability census' is explicitly mentioned as part of the data collection efforts in the context, indicating it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "195_multi-page", + "page": 24, + "text": "Annex 2 Page 4 of 1 Issues Health information systems are necessary because currently it is impossible to perform the proper epidemiological and public health analysis on a national level. It is also impossible to have significant decentralization of management authority without appropriate information infrastructure. No systems are in place at the hospital level which would support efforts to run hospitals more efficiently by measuring their efficiency ( patient flow management, inventory management, cost effectiveness and quality of physicians, as well as utilization ). If the Government intends to decentralize decision making and to hold organizations responsible for their budget, then information systems are needed. The few existing management information systems are managed and operated independently, with no data sharing across institutions, except for the minimal data set submitted to the MOH at periodic intervals, usually on paper. The network and database systems to be introduced under the project will encourage data sharing and reduction of duplicate resources. Project Inputs Financing would be provided for equipment, technical assistance, software developmentlpurchase, and training. Specifically, the project will support the collection, analysis, distribution, and storage of epidemiological data; equip hospital managers with tools for improving quality, efficiency, and physician effectiveness, and provide an infrastructure that will support decentralization.", + "ner_text": [ + [ + 747, + 777, + "named" + ] + ], + "validated": false, + "empirical_context": "If the Government intends to decentralize decision making and to hold organizations responsible for their budget, then information systems are needed. The few existing management information systems are managed and operated independently, with no data sharing across institutions, except for the minimal data set submitted to the MOH at periodic intervals, usually on paper. The network and database systems to be introduced under the project will encourage data sharing and reduction of duplicate resources.", + "type": "system", + "explanation": "However, it is described as a system that manages information rather than a specific dataset used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'management information systems' implies a structured collection of data.", + "contextual_reason_agent": "However, it is described as a system that manages information rather than a specific dataset used for empirical analysis.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 53, + "text": "The World Bank Building Skills for Human Capital Development in South Sudan ( P178654 ) Page 49 of 66 agricultural program. Targets are cumulative. annually Of which, female This indicator will track the share of students who are certified after completing the digital training for the agricultural program. Targets are cumulative. Cumulative Monitoring reports MoHEST PIU will collect data from Universities / polytechni cs; validated by TPM annually MoHEST PIU Of which, refugee / host community population This indicator will track the share of refugee students students who are certified after completing the digital training for the agricultural program. Targets are cumulative. Annual Monitoring reports MoHEST PIU will collect data from Universities / polytechni cs; validated by TPM annually MoHEST PIU Education Management Information System strengthened This indicator will track the implementation of the EMIS strengthening plan. The target will be achieved once the EMIS collects and reports data on at least 90 % of schools in South Sudan including information on teachers ( profile, qualifications, deployment and training ), schools ( location, enrollment, physical facilities ( including WASH, classrooms etc ), teachers and school management committees and students. Annually starting from the second year of implementati on Monitoring reports MoGEI and MoHEST will prepare reports on implementation progress of EMIS strengthening plan.", + "ner_text": [ + [ + 811, + 850, + "named" + ] + ], + "validated": false, + "empirical_context": "Targets are cumulative. Annual Monitoring reports MoHEST PIU will collect data from Universities / polytechni cs; validated by TPM annually MoHEST PIU Education Management Information System strengthened This indicator will track the implementation of the EMIS strengthening plan. The target will be achieved once the EMIS collects and reports data on at least 90 % of schools in South Sudan including information on teachers ( profile, qualifications, deployment and training ), schools ( location, enrollment, physical facilities ( including WASH, classrooms etc ), teachers and school management committees and students.", + "type": "system", + "explanation": "However, the context indicates that it is a system designed to manage and report data rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Information System' which often relates to data management.", + "contextual_reason_agent": "However, the context indicates that it is a system designed to manage and report data rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "172_multi0page", + "page": 73, + "text": "Government resources are currently limited and it is highly likely that the demand for education resources will increase as incentives are expanded to include all primary students and schools in areas that have, until now, been inaccessible. Potential HIPC ftmding could be an important potential resource for supporting the recurrent cost of education. The EU is currently providing some support to the Government for covering recurrent cost of education. This assistance totaled approximately US $ 8. 0 million for the six month period from July to December 2000. Demographic Context The last census conducted in Sierra Leone dates back to 1985, and the new census is scheduled to take place in 2002. The 1985 national census counted a population of 3. 5 million, reflecting an annual growth rate of 2. 3 percent during 1974-85. The 1997 population was estimated at approximately 4. 4 million. According to the UN, 1. 6 million people were displaced within the country in 1996 with 370, 000 refugees in neighboring countries; 530, 000 refugees fled Sierra Leone after the 1997 coup, and a further 237, 000 have left as a consequence of the renewed fighting since February 1998.", + "ner_text": [ + [ + 707, + 727, + "named" + ] + ], + "validated": true, + "empirical_context": "Demographic Context The last census conducted in Sierra Leone dates back to 1985, and the new census is scheduled to take place in 2002. The 1985 national census counted a population of 3. 5 million, reflecting an annual growth rate of 2.", + "type": "census", + "explanation": "This is indeed a dataset as it represents a structured collection of demographic data from the 1985 national census.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a national census, which typically involves structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it represents a structured collection of demographic data from the 1985 national census.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "078_Afghanistan-Eshteghal-Zaiee-Karmondena-EZ-Kar-Project", + "page": 70, + "text": "Supply side interventions include: ( a ) Compliance with Afghanistan Access to Information ( ATI ) Law \u2013 pro \u2010 active disclosure requirements, ( b ) Development of a project website with information on project progress ( M & E, Results, procurement, FM ), ( c ) Awareness campaigns, ( d ) Geo \u2010 enabled data \u2010 using rocket, pocket and socket technologies. Demand side interventions include: ( a ) GRM, ( b ) Call Centers, ( c ) Beneficiary feedback, and ( d ) On \u2010 demand ATI requests. Communication: 4. Ministry of Economy ( MoEC ), as the coordinating ministry for EZ \u2010 Kar project implementation, will lead the project communication efforts. MoEC will also coordinate communication with other implementing agencies and use their existing communication platforms for the purposes of EZ \u2010 Kar communications and engage with beneficiaries at different levels. In addition, a robust communication and awareness raising campaign will be carried out for Afghan refugees in Pakistan under Component 1. 5. MoEC will develop a community strategy and potentially outsource the implementation and creative production part of the strategy to a qualified communication and outreach firm. This approach addresses the capacity constraints of MoEC and enhances its coordination and monitoring role. 6.", + "ner_text": [ + [ + 166, + 181, + "named" + ] + ], + "validated": false, + "empirical_context": "Supply side interventions include: ( a ) Compliance with Afghanistan Access to Information ( ATI ) Law \u2013 pro \u2010 active disclosure requirements, ( b ) Development of a project website with information on project progress ( M & E, Results, procurement, FM ), ( c ) Awareness campaigns, ( d ) Geo \u2010 enabled data \u2010 using rocket, pocket and socket technologies. Demand side interventions include: ( a ) GRM, ( b ) Call Centers, ( c ) Beneficiary feedback, and ( d ) On \u2010 demand ATI requests.", + "type": "website", + "explanation": "However, the project website is described as a source of information rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it mentions a website that provides information.", + "contextual_reason_agent": "However, the project website is described as a source of information rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "124_PAD8220PAD0P12010Box385319B00OUO090", + "page": 109, + "text": "related to servicing the major occupation sectors in the Valley ( plant and animal production sectors ). The Table below provides the land area and attached assets. Table 8: Other Land attachments Other Land attachments Unit Quantity Water ground tank, concrete cubic meter 985 Water tank, elevated, plastic on metal frame cubic meter 910 Poly-tunnels, metal and plastic meter square 40, 000 Agricultural and irrigation Equipment * meter square 227, 000 Animal shed, concrete walls and floor meter square 2, 220 Metal overhead Pergola square meter 115 Metal pipes linear meter 65 Metal wired fence linear meter 1, 680 Water channel, open, concrete linear meter 990 * for total area cover refer to field crops area. 15. Property tenancy: The 2014 Social Survey has revealed that 35 households are non - Lebanese ( totaling 174 persons ) of which 34 households ( housing 165 persons ) have no legal Tenancy Right. Similarly; the eight counted Lebanese non-owner-households ( totaling 47 persons ) have no Tenancy rights to the property they occupy, and are not protected under the Lebanese law. The Table below summarizes the Impact over Properties Tenancy in the valley.", + "ner_text": [ + [ + 741, + 759, + "named" + ] + ], + "validated": true, + "empirical_context": "15. Property tenancy: The 2014 Social Survey has revealed that 35 households are non - Lebanese ( totaling 174 persons ) of which 34 households ( housing 165 persons ) have no legal Tenancy Right. Similarly; the eight counted Lebanese non-owner-households ( totaling 47 persons ) have no Tenancy rights to the property they occupy, and are not protected under the Lebanese law.", + "type": "survey", + "explanation": "The 2014 Social Survey is explicitly mentioned as revealing data about households, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides specific data about households.", + "contextual_reason_agent": "The 2014 Social Survey is explicitly mentioned as revealing data about households, confirming its role as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "026_Djibouti-Health-System-Strengthening-Project", + "page": 54, + "text": "The World Bank Djibouti Health System Strengthening ( P178033 ) Page 50 of 64 RESULT_FRAME_TBL_IO Indicator Name PBC Baseline End Target Average user experience score among PBF / DFF participating facilities ( Percentage ) 0. 00 60. 00 Primary health facilities with functional Facility Community Committee assessed by the PBF quality checklist ( Number ) 0. 00 80. 00 Communities with functioning community health workers per the norms set by the National Community Health Strategy ( Percentage ) 0. 00 60. 00 Communities that have formally declared the abandonment of the practice of FGM ( Percentage ) 0. 00 60. 00 Component 2: Strengthening institutions, citizen engagement and project management Facilities with DHIS-2 implementation ( DHIS-2 data entry at the facility level ) ( Percentage ) 35. 00 65. 00 Facilities that are connected to the National Logistics Management Information System ( eLMIS ) ( Percentage ) 0. 00 65. 00 Health surveys conducted and results made public ( Number ) 0. 00 2. 00 Expectant women using a transport voucher or staying in a maternal waiting home to ensure safe deliveries ( Number ) 0. 00 20, 000. 00 Of which refugees ( Number ) 0. 00 200. 00 Of which host community residents ( Number ) 0. 00 2, 000. 00 Completion of annual regional and national health fora with adopted resolutions ( Yes", + "ner_text": [ + [ + 849, + 897, + "named" + ] + ], + "validated": false, + "empirical_context": "00 65. 00 Facilities that are connected to the National Logistics Management Information System ( eLMIS ) ( Percentage ) 0. 00 65.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a data-related function.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "033_IBArchive-38b35d89-2c5c-4636-ab7e-0ba662b95a5f", + "page": 44, + "text": "The PIU will include a dedicated Project Director and three technical specialists to cover TVET governance; curriculum, assessment, and quality assurance; and monitoring and evaluation. The PIU will also bring on board additional staff to support fiduciary ( both procurement and financial management ) and safeguards related activities but embed them in existing implementation structures in MENFOP which are in place to support other on-going projects - Expanding Opportunities for Learning ( P166059 ) and Education Emergency Response to COVID-19 ( P174128 ). This will ensure that World Bank projects help strengthen ministerial capacity in a manner that does not add to the MENFOP \u2019 s administrative burden. 85. The implementation arrangements for the Project ensures that the issue of refugee integration will reach the Council for Economic Development ( CDE ), the highest-level policy body in the country. The issue of including refugee candidates / beneficiaries into the training system will be managed and determined by the implementing agency, MENFOP, together with guidance from the CDE. B. Results Monitoring and Evaluation Arrangements 86. Monitoring and evaluation mechanism. A wide variety of mechanisms will be used to measure project outcomes including but not limited to administrative data collected through both the MENFOP and MT, other centrally sourced data in the country, standardized data collected from training providers, and data from periodic surveys, tracer studies, labor market analysis, and studies planned to be carried out annually to support project implementation. This multi-level data gathering system will be supported by the Project and will develop a linked system of information allowing different data to be mapped across sub-systems. This will help address one of the key issues in collecting data from TVET systems \u2013 avoidance of data from a fragmented system. The Project places 55 A draft training plan for select PIU and other staff is included in Annex 3.", + "ner_text": [ + [ + 1291, + 1310, + "named" + ], + [ + 937, + 971, + "administrative data <> reference population" + ], + [ + 1398, + 1415, + "administrative data <> data type" + ], + [ + 1431, + 1449, + "administrative data <> reference population" + ], + [ + 1465, + 1481, + "administrative data <> data description" + ], + [ + 1499, + 1520, + "administrative data <> data description" + ], + [ + 2023, + 2041, + "administrative data <> usage context" + ] + ], + "validated": true, + "empirical_context": "Monitoring and evaluation mechanism. A wide variety of mechanisms will be used to measure project outcomes including but not limited to administrative data collected through both the MENFOP and MT, other centrally sourced data in the country, standardized data collected from training providers, and data from periodic surveys, tracer studies, labor market analysis, and studies planned to be carried out annually to support project implementation. This multi-level data gathering system will be supported by the Project and will develop a linked system of information allowing different data to be mapped across sub-systems.", + "type": "data", + "explanation": "In this context, 'administrative data' is indeed used as a source of information for measuring project outcomes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'administrative data' is a dataset because it refers to collected information used for evaluation.", + "contextual_reason_agent": "In this context, 'administrative data' is indeed used as a source of information for measuring project outcomes.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 11, + "validated": 10, + "not_validated": 1 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 54, + "text": "DLR 5. 3: Enhanced digital literacy / skills. DLR 5. 4: Utilization of the HRMIS and other digital platforms for recording competitive recruitments, performance-based promotions, and digital upskilling. Data source / Agency DLR 5. 0: SPAC DLR 5. 1 and 2: SPAC Assessment Center HR interview and evaluation records, as well as HRMIS data records. DLR 5. 3: Training registry maintained by the IPA and HRMIS data records. DLR 5. 4: HRMIS data reports. Verification Entity KACE. Procedure DLR 5. 0: Verification of adopted SPAC by-law and subsidiary regulation. DLR 5. 1: Verification of recruitment records and competency assessment reports shared biannually by the SPAC and competence-based recruitments recorded by the HRMIS. DLR 5. 2: Verification of promotion records and competency assessment reports shared biannually by the SPAC and performance-based promotions recorded by the HRMIS and other digital platforms. DLR 5. 3: Verification of digital skill certifying training records shared annually by IPA and digital skill credentials recorded by the HRMIS and other digital platforms. DLR 5. 4: Cross-validation of HRMIS and other digital platforms data reports with SPAC Assessment Center interview and evaluation records and IPA training assessment reports. DLI 6 on the establishment of digital health standards across a national Health Information Exchange ( HIE ) Formula Scalable. The DLI disburses up to US $ 29 million across the three following DLRs:", + "ner_text": [ + [ + 326, + 331, + "named" + ] + ], + "validated": false, + "empirical_context": "0: SPAC DLR 5. 1 and 2: SPAC Assessment Center HR interview and evaluation records, as well as HRMIS data records. DLR 5.", + "type": "system", + "explanation": "However, HRMIS is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HRMIS is a dataset because it includes 'data records' in its description.", + "contextual_reason_agent": "However, HRMIS is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "010_BOSIB1554c314c0a2187c019d7e85bc2a91", + "page": 15, + "text": "Garissa and Turkana counties record the lowest percentages of women receiving at least 4 ANC visits ( 31. 2 percent ) in 2022 and deliveries by a skilled provider ( 52. 6 percent ) respectively. 13 In the refugee camps, most health services are provided by UNHCR and non-governmental organizations in collaboration with the Government. The overcrowded conditions, clean water supply shortages and hygiene challenges present heightened risks of communicable disease outbreaks such as cholera. Other recent outbreaks in the refugee camps include polio, dengue fever, and chikungunya. Refugees and host communities have also been affected by prolonged drought in the region and the food security of refugees has been further affected by cuts in the general food assistance. From 2020 to July 2022, there has been a steady and significant increase in malnutrition cases across all refugee camps, with children under 5 years being particularly affected by malnutrition and micronutrient deficiencies. 14 6. The devolution of health service delivery in 2013 has presented mixed results. Decentralization of responsibility for public sector health service delivery to the 47 county Governments has been accompanied by a 34. 0 percent increase in the number of facilities, a 46. 0 percent improvement in public health worker density between 2014 and 2020, and many counties have equipped their health facilities to respond to the evolving health needs. County Governments are also exploring approaches to strengthen primary care service delivery through governance and financial management reforms, such as the Facility Improvement Fund. However, county Governments have faced significant challenges in management of human resources for health, ensuring availability of Health Products and Technologies ( HPTs ), improving quality of care, 10 World Bank Estimates: https: / / data. worldbank. org / indicator / SP. DYN. LE00. IN? locations = KE 11 Kenya Demographic Health Survey, 2022. Key Indicators Report 12 Ministry of Health Kenya ( 2020 ) Kenya Progress Report on Health and Health-Related SDGs. 13 Kenya Demographic Health Survey, 2022 14 UNHCR & WFP, Joint Assessment Mission Kenya-Refugee Operations ( 2022 )", + "ner_text": [ + [ + 1940, + 1971, + "named" + ], + [ + 0, + 7, + "Kenya Demographic Health Survey <> data geography" + ], + [ + 12, + 28, + "Kenya Demographic Health Survey <> data geography" + ], + [ + 62, + 67, + "Kenya Demographic Health Survey <> reference population" + ], + [ + 121, + 125, + "Kenya Demographic Health Survey <> publication year" + ], + [ + 789, + 793, + "Kenya Demographic Health Survey <> publication year" + ], + [ + 897, + 919, + "Kenya Demographic Health Survey <> reference population" + ], + [ + 1973, + 1977, + "Kenya Demographic Health Survey <> publication year" + ], + [ + 2131, + 2135, + "Kenya Demographic Health Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "IN? locations = KE 11 Kenya Demographic Health Survey, 2022. Key Indicators Report 12 Ministry of Health Kenya ( 2020 ) Kenya Progress Report on Health and Health-Related SDGs.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly referenced in the context as part of the Kenya Demographic Health Survey, which is known to provide empirical data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Survey' in its name, which often indicates a structured collection of data.", + "contextual_reason_agent": "This is a dataset as it is explicitly referenced in the context as part of the Kenya Demographic Health Survey, which is known to provide empirical data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "148_492780PAD0P111101Official0Use0Only1", + "page": 47, + "text": "40 experience in professional development an in-service teacher training. This international institution will assist NIET and the selected HE institutions with the preparation of tools to assess the level of competencies and skills of class teachers and, based on this assessment, to design a modular program for upgrading their skills. It will also provide implementation support for the evaluation of phase one and the preparation of a plan for scaling up this initiative using the evaluation data; ( iii ) training of trainers to be provided by the international institution; ( iv ) funds to finance the delivery of these training programs by higher education institutions through a consultant service contract with the participating local higher education institutions; ( v ) development and printing of training materials; ( vi ) a communication / dissemination campaign to inform unqualified teachers about the program; and ( vii ) financing of incremental operating costs to support NIET-TMT during project implementation. 150.", + "ner_text": [ + [ + 484, + 499, + "named" + ], + [ + 235, + 249, + "evaluation data <> reference population" + ] + ], + "validated": true, + "empirical_context": "This international institution will assist NIET and the selected HE institutions with the preparation of tools to assess the level of competencies and skills of class teachers and, based on this assessment, to design a modular program for upgrading their skills. It will also provide implementation support for the evaluation of phase one and the preparation of a plan for scaling up this initiative using the evaluation data; ( iii ) training of trainers to be provided by the international institution; ( iv ) funds to finance the delivery of these training programs by higher education institutions through a consultant service contract with the participating local higher education institutions; ( v ) development and printing of training materials; ( vi ) a communication / dissemination campaign to inform unqualified teachers about the program; and ( vii ) financing of incremental operating costs to support NIET-TMT during project implementation. 150.", + "type": "data", + "explanation": "In this context, 'evaluation data' is indeed used as a source of information for designing a modular program and scaling up the initiative.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'evaluation data' is a dataset because it refers to information collected for assessing competencies and skills.", + "contextual_reason_agent": "In this context, 'evaluation data' is indeed used as a source of information for designing a modular program and scaling up the initiative.", + "contextual_signal": "follows 'uses data from'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "172_multi0page", + "page": 46, + "text": "grant program and oversight WMA A Finance Directorate able to Adequate procedures in manage all project funds and compliance with IDA financial treasury and to comply with management and accounting accounting, budgeting and rules. financial reporting requirements. Internal Audit unit able to Report on budget execution per eview all MEST operations quarter and annual budget forecast based on assessment of needs and pnorities. EMIS established with data on EMIS network infrastructure four school annual surveys and set up with procedures in place gradually integrating and for users. linking data on finance ( i. e. udget execution ); personnel Four annual school surveys ( payroll and deployment published. ecords ); and indicators on OL standards. EMIS data are effectively used EMIS reports. for planning and prospective work on the education sector. Improved personnel Audit report on personnel management system with management contracted out. effective procedures enforced for recruitment, deployment Records on MEST ' s staff on and payment of MEST ' s staff the payroll. and teachers on the payroll. 80 trainers trained in first year List of trainers published and of Project to serve as trainers evaluation report on training for in-service teacher training received. activities to be implemented via school grants. 600 SMCs established with Reports on school tnspections prior mobilization from the from Inspectorate. community and with capacity built ( training when required ) to monitor school grant", + "ner_text": [ + [ + 784, + 788, + "named" + ] + ], + "validated": false, + "empirical_context": "ecords ); and indicators on OL standards. EMIS data are effectively used EMIS reports. for planning and prospective work on the education sector.", + "type": "system", + "explanation": "However, EMIS is mentioned as a system rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with data usage in reports.", + "contextual_reason_agent": "However, EMIS is mentioned as a system rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "117_Somali-Urban-Investment-Planning-Project", + "page": 17, + "text": "Feasibility and preliminary design studies for the solid and liquid waste investments will be undertaken to determine the costs and siting of bulk solid and liquid waste disposal / treatment sites and improvements in transfer points / stations, determine necessary construction standards and the basic technologies that are most appropriate and cost-effective, identify options for sourcing of materials and equipment, and assess the labor pool, and determine requirements ( financial, technical and institutional ) for sustaining and expanding the system of solid waste collection that has been introduced under donor-funded technical assistance efforts in recent years. This work has already been completed for the water supply sub-component ( under the UN-Habitat supported project ) and so is not required under SUIPP. 29. Environmental and social due diligence work will contribute to the sub-component via two main types of activities: ( i ) a baseline survey of environmental and social information, data and issues that would help to identify E & S constraints, but also areas of potential enhancement of project outcomes, and provide E & S information, criteria and constraining factors for the processes for design and environmental / social assessments planned for SUDP or other downstream planning activities; ( ii ) the development of an environmental and social management framework, which would constitute a generic tool for managing social and environmental risks related to urban investments, and planning follow-up investigations, assessments and", + "ner_text": [ + [ + 950, + 965, + "named" + ], + [ + 1079, + 1129, + "baseline survey <> data description" + ] + ], + "validated": true, + "empirical_context": "29. Environmental and social due diligence work will contribute to the sub-component via two main types of activities: ( i ) a baseline survey of environmental and social information, data and issues that would help to identify E & S constraints, but also areas of potential enhancement of project outcomes, and provide E & S information, criteria and constraining factors for the processes for design and environmental / social assessments planned for SUDP or other downstream planning activities; ( ii ) the development of an environmental and social management framework, which would constitute a generic tool for managing social and environmental risks related to urban investments, and planning follow-up investigations, assessments and", + "type": "survey", + "explanation": "In this context, it is indeed a dataset as it refers to a survey that collects specific data to identify constraints and enhance project outcomes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'baseline survey' implies a structured collection of data related to environmental and social information.", + "contextual_reason_agent": "In this context, it is indeed a dataset as it refers to a survey that collects specific data to identify constraints and enhance project outcomes.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "192_multi-page", + "page": 19, + "text": "bigger loans, and the remaining indicated \" more justice, \" \" more transparency, \" \" longer loan terms, \" or \" more loans. \" ( d ) Client Satisfaction and Program Impact. Over 80 percent of respondents believe that the VCF program has had a positive impact on the village ( a general improvement in living conditions, improved livestock, improved agriculture, or improved trade ). According to the respondents, the advantages of VCF loans were: ( i ) low interest rate; ( ii ) simple procedures; and ( iii ) fast disbursement. However, 30 percent of respondents indicated that the loans were not large enough. Urban Microcredit Beneficiary Assessment. In September and October 1998, client surveys, in - depth interviews, and focus group discussions were carried out at all seven urban microcredit branches ( Elbasan, Berat, Kavaje, Puka, Shijak. Shkodra, and Tirana ). The client survey included 375 borrowers and applicants, representing one-fifth of the total ADF urban credit borrowers ( 1, 757 ) at the time. The main findings were: ( a ) Characteristics of Borrowers. In terms of gender and age of borrower and type of business, ADF urban microcredit clients are representative of the general business community.", + "ner_text": [ + [ + 683, + 697, + "named" + ], + [ + 677, + 681, + "client surveys <> publication year" + ], + [ + 809, + 816, + "client surveys <> data geography" + ], + [ + 818, + 823, + "client surveys <> data geography" + ], + [ + 825, + 831, + "client surveys <> data geography" + ], + [ + 833, + 837, + "client surveys <> data geography" + ], + [ + 839, + 845, + "client surveys <> data geography" + ], + [ + 847, + 854, + "client surveys <> data geography" + ], + [ + 860, + 866, + "client surveys <> data geography" + ], + [ + 1086, + 1112, + "client surveys <> data description" + ] + ], + "validated": true, + "empirical_context": "Urban Microcredit Beneficiary Assessment. In September and October 1998, client surveys, in - depth interviews, and focus group discussions were carried out at all seven urban microcredit branches ( Elbasan, Berat, Kavaje, Puka, Shijak. Shkodra, and Tirana ).", + "type": "survey", + "explanation": "In this context, 'client surveys' is indeed a dataset as it is explicitly mentioned as part of the assessment process to gather data from beneficiaries.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'client surveys' is a dataset because it refers to a structured collection of responses gathered from clients.", + "contextual_reason_agent": "In this context, 'client surveys' is indeed a dataset as it is explicitly mentioned as part of the assessment process to gather data from beneficiaries.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 94, + "text": "Providing additional support to these communities under the project will reduce the pressure on the education system that would be expected in receiving an increase in refugee students. Disaggregated data by refugee / host community status will be collected and reported under the project. Efforts will be made to integrate gradually existing refugee-related data into the integrated EMIS developed under the project. 50. Results Area 7: Improved learning environment and quality of education for children in host community schools with refugees. The objective of this result area is to: ( a ) promote refugee welfare and inclusion in host communities \u2019 socio-economic structure; ( b ) help ensure access to and quality of services and basic infrastructure to refugees and host communities; and ( c ) strengthen Government finances where these have been strained by expenditures related to their hosting responsibilities. These objectives are consistent with the IDA18 RSW resource allocation framework implementation guidelines. 51. Activities supported under this result area will include: ( a ) promoting refugee welfare and inclusion in host communities: given the project specific profile of refugees, having a very low enrollment rate in their origin country ( around 20 percent ), the project will support an awareness program for refugees on sociocultural obstacles that influence school attendance: TA will be provided in the areas of", + "ner_text": [ + [ + 343, + 363, + "named" + ], + [ + 168, + 184, + "refugee-related data <> reference population" + ] + ], + "validated": true, + "empirical_context": "Disaggregated data by refugee / host community status will be collected and reported under the project. Efforts will be made to integrate gradually existing refugee-related data into the integrated EMIS developed under the project. 50.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to data that will be collected and integrated into a system for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific type of data being collected and reported.", + "contextual_reason_agent": "This is indeed a dataset as it refers to data that will be collected and integrated into a system for analysis.", + "contextual_signal": "mentioned as data to be collected and reported under the project", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 69, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 57 refugee host areas percentage of refugee children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in high refugee host areas Share of new entrants in the first grade of primary school attended pre-primary education in 29 selected SPG woredas The indicator measures children enrolled in the first year of primary school with some exposure to preschool education and calculated by dividing the gross enrollment rate for pre-primary level by the gross intake rate of primary level. Annual EMIS Annual data collection through school census MOE and WOFED Pregnant women & caregivers of children 0-23 months participating in Community Conversations ( CC ) in 29 selected SPG woredas These are currently pregnant women and caregivers of children 0-23 months who attended at least 1 CC session per month Annual UNISE / DHIS2 UNISE / DHIS2 - - Data collected by woreda health offices MOH and MOF Quarterly reports of minutes of multisectoral committee meetings in 29 selected SPG woredas Quarterly minutes report submitted by WOFED to MOF and verified by ESS Annual Admin WOFED report quarterly meeting minutes with all committee members signed to MOF WOFED Students benefiting from direct interventions to enhance learning Annual Project M & E Questionnaires will be developed to keep track on number of beneficiaries MOE and WOEFD", + "ner_text": [ + [ + 648, + 653, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 57 refugee host areas percentage of refugee children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in high refugee host areas Share of new entrants in the first grade of primary school attended pre-primary education in 29 selected SPG woredas The indicator measures children enrolled in the first year of primary school with some exposure to preschool education and calculated by dividing the gross enrollment rate for pre-primary level by the gross intake rate of primary level. Annual EMIS Annual data collection through school census MOE and WOFED Pregnant women & caregivers of children 0-23 months participating in Community Conversations ( CC ) in 29 selected SPG woredas These are currently pregnant women and caregivers of children 0-23 months who attended at least 1 CC session per month Annual UNISE / DHIS2 UNISE / DHIS2 - - Data collected by woreda health offices MOH and MOF Quarterly reports of minutes of multisectoral committee meetings in 29 selected SPG woredas Quarterly minutes report submitted by WOFED to MOF and verified by ESS Annual Admin WOFED report quarterly meeting minutes with all committee members signed to MOF WOFED Students benefiting from direct interventions to enhance learning Annual Project M & E Questionnaires will be developed to keep track on number of beneficiaries MOE and WOEFD", + "type": "organization", + "explanation": "'WOFED' is mentioned as an organization responsible for submitting reports, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'WOFED' is a dataset because it appears in a context discussing data collection and reporting.", + "contextual_reason_agent": "'WOFED' is mentioned as an organization responsible for submitting reports, not as a structured collection of data.", + "contextual_signal": "mentioned only as an organization, not as a data source", + "tags": [] + }, + { + "filename": "108_Jordan-PforR-PAD-P159522-FINAL-DISCLOSURE-10052016", + "page": 35, + "text": "Public disclosure annually by Better Work Jordan, through the http: / / betterwork. org / jordan website, of factory-level compliance on issues, including major labor and safety violations. 16. The process of transparent reporting is as follows: ( a ) Better Work completes an assessment ( assessments take place year round and are random and unannounced for each factory ). ( b ) Factories receive a draft of the full assessment report regarding compliance on all assessment questions, including information on whether the issues subject to public reporting are in noncompliance. ( c ) When the assessment report is finalized, the factory \u2019 s compliance with the 29 publicly reported issues is published online, on the Better Work Transparency Portal ( for all factories that have had at least two assessments ). ( d ) In response, factories can upload documents and photos on the public reporting website ( including information from assessment reports ). ( e ) A factory \u2019 s compliance findings remain on the website until a new assessment report is published, at which point the website is updated to reflect the factory \u2019 s most recent assessment data. ( f ) Every time a new assessment is completed for a factory, new compliance data replaces old data. ( g ) Compliance data on factories that had not yet had two assessments when public reporting was launched is published following a factory \u2019 s second assessment.", + "ner_text": [ + [ + 720, + 751, + "named" + ] + ], + "validated": false, + "empirical_context": "( b ) Factories receive a draft of the full assessment report regarding compliance on all assessment questions, including information on whether the issues subject to public reporting are in noncompliance. ( c ) When the assessment report is finalized, the factory \u2019 s compliance with the 29 publicly reported issues is published online, on the Better Work Transparency Portal ( for all factories that have had at least two assessments ). ( d ) In response, factories can upload documents and photos on the public reporting website ( including information from assessment reports ).", + "type": "portal", + "explanation": "However, the Better Work Transparency Portal is described as a website for publishing compliance information, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a portal that publishes compliance information.", + "contextual_reason_agent": "However, the Better Work Transparency Portal is described as a website for publishing compliance information, not as a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 9, + "validated": 1, + "not_validated": 8 + } + }, + { + "filename": "100_Lebanon-Health-PAD-PAD2358-06152017", + "page": 28, + "text": "The World Bank Lebanon Health Resilience Project ( P163476 ) Page 26 of 54 48. The MoPH, through the PMU \u2019 s two coordinators ( PHCC and hospital ), will be responsible for monitoring the daily progress of the project, focusing on improved accessibility of beneficiaries to the package of services, proper procurement, and capacity building of hospitals. The PMU will be responsible for preparing and submitting semiannual progress reports that, among other things, provide detailed reporting on services, procurement, and expenditures. It will also conduct mid-term and post - completion evaluations to gauge progress toward the PDO and assess the impact of the project on targeted beneficiaries. 49. The HIS system developed by the MoPH will be further refined and expanded under the project to all newly enrolled PHCCs to support the implementation and monitoring of the program. Data will be collected and used to: ( i ) supervise the performance of PHCCs; ( ii ) monitor the progress of beneficiary accessibility; ( iii ) monitor hospital improvements; and ( iv ) improve the provision of services on the basis of intermediate output and outcome data. The data will be verified directly by MoPH supervisory systems and external evaluation, and indirectly through triangulation with other data sources such as hospital claims. 50.", + "ner_text": [ + [ + 706, + 716, + "named" + ] + ], + "validated": false, + "empirical_context": "49. The HIS system developed by the MoPH will be further refined and expanded under the project to all newly enrolled PHCCs to support the implementation and monitoring of the program. Data will be collected and used to: ( i ) supervise the performance of PHCCs; ( ii ) monitor the progress of beneficiary accessibility; ( iii ) monitor hospital improvements; and ( iv ) improve the provision of services on the basis of intermediate output and outcome data.", + "type": "system", + "explanation": "However, the context indicates that it is a system for managing health information, not a dataset itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'HIS system' suggests a structured collection of health information.", + "contextual_reason_agent": "However, the context indicates that it is a system for managing health information, not a dataset itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 44, + "text": "The World Bank Mauritania Social Safety Net System Project II ( P171125 ) Page 39 of 61 Households with complete information in the Social Registry - Refugee Number of refugee households registred within the Social Registry database and for which the socio - economic data has been collected and recorded. Semester Social Registry MIS Specific request on the Social Registry database Social Registry Directorate Households with updated information in the Social registry ( less than 3 years ) Percentage of households included in the Social Register database for which socio-economic data was collected within the previous three years. Semester Social Registry MIS Request on the Social Registry database Social Registry Directorate Cases from the Social Registry grievance redress mechanism resolved in a timely manner Percentage of complaints or requests for information received by the Social Registry that have been resolved within 60 days. Semester Social Registry MIS MIS extraction Social Registry Directorate Spot-check surveys carried-out and disclosed by the Social Registry Number of spot checks on the Social Registry carried out by an external firm and / or the Social Registry during the duration of the project and which have been the subject of a report and feedback to the teams.", + "ner_text": [ + [ + 1175, + 1190, + "named" + ] + ], + "validated": false, + "empirical_context": "Semester Social Registry MIS Request on the Social Registry database Social Registry Directorate Cases from the Social Registry grievance redress mechanism resolved in a timely manner Percentage of complaints or requests for information received by the Social Registry that have been resolved within 60 days. Semester Social Registry MIS MIS extraction Social Registry Directorate Spot-check surveys carried-out and disclosed by the Social Registry Number of spot checks on the Social Registry carried out by an external firm and / or the Social Registry during the duration of the project and which have been the subject of a report and feedback to the teams.", + "type": "system", + "explanation": "However, 'Social Registry' is referred to as a management information system and not explicitly as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Social Registry' is a dataset because it is mentioned in the context of data collection and management.", + "contextual_reason_agent": "However, 'Social Registry' is referred to as a management information system and not explicitly as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "042_Burundi-Skills-for-Jobs-Women-and-Youth-Project", + "page": 21, + "text": "While the coverage of technical and vocational training has increased, formal TVET centers and schools admit only approximately 25, 000 youth each year15 and only six percent of young males and two percent of young females in urban areas, and one percent of young males and females in rural areas report any technical or professional training or apprenticeship. Social norms around gender limit the time adolescent girls or young women have to pursue skills training and also dictate the type of training they are likely to pursue. While gender - disaggregated data on enrollment in training by sector are not currently available, consultations with development partners revealed that the small share of female youth who enroll in formal skills training or seek informal apprenticeships are more likely to opt for professions deemed suitable for women such as sewing and hairdressing. Female youth are less likely to acquire skills in highly remunerative sectors of the economy; for instance, according to the Ministry of National Education and Scientific Research ( Minist\u00e8re de l \u2019 Education Nationale et de la Recherche Scientifique, MNESR16 ), of the already small number of students enrolled in information and communication technology ( ICT ) courses in 2017 / 18 ( 3, 431 students ), only 25 percent were female. 15 In discussions with the private sector during the recent preparation of the World Bank-financed Local Development for Jobs Project. 16 Formerly the Ministry of Education and Technical and Professional Training ( Minist\u00e8re de l \u2019 Enseignement de la Formation Technique et Professionnelle, MEFTP ). Median Annual Income of 15-24-year-old by training / apprenticeship status, 2013-14 ( BIF thousands )", + "ner_text": [ + [ + 538, + 565, + "named" + ], + [ + 704, + 716, + "gender - disaggregated data <> reference population" + ], + [ + 1010, + 1064, + "gender - disaggregated data <> author" + ], + [ + 1260, + 1269, + "gender - disaggregated data <> publication year" + ], + [ + 1696, + 1703, + "gender - disaggregated data <> reference year" + ] + ], + "validated": true, + "empirical_context": "Social norms around gender limit the time adolescent girls or young women have to pursue skills training and also dictate the type of training they are likely to pursue. While gender - disaggregated data on enrollment in training by sector are not currently available, consultations with development partners revealed that the small share of female youth who enroll in formal skills training or seek informal apprenticeships are more likely to opt for professions deemed suitable for women such as sewing and hairdressing. Female youth are less likely to acquire skills in highly remunerative sectors of the economy; for instance, according to the Ministry of National Education and Scientific Research ( Minist\u00e8re de l \u2019 Education Nationale et de la Recherche Scientifique, MNESR16 ), of the already small number of students enrolled in information and communication technology ( ICT ) courses in 2017 / 18 ( 3, 431 students ), only 25 percent were female.", + "type": "data", + "explanation": "However, it is not a dataset itself but rather a description of the type of data that is lacking in the context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific data that is categorized by gender.", + "contextual_reason_agent": "However, it is not a dataset itself but rather a description of the type of data that is lacking in the context.", + "contextual_signal": "mentioned only as a type of data, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "029_IBArchive-ea5ff2ad-6f1f-45bf-a2ad-036440562c18", + "page": 123, + "text": "This enabled the design of a comprehensive range of context - and problem-specific behavioral solutions, taking as a basis behavioral science literature and analysis of the evidence collected. As example, behavioral approaches have been successfully applied in the past to increase effectiveness of economic stimulus such as cash transfers. The study also identified solutions to address behavioral challenges that impede system maintenance and power generation for public institutions. To address lack of local ownership, options include localized job training for solar O & M, sharing power generation with surrounding communities, and required ( nominal ) payments from the facilities themselves. Information campaigns can be used to address pre-conceived perceptual biases against both solar systems and the service model, including cost comparisons between stand-alone solar and other options, and assurance of payments over contract lifetimes. To address confirmation bias which leads to a lack of appreciation for the solar systems and the 68 UBOS and World Bank ( 2020 ) Economic and Social Impacts of COVID-19 on Refugee Population in Uganda: Results from the High - Frequency Phone Survey for Refugees in Uganda \u2013 First Round. 69 UNHCR ( 2022 ) Uganda Refugee Protection Assessment Update 3 for the period July to December 2021. 70 Forthcoming.", + "ner_text": [ + [ + 1169, + 1198, + "named" + ], + [ + 1059, + 1069, + "High - Frequency Phone Survey <> publisher" + ], + [ + 1072, + 1076, + "High - Frequency Phone Survey <> publication year" + ], + [ + 1122, + 1140, + "High - Frequency Phone Survey <> reference population" + ], + [ + 1144, + 1150, + "High - Frequency Phone Survey <> data geography" + ], + [ + 1215, + 1221, + "High - Frequency Phone Survey <> data geography" + ], + [ + 1248, + 1252, + "High - Frequency Phone Survey <> publication year" + ], + [ + 1255, + 1261, + "High - Frequency Phone Survey <> data geography" + ], + [ + 1316, + 1337, + "High - Frequency Phone Survey <> reference year" + ] + ], + "validated": true, + "empirical_context": "Information campaigns can be used to address pre-conceived perceptual biases against both solar systems and the service model, including cost comparisons between stand-alone solar and other options, and assurance of payments over contract lifetimes. To address confirmation bias which leads to a lack of appreciation for the solar systems and the 68 UBOS and World Bank ( 2020 ) Economic and Social Impacts of COVID-19 on Refugee Population in Uganda: Results from the High - Frequency Phone Survey for Refugees in Uganda \u2013 First Round. 69 UNHCR ( 2022 ) Uganda Refugee Protection Assessment Update 3 for the period July to December 2021.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly referenced in the context as a survey providing empirical results.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is a dataset as it is explicitly referenced in the context as a survey providing empirical results.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 38, + "text": "The World Bank Pakistan: Balochistan Human Capital Investment Project ( P166308 ) Page 34 of 47 teachers trained using scripted lessons; ( c ) boundary wall and functional toilets; ( d ) desk and chair for each student; ( e ) trained PTSMCs meeting regularly and endorsing monthly teacher attendance ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Targeted HFs having majority of essential medicines for RMNCHN services Percentage of targeted HFs having > 75 percent of essential RMNCHN medicines. Bi \u2010 annual Health institutional database Self \u2010 reported data Health Department, HMIS, PPHI Absenteeism among key staff to provide RMNCHN services Percentage of key staff present at HFs during duty time to provide RMNCHN services. Baseline is a preliminary estimate. Bi \u2010 annual Real time monitoring system Administrative data Health Department, PMU Targeted health care providers with minimum knowledge and competencies in RMNCHN services Percentage of targeted staff with minimum knowledge and competencies in RMNCHN services. Bi \u2010 annual PMU Training report Health Department, PMU Targeted HMIS digitally integrated into DHIS ( 2 ) Cumulative number of RMNCHN relevant health information systems digitally integrated into DHIS ( 2 ).", + "ner_text": [ + [ + 939, + 966, + "named" + ] + ], + "validated": false, + "empirical_context": "Baseline is a preliminary estimate. Bi \u2010 annual Real time monitoring system Administrative data Health Department, PMU Targeted health care providers with minimum knowledge and competencies in RMNCHN services Percentage of targeted staff with minimum knowledge and competencies in RMNCHN services. Bi \u2010 annual PMU Training report Health Department, PMU Targeted HMIS digitally integrated into DHIS ( 2 ) Cumulative number of RMNCHN relevant health information systems digitally integrated into DHIS ( 2 ).", + "type": "system", + "explanation": "However, it is described as a system and not explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'monitoring system' which suggests data collection.", + "contextual_reason_agent": "However, it is described as a system and not explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "029_IBArchive-ea5ff2ad-6f1f-45bf-a2ad-036440562c18", + "page": 65, + "text": "The World Bank Electricity Access Scale-up Project ( EASP ) ( P166685 ) Page 60 of 130 2017. People provided with access to electricity with grid, mini-grid, off-grid under the project, of which refugees The indicator will track the number of refugees beneficiary of access to grid, mini-grid, and off-grid solutions under the project. Quarterly Project implementati on progress reports Service providers customers database, approved loan applications and installation reports. UECCC, MEMD People with access to clean cooking solutions under the project The indicator will track the number of people benefitting from access to clean cooking solutions under the project, disaggregated by gender. Average household size of 4. 7 people, UBOS National Household Survey 2016 - 2017. Quarterly Project implementati on progress reports. Average household size based on UBOS National Household Survey. Approved loan and grants applications and installation reports. UECCC, MEMD Commercial and productive uses beneficiaries of grid, mini-grid, off-grid access The indicator will track beneficiaries of grid, mini - grid, off-grid electricity access for commercial and productive uses purposes. Quarterly Project implementati on progress report. Service providers customers database, MEMD database, approved loan applications and installation reports. Productive uses technologies include: internal wiring, efficient appliances, SRU, SPU, milling units etc.", + "ner_text": [ + [ + 734, + 764, + "named" + ], + [ + 87, + 91, + "UBOS National Household Survey <> publication year" + ], + [ + 695, + 732, + "UBOS National Household Survey <> data description" + ], + [ + 765, + 776, + "UBOS National Household Survey <> reference year" + ] + ], + "validated": true, + "empirical_context": "Average household size of 4. 7 people, UBOS National Household Survey 2016 - 2017. Quarterly Project implementati on progress reports.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referenced in the context as a source of data for the average household size.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a national survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referenced in the context as a source of data for the average household size.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 63, + "text": "Figures from the 2014 Uganda National Population and Housing Survey indicate that 32 percent of women were not involved in any economic activities, compared to only 26 percent of men ( National Housing and Population Census 2014 ). \u2022 Sub-component 3. 2 includes Digital Access program that will support various access affordability initiatives to increase direct access to internet, particularly for women. \u2022 The program will integrate feedback by women beneficiaries in the design and target households in refugees and host districts that are among the most vulnerable and left behind in terms of access to mobile devices. \u2022 Public Internet access points ( Wi-Fi hotspots ) and community Internet access schemes ( telecenters ) with women-friendly opening hours and in women-friendly locations such as markets, informal women \u2019 s group meeting locations, water collection points, and public food distribution centers ( Sub-components 1. 2, 3. 1, and 3. 2 ). Low level of digital skills and high incidence of online violence especially within refugee and host communities \u2022 While digital skills in Uganda are generally low, the \u2018 digital \u2022 Design digital skills / literacy training under sub - component 3. 2 to promote women \u2019 s participation and \u2022 Percentage of women assessed as digitally literate post the 64 GSMA Mobile Gender Gap Report 2020. 65 After Access Surveys 2019. 66 GSMA ( Global System for Mobile Communications Association ). The Mobile Gender Gap Report. 2015. 67 Freedom on the Net 2018, Freedom House.", + "ner_text": [ + [ + 22, + 67, + "named" + ], + [ + 17, + 21, + "Uganda National Population and Housing Survey <> publication year" + ], + [ + 22, + 28, + "Uganda National Population and Housing Survey <> data geography" + ], + [ + 82, + 146, + "Uganda National Population and Housing Survey <> data description" + ] + ], + "validated": true, + "empirical_context": "Figures from the 2014 Uganda National Population and Housing Survey indicate that 32 percent of women were not involved in any economic activities, compared to only 26 percent of men ( National Housing and Population Census 2014 ). \u2022 Sub-component 3.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey providing empirical data used for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides statistical data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey providing empirical data used for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "084_Lebanon-PAD-final-06082018", + "page": 35, + "text": "Technical assistance will be provided to ensure that implementing agencies have sufficient data collection capacity and to strengthen the GOL \u2019 s \u201c tracking \u201d capacity, including systems to monitor gender-specific impacts of interventions. iii. Indicators that measure actions taken by or benefits arising to economic actors who do not receive direct support from the Program ( Results area 2 ). Monitoring will rely on administrative data collected by government entities. Technical assistance will be provided to ensure that data collection protocols collect the data needed for monitoring. 65. M & E will be supported by Program activities to support an improved evidence based on firms in Lebanon. Support through the Program for the establishment of an SME Observatory at the MOET to track and assess data on SME development, will provide valuable input to the M & E efforts. An impact evaluation of the Program will focus on the interventions that provide funding to beneficiaries. These evaluations will: ( i ) assess the effectiveness of jobs programs in a labor market under considerable stress; ( ii ) compare the effectiveness and cost-efficiency of different support modalities in a single consistent framework; and ( iii ) ensure the results are gender sensitive.", + "ner_text": [ + [ + 420, + 439, + "named" + ], + [ + 309, + 324, + "administrative data <> reference population" + ], + [ + 693, + 700, + "administrative data <> data geography" + ], + [ + 1292, + 1310, + "administrative data <> usage context" + ] + ], + "validated": true, + "empirical_context": "Indicators that measure actions taken by or benefits arising to economic actors who do not receive direct support from the Program ( Results area 2 ). Monitoring will rely on administrative data collected by government entities. Technical assistance will be provided to ensure that data collection protocols collect the data needed for monitoring.", + "type": "data", + "explanation": "In this context, 'administrative data' is indeed used as a data source for monitoring actions and benefits, confirming it as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'administrative data' is a dataset because it refers to data collected by government entities for monitoring purposes.", + "contextual_reason_agent": "In this context, 'administrative data' is indeed used as a data source for monitoring actions and benefits, confirming it as a dataset.", + "contextual_signal": "mentioned as a source of data for monitoring", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 11, + "validated": 10, + "not_validated": 1 + } + }, + { + "filename": "145_689710PAD0P1300Official0Use0Only090", + "page": 206, + "text": "193 8. Contract deviations by means of changes requested and granted immediately after contract award; 9. Used or inferior products are substituted for the product actually ordered; 10. Defective pricing, which might include: ( i ) persistent defective pricing; ( ii ) repeated defective pricing involving similar patterns or conditions; ( iii ) failure to correct known system deficiencies; ( iii ) indications of falsification or alteration of supporting data; ( iv ) protracted delay in release of data to government to preclude possible price reductions; ( v ) identical or nearly identical high salary history data on employees or consultants; and 11. Employment of people known to have previously perpetrated fraud against the government. 6 ) Payments 1. Contractors are overpaid or paid twice for the same items / services and there is no attempt to recoup the overpayments; 2. Accounting reconciliation is not performed regularly relative to ( i ) contract payments, ( ii ) daily transactions, and ( iii ) inventory; 3. Cost proposal data that is incorrect or less than current or complete; 4. Billings ( including progress payments ) not adequately supported by project status or reliable cost data ( including duplicate or altered invoices; double billing; etc. ); 5. Significant increase in price without corresponding increase in work; 6.", + "ner_text": [ + [ + 1028, + 1046, + "named" + ] + ], + "validated": false, + "empirical_context": "Accounting reconciliation is not performed regularly relative to ( i ) contract payments, ( ii ) daily transactions, and ( iii ) inventory; 3. Cost proposal data that is incorrect or less than current or complete; 4. Billings ( including progress payments ) not adequately supported by project status or reliable cost data ( including duplicate or altered invoices; double billing; etc. ); 5.", + "type": "data", + "explanation": "'Cost proposal data' is mentioned as a type of information rather than a structured dataset or data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Cost proposal data' refers to a structured collection of data used in financial contexts.", + "contextual_reason_agent": "'Cost proposal data' is mentioned as a type of information rather than a structured dataset or data source.", + "contextual_signal": "mentioned only as a type of data, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "155_432110PAD0P09717632B01off0use0only1", + "page": 30, + "text": "25 78. Survey and public consultations. The project is basing the design of the water service expansion and standpost management components on the results of a socio-economic study and household survey \u2013 commissioned specifically for this project - of household demand and willingness and ability to pay for different modes of service ( standposts vs. private connections ) in the peri-urban neighborhoods of Bujumbura. In addition, as part of the project preparation, multiple consultations were held between REGIDESO staff, members and local leaders of 26 neighborhoods, and the Bank project team, with the facilitation of local consultants. A participatory workshop was organized to discuss the design of the proposed program and to disseminate the findings of the initial focus group interviews, the household survey, and the experience of similar endeavors in other countries. Representatives of 26 peri - urban neighborhoods and their local leaders expressed strong support for the proposed program. They provided key input into the design of the program during break-out sessions where the details of the program were discussed, such as the mode of selection and terms of reference for standpost operators, and the respective roles of the project partners ( REGIDESO, user committees, local administrators, and operators ). 79. Willingness and ability to pay.", + "ner_text": [ + [ + 804, + 820, + "named" + ], + [ + 409, + 418, + "household survey <> data geography" + ], + [ + 581, + 585, + "household survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "In addition, as part of the project preparation, multiple consultations were held between REGIDESO staff, members and local leaders of 26 neighborhoods, and the Bank project team, with the facilitation of local consultants. A participatory workshop was organized to discuss the design of the proposed program and to disseminate the findings of the initial focus group interviews, the household survey, and the experience of similar endeavors in other countries. Representatives of 26 peri - urban neighborhoods and their local leaders expressed strong support for the proposed program.", + "type": "survey", + "explanation": "In the context, the 'household survey' is mentioned as part of the findings disseminated in the project, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'household survey' implies a structured collection of data from households.", + "contextual_reason_agent": "In the context, the 'household survey' is mentioned as part of the findings disseminated in the project, indicating it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "041_Burkina-Faso-Support-to-Land-and-Mining-Management-Strengthening-Project", + "page": 27, + "text": "The World Bank Support to Land and Mining Management Strengthening Project ( P169267 ) Page 21 of 95 use data ( land, mining, forestry, agriculture, etc. ) sharing for informed and sustainable land use management and planning with respect to protecting forests and improving land degradation for the land and mining sectors. It will support the establishment of an architecture for common data interoperability and data exchange including the principles, standards, modularity, scalability, and autonomy useful between relevant land, mining and other sectors domains to reduce cost and enable digital links / exchange of new or existing data systems. The project will finance activities to: ( a ) finalize the NSDI strategy; ( b ) develop instruments and procedures for data storage, management, and sharing of spatial data; ( c ) design a platform to integrate geospatial datasets to support decision-making; ( d ) strengthen the geodetic reference system by adding 11 stations to the 13 existing stations in order to densify the geodetic network; ( e ) strengthen the core foundation of a NSDI for better interoperability of geographic datasets, using orthophotos and images acquired through the project to support the development of a cadastral layer in the project area; ( f ) establish specific interoperability and data exchanges within ministries in charge of mines ( Mining Cadastre, Bureau of Mines and Geology of Burkina Faso [", + "ner_text": [ + [ + 931, + 956, + "named" + ] + ], + "validated": false, + "empirical_context": "It will support the establishment of an architecture for common data interoperability and data exchange including the principles, standards, modularity, scalability, and autonomy useful between relevant land, mining and other sectors domains to reduce cost and enable digital links / exchange of new or existing data systems. The project will finance activities to: ( a ) finalize the NSDI strategy; ( b ) develop instruments and procedures for data storage, management, and sharing of spatial data; ( c ) design a platform to integrate geospatial datasets to support decision-making; ( d ) strengthen the geodetic reference system by adding 11 stations to the 13 existing stations in order to densify the geodetic network; ( e ) strengthen the core foundation of a NSDI for better interoperability of geographic datasets, using orthophotos and images acquired through the project to support the development of a cadastral layer in the project area; ( f ) establish specific interoperability and data exchanges within ministries in charge of mines ( Mining Cadastre, Bureau of Mines and Geology of Burkina Faso [", + "type": "system", + "explanation": "However, the geodetic reference system is mentioned as a framework for enhancing geospatial data rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'system' which could imply a structured collection of data.", + "contextual_reason_agent": "However, the geodetic reference system is mentioned as a framework for enhancing geospatial data rather than a structured collection of data itself.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "037_Uganda-Investment-for-Industrial-Transformation-and-Employment-Project", + "page": 17, + "text": "Tiers 1 \u2013 3 are regulated and supervised by the Bank of Uganda and Tier 4 is regulated by Uganda Microfinance Regulatory Authority ( UMRA ). 20 See Uganda - Policy, Regulator, Supervisory response to COVID-19 responses for Micro Finance, CGAP. 21 See Uganda - Policy, Regulator, Supervisory response to COVID-19 responses for Micro Finance, CGAP. 22 Uganda remittances were US $ 1. 3 billion in 2019, US $ 1. 425 billion in 2018, and US $ 1. 2 billion in 2017 \u2014 World Bank 2017 \u2013 2019 data. 23 Economic Policy Research Center ( EPRC ), 2020. 24 The survey conducted 1, 839 face-to-face interviews with SMEs in Uganda. Drawn from the UBOS census of business establishments, the sample is considered to be nationally representative. Nathan Associates performed the data collection between March and August 2014, with funding from Financial Sector Deepening Uganda.", + "ner_text": [ + [ + 633, + 644, + "named" + ], + [ + 56, + 62, + "UBOS census <> data geography" + ], + [ + 350, + 356, + "UBOS census <> data geography" + ], + [ + 602, + 606, + "UBOS census <> reference population" + ], + [ + 731, + 748, + "UBOS census <> author" + ], + [ + 787, + 808, + "UBOS census <> reference year" + ] + ], + "validated": true, + "empirical_context": "24 The survey conducted 1, 839 face-to-face interviews with SMEs in Uganda. Drawn from the UBOS census of business establishments, the sample is considered to be nationally representative. Nathan Associates performed the data collection between March and August 2014, with funding from Financial Sector Deepening Uganda.", + "type": "census", + "explanation": "The UBOS census is explicitly mentioned as the source from which the sample was drawn, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'census' typically refers to a structured collection of data.", + "contextual_reason_agent": "The UBOS census is explicitly mentioned as the source from which the sample was drawn, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "027_Jordan-Emergency-Food-Security-Project", + "page": 14, + "text": "Of the more than 750, 000 refugees17 registered in Jordan ( 89 percent of whom came from Syria ), an estimated 17 percent live in the Za \u2019 atari and Azraq refugee camps, while the remaining 83 percent are mostly in Jordan \u2019 s urban areas. Throughout the COVID-19 pandemic, food security has been a key concern for refugees in both camps and in host communities mainly due to the loss of income from temporary and informal labor activities. More than 80 percent of labor activities performed by non-Jordanians are estimated to take place in the informal economy versus 40 percent for Jordanian citizens ( MOSD, 2019 ). February 2021 mVAM data showed that 23. 3 percent of refugee households in host communities are food insecure ( over 154, 777 individuals ), while another 63. 7 percent of refugee households ( equivalent to approximately 423, 344 individuals ) are vulnerable to food insecurity. 12. Ensuring food security and social stability are at the core of the urgent need to ensure availability of and access to staple food. Bread is an essential part of the diet in Jordan and represents the main caloric source for the poorest Jordanians and the many refugees in the country.", + "ner_text": [ + [ + 604, + 608, + "named" + ] + ], + "validated": false, + "empirical_context": "Throughout the COVID-19 pandemic, food security has been a key concern for refugees in both camps and in host communities mainly due to the loss of income from temporary and informal labor activities. More than 80 percent of labor activities performed by non-Jordanians are estimated to take place in the informal economy versus 40 percent for Jordanian citizens ( MOSD, 2019 ). February 2021 mVAM data showed that 23.", + "type": "organization", + "explanation": "'MOSD' is mentioned as a source of information rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MOSD' is a dataset because it is referenced in a statistical context.", + "contextual_reason_agent": "'MOSD' is mentioned as a source of information rather than a structured collection of data.", + "contextual_signal": "mentioned only as a source, not as a data source", + "tags": [] + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 53, + "text": "The World Bank Building Skills for Human Capital Development in South Sudan ( P178654 ) Page 49 of 66 agricultural program. Targets are cumulative. annually Of which, female This indicator will track the share of students who are certified after completing the digital training for the agricultural program. Targets are cumulative. Cumulative Monitoring reports MoHEST PIU will collect data from Universities / polytechni cs; validated by TPM annually MoHEST PIU Of which, refugee / host community population This indicator will track the share of refugee students students who are certified after completing the digital training for the agricultural program. Targets are cumulative. Annual Monitoring reports MoHEST PIU will collect data from Universities / polytechni cs; validated by TPM annually MoHEST PIU Education Management Information System strengthened This indicator will track the implementation of the EMIS strengthening plan. The target will be achieved once the EMIS collects and reports data on at least 90 % of schools in South Sudan including information on teachers ( profile, qualifications, deployment and training ), schools ( location, enrollment, physical facilities ( including WASH, classrooms etc ), teachers and school management committees and students. Annually starting from the second year of implementati on Monitoring reports MoGEI and MoHEST will prepare reports on implementation progress of EMIS strengthening plan.", + "ner_text": [ + [ + 978, + 982, + "named" + ] + ], + "validated": false, + "empirical_context": "Annual Monitoring reports MoHEST PIU will collect data from Universities / polytechni cs; validated by TPM annually MoHEST PIU Education Management Information System strengthened This indicator will track the implementation of the EMIS strengthening plan. The target will be achieved once the EMIS collects and reports data on at least 90 % of schools in South Sudan including information on teachers ( profile, qualifications, deployment and training ), schools ( location, enrollment, physical facilities ( including WASH, classrooms etc ), teachers and school management committees and students. Annually starting from the second year of implementati on Monitoring reports MoGEI and MoHEST will prepare reports on implementation progress of EMIS strengthening plan.", + "type": "system", + "explanation": "However, EMIS is described as a management information system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with data collection and reporting.", + "contextual_reason_agent": "However, EMIS is described as a management information system, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "101_Project-Appraisal-Document-PAD-P163830-2017-05-22-CLEAN-05232017", + "page": 100, + "text": "Currently FAO M & E conducts multiple impact assessment studies for the project they implement: Baseline Surveys, Post-Distribution Assessments and Impact Assessments. The Baseline Surveys, conducted regularly, will employ a hybrid approach that uses FAO Field Monitors ( currently about 15 across Somalia ) and an independent consulting firm as a Service Provider which is contracted and overseen by FAO. The use of out-sourcing contractor will increase the study access by reaching districts that are inaccessible to FAO staff and by reaching more locations and households for studies that require a relatively high sample size. FAO Field Monitors facilitate the collection of reliable data, as well as verify the implementation of activities. Remote-sensing ( comparison of high-definition satellite images and aerial pictures to confirm execution of the rehabilitation works ), biometrics ( registers and identifies beneficiaries through digitization and recognition of their thumb-print ), GPS photography ( photographs with GPS coordinates are required from the project sites ) and a call center ( conducts beneficiary, community leaders, trader and pricing surveys on the phone; hotline that receives and records complaints as part of the accountability to affected populations including raising awareness of Protection Against Sexual Exploitation and Abuse - PSEA ) are key tools for verification and administration to ensure all beneficiaries have been reached and have received the expected support of the right quantity and quality and on time. Data generated is maintained by the Information Management Unit, which has developed a database software, the Form Management Tool ( FMT ). The Post - Distribution and Impact Assessments will assess the extent to which the program objectives of rehabilitating community level productive infrastructure, inputs, and providing cash income to improve food security has been performed. An Impact Assessment39 to evaluate the project will be conducted once all data has been analyzed, programs implemented and results validated by FAO Somalia. 39 The impact assessment report shall present the detail evaluation include key number such as Food Consumption Index, the dietary diversity score and the increase in Tropical Livestock Unit ( TLU ). The data collection will likely be using the RIMA-based questionnaire ( RIMA \u2013 Resilience Index Measurement and Analysis )", + "ner_text": [ + [ + 2340, + 2364, + "named" + ] + ], + "validated": false, + "empirical_context": "39 The impact assessment report shall present the detail evaluation include key number such as Food Consumption Index, the dietary diversity score and the increase in Tropical Livestock Unit ( TLU ). The data collection will likely be using the RIMA-based questionnaire ( RIMA \u2013 Resilience Index Measurement and Analysis )", + "type": "questionnaire", + "explanation": "However, the RIMA-based questionnaire is a tool for data collection, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it mentions a specific method of data collection.", + "contextual_reason_agent": "However, the RIMA-based questionnaire is a tool for data collection, not a structured collection of data itself.", + "contextual_signal": "mentioned only as a tool for data collection, not as a data source", + "tags": [] + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 41, + "text": "A separate manual of procedures will be endorsed to define the PBC process and the detailed roles and responsibilities of all actors at the central, regional, district, and school levels. B. Results Monitoring and Evaluation Arrangements 79. The project will use the Results Framework to monitor and assess progress in the implementation of activities and in achievement of the PDO. The Results Framework includes PDO-level and intermediate results indicators, baselines and target values, frequency, data source methodology, and responsibilities for data collection. Where feasible, data collected will be disaggregated by gender and refugee status to monitor interventions \u2019 specific impact on these populations. Data to monitor the project and inform the indicators will be drawn from three main sources: official government data, surveys / evaluations, and progress reports produced by the PCU. In areas difficult to reach due to insecurity or conflict, the proposed project will use \u2018 Enhanced Monitoring and Evaluation \u2019 to monitor implementation progress for ongoing investments by geo-enabled methods and supplemented by community discussions. 80. The MEP and MES have low M & E capacity and producing timely and reliable data remains an issue. The ministries do not have an effective EMIS with data collection tools and processes, and school census are carried out through paper-based questionnaires.", + "ner_text": [ + [ + 808, + 832, + "named" + ], + [ + 607, + 649, + "official government data <> data description" + ] + ], + "validated": true, + "empirical_context": "Where feasible, data collected will be disaggregated by gender and refugee status to monitor interventions \u2019 specific impact on these populations. Data to monitor the project and inform the indicators will be drawn from three main sources: official government data, surveys / evaluations, and progress reports produced by the PCU. In areas difficult to reach due to insecurity or conflict, the proposed project will use \u2018 Enhanced Monitoring and Evaluation \u2019 to monitor implementation progress for ongoing investments by geo-enabled methods and supplemented by community discussions.", + "type": "data", + "explanation": "In the context, it is explicitly mentioned as a source of data to monitor the project, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'official government data' suggests a structured collection of information used for analysis.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as a source of data to monitor the project, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "027_Jordan-Emergency-Food-Security-Project", + "page": 42, + "text": "MOITS Data collected regularly and reported by the MOITS Project Coordination Team Cumulative amount of barley procured through the project Cumulative amount of barley imports procured with project financing since the start of the project and delivered to the port of Aqaba Monthly and at the end of the project implementati on period MOITS Data collected regularly and reported by the MOITS Project Coordination Team Policy framework for mitigating commodity risk developed Series of policy reports supported with project financing completed and Twice per year and at the end of Project monitoring system Progress status report of analytical and stakeholder Project Coordination Team", + "ner_text": [ + [ + 0, + 10, + "named" + ], + [ + 51, + 82, + "MOITS Data <> author" + ], + [ + 83, + 207, + "MOITS Data <> data description" + ], + [ + 260, + 273, + "MOITS Data <> data geography" + ], + [ + 386, + 417, + "MOITS Data <> author" + ] + ], + "validated": true, + "empirical_context": "MOITS Data collected regularly and reported by the MOITS Project Coordination Team Cumulative amount of barley procured through the project Cumulative amount of barley imports procured with project financing since the start of the project and delivered to the port of Aqaba Monthly and at the end of the project implementati on period MOITS Data collected regularly and reported by the MOITS Project Coordination Team Policy framework for mitigating commodity risk developed Series of policy reports supported with project financing completed and Twice per year and at the end of Project monitoring system Progress status report of analytical and stakeholder Project Coordination Team", + "type": "data", + "explanation": "This is indeed a dataset as it refers to data collected regularly for project monitoring and reporting purposes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to data collected and reported by a project team.", + "contextual_reason_agent": "This is indeed a dataset as it refers to data collected regularly for project monitoring and reporting purposes.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 54, + "text": "45 Annex 5. Program Action Plan Action Description DLI # * Responsibility Recurrent Frequency Due Date Completion Measurement * * M & E Complete and verify baselines for the PforR Program indicators MOE Once No later than March 31, 2018 Procurement Establish an integrated system which captures procurement / contract cycle ( Goods, Works, Consultants ) MOE / MOPWH Once ( in two phases ) Phase 1: one year after project effectiveness Phase 2 \u2010 MOE: Integration of works and supplies departments level is completed. \u2010 MOE: Integration ( covering MOE departments, governorates, up to school levels ) is completed to align with OpenEMIS deployment timeline. \u2010 MOPWH: Integration of procurement, contract management and finance departments is completed and generating reports. \u2010 MOE and MOPWH: Systematic reports reflecting performance indicators of procurement processing and contract management. Systematic Reporting for constructions activities, between MOE and MOPWH MOE / MOPWH Recurrent \u2010 Systematic sharing of MOE Annual procurement plans, Procurement plans updates. \u2010 Systematic MOPWH reporting on contract management including payments and expected variations. Environmental and Social Safeguards Environmental and Social Standard Operating Procedures and Guidelines including different measures to bridge identified gaps, adopted. MOE 3 months after effectiveness Environmental and Social Standard Operating Procedures adopted.", + "ner_text": [ + [ + 626, + 634, + "named" + ] + ], + "validated": false, + "empirical_context": "Program Action Plan Action Description DLI # * Responsibility Recurrent Frequency Due Date Completion Measurement * * M & E Complete and verify baselines for the PforR Program indicators MOE Once No later than March 31, 2018 Procurement Establish an integrated system which captures procurement / contract cycle ( Goods, Works, Consultants ) MOE / MOPWH Once ( in two phases ) Phase 1: one year after project effectiveness Phase 2 \u2010 MOE: Integration of works and supplies departments level is completed. \u2010 MOE: Integration ( covering MOE departments, governorates, up to school levels ) is completed to align with OpenEMIS deployment timeline. \u2010 MOPWH: Integration of procurement, contract management and finance departments is completed and generating reports.", + "type": "program", + "explanation": "However, OpenEMIS is mentioned as a deployment timeline and not as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed OpenEMIS is a dataset due to its name suggesting it relates to data management.", + "contextual_reason_agent": "However, OpenEMIS is mentioned as a deployment timeline and not as a data source itself.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 9, + "validated": 2, + "not_validated": 7 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 74, + "text": "The department has a senior accountant and several accounts assistants. The main accounts of the MWE are computerized with the IFMS. However, this system is only operational for government funds, and the Project module is not yet fully operational. As a result, Project financial reports cannot be generated directly from the IFMS. The accounting section also has a unit that handles the WMDP that has fully qualified accounting staff. Although the unit has a large number of staff, only three are senior ( at the level of officer and above ), which affects decision-making. The current WMDP has a financial management specialist ( FMS ) who is qualified and experienced and is expected to be dedicated for the Project. It also has the position of assistant FMS that fell vacant recently and will be filled in due course. With this level of staffing, there will be sufficient hands to manage the implementation of the Project. 10. The MWE has an Internal Audit Unit comprising four internal auditors seconded from the MoFPED \u2019 s Department of Internal Audit. There is also an audit committee in place at the MoFPED to which the Internal Audit Unit reports. The committee meets quarterly to review internal audit findings and the actions that have been taken to address them.", + "ner_text": [ + [ + 326, + 330, + "named" + ] + ], + "validated": false, + "empirical_context": "However, this system is only operational for government funds, and the Project module is not yet fully operational. As a result, Project financial reports cannot be generated directly from the IFMS. The accounting section also has a unit that handles the WMDP that has fully qualified accounting staff.", + "type": "system", + "explanation": "However, IFMS is described as a system and not explicitly mentioned as a data source or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed IFMS is a dataset because it is referred to as a system that handles financial reports.", + "contextual_reason_agent": "However, IFMS is described as a system and not explicitly mentioned as a data source or dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 45, + "text": "( Percentage ) Description The proportion of Health budget expenditure to allocation Frequency Quarterly Data source MoH budgetary data Methodology for Data Collection PMU and WB Responsibility for Data Collection PMU and WB Percentage of general service availability score ( Percentage ) Description Service availability is described by an index using the three areas of tracer indicators ( infrastructure, workforce, and utilization ). This is made possible by expressing the indicators as a percentage score un-weighted average of the three areas Frequency Quarterly Data source Quarterly Health Facility Assessment Methodology for Data Collection TPM report Responsibility for Data Collection TPM / PMU Percentage of general service availability score in host communities \u2019 areas ( Percentage ) Description Service availability is described by an index using the three areas of tracer indicators ( infrastructure, workforce, and utilization ). This is made possible by expressing the indicators as a percentage score un-weighted average of the three areas", + "ner_text": [ + [ + 372, + 389, + "named" + ] + ], + "validated": false, + "empirical_context": "( Percentage ) Description The proportion of Health budget expenditure to allocation Frequency Quarterly Data source MoH budgetary data Methodology for Data Collection PMU and WB Responsibility for Data Collection PMU and WB Percentage of general service availability score ( Percentage ) Description Service availability is described by an index using the three areas of tracer indicators ( infrastructure, workforce, and utilization ). This is made possible by expressing the indicators as a percentage score un-weighted average of the three areas Frequency Quarterly Data source Quarterly Health Facility Assessment Methodology for Data Collection TPM report Responsibility for Data Collection TPM / PMU Percentage of general service availability score in host communities \u2019 areas ( Percentage ) Description Service availability is described by an index using the three areas of tracer indicators ( infrastructure, workforce, and utilization ).", + "type": "concept", + "explanation": "'Tracer indicators' are described as components of an index rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'tracer indicators' refers to a dataset because it is associated with service availability metrics.", + "contextual_reason_agent": "'Tracer indicators' are described as components of an index rather than a structured collection of data.", + "contextual_signal": "mentioned only as part of an index, not as a data source", + "tags": [] + }, + { + "filename": "187_multi-page", + "page": 23, + "text": "performance standards. In addition, a number of spot surveys of individual issues will be undertaken over the course of the project, to capture and publicize citizens ' views on salient issues. In addition, the project will collect data produced by both public expenditure and human resource management systems, so as to monitor their functioning on an ongoing basis throughout project implementation. As a complement to these monitoring efforts, anti-corruption surveys were undertaken as a precursor to project preparation, to gauge citizen perceptions of corruption and related phenomena. Those surveys will also be repeated during the penultimate year of the project, so as to permit pre - and post-project comparisons. While the project is not expected to have measurable impacts on perceptions of corruption, the repeat of this survey should provide an instrument for contributing to enhanced citizen awareness of corruption issues, and thereby make a modest contribution to helping to nurture public pressure on the government to address the underlying factors that contribute to the pervasive levels of corruption evident in the first set of anti-corruption surveys. Accounting. financial reporting. and auditing arrangiements. The newly established UIPARP within the Office of the Prime Minister will be responsible for the overall fnancial managernent of the Project.", + "ner_text": [ + [ + 48, + 60, + "named" + ], + [ + 158, + 166, + "spot surveys <> reference population" + ], + [ + 447, + 470, + "spot surveys <> data type" + ], + [ + 1393, + 1411, + "spot surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "performance standards. In addition, a number of spot surveys of individual issues will be undertaken over the course of the project, to capture and publicize citizens ' views on salient issues. In addition, the project will collect data produced by both public expenditure and human resource management systems, so as to monitor their functioning on an ongoing basis throughout project implementation.", + "type": "survey", + "explanation": "In the context, 'spot surveys' are explicitly mentioned as a method to capture citizens' views, indicating they are used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'spot surveys' is a dataset because it refers to a structured method of collecting data on specific issues.", + "contextual_reason_agent": "In the context, 'spot surveys' are explicitly mentioned as a method to capture citizens' views, indicating they are used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 3, + "validated": 2, + "not_validated": 1 + } + }, + { + "filename": "104_PAD2161-PUBLIC-P162004", + "page": 18, + "text": "The World Bank Education Infrastructure for Resilience ( EU Facility for SuTP ) ( P162004 ) Page 17 of 86 matters ( count of SuTP ), as well as access of SuTPs to key basic services ( e. g. education, health ). The analysis of combined data sources shows that some provinces which are not among the first priority list identified by the EU, have higher concentrations ( by ratios to population or levels ) of SuTPs by districts than previously reported. 35. Locations have been ranked and prioritized according to their level of deprivation in access to education to inform the selection of project sites for school construction. Deprivation in access to education has been assessed through a National Muhtar Survey conducted in every neighborhood and village of the country. In each location, muhtars who are the elected heads for neighborhoods and villages have reported on and rated Syrian refugees \u2019 access to education. This unique information has allowed the ranking of all locations in the country and produce an informed list of prioritized locations for better need-based targeting. This analysis shows that some provinces which are not among the first priority list identified by the EU, there is higher concentration of refugees by district. The top 5 percent districts with the highest concentration of SuTP are listed in Annex 4. 36.", + "ner_text": [ + [ + 693, + 715, + "named" + ], + [ + 794, + 801, + "National Muhtar Survey <> author" + ], + [ + 886, + 901, + "National Muhtar Survey <> reference population" + ], + [ + 1362, + 1380, + "National Muhtar Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "Locations have been ranked and prioritized according to their level of deprivation in access to education to inform the selection of project sites for school construction. Deprivation in access to education has been assessed through a National Muhtar Survey conducted in every neighborhood and village of the country. In each location, muhtars who are the elected heads for neighborhoods and villages have reported on and rated Syrian refugees \u2019 access to education.", + "type": "survey", + "explanation": "The National Muhtar Survey is indeed a dataset as it systematically collects and reports data on education access from various neighborhoods and villages.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that collects data on access to education.", + "contextual_reason_agent": "The National Muhtar Survey is indeed a dataset as it systematically collects and reports data on education access from various neighborhoods and villages.", + "contextual_signal": "described as a survey that collects data on access to education", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 85, + "text": "The biggest gap in the Cameroon statistical system is on the production of micro-data ( household surveys and censuses ). The funding requirements for the population census is estimated at US $ 55 million ( CFAF 30 billion ) according to BUCREP and the requirements for the agriculture census is estimated at US $ 51 million ( CFAF 28 billion ). In addition, there is no commitment for the next ECAM. The proposed project intends to increase the frequency of the production of micro-data for a close poverty monitoring ( by complementing resources for the population census and securing resources for the next living conditions survey ), strengthen the national accounts, and enhance access to statistics to strengthen both policy making and monitoring.", + "ner_text": [ + [ + 155, + 172, + "named" + ] + ], + "validated": false, + "empirical_context": "The biggest gap in the Cameroon statistical system is on the production of micro-data ( household surveys and censuses ). The funding requirements for the population census is estimated at US $ 55 million ( CFAF 30 billion ) according to BUCREP and the requirements for the agriculture census is estimated at US $ 51 million ( CFAF 28 billion ). In addition, there is no commitment for the next ECAM.", + "type": "program", + "explanation": "However, in this context, it is mentioned as a funding requirement and lacks direct reference to a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'population census' typically involves data collection.", + "contextual_reason_agent": "However, in this context, it is mentioned as a funding requirement and lacks direct reference to a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 6, + "validated": 5, + "not_validated": 1 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 52, + "text": "The integration of consent-based data sharing by DPI ecosystem participants is verified as follows: ( 1 ) the system provides a dynamic consent management platform that enables users to grant, review, modify, and revoke their consent at any time; ( 2 ) this platform allows users to easily access and understand the terms of consent before granting it, including what data is collected, how it is used, who it is shared with, and for how long it is retained; ( 3 ) it is possible for relying parties to require ex-ante consent to be given by users in real time prior to data sharing in cases where such consent may be required by the use case; ( 4 ) the available data sharing mechanisms that relying parties can use include standards-based, digitally verifiable credentials that put people at the center of a data sharing transaction and allow the shared data to be authenticated by relying parties online or offline; ( 5 ) the system maintains comprehensive logs of all consent activities, including timestamps, user identity verification, and details of consent; and ( 6 ) systems used for sharing personal data use people-centric digital identity as the primary method of authentication and authorization for the sharing of personal data. 3. 5 Digitally verifiable credentials.", + "ner_text": [ + [ + 128, + 163, + "named" + ] + ], + "validated": false, + "empirical_context": "The integration of consent-based data sharing by DPI ecosystem participants is verified as follows: ( 1 ) the system provides a dynamic consent management platform that enables users to grant, review, modify, and revoke their consent at any time; ( 2 ) this platform allows users to easily access and understand the terms of consent before granting it, including what data is collected, how it is used, who it is shared with, and for how long it is retained; ( 3 ) it is possible for relying parties to require ex-ante consent to be given by users in real time prior to data sharing in cases where such consent may be required by the use case; ( 4 ) the available data sharing mechanisms that relying parties can use include standards-based, digitally verifiable credentials that put people at the center of a data sharing transaction and allow the shared data to be authenticated by relying parties online or offline; ( 5 ) the system maintains comprehensive logs of all consent activities, including timestamps, user identity verification, and details of consent; and ( 6 ) systems used for sharing personal data use people-centric digital identity as the primary method of authentication and authorization for the sharing of personal data. 3.", + "type": "system", + "explanation": "However, it is described as a platform for managing consent rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves data management and sharing.", + "contextual_reason_agent": "However, it is described as a platform for managing consent rather than a structured collection of data.", + "contextual_signal": "mentioned only as a platform, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 22, + "text": "The subcomponent will: ( a ) finance procurement of information communication technology equipment at the national level and train staff on data entry and use; ( b ) train trainers to develop health facility staff data entry, management, and use capacity; ( c ) create interoperability and integration between data systems and ensure data sharing, storage and backup; ( d ) develop, print, and disseminate Standard Operating Procedures for HMIS data entry, cleaning, quality improvement, and use at all levels; ( e ) conduct data review meetings and generate data use tools; ( f ) establish and operate the National and State level HMIS and Monitoring and Evaluation ( M & E ) Technical Working Groups; ( g ) conduct data quality improvement activities at the facility and national level; ( h ) operationalize a national and state level research committee, building on existing structure; ( i ) conduct an annual health sector review meeting; and ( j ) maintain and institutionalize the Health Service Functionality ( HSF ) Database. 39.", + "ner_text": [ + [ + 440, + 444, + "named" + ] + ], + "validated": false, + "empirical_context": "The subcomponent will: ( a ) finance procurement of information communication technology equipment at the national level and train staff on data entry and use; ( b ) train trainers to develop health facility staff data entry, management, and use capacity; ( c ) create interoperability and integration between data systems and ensure data sharing, storage and backup; ( d ) develop, print, and disseminate Standard Operating Procedures for HMIS data entry, cleaning, quality improvement, and use at all levels; ( e ) conduct data review meetings and generate data use tools; ( f ) establish and operate the National and State level HMIS and Monitoring and Evaluation ( M & E ) Technical Working Groups; ( g ) conduct data quality improvement activities at the facility and national level; ( h ) operationalize a national and state level research committee, building on existing structure; ( i ) conduct an annual health sector review meeting; and ( j ) maintain and institutionalize the Health Service Functionality ( HSF ) Database. 39.", + "type": "system", + "explanation": "However, 'HMIS' is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'HMIS' is a dataset because it is related to health information management systems, which often handle data.", + "contextual_reason_agent": "However, 'HMIS' is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 21, + "validated": 4, + "not_validated": 17 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 25, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 14 part of the Project to support implementation and reporting. Monitoring and reporting will focus on key performance data from specific Project activities that contribute to the Project \u2019 s intermediate results and PDO-level outcomes. The MWE will submit progress reports every semester to the World Bank. A midterm review will be carried out to evaluate implementation progress and identify potential issues in need of attention and resolution. An end-of-project evaluation will also be conducted two months before Project closing to assess whether the intermediate results and PDO-level indicators were achieved, the sustainability of the results, and lessons learned. C. Sustainability 45. The Borrower \u2019 s commitment is ensured by the strong alignment between the Project \u2019 s investments and national - and institutional-level strategies. The Project is supporting the implementation of Sector Development Plan ( 2015 / 2016 \u2013 2019 / 2020 ), which was the result of a comprehensive planning process that included consultation and coordination with national, regional, and local authorities and key DPs. The water resources investments were designed with sustainability in mind; the investments directly support the existing institutions ( the MWE and WMZs ) in carrying out their mandates and strengthening stakeholder-based institutions such as the Catchment Management Organizations ( CMOs ).", + "ner_text": [ + [ + 182, + 202, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Integrated Water Management and Development Project ( P163782 ) 14 part of the Project to support implementation and reporting. Monitoring and reporting will focus on key performance data from specific Project activities that contribute to the Project \u2019 s intermediate results and PDO-level outcomes. The MWE will submit progress reports every semester to the World Bank.", + "type": "data", + "explanation": "'Key performance data' is not a structured collection of data but rather refers to specific metrics or information used for monitoring.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'key performance data' refers to a dataset because it includes the term 'data'.", + "contextual_reason_agent": "'Key performance data' is not a structured collection of data but rather refers to specific metrics or information used for monitoring.", + "contextual_signal": "mentioned only as performance metrics, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 60, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 56 of 74 Project Operations Manual, and have received cash transfers, at least for one payment cycle. minimum on a quarterly basis Information System ( MIS ) course of project implementation. Payment data will be liked to and updated in the MIS. Number of beneficiaries receiving cash for performing labor intensive public works who are female Number of total beneficiaries that directly receive cash transfer for working on LIPW under sub - component 1. 1 on behalf of beneficiary HH, of which are female This indicator will be measured at a minimum on a quarterly basis Registration and payment data in the SNSOP MIS Beneficiary data is collected during registration and updated over the course of the project. Payment data will also be periodically updated in the MIS Selected Implementing Partner Number of beneficiary households receiving cash transfer for participating in the behavioral change communication training The number of beneficiary households that participate in behavioral change communication training activities to receive their cash transfer.", + "ner_text": [ + [ + 246, + 249, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 56 of 74 Project Operations Manual, and have received cash transfers, at least for one payment cycle. minimum on a quarterly basis Information System ( MIS ) course of project implementation. Payment data will be liked to and updated in the MIS.", + "type": "system", + "explanation": "However, 'MIS' is mentioned as a management information system and not explicitly as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MIS' suggests a system that manages information.", + "contextual_reason_agent": "However, 'MIS' is mentioned as a management information system and not explicitly as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "086_NEW-ETHIOPIA-PAD-06072018", + "page": 110, + "text": "102 EIC and MoLSA follow the Government reporting system. Both submit monthly financial statements to MoFEC in soft and hard copies within the stipulated dead line ( within 15 days after the month end ). Both entities closed the EFY 2009 accounts and submitted to MoFEC and OFAG. 16. ARRA submits quarterly IPFMR to UNHCR. The IPFMR contains both financial and physical activity report. ARRA is required to submit the quarterly report within 15 days after the end of the quarter. The quarterly report is being submitted within the deadline. For instance, the 1st quarter report for 2017 was submitted on April 10 ( 5 days before the dead line and the 2nd quarter report on July 15, 2017 ( on the deadline ). Annual financial statement is also prepared and is submitted to UNHCR and external auditors. IPDC produces consolidated financial statements of the entity for both internal and external use. The internal reports are intended for management and Board while the external reports are mainly for the tax authorities. The reports to management and Board are produced on quarterly basis and include: budget vs. expenditure reports, profit and loss statement, balance sheet, ratio analysis and narration to explain performance and budget variances mainly on revenue. The reports are usually submitted to the management and Board within ten days of quarter closing.", + "ner_text": [ + [ + 307, + 312, + "named" + ] + ], + "validated": false, + "empirical_context": "16. ARRA submits quarterly IPFMR to UNHCR. The IPFMR contains both financial and physical activity report.", + "type": "report", + "explanation": "However, the IPFMR is described as a report, not a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it contains reports that could imply data collection.", + "contextual_reason_agent": "However, the IPFMR is described as a report, not a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a report, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 20, + "validated": 4, + "not_validated": 16 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 19, + "text": "Component 2: Enhancing the socio-economic inclusion of poor households ( US $ 51. 2 million: US $ 22. 4 million from national IDA18, US $ 12. 1 million from IDA18 RSW, and US $ 16. 7 million from government ) 32. This component aims to tackle several constraints that hamper the socio-economic inclusion of the poorest households. More specifically, its objectives are to: ( a ) support the scale-up of the conditional cash transfer Tekavoul program to reach a total of 45, 000 households in extreme poverty ( Subcomponent 2. 1 ); ( b ) re-certify and develop an economic inclusion scheme for households exiting the Tekavoul program ( Subcomponent 2. 2 ); and ( c ) support a pilot for households \u2019 Civil Registry enrollment ( Subcomponent 2. 3 ). The Taazour General Delegation will be responsible for the component \u2019 s implementation. Sub-component 2. 1: Scale-up the Tekavoul program ( US $ 47. 6 million ) 33. The proposed project will expand the number of beneficiaries of the Tekavoul program from 30, 000 to 45, 000 households. Transfers will continue to be paid to the household member with primary responsibility for the children \u2019 s health, nutrition and education, in most cases, the household children \u2019 s mother. With this extension, the caseload represents 45 percent of the governmental objectives of covering the 100, 000 poorest 9 In September 2019, the Prime Minister announced that all social programs will eventually have to use the Social Registry for their targeting. In the medium term, the Government may issue a decree to encourage usage of the Social Registry by government departments.", + "ner_text": [ + [ + 1453, + 1468, + "named" + ], + [ + 752, + 778, + "Social Registry <> author" + ], + [ + 1361, + 1365, + "Social Registry <> publication year" + ] + ], + "validated": true, + "empirical_context": "Transfers will continue to be paid to the household member with primary responsibility for the children \u2019 s health, nutrition and education, in most cases, the household children \u2019 s mother. With this extension, the caseload represents 45 percent of the governmental objectives of covering the 100, 000 poorest 9 In September 2019, the Prime Minister announced that all social programs will eventually have to use the Social Registry for their targeting. In the medium term, the Government may issue a decree to encourage usage of the Social Registry by government departments.", + "type": "registry", + "explanation": "The Social Registry is explicitly mentioned as a source that social programs will use for targeting, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a registry that is used for targeting in social programs.", + "contextual_reason_agent": "The Social Registry is explicitly mentioned as a source that social programs will use for targeting, confirming its role as a dataset.", + "contextual_signal": "mentioned as a data source for targeting in social programs", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 64, + "text": "On an annual basis the health service quality assessment will include direct observation of health service process quality measures at hospitals and health centers. ( iv ) Bi-annual patient feedback using exit surveys. ( v ) Bi-annual visits to a sample of BHTs to measure service outputs and quality. ( b ) Periodic TPM data collection: ( i ) Biennial household coverage surveys as baseline / endline surveys in the project \u2019 s three-year timeframe. 35 ( ii ) Citizen engagement survey collected at the household level, with the coverage survey every other year. 2. Data Analysis and Visualization Platform. The platform will emphasize development of an integrated, institutionalized, and sustainable system. The platform will include analysis of health service delivery in refugee and host community areas. The platform will include the following: ( a ) Interactive data visualization platform presenting Results Framework and core indicators. The platform will use data from DHIS2 and the TPM and will include BHI data. It will be updated at least on a quarterly basis. ( b ) Presentation of data in data visualization platform in the form of static and interactive maps, charts and graphs, tailored to project monitoring needs and partner priorities. ( c ) Incorporation of HSF data within the data visualization platform through a link or page within the platform. ( d ) Overlay of disease and health service delivery data with meteorologic data to better understand seasonal patterns in service delivery and infectious disease data. 35 Given the planned project length of three years, this is a baseline and an endline survey. Potential timeframe changes would include interim surveys, which are planned to be light surveys focusing on key indicators.", + "ner_text": [ + [ + 1013, + 1021, + "named" + ], + [ + 344, + 379, + "BHI data <> data description" + ] + ], + "validated": true, + "empirical_context": "The platform will include the following: ( a ) Interactive data visualization platform presenting Results Framework and core indicators. The platform will use data from DHIS2 and the TPM and will include BHI data. It will be updated at least on a quarterly basis.", + "type": "data", + "explanation": "In the context, 'BHI data' is explicitly referenced as part of the data used in the platform, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'BHI data' is a dataset because it is mentioned in the context of a data visualization platform that presents indicators.", + "contextual_reason_agent": "In the context, 'BHI data' is explicitly referenced as part of the data used in the platform, indicating it functions as a data source.", + "contextual_signal": "follows 'uses data from'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "076_Rwanda-Socio-Economic-Inclusion-of-Refugees-and-Host-Communities-in-Rwanda-Project", + "page": 30, + "text": "The World Bank Socio-economic Inclusion of Refugees & Host Communities in Rwanda Project ( P164130 ) Page 26 of 82 77. Project-specific MIS ( P-MIS ). The P-MIS will be web-based and user-friendly and will not duplicate existing systems. It will be used to gather and consolidate data across the components. It will allow the project to track and report on project-specific data requirements disaggregated by group and sub-group ( refugee, host community, women, children, etc. ). The P-MIS will be housed at the MINEMA SPIU and accessible via a web-based application in project districts and at BRD. The system will be used to ( a ) collect data and information and monitor progress of project activities; and ( b ) generate reports, figures and tables for project M & E. 78. Regular progress monitoring will be complemented by externally-administered surveys to measure progress and effectiveness of activities and to understand beneficiary perceptions of socio-economic inclusion. Periodic technical audits will be carried out to verify infrastructure quality. A baseline study will be conducted during the inception phase and followed up with annual surveys and other assessments, a midterm evaluation, and at project closing, to evaluate qualitative and quantitative aspects of project results.", + "ner_text": [ + [ + 829, + 860, + "named" + ], + [ + 74, + 80, + "externally-administered surveys <> data geography" + ], + [ + 1066, + 1080, + "externally-administered surveys <> data description" + ], + [ + 1147, + 1161, + "externally-administered surveys <> data description" + ] + ], + "validated": true, + "empirical_context": "78. Regular progress monitoring will be complemented by externally-administered surveys to measure progress and effectiveness of activities and to understand beneficiary perceptions of socio-economic inclusion. Periodic technical audits will be carried out to verify infrastructure quality.", + "type": "survey", + "explanation": "In this context, the 'externally-administered surveys' are explicitly mentioned as a method to measure progress and effectiveness, indicating they are used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'surveys' are typically structured collections of data used for analysis.", + "contextual_reason_agent": "In this context, the 'externally-administered surveys' are explicitly mentioned as a method to measure progress and effectiveness, indicating they are used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "155_432110PAD0P09717632B01off0use0only1", + "page": 39, + "text": "Despite sufficient water resources, the demand for potable water is not satisfied either in urban or rural areas. In urban areas, potable water needs are estimated to double every decade: 22 million m3 in 1990, 44 million m3 in 2000, and 70 million m3 projected for the year 2010. In rural areas, water needs increase by 58 percent every 10 years, and are projected to reach 434 million m3 by 201021. On average, the net service coverage rate22 ( taux de desserte net ) was estimated to be 43 percent in rural areas and 42 percent in urban areas in 199923. Currently, REGIDESO estimates the urban service coverage rate to be 60 percent. 27. These numbers have only worsened with the above-mentioned dilapidation of the infrastructure over the past decade. By the end of the conflict, hundreds of thousands of refugees had migrated to the 20 Data provided by REGIDESO and DGHER. These access rates are lower than the access rates mentioned in the WHO / UNICEF Joint Monitoring Program ( JMP ) of 2004-2006, which seem inconsistent with the surveys carried out by DGHER. 21 Source: Document de rapport du Burundi sur la mise en oeuvre de l ' Agenda 21, 2002 22 The net service coverage rate is the ratio between the number of households with access to water and the total number of households in a given territorial unit, accounting for non-functionning infrastructure. 23 These figures and assessment from the National Survey of Water Services Coverage carried out in 1999 and published in 2000, as cited in \u201c Analyse Contextuelle en Mati\u00e8re de Gestion Integr\u00e9e des Ressources en Eau ( GIRE ) au Burundi \u2013 Rapport Final, \u201d prepared by Gishinge Kasavubu M\u00e9dard, March 2006.", + "ner_text": [ + [ + 1409, + 1451, + "named" + ], + [ + 205, + 209, + "National Survey of Water Services Coverage <> reference year" + ], + [ + 228, + 232, + "National Survey of Water Services Coverage <> publication year" + ], + [ + 417, + 444, + "National Survey of Water Services Coverage <> data description" + ], + [ + 1062, + 1067, + "National Survey of Water Services Coverage <> publisher" + ], + [ + 1103, + 1110, + "National Survey of Water Services Coverage <> data geography" + ], + [ + 1467, + 1471, + "National Survey of Water Services Coverage <> reference year" + ], + [ + 1489, + 1493, + "National Survey of Water Services Coverage <> publication year" + ], + [ + 1595, + 1602, + "National Survey of Water Services Coverage <> data geography" + ], + [ + 1634, + 1658, + "National Survey of Water Services Coverage <> author" + ] + ], + "validated": true, + "empirical_context": "21 Source: Document de rapport du Burundi sur la mise en oeuvre de l ' Agenda 21, 2002 22 The net service coverage rate is the ratio between the number of households with access to water and the total number of households in a given territorial unit, accounting for non-functionning infrastructure. 23 These figures and assessment from the National Survey of Water Services Coverage carried out in 1999 and published in 2000, as cited in \u201c Analyse Contextuelle en Mati\u00e8re de Gestion Integr\u00e9e des Ressources en Eau ( GIRE ) au Burundi \u2013 Rapport Final, \u201d prepared by Gishinge Kasavubu M\u00e9dard, March 2006.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey that provides empirical data for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides data on water services coverage.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey that provides empirical data for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "183_multi0page", + "page": 26, + "text": "and workplans adequate financial and oEducational data system, the Districts is improved in educational data. ( e. g., financial reporting system. planning process. proposals for new school * Monitoring reports, project construction, using data from management information EMIS ) system. - 23 -", + "ner_text": [ + [ + 273, + 277, + "named" + ] + ], + "validated": false, + "empirical_context": "planning process. proposals for new school * Monitoring reports, project construction, using data from management information EMIS ) system. - 23 -", + "type": "system", + "explanation": "'EMIS' is mentioned as a management information system, not as a structured collection of data or a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'EMIS' is a dataset because it is associated with data management.", + "contextual_reason_agent": "'EMIS' is mentioned as a management information system, not as a structured collection of data or a dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "045_Turkey-Agricultural-Employment-Support-for-Refugees-and-Turkish-Citizens-through-Enhanced-Market-Linkages-Project", + "page": 13, + "text": "Further long-standing structural, institutional, and policy constraints such as low productivity, limited domestic market integration, and distinct dualistic pattern in farm structures act as a binding constraint on growth and employment generation. These factors result in unfavorable working conditions, including wages often below the national minimum and inadequate worker protection, including some use of child labor. More recently, there is some suggestion that farmers are finding it increasingly difficult to attract Turkish workers who are willing to work in the sector; 14 and farmers report that among those people who are willing to work, they lack the relevant skills for the task, which leads to crop losses. 15 10 Erdogan, M. 2014. Syrians in Turkey: Social Acceptance and Integration Research. Migration and Politics Research Centre, Hacettepe University. 11 Source: Turkish Statistical Institute Household Labor Force Survey, 2018.", + "ner_text": [ + [ + 914, + 942, + "named" + ], + [ + 730, + 741, + "Household Labor Force Survey <> author" + ], + [ + 759, + 765, + "Household Labor Force Survey <> data geography" + ], + [ + 884, + 913, + "Household Labor Force Survey <> publisher" + ], + [ + 944, + 948, + "Household Labor Force Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "Migration and Politics Research Centre, Hacettepe University. 11 Source: Turkish Statistical Institute Household Labor Force Survey, 2018.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source of information used in the research.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source of information used in the research.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 46, + "text": "Collection TPM / PMU; Measures subcomponents 1. 1 and 1. 2 Under UNICEF Percentage of women receiving four ANC visits ( Percentage ) Description Percentage of women at childbearing age with a live birth in a given time period who received antenatal care, four times or more times from any provider. Frequency Quarterly Data source DHIS2 Methodology for Data Collection DHIS2 Responsibility for Data Collection MoH and UNICEF; Measures subcomponent 1. 1 Under UNICEF Percentage of refugee women receiving four ANC visits ( Percentage ) Description Percentage of refugee women at childbearing age with a live birth in a given time period who received antenatal care, four times or more times from any provider.", + "ner_text": [ + [ + 369, + 374, + "named" + ], + [ + 65, + 71, + "DHIS2 <> publisher" + ], + [ + 72, + 117, + "DHIS2 <> data description" + ], + [ + 418, + 424, + "DHIS2 <> publisher" + ], + [ + 459, + 465, + "DHIS2 <> publisher" + ], + [ + 466, + 519, + "DHIS2 <> data description" + ] + ], + "validated": true, + "empirical_context": "2 Under UNICEF Percentage of women receiving four ANC visits ( Percentage ) Description Percentage of women at childbearing age with a live birth in a given time period who received antenatal care, four times or more times from any provider. Frequency Quarterly Data source DHIS2 Methodology for Data Collection DHIS2 Responsibility for Data Collection MoH and UNICEF; Measures subcomponent 1. 1 Under UNICEF Percentage of refugee women receiving four ANC visits ( Percentage ) Description Percentage of refugee women at childbearing age with a live birth in a given time period who received antenatal care, four times or more times from any provider.", + "type": "system", + "explanation": "DHIS2 is indeed a data source used for collecting health-related data, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned as a data source for collecting information on antenatal care visits.", + "contextual_reason_agent": "DHIS2 is indeed a data source used for collecting health-related data, confirming its role as a dataset.", + "contextual_signal": "mentioned as a data source for data collection", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "016_BOSIB029693bc600f090cb0745b2b12ee69", + "page": 110, + "text": ". Partnership with the Global Center on Adaptation ( GCA ). GCA is an international organization supporting adaptation through three areas: programs, advocacy, and knowledge. GCA has recently launched the Africa Adaptation Acceleration Program that aims to mobilize upwards to US $ 25 billion in finance for climate adaptation between 2020 and 2025. Through its City Adaptation Accelerator, GCA will provide the technical assistance to enhance the climate adaptation measures of KUSP2: ( i ) support the development of climate risk - informed urban master plans for select Kenyan counties and urban areas; ( ii ) strengthen resilience-building capacity of key municipal officials and NPCT through development of an urban resilience master class that focuses on hands-on skills development; and ( iii ) development of a Kenya-specific catalogue of Nature-based Solutions ( NbS ) for large-scale urban resilience programs. The catalogue will focus on technical details and create a better understanding of the characteristics of scalable NbS through an indicative economic and financial / cost benefit analysis as well as identification of key enabling environment factors.", + "ner_text": [ + [ + 819, + 869, + "named" + ] + ], + "validated": false, + "empirical_context": "GCA has recently launched the Africa Adaptation Acceleration Program that aims to mobilize upwards to US $ 25 billion in finance for climate adaptation between 2020 and 2025. Through its City Adaptation Accelerator, GCA will provide the technical assistance to enhance the climate adaptation measures of KUSP2: ( i ) support the development of climate risk - informed urban master plans for select Kenyan counties and urban areas; ( ii ) strengthen resilience-building capacity of key municipal officials and NPCT through development of an urban resilience master class that focuses on hands-on skills development; and ( iii ) development of a Kenya-specific catalogue of Nature-based Solutions ( NbS ) for large-scale urban resilience programs. The catalogue will focus on technical details and create a better understanding of the characteristics of scalable NbS through an indicative economic and financial / cost benefit analysis as well as identification of key enabling environment factors.", + "type": "catalogue", + "explanation": "However, it is not functioning as a data source in the context, as it is described as a catalogue focusing on technical details rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'catalogue' which can imply a collection of information.", + "contextual_reason_agent": "However, it is not functioning as a data source in the context, as it is described as a catalogue focusing on technical details rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "029_IBArchive-ea5ff2ad-6f1f-45bf-a2ad-036440562c18", + "page": 119, + "text": "The books of accounts to be maintained specifically for the proposed Project should thus be set up and should include cash book, ledgers, journal vouchers, fixed asset register, and a contract register. There is a list of accounts codes ( chart of accounts ) that allows project costs to be directly related to specific work activities and outputs of the project. 8. Staffing arrangements. The MEMD and UECCC are adequately staffed with qualified and experienced accounting staff. The function is headed by the Undersecretary at the MEMD who reports to the Permanent Secretary while a finance manager heads the departments at the UECCC, including an accountant and assistant accountant. The staff are qualified and experienced. To maintain a strong coordination function at the MEMD, its current staffing arrangement will continue to have a dedicated accountant for prompt reporting by line ministries. 9. Information systems. The UECCC uses Sun Systems and Pastel computerized accounting software while the MEMD is on IFMS. These packages can produce project financial reports instantly and will be used to produce reports for this project. The users are also well trained to use the software. Line ministries will keep basic records which will be consolidated at the MEMD. 10. Internal controls and audit. The UECCC have FM manuals that describe the accounting system while ministries have existing treasury accounting instructions issued under the Public Finance Management Act which describes the accounting system, that is, major transaction cycles of the project, funds flow processes, the accounting records, supporting documents, computer files, and specific accounts in the financial statements involved in the processing of transactions; the list of accounting codes used to group transactions ( chart of accounts ); the accounting processes from the initiation of a transaction to its inclusion in the financial statements; authorization procedures for transactions; the financial reporting process used to prepare the", + "ner_text": [ + [ + 1019, + 1023, + "named" + ] + ], + "validated": false, + "empirical_context": "Information systems. The UECCC uses Sun Systems and Pastel computerized accounting software while the MEMD is on IFMS. These packages can produce project financial reports instantly and will be used to produce reports for this project.", + "type": "system", + "explanation": "However, IFMS is described as a system and not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed IFMS is a dataset because it is mentioned in the context of accounting software.", + "contextual_reason_agent": "However, IFMS is described as a system and not as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 103, + "text": "IVA confirms TSC-reported figures through target schools online or phone based surveys and key informant interviews. DLI_TBL_VERIFICATION DLI 3. 2 Number of new teachers deployed to primary schools in non refugee and non host communities with the highest teacher shortages Description TSC allocates and deploys new teacher posts to primary schools with the highest teacher shortage, on top of pro-rata annual allocation to these schools. Data source / Agency TSC Verification Entity IVA Procedure TSC county offices will collect data from schools for submission to the TSC for compilation of annual reports on teacher", + "ner_text": [ + [ + 91, + 115, + "named" + ] + ], + "validated": false, + "empirical_context": "IVA confirms TSC-reported figures through target schools online or phone based surveys and key informant interviews. DLI_TBL_VERIFICATION DLI 3.", + "type": "interview", + "explanation": "However, it is not a structured collection of data but rather a method of gathering qualitative information.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'key informant interviews' can be associated with data collection methods.", + "contextual_reason_agent": "However, it is not a structured collection of data but rather a method of gathering qualitative information.", + "contextual_signal": "mentioned only as a method of data collection, not as a data source", + "tags": [] + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 22, + "text": "12 Sub-component 2. 2. Core modules of the Management Information System ( US $ 1. 5 million equivalent ) 40. This sub-component will support the development of basic core modules of the Management Information System to support the delivery mechanisms of a basic social safety net system. These modules will be developed for the cash transfer program and will use a unique individual identification number for each beneficiary. They will include key social program Box 3: The selection at communes, collines, and household-level Based on the poverty map, the registry will start in the four communes with the highest estimated rural poverty rate in each province ( Gitega: Bugendana, Buraza, Gitega, and Itaba; Karusi: Bugenyuzi, Gihogazi, Mutumba, and Nyabikere; Kirundo: Bugabira, Busoni, Kirundo, and Ntega; and Ruyigi: Butagwanza, Butezi, Bweru, and Gisuru ). The 16 selected communes are sub-divided in 2 to 5 zones and include on average 26 collines ( between 11 and 43 ) with an average of 535 households ( between 130 and 1, 400 ) per colline.", + "ner_text": [ + [ + 43, + 72, + "named" + ] + ], + "validated": false, + "empirical_context": "2. Core modules of the Management Information System ( US $ 1. 5 million equivalent ) 40.", + "type": "system", + "explanation": "However, it is mentioned as a system and not as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Management Information System' suggests a structured collection of data.", + "contextual_reason_agent": "However, it is mentioned as a system and not as a data source in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "141_760530PAD0P127010Box377322B00OUO090", + "page": 31, + "text": "The PCU will also be supported to regularly review LG budgets, annual action plans and procurement plans to ensure that expenditure falls within the agreed expenditure items and follows procurement and safeguards guidelines and requirements. ( iii ) Annual performance assessments of the 100 LGs, supported by mid-term and end-of - project citizen satisfaction surveys, will provide data to measure actual improvements in service delivery and in the LGs \u2019 performance. ( iv ) Regular third-party FM reviews, procurement audits, and value-for-money reviews of expenditure and performance at the LG level. ( v ) Regular collection of reports and data tracking for the performance of national and regional level agencies as against their contractual commitments ( under Component 2 ). ( vi ) Reports and tracking of performance of mobile teams ( under Component 2 ). ( vii ) Overall project midterm review and final end-of-project evaluation, including covering social, environmental and economic aspects. 53. The project thus contributes to developing the Government \u2019 s internal performance monitoring system and financial reporting system through direct support to LGs and central government institutions. In addition, there will be formal mid-term and final evaluations of the project itself. Separate reviews will also be commissioned to assess specific aspects of the project, such as third-party procurement and financial management reviews, and value-for - money audits. C. Sustainability 54. Policy sustainability is very likely, given the strong commitments provided by the Government as reflected in the policy letters, reviews of legislation, and most recently the firm commitment provided in the budget statement for Fiscal Year 2013, with specific commitments to establish the Conditional Performance Grant and provide US $ 52 million in financing for the entire period of the project. It is expected that even in the event of a change of government, the support and push for decentralization reforms will continue.", + "ner_text": [ + [ + 310, + 368, + "named" + ], + [ + 1727, + 1743, + "mid-term and end-of - project citizen satisfaction surveys <> publication year" + ], + [ + 2042, + 2060, + "mid-term and end-of - project citizen satisfaction surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "The PCU will also be supported to regularly review LG budgets, annual action plans and procurement plans to ensure that expenditure falls within the agreed expenditure items and follows procurement and safeguards guidelines and requirements. ( iii ) Annual performance assessments of the 100 LGs, supported by mid-term and end-of - project citizen satisfaction surveys, will provide data to measure actual improvements in service delivery and in the LGs \u2019 performance. ( iv ) Regular third-party FM reviews, procurement audits, and value-for-money reviews of expenditure and performance at the LG level.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned that the surveys provide data to measure improvements in service delivery.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to surveys that collect data on citizen satisfaction.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned that the surveys provide data to measure improvements in service delivery.", + "contextual_signal": "follows 'supported by' indicating it provides data", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "042_Burundi-Skills-for-Jobs-Women-and-Youth-Project", + "page": 62, + "text": "The World Bank Burundi Skills for Jobs: Women and Youth Project ( P164416 ) Page 56 of 102 at university level - - Female according to accepted standard measurements of female graduates / completers of supported digital skills programs. The definition and measurement of advanced digital skills to be used for measuring this indicator will be disaggregated by type of training. Baseline will be established upon start of intervention and among graduates / completers from the third year of project, onwards using standard digital skills measurement tools. 3 Burundi Of which number of students with advanced digital skills competencies at university level ( Refugees ) Beneficiaries of job-focused interventions Annual, starting from Y2 Procurement report Administrative data, will capture the number of young entrepreneurs who benefitted from job - focused interventions including entrepreneurial skills training, subsidies and mentorship programs M & E specialist with the PIU", + "ner_text": [ + [ + 756, + 775, + "named" + ], + [ + 15, + 22, + "Administrative data <> data geography" + ], + [ + 558, + 565, + "Administrative data <> data geography" + ], + [ + 804, + 823, + "Administrative data <> reference population" + ] + ], + "validated": true, + "empirical_context": "Baseline will be established upon start of intervention and among graduates / completers from the third year of project, onwards using standard digital skills measurement tools. 3 Burundi Of which number of students with advanced digital skills competencies at university level ( Refugees ) Beneficiaries of job-focused interventions Annual, starting from Y2 Procurement report Administrative data, will capture the number of young entrepreneurs who benefitted from job - focused interventions including entrepreneurial skills training, subsidies and mentorship programs M & E specialist with the PIU", + "type": "data", + "explanation": "In this context, 'administrative data' is explicitly mentioned as capturing the number of young entrepreneurs, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'administrative data' typically refers to structured data collected for administrative purposes.", + "contextual_reason_agent": "In this context, 'administrative data' is explicitly mentioned as capturing the number of young entrepreneurs, indicating it is used as a data source.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "082_PAD2809-PAD-PUBLIC-disclosed-9-12-2018-IDA-R2018-0286-1", + "page": 49, + "text": "The World Bank Chad - Refugees and Host Communities Support Project ( P164748 ) Page 44 Beneficiaries with improved access to community infrastructure ( health and education ) - - Refugees Beneficiaries of social safety net programs Quarterly For baseline, source is WFP and UNHCR data. Number of safety nets program beneficiaries includes number of refugee households r eceiving cash assistance from humanitarian agencies in targeted areas. The CFS is launching a baseline study which will help to confirm baseline numbers, to be reviewed at MTR. Data will be collected from the program MIS ( see further information below ) and also from humanitarian agencies working in targeted areas. CFS regional offices prepare reports based on registration and payments. Reports are quarterly and consolidated centrally by the CFS. They are then presented to the Steering Committee and the World Bank. At the end of the year ( December ) a final report consolidates data of the previous quarters.", + "ner_text": [ + [ + 588, + 591, + "named" + ] + ], + "validated": false, + "empirical_context": "The CFS is launching a baseline study which will help to confirm baseline numbers, to be reviewed at MTR. Data will be collected from the program MIS ( see further information below ) and also from humanitarian agencies working in targeted areas. CFS regional offices prepare reports based on registration and payments.", + "type": "system", + "explanation": "'MIS' refers to a management information system, which is not a dataset but a system for managing information.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is related to data collection.", + "contextual_reason_agent": "'MIS' refers to a management information system, which is not a dataset but a system for managing information.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "050_Cameroon-Secondary-Education-and-Skills-Development-Project", + "page": 80, + "text": "The World Bank Secondary Education and Skills Development Project ( P170561 ) Page 76 of 159 Window 2 who take in apprentices the SDF Secretariat / PCU, PCU compiles and sends to independent verification agency which will verify Companies / enterprises benefiting from SDF Window 2 Number of enterprises benefitting from SDF Window 2 Annual SDF Secretariat / P CU SDF Secretariat / PCU compiles and sends to independent verification agency which will verify PCU, SDF Secretariat Strengthened information system and skills development sector monitoring This indicator measures DLI 7. The website and application will contain information about availability of training centers by different regions, and contact information of these centers. It will also contain aggregated results of labor market outcomes of different programs from the tracer studies. The tracer study will cover a representative sample of graduates from training institutions of MINEFOP, Website and application - Annual Tracer study - Years 1, 2, 4, and 5 Website and application - MINEFOP Tracer study - ONEFOP and MINEFOP MINEFOP sends the documentation, links and tools to PCU, independent verification agency verifies and validates MINEFOP", + "ner_text": [ + [ + 835, + 849, + "named" + ], + [ + 4, + 14, + "tracer studies <> publisher" + ], + [ + 906, + 942, + "tracer studies <> reference population" + ], + [ + 946, + 953, + "tracer studies <> publisher" + ], + [ + 1050, + 1057, + "tracer studies <> publisher" + ], + [ + 1084, + 1091, + "tracer studies <> publisher" + ], + [ + 1092, + 1099, + "tracer studies <> publisher" + ] + ], + "validated": true, + "empirical_context": "The website and application will contain information about availability of training centers by different regions, and contact information of these centers. It will also contain aggregated results of labor market outcomes of different programs from the tracer studies. The tracer study will cover a representative sample of graduates from training institutions of MINEFOP, Website and application - Annual Tracer study - Years 1, 2, 4, and 5 Website and application - MINEFOP Tracer study - ONEFOP and MINEFOP MINEFOP sends the documentation, links and tools to PCU, independent verification agency verifies and validates MINEFOP", + "type": "study", + "explanation": "In this context, 'tracer studies' is indeed used as a source of data for analyzing labor market outcomes of training programs.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'tracer studies' is a dataset because it refers to a systematic study of labor market outcomes.", + "contextual_reason_agent": "In this context, 'tracer studies' is indeed used as a source of data for analyzing labor market outcomes of training programs.", + "contextual_signal": "described as a study that covers a representative sample of graduates", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 21, + "validated": 8, + "not_validated": 13 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "124_PAD8220PAD0P12010Box385319B00OUO090", + "page": 118, + "text": "Twelve groups comprising each of seven-eight residents, disaggregated by gender, socio-economic status and beneficiary zone or caza, 37 were organized through a combination of random and convenience sampling. A large portion of the residents selected came from some of the poorest neighborhoods in Southern Beirut, known for its heavy reliance on private networks and artesian wells due to limited or lack of public water supply in these areas. Based on the project household economic survey conducted in April 2014, approximately 48 percent of household heads in the project beneficiary areas had not completed primary school and 50 percent reported incomes of less than US $ 392 per month. 9. The development of the focus group instruments and sample frame benefited from in - depth interviews with the MOEW and official documentation provided on residents listed as direct project beneficiaries in Zones A, B, C and D. A recent census of the beneficiary zones also contributed to the development of the sample frame.", + "ner_text": [ + [ + 458, + 491, + "named" + ], + [ + 298, + 313, + "project household economic survey <> data geography" + ], + [ + 505, + 515, + "project household economic survey <> publication year" + ], + [ + 545, + 560, + "project household economic survey <> reference population" + ], + [ + 849, + 897, + "project household economic survey <> reference population" + ], + [ + 1035, + 1053, + "project household economic survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "A large portion of the residents selected came from some of the poorest neighborhoods in Southern Beirut, known for its heavy reliance on private networks and artesian wells due to limited or lack of public water supply in these areas. Based on the project household economic survey conducted in April 2014, approximately 48 percent of household heads in the project beneficiary areas had not completed primary school and 50 percent reported incomes of less than US $ 392 per month. 9.", + "type": "survey", + "explanation": "This is indeed a dataset as it is a structured collection of data used to analyze household economic conditions in the project areas.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that collects economic data from households.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data used to analyze household economic conditions in the project areas.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "061_Turkey-Formal-Employment-Creation-Project", + "page": 83, + "text": "012 ) ( 0. 033 ) ( 0. 015 ) ( 0. 021 ) ( 0. 160 ) Observations 5, 984 2, 412 2, 046 992 534 R-squared 0. 085 0. 038 0. 072 0. 123 0. 568 Standard errors in parentheses * * * p < 0. 01, * * p < 0. 05, * p < 0. 1 Source: Enterprise Survey, 2008, 2013 \u2013 14, and 2015 \u2013 16 Note: Explanatory variables include firm size and age, firm \u2019 s ownership status, industry, region, and year. Control group for credit constraint status is FCC. 41 Kuntchev, V., Ramalho, R., Rodriguez-Meza, J., Yang, J. S., 2013. What have we learned from the Enterprise Surveys regarding access to finance by SMEs? Policy Research Working Paper 6670. World Bank, Washington D. C. 42 The firms in the FCC group applied for a loan and were rejected and do not have any type of external finance.", + "ner_text": [ + [ + 219, + 236, + "named" + ], + [ + 238, + 242, + "Enterprise Survey <> reference year" + ], + [ + 244, + 253, + "Enterprise Survey <> reference year" + ], + [ + 259, + 268, + "Enterprise Survey <> publication year" + ], + [ + 305, + 322, + "Enterprise Survey <> data description" + ], + [ + 433, + 445, + "Enterprise Survey <> author" + ], + [ + 447, + 458, + "Enterprise Survey <> author" + ], + [ + 480, + 491, + "Enterprise Survey <> author" + ], + [ + 621, + 631, + "Enterprise Survey <> publisher" + ], + [ + 633, + 649, + "Enterprise Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "05, * p < 0. 1 Source: Enterprise Survey, 2008, 2013 \u2013 14, and 2015 \u2013 16 Note: Explanatory variables include firm size and age, firm \u2019 s ownership status, industry, region, and year. Control group for credit constraint status is FCC.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it is explicitly mentioned as a source of information for the analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a source in the context.", + "contextual_reason_agent": "The context confirms it is a dataset as it is explicitly mentioned as a source of information for the analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "100_Lebanon-Health-PAD-PAD2358-06152017", + "page": 16, + "text": "Essential Health Care Package The project provides beneficiaries with a package of essential health care services comprising the following: ( i ) three age - and gender-specific wellness packages ( age 0-18, females 19 years and above, males 19 years and above ); ( ii ) two care packages for the most common non-communicable diseases in Lebanon, diabetes and hypertension; and ( iii ) an antenatal package. Providers Services are provided to beneficiaries through 75 of the 204 MoPH network centers. Network facilities are managed by NGOs ( 67 percent ), local municipalities ( 20 percent ), MoPH ( 11 percent ), and MoSA ( 2 percent ). Provider participation is voluntary and is governed by the legal agreement between the MoPH and the managing entity. Quality of Care Quality of care is monitored through the PHCC accreditation program implemented by the MoPH in collaboration with Accreditation Canada International. Currently, all 75 PHCCs are within the accreditation program. The quality of clinical care is also monitored by the MoPH through clinical indicators captured in the Health Information System. Contracting and Provider Payment Mechanism The MoPH purchases the package of services for the beneficiary population from PHCCs. Provider payment is based on capitation and is output-based.", + "ner_text": [ + [ + 1086, + 1111, + "named" + ] + ], + "validated": false, + "empirical_context": "Currently, all 75 PHCCs are within the accreditation program. The quality of clinical care is also monitored by the MoPH through clinical indicators captured in the Health Information System. Contracting and Provider Payment Mechanism The MoPH purchases the package of services for the beneficiary population from PHCCs.", + "type": "system", + "explanation": "However, it is described as a system for monitoring clinical care quality, not explicitly as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Information System' which often relates to data management.", + "contextual_reason_agent": "However, it is described as a system for monitoring clinical care quality, not explicitly as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 53, + "text": "The project preparation team worked with the Poverty team and the National Statistical Institute ( ISTEEBU ) to build small-area extreme poverty maps ( based on the ECVMB 2013-14 in combination with the 2010 Census data ), which provided the basis for a more systematic geographical targeting at the commune-level and could inform the planned expansion of the safety net program and other programs using the registry. In each province, the four communes with the highest poverty ( from 21 The CNPS includes 11 ministers under the authority of the President 22 The country is divided in 18 provinces, 129 communes and 2, 638 collines. 23 The Amashiga program started in 2016 and aims to foster community based-nutrition, using the lessons of the Tubaramure pilot in Cankuso and Ruyigi. The program would distribute food to all households with pregnant women or children under two, and foster behavior change in terms of food consumption, preparation, production, water and sanitation and hygiene practices, and access to health.", + "ner_text": [ + [ + 165, + 178, + "named" + ] + ], + "validated": true, + "empirical_context": "The project preparation team worked with the Poverty team and the National Statistical Institute ( ISTEEBU ) to build small-area extreme poverty maps ( based on the ECVMB 2013-14 in combination with the 2010 Census data ), which provided the basis for a more systematic geographical targeting at the commune-level and could inform the planned expansion of the safety net program and other programs using the registry. In each province, the four communes with the highest poverty ( from 21 The CNPS includes 11 ministers under the authority of the President 22 The country is divided in 18 provinces, 129 communes and 2, 638 collines.", + "type": "survey", + "explanation": "It is indeed a dataset as it is used to inform geographical targeting and is combined with census data for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced in the context of building poverty maps and is associated with specific years.", + "contextual_reason_agent": "It is indeed a dataset as it is used to inform geographical targeting and is combined with census data for empirical analysis.", + "contextual_signal": "mentioned as a basis for systematic geographical targeting", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "036_Tajikistan-Resilient-Landscape-Restoration-Project", + "page": 43, + "text": "The methodology monitors three aspects of degradation: ( a ) net primary productivity using normalized difference vegetation index ( NDVI ) with information from satellite imagery ( for example, Moderate Resolution Imaging Spectroradiometer ); ( b ) land cover using either a representative area approach with high-resolution imagery, Landsat and Sentinel imagery, or globally produced datasets; and ( c ) soil organic carbon using field sampling. The UNCCD Secretariat will support the CEP to establish baselines for the above three aspects and repeat the measurements at project midterm and completion. 90. The M & E system of the project will comprise both performance and impact monitoring. The project M & E system will include both annual outcome and result targets as well as periodic evaluations of impact on land degradation, using the three LDN indicators, and households \u2019 socioeconomic factors that will be compared with baseline evaluations to be carried out by the CEP IT in Years 1 and 2. The project will make particular efforts to integrate participatory monitoring methods, using ongoing advances in digital tools and data collection, thus giving communities the potential for timely decision - making, wider sharing of results, and greater ownership of investments. The project will design and maintain a website for wider dissemination of the results and progress.", + "ner_text": [ + [ + 347, + 363, + "named" + ] + ], + "validated": false, + "empirical_context": "The methodology monitors three aspects of degradation: ( a ) net primary productivity using normalized difference vegetation index ( NDVI ) with information from satellite imagery ( for example, Moderate Resolution Imaging Spectroradiometer ); ( b ) land cover using either a representative area approach with high-resolution imagery, Landsat and Sentinel imagery, or globally produced datasets; and ( c ) soil organic carbon using field sampling. The UNCCD Secretariat will support the CEP to establish baselines for the above three aspects and repeat the measurements at project midterm and completion.", + "type": "imagery", + "explanation": "'Sentinel imagery' is not a dataset itself but rather a type of data source or information used in the methodology.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Sentinel imagery' is a dataset because it refers to satellite imagery data used for analysis.", + "contextual_reason_agent": "'Sentinel imagery' is not a dataset itself but rather a type of data source or information used in the methodology.", + "contextual_signal": "mentioned only as a source of information, not as a data source", + "tags": [] + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 139, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 127 and complexity of activity which will be indicated in the Procurement Plan in STEP. These prior-review contracts will be updated in the Procurement Plan annually or as necessary during implementation, based on the procurement capacity assessment during implementation support missions. 64. The World Bank will carry out regular procurement supervision missions on an annual basis and carry out procurement post-review on an annual basis. Contracts not subject to prior review will be subject to post - review by the World Bank as per procedures set forth in Annex II122 \u2013 \u201c Procurement Oversight \u201d of the Procurement Regulations. The sample contracts for the procurement post-reviews will be risk-based. The World Bank will undertake the post-reviews online accessing procurement documents and data from STEP. The Recipient will upload all procurement process documentation and information in STEP at each roadmap stage. At the completion of the contracts, the Recipient will upload completion documents including inspection, acceptance, delivery, and final payment evidenced in STEP. 65.", + "ner_text": [ + [ + 957, + 961, + "named" + ], + [ + 4, + 14, + "STEP <> publisher" + ], + [ + 15, + 23, + "STEP <> data geography" + ], + [ + 358, + 368, + "STEP <> publisher" + ], + [ + 580, + 590, + "STEP <> publisher" + ], + [ + 772, + 782, + "STEP <> publisher" + ] + ], + "validated": true, + "empirical_context": "The World Bank will undertake the post-reviews online accessing procurement documents and data from STEP. The Recipient will upload all procurement process documentation and information in STEP at each roadmap stage. At the completion of the contracts, the Recipient will upload completion documents including inspection, acceptance, delivery, and final payment evidenced in STEP.", + "type": "system", + "explanation": "In the context, STEP is described as a system that stores procurement documents and data, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is mentioned in the context of accessing procurement documents and data.", + "contextual_reason_agent": "In the context, STEP is described as a system that stores procurement documents and data, indicating it functions as a data source.", + "contextual_signal": "system but mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 15, + "validated": 1, + "not_validated": 14 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 63, + "text": "54 Annex 7. Economic Analysis 1. The net effect of the Program at the individual \u2019 s level is calculated as the additional benefit that a representative child obtains as a result of the Program. This effect is estimated from a present discounted value ( PDV ) calculation. This approach estimates the stream of benefits and costs of schooling over a lifetime in the labor market with and without the Program. 2. Data for this analysis are obtained mainly from the 2010 Income and Expenditure Survey, a household survey of the labor force, and from the 2015 Population and Housing Census conducted by the Department of Statistics. This representative information accounts for the entire population of Jordan of all ages. It is worth noting that the estimates below are considered under estimates as they do not account for the social benefits of more and better education. Estimation of expected economic benefits 3. The private benefits ( returns to schooling ) are measured following the standard literature on cost \u2010 benefit analysis for investments in education and by calculating the earnings over the course of the working life.", + "ner_text": [ + [ + 464, + 498, + "named" + ] + ], + "validated": true, + "empirical_context": "2. Data for this analysis are obtained mainly from the 2010 Income and Expenditure Survey, a household survey of the labor force, and from the 2015 Population and Housing Census conducted by the Department of Statistics. This representative information accounts for the entire population of Jordan of all ages.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a household survey providing data for the analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides data for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a household survey providing data for the analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 38, + "text": "The World Bank Pakistan: Balochistan Human Capital Investment Project ( P166308 ) Page 34 of 47 teachers trained using scripted lessons; ( c ) boundary wall and functional toilets; ( d ) desk and chair for each student; ( e ) trained PTSMCs meeting regularly and endorsing monthly teacher attendance ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Targeted HFs having majority of essential medicines for RMNCHN services Percentage of targeted HFs having > 75 percent of essential RMNCHN medicines. Bi \u2010 annual Health institutional database Self \u2010 reported data Health Department, HMIS, PPHI Absenteeism among key staff to provide RMNCHN services Percentage of key staff present at HFs during duty time to provide RMNCHN services. Baseline is a preliminary estimate. Bi \u2010 annual Real time monitoring system Administrative data Health Department, PMU Targeted health care providers with minimum knowledge and competencies in RMNCHN services Percentage of targeted staff with minimum knowledge and competencies in RMNCHN services. Bi \u2010 annual PMU Training report Health Department, PMU Targeted HMIS digitally integrated into DHIS ( 2 ) Cumulative number of RMNCHN relevant health information systems digitally integrated into DHIS ( 2 ).", + "ner_text": [ + [ + 741, + 745, + "named" + ], + [ + 4, + 14, + "HMIS <> publisher" + ], + [ + 671, + 700, + "HMIS <> data type" + ] + ], + "validated": true, + "empirical_context": "The World Bank Pakistan: Balochistan Human Capital Investment Project ( P166308 ) Page 34 of 47 teachers trained using scripted lessons; ( c ) boundary wall and functional toilets; ( d ) desk and chair for each student; ( e ) trained PTSMCs meeting regularly and endorsing monthly teacher attendance ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Targeted HFs having majority of essential medicines for RMNCHN services Percentage of targeted HFs having > 75 percent of essential RMNCHN medicines. Bi \u2010 annual Health institutional database Self \u2010 reported data Health Department, HMIS, PPHI Absenteeism among key staff to provide RMNCHN services Percentage of key staff present at HFs during duty time to provide RMNCHN services. Baseline is a preliminary estimate.", + "type": "database", + "explanation": "In this context, 'HMIS' is indeed a dataset as it refers to a health institutional database used for monitoring health services.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'HMIS' is a dataset because it is mentioned in the context of data collection and monitoring.", + "contextual_reason_agent": "In this context, 'HMIS' is indeed a dataset as it refers to a health institutional database used for monitoring health services.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 21, + "validated": 4, + "not_validated": 17 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 20, + "text": "Similarly, the public investment preparation process does not have a systematic evidence-based assessment of return on investment to inform selection and prioritization of public investment projects, despite the availability of good national statistics overall. Public spending planning and programming would have a higher impact with better use of available statistics to inform resource allocation according to the population, poverty level, and other socioeconomic indicators. 25. The statistics system is adequate overall but is obviously under strain. The National Institute of Statistics ( Institut National de la Statistique, INS ) produces and publishes economic statistics ( national accounts, prices indices, external trade, enterprises ) and social statistics ( poverty and living condition, demographic, health ) of satisfactory quality. Cameroon has a solid experience in rolling out large data collection operations such as population censuses, living standard household surveys, and demographic surveys. The quality of training of Cameroonian statisticians is good. The sub regional training institute ( Institut Sous R\u00e9gional de Statistique et d \u2019 Economie Appliqu\u00e9e ) recruits through a rigorous open competitive exam jointly with the statistics training institutes of Abidjan and Dakar.", + "ner_text": [ + [ + 938, + 957, + "named" + ], + [ + 561, + 593, + "population censuses <> publisher" + ], + [ + 850, + 858, + "population censuses <> data geography" + ] + ], + "validated": true, + "empirical_context": "The National Institute of Statistics ( Institut National de la Statistique, INS ) produces and publishes economic statistics ( national accounts, prices indices, external trade, enterprises ) and social statistics ( poverty and living condition, demographic, health ) of satisfactory quality. Cameroon has a solid experience in rolling out large data collection operations such as population censuses, living standard household surveys, and demographic surveys. The quality of training of Cameroonian statisticians is good.", + "type": "census", + "explanation": "In this context, 'population censuses' is confirmed as a dataset since it is mentioned as part of large data collection operations.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'population censuses' are typically structured collections of data used for statistical analysis.", + "contextual_reason_agent": "In this context, 'population censuses' is confirmed as a dataset since it is mentioned as part of large data collection operations.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "159_38147core", + "page": 34, + "text": "The PPU would publicize the criteria for the selection o f refugee camps in each phase, the identification of beneficiaries, details on the cash grant scheme and disbursement procedure. The communication campaign, to be monitored by UNHCR, would ensure transparency, accountability and allow IDPs to avail o f the grievance redressal mechanism. Housing Assessment Survey Since 25 % o f the housing caseload falls under the partly-completed category, it was necessary to verify the precise physical improvement required. This was done through a Housing Assessment Survey conducted by the PPU. While a permanent housing unit was defined in keeping with the core housing concept used by the Government for the conflict and tsunami housing programs, any unit not falling within the minimum physical specification o f a permanent house and definition o f a temporary thatched house was considered a partly-completed unit. The Housing Assessment Survey determined the extent of physical improvement required for the partly-completed houses be it a permanent roof, additional room ( s ), toilet, permanent flooring / plastering, etc. Houses with poor foundation that require reconstruction were categorized as a temporary unit. The Housing Assessment Survey provided a template for project implementation with basic information such as name and identification number o f the beneficiary, bank account, type o f land ownership, type o f house and type o f physical construction required in the case o f partly-completed houses. It would be the baseline document in each beneficiary file. Project Implementation Physical Construction Requirement. A permanent housing unit i s defined as one with:.. One safe ( closed ) room,. Kitchen ( internal or external ),. Veranda,. Permanent roof,. Permanent floor / internal plastering, and. Independent toilet. A minimum plinth area o f 500 square ft, 29", + "ner_text": [ + [ + 544, + 569, + "named" + ], + [ + 4, + 7, + "Housing Assessment Survey <> author" + ], + [ + 292, + 296, + "Housing Assessment Survey <> reference population" + ], + [ + 587, + 590, + "Housing Assessment Survey <> author" + ], + [ + 1329, + 1379, + "Housing Assessment Survey <> data description" + ], + [ + 1381, + 1393, + "Housing Assessment Survey <> data description" + ], + [ + 1395, + 1418, + "Housing Assessment Survey <> data description" + ], + [ + 1420, + 1434, + "Housing Assessment Survey <> data description" + ] + ], + "validated": true, + "empirical_context": "Housing Assessment Survey Since 25 % o f the housing caseload falls under the partly-completed category, it was necessary to verify the precise physical improvement required. This was done through a Housing Assessment Survey conducted by the PPU. While a permanent housing unit was defined in keeping with the core housing concept used by the Government for the conflict and tsunami housing programs, any unit not falling within the minimum physical specification o f a permanent house and definition o f a temporary thatched house was considered a partly-completed unit.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a Housing Assessment Survey that collects data for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that collects data on housing conditions.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a Housing Assessment Survey that collects data for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "010_BOSIB1554c314c0a2187c019d7e85bc2a91", + "page": 35, + "text": "The World Bank Building Resilient and Responsive Health Systems ( P179698 ) Feb 21, 2024 Page 30 of 43 Monitoring & Evaluation Plan: PDO Indicators by PDO Outcomes Outcome 1: Improve utilization of quality primary health care services Percentage of women receiving postnatal care within 48 hours ( Percentage ) Description Numerator: Number of women receiving postnatal care after delivery within 48 hours. Denominator: Total number of expected live births during the reporting period Frequency Every six months Data source KHIS Methodology for Data Collection Routine Health Management Information System ( HMIS ) data collection Responsibility for Data Collection MoH Percentage of women receiving postnatal care within 48 hours in the 10 selected counties ( Percentage ) Description Numerator: Number of women, in the 10 selected counties, receiving postnatal care after delivery within 48 hours. Denominator: Total number of expected live births, in the 10 selected counties, during the reporting period Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Percentage of host community women receiving postnatal care within 48 hours in Garissa and Turkana ( Percentage ) Description Numerator: Number of host community women in Garissa and Turkana, receiving postnatal care after delivery within 48 hours.", + "ner_text": [ + [ + 1092, + 1096, + "named" + ] + ], + "validated": false, + "empirical_context": "Denominator: Total number of expected live births during the reporting period Frequency Every six months Data source KHIS Methodology for Data Collection Routine Health Management Information System ( HMIS ) data collection Responsibility for Data Collection MoH Percentage of women receiving postnatal care within 48 hours in the 10 selected counties ( Percentage ) Description Numerator: Number of women, in the 10 selected counties, receiving postnatal care after delivery within 48 hours. Denominator: Total number of expected live births, in the 10 selected counties, during the reporting period Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Percentage of host community women receiving postnatal care within 48 hours in Garissa and Turkana ( Percentage ) Description Numerator: Number of host community women in Garissa and Turkana, receiving postnatal care after delivery within 48 hours.", + "type": "system", + "explanation": "'HMIS' is mentioned as a methodology for data collection, not as a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'HMIS' is a dataset because it is associated with data collection in health management.", + "contextual_reason_agent": "'HMIS' is mentioned as a methodology for data collection, not as a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 21, + "validated": 4, + "not_validated": 17 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 20, + "text": "The fourth Result Area is strengthened education system management by focusing on supporting MOE and strengthening its capacity to manage an increasing number of schools and students, notably due to the expansion of early childhood education and to the enrollment of a large number of refugee children in Jordanian schools. The focus of this Result Area is to provide and enhance the tools and resources available to the MOE for decision making and implementation. These tools include information systems such as the operationalization of the GIS, which will allow the MOE to map school construction, expansion, and rehabilitation needs, and the strengthening of the existing OpenEMIS to allow MOE to analyze and make use of disaggregated and gender \u2010 sensitive data for decision making. This Result Area will also support the MOE in securing budget additionality to the sector in an efficient and effective manner to ensure that resources are available for undertaking the necessary reforms. 19 In an effort to shed light on gender dynamics in the education sector in Jordan, the impact evaluation will assess heterogeneous effects by student gender. 20 By \u201c private KG \u201d is meant: all non \u2010 public provision including for profit private KGs, community \u2010 based KGs, and NGO KGs.", + "ner_text": [ + [ + 676, + 684, + "named" + ] + ], + "validated": false, + "empirical_context": "The focus of this Result Area is to provide and enhance the tools and resources available to the MOE for decision making and implementation. These tools include information systems such as the operationalization of the GIS, which will allow the MOE to map school construction, expansion, and rehabilitation needs, and the strengthening of the existing OpenEMIS to allow MOE to analyze and make use of disaggregated and gender \u2010 sensitive data for decision making. This Result Area will also support the MOE in securing budget additionality to the sector in an efficient and effective manner to ensure that resources are available for undertaking the necessary reforms.", + "type": "system", + "explanation": "However, OpenEMIS is described as an information system rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed OpenEMIS is a dataset because it is mentioned in the context of analyzing data.", + "contextual_reason_agent": "However, OpenEMIS is described as an information system rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 9, + "validated": 2, + "not_validated": 7 + } + }, + { + "filename": "036_Tajikistan-Resilient-Landscape-Restoration-Project", + "page": 43, + "text": "The World Bank RESILAND CA + Program: Tajikistan Resilient Landscape Restoration Project ( P171524 ) Page 39 of 104 objectives ( annex 1 ), which will be used to track progress in implementation activities. Smartphone-based field data capture tools will be promoted as part of the monitoring efforts. 88. The CEP IT will carry out in-depth midterm and completion assessments. Before the midterm review in Year 3 and at the end of the project, analysis of changes in forest cover and land use patterns will be carried out based on GIS mapping, to monitor the land area where sustainable land management practices have been adopted. The project will also contribute to regional monitoring under the RESILAND CA +. Well-being surveys will be carried out before the midterm review in Year 3 and at the project end. These surveys will also determine changes in monetary or non-monetary benefits from landscape restoration forestry, pasture, and agricultural lands, disaggregated by gender. A final impact evaluation of the landscape restoration activities will be carried out in the final year of implementation. 89. Project impact on land degradation in the targeted landscapes will be monitored and evaluated using the UNCCD LDN Impact Monitoring Methodology.", + "ner_text": [ + [ + 712, + 730, + "named" + ], + [ + 4, + 14, + "Well-being surveys <> publisher" + ], + [ + 38, + 48, + "Well-being surveys <> data geography" + ], + [ + 309, + 315, + "Well-being surveys <> author" + ], + [ + 845, + 889, + "Well-being surveys <> data description" + ], + [ + 960, + 983, + "Well-being surveys <> data description" + ] + ], + "validated": true, + "empirical_context": "The project will also contribute to regional monitoring under the RESILAND CA +. Well-being surveys will be carried out before the midterm review in Year 3 and at the project end. These surveys will also determine changes in monetary or non-monetary benefits from landscape restoration forestry, pasture, and agricultural lands, disaggregated by gender.", + "type": "survey", + "explanation": "These surveys are explicitly mentioned as being carried out to gather data on changes in benefits, confirming their role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'well-being surveys' imply a structured collection of data collected from participants.", + "contextual_reason_agent": "These surveys are explicitly mentioned as being carried out to gather data on changes in benefits, confirming their role as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "117_Somali-Urban-Investment-Planning-Project", + "page": 11, + "text": "For the first time since 1991, there is a federal ( rather than transitional ) government and a widely supported provisional constitution. The September 2013 Brussels Conference galvanized further domestic and international support for the government, and endorsed a Compact whose Peace-building and State-building Goals provide an important roadmap for the coming three year period. While positive momentum has been generated, sustaining domestic confidence through the translation of international support into improved security, governance, and economic benefits on the ground is an increasingly pressing challenge. 4. Somalia \u2019 s macro-economic framework reflects the country \u2019 s underlying fragility. Reliable macro-economic data for Somalia is not available \u2013 however regional fiscal and economic data does exist and broader estimates can be aggregated. Public expenditure is estimated to account for 7. 7 percent of GDP3 compared with private sector consumption of 73 percent of GDP. Agriculture and services are the key contributors to GDP. Based on regional fiscal data, Somaliland controls the largest budgetary resource envelope, generating US $ 127 million in revenue during 2012, compared to US $ 35 million at the federal level and US $ 38 million in Puntland. 5. In contrast to the war-torn south, authorities in the northern regions have put in place functioning institutions that have succeeded in sustaining stability although considerable development challenges remain. Following their declaration of independence and semi - autonomy respectively, Somaliland and Puntland have developed hybrid forms of governance combining modern institutions with religious authorities, civil society, the private sector and 1 Interim Strategy Note FY14-16, World Bank, December 2013, UNFPA Population Estimates 2014 2 A Rapid Assessment of Three Somali Urban Areas, World Bank, November 2013, UNFPA Population Estimates 2014 3 In the UNDP Human Development Report, Somalia \u2019 s GDP is estimated to be US $ 2. 6 billion and per capita GDP is estimated to be US $ 288 based on the World Development Indicators and Economist Intelligence Unit.", + "ner_text": [ + [ + 1058, + 1078, + "named" + ], + [ + 622, + 629, + "regional fiscal data <> data geography" + ], + [ + 774, + 807, + "regional fiscal data <> data type" + ], + [ + 1080, + 1090, + "regional fiscal data <> data geography" + ], + [ + 1187, + 1191, + "regional fiscal data <> publication year" + ], + [ + 1265, + 1273, + "regional fiscal data <> data geography" + ], + [ + 1567, + 1577, + "regional fiscal data <> data geography" + ], + [ + 1582, + 1590, + "regional fiscal data <> data geography" + ], + [ + 1762, + 1772, + "regional fiscal data <> publisher" + ], + [ + 1871, + 1881, + "regional fiscal data <> publisher" + ], + [ + 2160, + 2178, + "regional fiscal data <> usage context" + ] + ], + "validated": true, + "empirical_context": "Agriculture and services are the key contributors to GDP. Based on regional fiscal data, Somaliland controls the largest budgetary resource envelope, generating US $ 127 million in revenue during 2012, compared to US $ 35 million at the federal level and US $ 38 million in Puntland. 5.", + "type": "data", + "explanation": "In this context, 'regional fiscal data' is indeed used as a source of information for analyzing budgetary resources.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'regional fiscal data' is a dataset because it refers to specific financial information relevant to the context.", + "contextual_reason_agent": "In this context, 'regional fiscal data' is indeed used as a source of information for analyzing budgetary resources.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 46, + "text": "In addition, a survey will be conducted in sampled SPG woredas as part of the impact evaluation to measure composite indicators not collected through routine information systems. 71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2. The MOH will share HCO-related data with the regions and woredas to provide any disaggregated data needed to monitor HCO results indicators and DLIs. Likewise, the central MOE is responsible for the management of EMIS and NLA which collects varying aspects of data at the school level. MOE will validate and share disaggregated data with regions and woredas to monitor HCO results indicators and DLIs. Other M & E data sources will be coordinated with various implementation units and service providers. The SPG impact evaluation survey will be 71 Such as Minimal Acceptable Diet for children 6 \u2013 23 months. Minimal Acceptable Diet is calculated according to breasfeeding status, number of food groups consumed, and number of times fed. https: / / inddex. nutrition. tufts. edu / data4diets / indicator / minimum-acceptable-diet-mad.", + "ner_text": [ + [ + 906, + 909, + "named" + ], + [ + 764, + 782, + "NLA <> data type" + ] + ], + "validated": true, + "empirical_context": "The MOH will share HCO-related data with the regions and woredas to provide any disaggregated data needed to monitor HCO results indicators and DLIs. Likewise, the central MOE is responsible for the management of EMIS and NLA which collects varying aspects of data at the school level. MOE will validate and share disaggregated data with regions and woredas to monitor HCO results indicators and DLIs.", + "type": "system", + "explanation": "NLA is indeed a dataset as it is described as collecting varying aspects of data, indicating its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NLA is a dataset because it is mentioned in the context of collecting data at the school level.", + "contextual_reason_agent": "NLA is indeed a dataset as it is described as collecting varying aspects of data, indicating its role as a data source.", + "contextual_signal": "described as a system that collects varying aspects of data", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 12, + "text": "These cross \u2010 cutting challenges along with sector specific supply \u2010 side and demand \u2010 side constraints have resulted in poor health indicators and large disparities by socioeconomic status. Balochistan performs worse than the national average across reproductive, maternal, newborn, child health, and nutrition ( RMNCHN ) indicators. 10 Under \u2010 five mortality rate is 78 per 1, 000 live births in Balochistan, compared to 74 per 1, 000 live births at the national level. The total fertility rate ( TFR ) is 4. 0 in Balochistan and 3. 6 nationally, and almost half of the children under five are stunted in the province, compared to about one in three at the national level. Differences in service utilization between the 8 Javed, S. A., M. D. Anjum, W. Imran, et al. 2013. \u201c Correlates of Preferences for Home or Hospital Confinement in Pakistan: Evidence from a National Survey. \u201d BMC \u2010 Pregnancy and Childbirth 13: 137. ul Husnain, M. I., M. Rashid, and U. Shakoor. 2018. \u201c Decision \u2010 making for Birth Location among Women in Pakistan: Evidence from National Survey. \u201d BMC Pregnancy and Childbirth 18: 226. https: / / doi. org / 10. 1186 / s12884 \u2010 018 \u2010 1844 \u2010 8. 9 Alif Ailaan 2018. 2013 \u2010 2018 Five Years of Education Reforms in Balochistan. Wins, Losses and Challenges for 2018 \u2010 2023. Islamabad: Alif Ailaan. vi \u2010 33 pp. 10 NIPS and ICF ( 2019 ).", + "ner_text": [ + [ + 1053, + 1068, + "named" + ], + [ + 191, + 202, + "National Survey <> data geography" + ], + [ + 398, + 409, + "National Survey <> data geography" + ], + [ + 768, + 772, + "National Survey <> reference year" + ], + [ + 838, + 846, + "National Survey <> data geography" + ], + [ + 957, + 967, + "National Survey <> author" + ], + [ + 969, + 973, + "National Survey <> publication year" + ], + [ + 1195, + 1199, + "National Survey <> publication year" + ], + [ + 1235, + 1246, + "National Survey <> data geography" + ], + [ + 1347, + 1351, + "National Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "2018. \u201c Decision \u2010 making for Birth Location among Women in Pakistan: Evidence from National Survey. \u201d BMC Pregnancy and Childbirth 18: 226.", + "type": "survey", + "explanation": "In this context, it is confirmed as a dataset since it is referenced in relation to evidence used for decision-making.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'National Survey' suggests a structured collection of data collected for research purposes.", + "contextual_reason_agent": "In this context, it is confirmed as a dataset since it is referenced in relation to evidence used for decision-making.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "013_BOSIB0efb09b920d90858a0135df22da7d1", + "page": 31, + "text": "Recent ID4D research on women \u2019 s ID ownership in Ethiopia found that women do not see the Kebele ID as salient to their daily lives and therefore do not pursue applying for one even if it is accessible. 53 By streamlining and, in some cases, automating the updating of information, the project will be making it easier for women to assert their associated rights and entitlements. The new Fayda system can also embed use cases and value propositions that are tailored to women, to increase demand for identification. Furthermore, a digital identification system will create more opportunities for Ethiopia \u2019 s many women - owned small and medium enterprises to do business online, and more generally access employment, and enroll in male - dominated education sectors such as science, technology, engineering, and mathematics education. 68. The authentication component of a digital ID system can also increase the security of funds transfers for both in - person and remote environments, particularly as Ethiopian legislation enables and helps increase payment interoperability between financial service providers.", + "ner_text": [ + [ + 533, + 562, + "named" + ] + ], + "validated": false, + "empirical_context": "The new Fayda system can also embed use cases and value propositions that are tailored to women, to increase demand for identification. Furthermore, a digital identification system will create more opportunities for Ethiopia \u2019 s many women - owned small and medium enterprises to do business online, and more generally access employment, and enroll in male - dominated education sectors such as science, technology, engineering, and mathematics education. 68.", + "type": "system", + "explanation": "However, the term 'digital identification system' is mentioned as a system and not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'system' which can imply data handling.", + "contextual_reason_agent": "However, the term 'digital identification system' is mentioned as a system and not as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 92, + "text": "The World Bank Niger Accelerating Electricity Access Project ( Hask\u00e9 ) ( P174034 ) Page 85 of 174 ANPER and ANERSOL project databases. acquire an off-grid system. This is obtained from NIGELEC ' s customers database, ANPER and ANERSOL ( for mini-grid and standalone systems ). The number of people would be estimated by multiplying the number of households by the average household size at the national or regional level. People provided with new or improved electricity service - Female Semi - annually. National statistics / Proj ect data. Estimated based on the share of female in the national population as published by the national statistics institute or other credible sources. NIGELEC / ANPER / ANERS OL. Refugee and host beneficiaries provided with electricity services through grid, mini grids and electrified public institutions This relates to the number of refugees and host populations living in localities and settings where the project provides electricity through grid, mini grids and off-grid electrification ( under components 1, 2 and 3 ). Semi - annually. National statistics / Proj ect data / UNHCR data / World Bank data. Estimated based on collected data about refugees and host communities zones and project-level data. NIGELEC / ANPER / ANERS OL.", + "ner_text": [ + [ + 227, + 234, + "named" + ] + ], + "validated": false, + "empirical_context": "acquire an off-grid system. This is obtained from NIGELEC ' s customers database, ANPER and ANERSOL ( for mini-grid and standalone systems ). The number of people would be estimated by multiplying the number of households by the average household size at the national or regional level.", + "type": "organization", + "explanation": "ANERSOL is mentioned as an entity but not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is mentioned alongside other entities related to data collection.", + "contextual_reason_agent": "ANERSOL is mentioned as an entity but not as a data source or structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 4, + "validated": 1, + "not_validated": 3 + } + }, + { + "filename": "162_PAD12050PAD0P15264600PUBLIC00Box391428B", + "page": 18, + "text": "To achieve its objectives, this component will finance technical assistance for the following activities: a ) Providing technical support to the Program Management Unit ( PMU ) in the MoPH. b ) Providing technical assistance in the development and management of contracts between MoPH and selected PHCCs and in the verification and validation of the PHCCs \u2019 financial and technical reports as well as the Essential Healthcare Services \u2019 packages \u2019 payment process. c ) Updating and maintaining Health Information System ( HIS ) ( including provision of IT hardware and software ) at MoPH with links to other related agencies involved in the implementation of the Project. d ) Initiating monitoring and assessment of the Project through setting the baseline, collecting the data and setting the parameters for evaluation. e ) Improving the grievance and redress mechanism for improved efficiency and transparency. f ) Launching outreach campaign and communication activities to inform Beneficiaries about their health rights and services provided at the PHCCs in their areas. B. Project Financing 39. The financing instrument for the project is a grant-based Investment Project Financing ( IPF ) in the amount of US $ 15. 00 million financed from the World Bank LSCTF. 14 14 The LSCTF was established in December 2013, in order to support the impact of the Syrian conflict on Lebanon.", + "ner_text": [ + [ + 494, + 519, + "named" + ] + ], + "validated": false, + "empirical_context": "b ) Providing technical assistance in the development and management of contracts between MoPH and selected PHCCs and in the verification and validation of the PHCCs \u2019 financial and technical reports as well as the Essential Healthcare Services \u2019 packages \u2019 payment process. c ) Updating and maintaining Health Information System ( HIS ) ( including provision of IT hardware and software ) at MoPH with links to other related agencies involved in the implementation of the Project. d ) Initiating monitoring and assessment of the Project through setting the baseline, collecting the data and setting the parameters for evaluation.", + "type": "system", + "explanation": "However, it is described as a system for managing health information, not as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Information System' which often relates to data management.", + "contextual_reason_agent": "However, it is described as a system for managing health information, not as a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "135_810840PAD0P144010Box379877B00OUO090", + "page": 50, + "text": "39 Phase Activities Participants Responsibility Prerequisites Outcomes PDSEC reflecting post-crisis priorities Validation of PDSEC Alignment of proposed priorities with sectoral investment plans Assurance from concerned sectoral departments on staff assignment and recurring expenditure Final list of infrastructure for rehabilitation under the project CCOCSAD members representing PLGs, Administration, Sector departments, civil society and other community representatives MOD \u2013 CC, Regional Social Development Officer Commune allocation defined MOD contracted List of infrastructure for rehabilitation with arrangements for utilization, operations and maintenance agreed with community and Commune Administration. Finalization of CBOs and action plans for productive investments Validation of action plans by CCOCSD Identification of CBOs at the commune level, based on village level data. Ensure inclusive membership with inclusion of poor and vulnerable members; clarify Management responsibility; ensure legal Status by registration with appropriate authority Finalize action plan for type and quantity of inputs for each CBO CBOs, Sector staff, Commune Council and CCOCSAD MOD Eligibility criteria for Livelihood Associations finalized and disseminated. Number to be supported per Commune decided List of productive inputs for different livelihoods finalized. List of CBOs to be supported finalized. Validated Action plan for inputs for each CBOs agreed and provided to concerned agency.", + "ner_text": [ + [ + 872, + 890, + "named" + ], + [ + 938, + 965, + "village level data <> reference population" + ] + ], + "validated": true, + "empirical_context": "39 Phase Activities Participants Responsibility Prerequisites Outcomes PDSEC reflecting post-crisis priorities Validation of PDSEC Alignment of proposed priorities with sectoral investment plans Assurance from concerned sectoral departments on staff assignment and recurring expenditure Final list of infrastructure for rehabilitation under the project CCOCSAD members representing PLGs, Administration, Sector departments, civil society and other community representatives MOD \u2013 CC, Regional Social Development Officer Commune allocation defined MOD contracted List of infrastructure for rehabilitation with arrangements for utilization, operations and maintenance agreed with community and Commune Administration. Finalization of CBOs and action plans for productive investments Validation of action plans by CCOCSD Identification of CBOs at the commune level, based on village level data. Ensure inclusive membership with inclusion of poor and vulnerable members; clarify Management responsibility; ensure legal Status by registration with appropriate authority Finalize action plan for type and quantity of inputs for each CBO CBOs, Sector staff, Commune Council and CCOCSAD MOD Eligibility criteria for Livelihood Associations finalized and disseminated.", + "type": "data", + "explanation": "In this context, 'village level data' is used to identify CBOs, indicating it serves as a data source for decision-making.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'village level data' is a dataset because it refers to specific information collected at the village level.", + "contextual_reason_agent": "In this context, 'village level data' is used to identify CBOs, indicating it serves as a data source for decision-making.", + "contextual_signal": "mentioned as a source for identifying CBOs", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "069_Pakistan-Strengthening-Institutions-for-Refugee-Administration-Project", + "page": 13, + "text": "The World Bank Strengthening Insitutions for Refugee Administration Project ( P165542 ) 5 expenses, and repairs and maintenance already covered under government budget for participating ministries and departments. Table 1: Total Project financing ( US $ million ) Source Amount ( US $ m ) Percent of total Government 166 77 IDA 50 23 Total program financing 216 100 A. Project Development Objective PDO Statement 18. To improve organizational and institutional capacity for managing refugees and host communities in Pakistan. PDO Level Indicators Progress towards the Project development objective ( PDO ) is measured by the following indicators: \u2022 PDO indicator 1: CCAR and CARs achieve, at least, 75 percent of assigned key performance targets ( Percent ) \u2022 PDO indicator 2: Registered Afghan refugee users of visa facilitation centers ( VFC ) report satisfaction with service standards ( Percent ). \u2022 PDO indicator 3: Registered Afghan refugees visas applications processed ( Number ). \u2022 PDO Indicator 4: Host community and refugee complaints resolved through the complaints handling mechanisms within 45 days of reporting ( Percent ). \u2022 PDO indicator 5: Data on socio economic characteristics of refugees and host communities published regularly by CCAR. B. Project Components Component 1: Implementing organizational and institutional reforms for management of refugees and host communities ( US $ 40 million-Performance based ) 19.", + "ner_text": [ + [ + 1158, + 1229, + "named" + ], + [ + 4, + 14, + "Data on socio economic characteristics of refugees and host communities <> author" + ], + [ + 516, + 524, + "Data on socio economic characteristics of refugees and host communities <> data geography" + ], + [ + 777, + 808, + "Data on socio economic characteristics of refugees and host communities <> reference population" + ], + [ + 921, + 947, + "Data on socio economic characteristics of refugees and host communities <> reference population" + ], + [ + 1253, + 1257, + "Data on socio economic characteristics of refugees and host communities <> publisher" + ] + ], + "validated": true, + "empirical_context": "\u2022 PDO Indicator 4: Host community and refugee complaints resolved through the complaints handling mechanisms within 45 days of reporting ( Percent ). \u2022 PDO indicator 5: Data on socio economic characteristics of refugees and host communities published regularly by CCAR. B.", + "type": "data", + "explanation": "This is indeed a dataset as it is explicitly mentioned that the data is published regularly, indicating its use as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'data' on specific characteristics.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned that the data is published regularly, indicating its use as a data source.", + "contextual_signal": "follows 'published regularly by CCAR'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "027_Jordan-Emergency-Food-Security-Project", + "page": 42, + "text": "MOITS Data collected regularly and reported by the MOITS Project Coordination Team Cumulative amount of barley procured through the project Cumulative amount of barley imports procured with project financing since the start of the project and delivered to the port of Aqaba Monthly and at the end of the project implementati on period MOITS Data collected regularly and reported by the MOITS Project Coordination Team Policy framework for mitigating commodity risk developed Series of policy reports supported with project financing completed and Twice per year and at the end of Project monitoring system Progress status report of analytical and stakeholder Project Coordination Team", + "ner_text": [ + [ + 335, + 345, + "named" + ], + [ + 51, + 82, + "MOITS Data <> author" + ], + [ + 83, + 207, + "MOITS Data <> data description" + ], + [ + 260, + 273, + "MOITS Data <> data geography" + ], + [ + 386, + 417, + "MOITS Data <> author" + ] + ], + "validated": true, + "empirical_context": "MOITS Data collected regularly and reported by the MOITS Project Coordination Team Cumulative amount of barley procured through the project Cumulative amount of barley imports procured with project financing since the start of the project and delivered to the port of Aqaba Monthly and at the end of the project implementati on period MOITS Data collected regularly and reported by the MOITS Project Coordination Team Policy framework for mitigating commodity risk developed Series of policy reports supported with project financing completed and Twice per year and at the end of Project monitoring system Progress status report of analytical and stakeholder Project Coordination Team", + "type": "data", + "explanation": "In the context, 'MOITS Data' is explicitly mentioned as data collected regularly, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to data collected and reported by a project team.", + "contextual_reason_agent": "In the context, 'MOITS Data' is explicitly mentioned as data collected regularly, indicating it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "074_Djibouti-Integrated-Cash-Transfer-and-Human-Capital-Project", + "page": 33, + "text": "The World Bank Integrated Cash Transfer and Human Capital Project ( P166220 ) Page 29 of 44 RESULT_FRAME_TBL_PDO Indicator Name DLI Baseline End Target Beneficiaries with access to basic services infrastructure financed by the project ( Number ) 0. 00 5, 000. 00 PDO Table SPACE Intermediate Results Indicators by Components RESULT_FRAME_TBL_IO Indicator Name DLI Baseline End Target Conditional cash transfers Cash transfers paid to eligible beneficiaries ( Amount ( USD ) ) 0. 00 7, 000, 000. 00 Community sessions organized as part of accompanying measures ( Number ) 0. 00 2, 000. 00 Community counselors trained to lead community sessions ( Number ) 0. 00 150. 00 Beneficiaries satisfied with community sessions ( Percentage ) 0. 00 75. 00 Strengthening social protection delivery systems PNSF beneficiary households with biometric data in the social registry ( Percentage ) 0. 00 80. 00 PNSF beneficiary households paid within 15 days of scheduled payment date ( Percentage ) 0. 00 85. 00 People in the social registry that received national identity cards with the support of the project ( Number ) 0. 00 1, 000. 00 PNSF complaints registered electronically and resolved by the time of the next cash transfer payment ( Percentage ) 0. 00 70. 00 Refugee households included in the social registry ( Number ) 0. 00 1, 500. 00 Community-based investments", + "ner_text": [ + [ + 849, + 864, + "named" + ], + [ + 1252, + 1270, + "social registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "00 75. 00 Strengthening social protection delivery systems PNSF beneficiary households with biometric data in the social registry ( Percentage ) 0. 00 80.", + "type": "registry", + "explanation": "In this context, 'social registry' is explicitly mentioned as containing biometric data, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'social registry' is a dataset because it refers to a structured collection of beneficiary households' biometric data.", + "contextual_reason_agent": "In this context, 'social registry' is explicitly mentioned as containing biometric data, indicating it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 8, + "validated": 7, + "not_validated": 1 + } + }, + { + "filename": "036_Tajikistan-Resilient-Landscape-Restoration-Project", + "page": 57, + "text": "The World Bank RESILAND CA + Program: Tajikistan Resilient Landscape Restoration Project ( P171524 ) Page 53 of 104 agronomic, vegetative, structural, and management measures that, applied as a combination, increase the connectivity between protected areas, forest land, rangeland, and agriculture land. Management and Biodiversity Conservation: 102, 803 ha iv ) Sub-component 2. 4. Landscape Restoration and Livelihoods: 15, 000 ha Note: Not all area under management plan under IRIs in Component 2 is included under this PDO indicator; only the area where intensive management activities have been carried out is considered. People benefiting from landscape management practices This indicator measures the number of people in the project areas that benefit from the range of SLM practices that the project is able to implement. Benefits include monetary ( employment, income ) and non-monetary ( changes in aspects of well-being, and improved condition of Annual, and at mid-term and completion for non monetary Project reports, project MIS, Survey reports and data Periodic well-being surveys and case studies, aggregation of beneficiary data from component 2 M & E, data collection to be supported through digital methods such as Kobo Tool Box, Government statistics CEP IT", + "ner_text": [ + [ + 1235, + 1248, + "named" + ] + ], + "validated": false, + "empirical_context": "People benefiting from landscape management practices This indicator measures the number of people in the project areas that benefit from the range of SLM practices that the project is able to implement. Benefits include monetary ( employment, income ) and non-monetary ( changes in aspects of well-being, and improved condition of Annual, and at mid-term and completion for non monetary Project reports, project MIS, Survey reports and data Periodic well-being surveys and case studies, aggregation of beneficiary data from component 2 M & E, data collection to be supported through digital methods such as Kobo Tool Box, Government statistics CEP IT", + "type": "tool", + "explanation": "However, it is actually a tool used for data collection, not a dataset itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is mentioned in the context of data collection methods.", + "contextual_reason_agent": "However, it is actually a tool used for data collection, not a dataset itself.", + "contextual_signal": "mentioned as a tool for data collection", + "tags": [] + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 14, + "text": "Unit. 20 In addition to the DHIS, there exist many parallel health management information systems ( HMIS ) ( for example, one for each vertical program, PPHI, and so on ). Services delivered through the private sector are also not reported. 21 Less than 4 percent of the population seeks care at the public primary level HFs and 28 percent seeks care at higher \u2010 level public HFs, while more than 60 percent seeks care at private HFs ( PSLSM Survey 2014 \u2013 15. Pakistan Bureau of Statics, 2016 ). Zaidi, S. A., M. Bigdeli, and E. V. Langlois, et al. 2019. \u201c Health Systems Changes after Decentralization: Progress, Challenges and Dynamics in Pakistan. \u201d BMJ Glob Health 4. 22 In Pakistan, primary schools cover grades 1 through 5 and secondary schools cover grades 6 to 10 with middle schools for grades 6 to 8 and high schools for grades 9 and 10. Higher \u2010 secondary schools cover grades 11 and 12.", + "ner_text": [ + [ + 28, + 32, + "named" + ] + ], + "validated": false, + "empirical_context": "Unit. 20 In addition to the DHIS, there exist many parallel health management information systems ( HMIS ) ( for example, one for each vertical program, PPHI, and so on ). Services delivered through the private sector are also not reported.", + "type": "system", + "explanation": "However, DHIS is described as a health management information system, not as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed DHIS is a dataset because it is mentioned in the context of health management information systems.", + "contextual_reason_agent": "However, DHIS is described as a health management information system, not as a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 11, + "validated": 5, + "not_validated": 6 + } + }, + { + "filename": "007_BOSIB-e8e37b29-1d61-491d-8aad-23a07cf57740", + "page": 88, + "text": "The World Bank Chad Agribusiness and Rural Transformation Project ( P179238 ) Page 84 of 88 Figure A8. 3. Barriers to account ownership, 2017 ( percent respondents without a financial institution account ) Source: Global Findex Database, 2017. 7. Similarly, the banking sector is exposed to vulnerabilities stemming from weaknesses in the credit reporting framework. Chad is a member of the Central Africa \u2019 s currency union ( CEMAC ), with monetary policy and financial sector regulatory and supervisory functions set at the regional level. The regional credit reporting system suffers from several weaknesses which impact the quality and availability of information about borrowers \u2019 behaviors. The regional Central Bank - Bank of Central African States ( Banque des Etats d \u2019 Afrique Centrale, BEAC ) has a credit risk registry ( Centrale des Risques ), but its effectiveness is hampered by the considerable delay in updating the information collected from the banks and the non-inclusion of data from microfinance institutions which account for a large number of loans. The efficiency of the supervisory framework also suffers from limited independence of the supervisory authority - Central Africa Banking Commission ( Commission Bancaire de l \u2019 Afrique Centrale, COBAC ), the need to better align prudential norms with best practices, and inadequate resources allocated to COBAC. 8.", + "ner_text": [ + [ + 214, + 236, + "named" + ], + [ + 15, + 19, + "Global Findex Database <> data geography" + ], + [ + 137, + 141, + "Global Findex Database <> publication year" + ], + [ + 238, + 242, + "Global Findex Database <> publication year" + ], + [ + 367, + 371, + "Global Findex Database <> data geography" + ] + ], + "validated": true, + "empirical_context": "3. Barriers to account ownership, 2017 ( percent respondents without a financial institution account ) Source: Global Findex Database, 2017. 7.", + "type": "database", + "explanation": "The Global Findex Database is explicitly mentioned as a source for the data on barriers to account ownership, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a 'Database' and is cited as a source of data.", + "contextual_reason_agent": "The Global Findex Database is explicitly mentioned as a source for the data on barriers to account ownership, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 52, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 40 and building capacity of oversight entities. 104. Key fiduciary arrangements have been put in place to ensure that resources are used for the intended purpose. The budget and expenditure for the sectors to be supported are clearly identifiable in the government \u2019 s budget and expenditure system of the IBEX at the regional and IFMIS at the federal level. The expenditure of the sectors is subject to internal as well as external audit reviews by the Office of Federal and Regional Auditor Generals ( OF / RAGs ). In addition, Program-specific audit exists which is done on a continual basis and annually. The audit terms of reference are updated for HCO to look into payroll in more detail as 90 percent of the expenditure framework is expected to be for salary of these basic service sectors. Quarterly financial reports are produced from the government system and consolidated at the federal level, and submitted to and reviewed by the World Bank. Relevant staff are placed at all levels to ensure that the system continues to function well. PFM institutionalized training is available which supports the system to deliver as expected. Procurement audits and fraud and corruption reports are also part of the operation \u2019 s arrangements for the pilot SPG woredas.", + "ner_text": [ + [ + 366, + 370, + "named" + ] + ], + "validated": false, + "empirical_context": "Key fiduciary arrangements have been put in place to ensure that resources are used for the intended purpose. The budget and expenditure for the sectors to be supported are clearly identifiable in the government \u2019 s budget and expenditure system of the IBEX at the regional and IFMIS at the federal level. The expenditure of the sectors is subject to internal as well as external audit reviews by the Office of Federal and Regional Auditor Generals ( OF / RAGs ).", + "type": "system", + "explanation": "IBEX is described as a system but not mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed IBEX is a dataset because it is mentioned in the context of budget and expenditure systems.", + "contextual_reason_agent": "IBEX is described as a system but not mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 60, + "text": "The World Bank Expanding Opportunities for Learning Project ( P166059 ) Page 55 of 85 updated COT will incorporate the revised professional development programs and the use of ICT for learning. Percentage of school directors who participate in professional community of practice School directors will create professional communities of practice to exchange good practice, observe, and give feedback to improve school quality. Annually MENFOP Reported by MENFOP General inspection within MENFOP Improved methods for data collection MENFOP will develop a digital application to improve data quality for private and public primary schools and ensure the timely publication of the statistical yearbook. Annually MENFOP Reported by MENFOP ICT unit within MENFOP Modernization of the EMIS MENFOP will perform a diagnosis on the existing data platforms ( EMIS, Human Resource Management ) and will work on recommendations for the setup of a student \u2019 s information systems and an electronic procurement system.", + "ner_text": [ + [ + 778, + 782, + "named" + ] + ], + "validated": false, + "empirical_context": "Annually MENFOP Reported by MENFOP General inspection within MENFOP Improved methods for data collection MENFOP will develop a digital application to improve data quality for private and public primary schools and ensure the timely publication of the statistical yearbook. Annually MENFOP Reported by MENFOP ICT unit within MENFOP Modernization of the EMIS MENFOP will perform a diagnosis on the existing data platforms ( EMIS, Human Resource Management ) and will work on recommendations for the setup of a student \u2019 s information systems and an electronic procurement system.", + "type": "system", + "explanation": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is mentioned in the context of data collection and management.", + "contextual_reason_agent": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 69, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | 60 20. Targeted results may be impacted by the exogenous risks identified in the Program \u2019 s integrated risk assessment. Increasing security concerns triggered by the ongoing conflict may distract government attention and fiscal resources, while climate change is not expected to impact the achievement of results. M & E Capacity 21. The Program \u2019 s M & E framework leverages government monitoring systems, which ensures adequate measuring and reporting of progress toward the targeted results. Indicators to measure process, output and outcome level results are deemed specific, measurable, attainable, relevant, and timely ( SMART ). Across the Result Areas, reporting on progress will be based on information and data generated by the very same IT infrastructure mobilized for the achievement of results, as follows: Sanad for DLIs 1, 2, and 3 on access to e-services, digital identity, and trusted, people-centric data sharing; the EMR system for DLI 4 on the management of electronic medical records; the HRMIS for DLI 5 on the professionalization of the civil service; the Education Management Information System ( EMIS ) for DLI 7 on the digitalization of general secondary education exam; the PMDU internet portal for DLI 8 on e-participation; and the DOS interactive internet portal for DLI 9 on interactive statistical data.", + "ner_text": [ + [ + 1031, + 1041, + "named" + ] + ], + "validated": false, + "empirical_context": "Indicators to measure process, output and outcome level results are deemed specific, measurable, attainable, relevant, and timely ( SMART ). Across the Result Areas, reporting on progress will be based on information and data generated by the very same IT infrastructure mobilized for the achievement of results, as follows: Sanad for DLIs 1, 2, and 3 on access to e-services, digital identity, and trusted, people-centric data sharing; the EMR system for DLI 4 on the management of electronic medical records; the HRMIS for DLI 5 on the professionalization of the civil service; the Education Management Information System ( EMIS ) for DLI 7 on the digitalization of general secondary education exam; the PMDU internet portal for DLI 8 on e-participation; and the DOS interactive internet portal for DLI 9 on interactive statistical data.", + "type": "system", + "explanation": "However, the context indicates that the EMR system is mentioned as a management tool rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'EMR system' includes the term 'system' which can imply data management.", + "contextual_reason_agent": "However, the context indicates that the EMR system is mentioned as a management tool rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 61, + "text": "An example is a record of achievement in education ( proof that the person has a qualification, such as a degree ), a medical record ( details of the person \u2019 s current medication and conditions ), or an entitlement document, such as a national identity card ( identifying the person \u2019 s legal name ). Holding and managing all this data in a single database is impractical and creates security vulnerabilities. Sharing this data securely and reliably under the consent of the individual offers many advantages over silos of data and functionality. People should have the ability to manage their consent with a particular service or dataset, including the ability to review and revoke consent as necessary. Taking a 27 Digital Public Infrastructure ( DPI ) refers to digital ID, payment, and data exchange capabilities that are fundamental to enabling service delivery at scale and supporting innovation in the digital economy. DPI provides reusable and foundational digital platforms that allow public and private sector service providers to build and innovate their products and services.", + "ner_text": [ + [ + 16, + 50, + "named" + ] + ], + "validated": false, + "empirical_context": "An example is a record of achievement in education ( proof that the person has a qualification, such as a degree ), a medical record ( details of the person \u2019 s current medication and conditions ), or an entitlement document, such as a national identity card ( identifying the person \u2019 s legal name ). Holding and managing all this data in a single database is impractical and creates security vulnerabilities.", + "type": "document", + "explanation": "However, it is described as a document that serves as proof of achievement, not as a structured collection of data used for analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a collection of information related to educational qualifications.", + "contextual_reason_agent": "However, it is described as a document that serves as proof of achievement, not as a structured collection of data used for analysis.", + "contextual_signal": "mentioned only as a document, not as a data source", + "tags": [] + }, + { + "filename": "029_IBArchive-ea5ff2ad-6f1f-45bf-a2ad-036440562c18", + "page": 64, + "text": "The World Bank Electricity Access Scale-up Project ( EASP ) ( P166685 ) Page 59 of 130 electricity access under the project, disaggregated by gender. Average household size of 4. 7 people, UBOS National Household Survey 2016-2017. reports. Average household size based on UBOS National Household Survey. People provided with access to electricity under the project with off - grid of which female The indicator will track the number of females benefiting from off-grid electricity access under the project. Share of female is 51 %, UBOS Women and Men in Uganda, Facts and Figures 2016. Quarterly Project implementati on progress reports. UBOS Women and Men in Uganda, Facts and Figures, 2016 Approved loan and grants applications UECCC, MEMD People provided with access to electricity with grid, mini-grid, off-grid in refugee - hosting districts The indicator will track the number of host communities and refugees beneficiaries of access to grid, mini-grid, and off-grid technologies under the project in District hosting refugees. Average household size of 4. 7 people, UBOS National Household Survey 2016 - Quarterly Project implementati on progress reports. Average household size based on UBOS National Household Survey. Service Providers customer database, MEMD database, IVA reports, approved loan applications and installation reports.", + "ner_text": [ + [ + 272, + 302, + "named" + ], + [ + 150, + 187, + "UBOS National Household Survey <> data description" + ], + [ + 220, + 229, + "UBOS National Household Survey <> publication year" + ], + [ + 554, + 560, + "UBOS National Household Survey <> data geography" + ], + [ + 580, + 584, + "UBOS National Household Survey <> publication year" + ], + [ + 660, + 666, + "UBOS National Household Survey <> data geography" + ], + [ + 1360, + 1378, + "UBOS National Household Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "reports. Average household size based on UBOS National Household Survey. People provided with access to electricity under the project with off - grid of which female The indicator will track the number of females benefiting from off-grid electricity access under the project.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source for average household size data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it references a national survey that collects data on household characteristics.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source for average household size data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 28, + "text": "Once the new EMIS system is ready, it will be piloted and finalized based on the pilot deployment. A training and communication strategy will be prepared targeting all stakeholders, including communities and parents. Workshops and hands-on training will be provided to the national and local staff on the new EMIS system. This component would also support the development and implementation of a plan to improve the information management system of MoHEST. 67. Improving the sector \u2019 s ability to collect data would enhance its ability to plan and implement future interventions, thus responding to GCRF Pillar 4: Strengthening Policies, Institutions and Investments for Rebuilding Better. 68. Subcomponent 4. 3: Project management ( US $ 3. 43 million ). This subcomponent will support the establishment of PIUs at the MoGEI and MoHEST to coordinate and manage day-to-day project implementation. The Project will finance key project staff at the national PIU level, and refurbishment, office equipment, and furniture to adequately resource project staff to manage implementation. Project staff may also be hired at the state level to support implementation and monitoring. 69. Further, MoHEST and MoGEI staff will receive training on climate change adaptation and mitigation.", + "ner_text": [ + [ + 13, + 24, + "named" + ] + ], + "validated": false, + "empirical_context": "Once the new EMIS system is ready, it will be piloted and finalized based on the pilot deployment. A training and communication strategy will be prepared targeting all stakeholders, including communities and parents.", + "type": "system", + "explanation": "However, the context indicates it is a system being piloted and not explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'EMIS system' includes 'system' which can imply data management.", + "contextual_reason_agent": "However, the context indicates it is a system being piloted and not explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "124_PAD8220PAD0P12010Box385319B00OUO090", + "page": 37, + "text": "They spend a disproportionate amount of time carrying out time-intensive domestic tasks, a burden which is only compounded in situations where chronic deficiencies exist in water provision. 17 103. Eight percent of women in the GBML service area are reported as the main breadwinners. 18 The project mainstreams gender by: ( i ) quantifying the differentiated impact of the burdens and benefits of improved water supply among male and female residents in the project affected areas and GBML service zones, and ( ii ) identifying areas of engagement by men and women during the operationalization of Component 1 and through the support of Component 2 in terms of citizen feedback and awareness. A qualitative study in the form of twelve semi - structured focus groups was carried out over the period of project preparation to provide deeper understanding of the gendered dimension and inform the design of gender-responsive indicators for measuring how the project is performing in this particular area. 19 Details of the analysis are provided in Annex 10. The Project Implementation Manual ( PIM ) contains gender-sensitive language that monitors and guarantees inclusiveness during such activities including citizen outreach, communications and recruitment to project positions. This requirement will help ensure equal representation of all diverse population groups in the GBML. 17 Literature review includes: World Bank Social Development Department. Making Water Supply and Sanitation Work for Women and Men, December 2010. A policy brief on Gender, Water and Sanitation developed by the Inter-agency Task Force on Gender and Water ( GWTF ) under the UN-Water and the Interagency Network on Women and Gender Equality ( IANWGE ) in support of Water for Life 2005-2015. June 2006. 18 World Bank Water Supply Augmentation Project ( P125184 ) Household Survey, March \u2013 April 2014. 19 Gender-responsive indicators can encapsulate gender-specific or gender-inclusive performance outcomes. The former measures specific needs of men and women whereas the latter focuses on relative benefits and provides comparable information. 26", + "ner_text": [ + [ + 1843, + 1859, + "named" + ], + [ + 228, + 232, + "Household Survey <> data geography" + ], + [ + 1412, + 1422, + "Household Survey <> publisher" + ], + [ + 1786, + 1796, + "Household Survey <> publisher" + ], + [ + 1875, + 1879, + "Household Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "June 2006. 18 World Bank Water Supply Augmentation Project ( P125184 ) Household Survey, March \u2013 April 2014. 19 Gender-responsive indicators can encapsulate gender-specific or gender-inclusive performance outcomes.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to a specific survey conducted to gather data for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Household Survey' suggests a structured collection of data collected from households.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a specific survey conducted to gather data for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 87, + "text": "The World Bank Cameroon Education Reform Support Project ( P160926 ) Page 84 of 148 Table 2. 2. Understaffed Schools by Area Number of Understaffed Schools Rural Areas Urban Areas Schools with no state-paid teachers 336 296 40 Schools with 1 state-paid teacher 2, 646 2, 522 124 Schools with 2 state-paid teachers 2, 409 2, 290 119 Total number of understaffed public schools 5, 391 5, 108 283 Total number of public schools 12, 636 Percentage of understaffed public schools 43 95 5 Source: MINEDUB data ( 2015 / 16 ). 24. Activities initiated under Results Area 1 are expected to contribute substantially to investments benefitting refugee-affected local councils and host communities. While following the procedures specified for targets under this results area, priority will be given to schools in refugee-affected local councils. 25. Results Area 2: Increased capacities of teachers in the effective and efficient use of the new curriculum in pre-primary and primary schools. The objective of this result area is to develop the capacity of teachers and pedagogic supervisors to more effectively and efficiently use the new curriculum in public and private pre-primary and primary schools.", + "ner_text": [ + [ + 491, + 503, + "named" + ], + [ + 15, + 23, + "MINEDUB data <> data geography" + ], + [ + 506, + 515, + "MINEDUB data <> publication year" + ] + ], + "validated": true, + "empirical_context": "2. Understaffed Schools by Area Number of Understaffed Schools Rural Areas Urban Areas Schools with no state-paid teachers 336 296 40 Schools with 1 state-paid teacher 2, 646 2, 522 124 Schools with 2 state-paid teachers 2, 409 2, 290 119 Total number of understaffed public schools 5, 391 5, 108 283 Total number of public schools 12, 636 Percentage of understaffed public schools 43 95 5 Source: MINEDUB data ( 2015 / 16 ). 24.", + "type": "data", + "explanation": "In this context, 'MINEDUB data' is indeed a dataset as it is cited as the source for the statistics on understaffed schools.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'MINEDUB data' is a dataset because it is referenced as a source of information for the statistics provided.", + "contextual_reason_agent": "In this context, 'MINEDUB data' is indeed a dataset as it is cited as the source for the statistics on understaffed schools.", + "contextual_signal": "mentioned as a source for the statistics", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 58, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XLIX Against DLR 9. 2: US $ 1 million for each core micro-data set made available via an online portal to users in adherence with the access protocols for up to a total of US $ 8 million. Against DLR 9. 3: An additional US $ 400, 000 for every core micro-data set made available with indicators for gender, disability and residency status, for up to 5 micro data sets and in the limit of US $ 2 million. Description The DLI supports the operationalization of an interactive statistical interface by DOS to allow users to query key statistical indicators ( such as unemployment, labor force participation, and so on ). This can be done according to user-defined cross tabulations, including gender, age, education-level, nationality and governorate. The DLI also supports the integration of administrative data on the platform from key line ministries to allow for a \u2018 one-stop shop \u2019, with incentives for each additional line Ministry that is integrated.", + "ner_text": [ + [ + 557, + 590, + "named" + ] + ], + "validated": false, + "empirical_context": "3: An additional US $ 400, 000 for every core micro-data set made available with indicators for gender, disability and residency status, for up to 5 micro data sets and in the limit of US $ 2 million. Description The DLI supports the operationalization of an interactive statistical interface by DOS to allow users to query key statistical indicators ( such as unemployment, labor force participation, and so on ). This can be done according to user-defined cross tabulations, including gender, age, education-level, nationality and governorate.", + "type": "system", + "explanation": "However, it is described as a system for querying data rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it mentions statistical indicators and querying capabilities.", + "contextual_reason_agent": "However, it is described as a system for querying data rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "172_multi0page", + "page": 47, + "text": "Procedures in place. 2. 2 Strengthening of US $ 1. 5M Quarterly project Information and reporting MEST ' s planning and implementation progress needs clearly defined. management capacity. report for the first year and the Staff trained in collecting and annual EMIS reports from end recording information for 2002. EMIS. Hardware and software purchased and EMIS set up accordingly. 2. 3 Support to education US $ 1. 6M Inspectors duly trained to service delivery. provide pedagogical support and monitor implementation of the education curriculum. Public information campaign to mobilize communities in setting up SMC. SMC members trained - 42 -", + "ner_text": [ + [ + 315, + 319, + "named" + ] + ], + "validated": false, + "empirical_context": "report for the first year and the Staff trained in collecting and annual EMIS reports from end recording information for 2002. EMIS. Hardware and software purchased and EMIS set up accordingly.", + "type": "system", + "explanation": "However, EMIS is described as a system for managing information rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is related to information collection.", + "contextual_reason_agent": "However, EMIS is described as a system for managing information rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 41, + "text": ", promotions, and digital skills trainings recorded using HRMIS and other digital platforms 0. 00 0. 00 1, 250, 000. 00 1, 250, 000. 00 1, 250, 000. 00 1, 250, 000. 00 DLI allocation 5, 000, 000. 00 As a % of Total Financing Amount 0. 0 % 6: Establishing digital health standards across a national Health Information Exchange ( HIE ) ( Yes / No ) 0 No Yes ( a committee established ) Yes ( five registries and standards ) Yes ( four registries and standards and HIS upgraded ) Yes ( all results achieved ) 0. 00 0. 00 3, 000, 000. 00 10, 000, 000. 00 8, 000, 000. 00 8, 000, 000. 00 DLI allocation 29, 000, 000. 00 As a % of Total Financing Amount 8. 29 % \u27a2 6. 1: Establishment and functioning of a multi-sectoral committee to conduct core health information governance tasks. ( Yes / No ) No No Yes Yes Yes Yes 0. 00 0. 00 3, 000, 000. 00 0. 00 0. 00 0. 00 DLI allocation 3, 000, 000. 00 As a % of Total Financing Amount 0. 75 % \u27a2 6. 2: Establishment of foundational registries and standards ( Number ) 0 0 0 5 4 0 0. 00 0. 00 0. 00 10, 000, 000. 00 8, 000, 000. 00 0. 00 DLI allocation 18, 000, 000. 00 As a % of Total Financing Amount 4. 5 % \u27a2 6. 3: Upgrading of Health information systems to share minimum datasets within the national HIE framework and utilizing common registries. ( Yes / No ) No No No No No Yes", + "ner_text": [ + [ + 58, + 63, + "named" + ] + ], + "validated": false, + "empirical_context": ", promotions, and digital skills trainings recorded using HRMIS and other digital platforms 0. 00 0.", + "type": "system", + "explanation": "HRMIS is mentioned as a system, not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HRMIS is a dataset because it is associated with recorded data.", + "contextual_reason_agent": "HRMIS is mentioned as a system, not as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 118, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 113 of 117 Action Description DLI Respon sible Timing Completion measurement 2. Review and analyse NEMIS capabilities on collecting, disaggregation and reporting on data for learners with special needs and disability and recommendations to close identified gaps. Integrated in DLI # 6 MoE / KI SE / KICD / KNEC / TSC Year 1 Reports generated from NEMIS with granular data on special needs children by category of disability, age, and gender. 3. Complete the ongoing process to institutionalize the process and procedures for administration and management of scholarships by Jomo Kenyatta Foundation ( JKF ) Integrated in DLI # 4 MoE Year 2 Scholarship administration and management processes and procedures established at JKF. 4. Design and develop a GRM MIS module compatible with NEMIS and interoperable between agencies and able to capture refugee data. N / A. MoE By Year 2, 2023 Complaints and grievances lodged through the NEMIS. Number of grievances satisfactorily addressed 5. Assess the outcomes of biogas pilots and analyze existing practices on use of biogas in schools N / A MoE Year 2 Costed action plan developed to scale up biogas projects.", + "ner_text": [ + [ + 828, + 835, + "named" + ] + ], + "validated": false, + "empirical_context": "4. Design and develop a GRM MIS module compatible with NEMIS and interoperable between agencies and able to capture refugee data. N / A.", + "type": "system", + "explanation": "However, the context indicates it is a module and not explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MIS' suggests a management information system that could store data.", + "contextual_reason_agent": "However, the context indicates it is a module and not explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 31, + "text": "The EMIS will build on efforts that are under way including by UNICEF through Niger \u2019 s Data Must Speak initiative which creates data dashboards at the school level; ( b ) support ( i ) the implementation of a national standardized assessment of students; and ( ii ) building the capacity of National Assessment Unit; and ( c ) M & E results of the project literacy intervention ( under Subcomponent 1. 2 ); and ( d ) support teacher deployment and retention measures in refugee-hosting areas and training in contingency planning in the event of new influx of populations. 31 Notably, a census of contractual teachers was conducted, in 2018, with the establishment of a single identification mechanism, which led to the registration of all contractual teachers at the MEP. Recent efforts also include an organizational audit of MES, the elaboration and distribution of HR procedures manuals at the MEP, training of trainers on teacher management, diagnosis of HR functions at the MEP and MES, elaboration of a compendium of all HR legal texts, and the elaboration of a strategy to reform HR management in both ministries. 32 These include the Capacity and Performance of Public Sector for Service Delivery Project and Support to Quality Education Project ( Projet d \u2019 Appui \u00e0 une \u00c9ducation de Qualit\u00e9, PAEQ, P132405 ).", + "ner_text": [ + [ + 587, + 617, + "named" + ], + [ + 78, + 83, + "census of contractual teachers <> data geography" + ], + [ + 636, + 640, + "census of contractual teachers <> publication year" + ] + ], + "validated": true, + "empirical_context": "2 ); and ( d ) support teacher deployment and retention measures in refugee-hosting areas and training in contingency planning in the event of new influx of populations. 31 Notably, a census of contractual teachers was conducted, in 2018, with the establishment of a single identification mechanism, which led to the registration of all contractual teachers at the MEP. Recent efforts also include an organizational audit of MES, the elaboration and distribution of HR procedures manuals at the MEP, training of trainers on teacher management, diagnosis of HR functions at the MEP and MES, elaboration of a compendium of all HR legal texts, and the elaboration of a strategy to reform HR management in both ministries.", + "type": "census", + "explanation": "This is indeed a dataset as it involves the registration and collection of data on contractual teachers.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a census, which typically involves the collection of data about a specific population.", + "contextual_reason_agent": "This is indeed a dataset as it involves the registration and collection of data on contractual teachers.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "094_PAD-final-02262018", + "page": 49, + "text": "Name: Number of BRT and regular bus lines Number 0. 00 20. 00 Biannual The RPTA Data to be obtained from the RPTA. Description: Number of BRT lines ( all service plans ) and regular bus lines ( defined route and stops, tariff and time schedule ) operating in GBA. This indicator will reflect", + "ner_text": [ + [ + 75, + 84, + "named" + ], + [ + 109, + 113, + "RPTA Data <> publisher" + ], + [ + 128, + 147, + "RPTA Data <> data description" + ], + [ + 259, + 262, + "RPTA Data <> data geography" + ] + ], + "validated": true, + "empirical_context": "00 20. 00 Biannual The RPTA Data to be obtained from the RPTA. Description: Number of BRT lines ( all service plans ) and regular bus lines ( defined route and stops, tariff and time schedule ) operating in GBA.", + "type": "data", + "explanation": "This is indeed a dataset as it specifies data to be obtained regarding BRT lines and bus lines, indicating it is used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'RPTA Data' which suggests a structured collection of information.", + "contextual_reason_agent": "This is indeed a dataset as it specifies data to be obtained regarding BRT lines and bus lines, indicating it is used for empirical analysis.", + "contextual_signal": "described as data to be obtained from RPTA", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "086_NEW-ETHIOPIA-PAD-06072018", + "page": 92, + "text": "This range of levels is also the case in host communities, which, in some cases, are located in some of the poorest and most fragile areas of Ethiopia. All these factors are likely to affect which refugees are both willing and able to enter the formal labor market in Ethiopia. It will also affect tensions between refugees and Ethiopians vying for opportunities. Box 4. 1 presents some data on refugees, from the World Bank \u2019 s recently completed skills profiling / survey. Box 4. 1. Refugee Labor Market Integration Based on data completed from the World Bank \u2019 s refugee skills survey, currently 28. 5 percent of refugees hosted in Ethiopia are in the labor force; 78. 7 percent of these refugees are employed, while 22. 3 percent are unemployed. Only 40 percent of refugees are of working age and have the potential to participate in the labor force. High levels of children and female-headed households raise particular constraints to entrance into the labor force in its current form \u2014 especially given the lack of available housing and refugee-focused services in the proximity of industrial parks. Nearly 59 percent of refugees are children under the age of 15, with dependency ratios a ranging from 1. 2 for Eritreans to 2. 1 for South Sudanese.", + "ner_text": [ + [ + 566, + 587, + "named" + ], + [ + 142, + 150, + "refugee skills survey <> data geography" + ], + [ + 268, + 276, + "refugee skills survey <> data geography" + ], + [ + 414, + 424, + "refugee skills survey <> publisher" + ], + [ + 448, + 473, + "refugee skills survey <> data type" + ], + [ + 551, + 561, + "refugee skills survey <> publisher" + ], + [ + 635, + 643, + "refugee skills survey <> data geography" + ], + [ + 1239, + 1253, + "refugee skills survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "1. Refugee Labor Market Integration Based on data completed from the World Bank \u2019 s refugee skills survey, currently 28. 5 percent of refugees hosted in Ethiopia are in the labor force; 78.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source of data for the analysis of refugee labor market integration.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that collects data on refugee skills.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source of data for the analysis of refugee labor market integration.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 53, + "text": "The World Bank South Sudan Health Sector Transformation Project ( HSTP ) ( P181385 ) Page 50 of 68 Description Proportion of an alert about a disease, condition, or event of public health importance which may be true or invented Frequency Quarterly Data source WHO Methodology for Data Collection Quarterly and biannual TPM Responsibility for Data Collection PMU / TPM; Measures subcomponent 2. 1 under WHO Birth registration notification coverage Description Proportion of live births notified by the health facility among the total expected live births in specific period Frequency Quarterly Data source DHIS2 Methodology for Data Collection DHIS2 Responsibility for Data Collection MoH / UNICEF Maternal death review coverage ( % ) Description Percentage of maternal deaths occurring in the health facility that were audited and reviewed. Frequency Quarterly Data source WHO Methodology for Data Collection Quarterly and biannual TPM Responsibility for Data Collection PMU / TPM; Measures subcomponent 2. 1 under WHO Component 3: Monitoring and Evaluation and Project Management Percentage of health facilities receiving quarterly supervision visits ( disaggregated by visits by CHDs, and States MoH ) ( Percentage ) Description Percentage of health facilities receiving at least one quarterly supervision visit within the quarter from either the CHD, or the State MoH Frequency Quarterly Data source MoH; TPM Methodology for Data Collection MoH to provide data; TPM to verify Responsibility for Data Collection MoH / TPM Percentage of health facilities receiving quarterly supervision visits", + "ner_text": [ + [ + 606, + 611, + "named" + ], + [ + 15, + 26, + "DHIS2 <> data geography" + ], + [ + 407, + 447, + "DHIS2 <> data description" + ], + [ + 698, + 728, + "DHIS2 <> data description" + ] + ], + "validated": true, + "empirical_context": "The World Bank South Sudan Health Sector Transformation Project ( HSTP ) ( P181385 ) Page 50 of 68 Description Proportion of an alert about a disease, condition, or event of public health importance which may be true or invented Frequency Quarterly Data source WHO Methodology for Data Collection Quarterly and biannual TPM Responsibility for Data Collection PMU / TPM; Measures subcomponent 2. 1 under WHO Birth registration notification coverage Description Proportion of live births notified by the health facility among the total expected live births in specific period Frequency Quarterly Data source DHIS2 Methodology for Data Collection DHIS2 Responsibility for Data Collection MoH / UNICEF Maternal death review coverage ( % ) Description Percentage of maternal deaths occurring in the health facility that were audited and reviewed. Frequency Quarterly Data source WHO Methodology for Data Collection Quarterly and biannual TPM Responsibility for Data Collection PMU / TPM; Measures subcomponent 2.", + "type": "system", + "explanation": "DHIS2 is indeed a data source as it is explicitly mentioned in the context as the data source for birth registration notification coverage.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is referenced as a data source for health-related metrics.", + "contextual_reason_agent": "DHIS2 is indeed a data source as it is explicitly mentioned in the context as the data source for birth registration notification coverage.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "151_533550PAD0P117101Official0Use0Only1", + "page": 28, + "text": "The GOY decided to use the proxy means testing to identify SWF beneficiaries as the best way to shift targeting from the previous social to poverty-based categories. The PMT was applied to the 2008 SWF mass survey data, and determined to indeed provide the best targeting alternative for the CT program. Strengthening of the PMT application in the SWF will ensure continued targeting progress. 65. The focus on developing the SWF core operational processes around the full CT program cycle will enable the GOY to implement large-scale CCT programs and increase CT program responsiveness to future shocks and crises. The approach of implementing the project through existing SWF structures would ensure learning by doing and building the capacity of the SWF staff through substantial amount of technical assistance and training. The project oversight responsibility is assigned to the existing SWF Board of Directors, thereby ensuring close coordination and collaboration with other donor-supported projects ( e. g., the EC ). 66. The present SWF approach to micro-credit does not reflect international best practice. The pilot project design is intended to test alternative roles for the SWF in the microcredit sector, as well as to connect the SWF with more experienced players in the microcredit network in Yemen.", + "ner_text": [ + [ + 193, + 218, + "named" + ] + ], + "validated": true, + "empirical_context": "The GOY decided to use the proxy means testing to identify SWF beneficiaries as the best way to shift targeting from the previous social to poverty-based categories. The PMT was applied to the 2008 SWF mass survey data, and determined to indeed provide the best targeting alternative for the CT program. Strengthening of the PMT application in the SWF will ensure continued targeting progress.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as 'mass survey data' used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific survey data from 2008.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as 'mass survey data' used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "006_BOSIB-2c22668f-f4ba-42f5-a0d1-7949b7b8fe34", + "page": 39, + "text": ", Ministry of Health Methodology for Data Collection Local community surveys, confirmation by supervising engineer Responsibility for Data Collection MoHAIS People provided with access to electricity Description Indicator will measure the number of off grid energy generation units constructed at selected schools, health centers, and public infrastructure that serve as conduits for small and medium enterprises, including women-led enterprises within the refugee and host community areas Frequency Biannual Data Source Project progress report Methodology for Data Collection Local community surveys, Rural Electrification Authority ( REA ), and relevant government ministries Responsibility for Data MoHAIS in collaboration with REA", + "ner_text": [ + [ + 53, + 76, + "named" + ], + [ + 239, + 281, + "Local community surveys <> data description" + ] + ], + "validated": true, + "empirical_context": ", Ministry of Health Methodology for Data Collection Local community surveys, confirmation by supervising engineer Responsibility for Data Collection MoHAIS People provided with access to electricity Description Indicator will measure the number of off grid energy generation units constructed at selected schools, health centers, and public infrastructure that serve as conduits for small and medium enterprises, including women-led enterprises within the refugee and host community areas Frequency Biannual Data Source Project progress report Methodology for Data Collection Local community surveys, Rural Electrification Authority ( REA ), and relevant government ministries Responsibility for Data MoHAIS in collaboration with REA", + "type": "survey", + "explanation": "In this context, 'Local community surveys' are explicitly mentioned as a method for data collection, indicating they are used to gather empirical data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a method of data collection.", + "contextual_reason_agent": "In this context, 'Local community surveys' are explicitly mentioned as a method for data collection, indicating they are used to gather empirical data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 41, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XXXII DLI allocation 17, 197, 500. 00 As a % of Total Financing Amount 5. 0 % \u27a2 5. 3: Enhanced digital literacy / skills ( Number ) 0 0 Prime Ministry endorses the curricular for digital training adopted by IPA, 1, 000 civil servants with certified digital literacy / skills 2, 000 civil servants with certified digital literacy / skills 3, 000 civil servants with certified digital literacy / skills 4, 000 civil servants with certified digital literacy / skills 0. 00 0. 00 3, 000, 000. 00 2, 000, 000. 00 2, 000, 000. 00 2, 000, 000. 00 DLI allocation 9, 000, 000. 00 As a % of Total Financing Amount 2. 57 % \u27a2 5. 4: Increase in the utilization of HRMIS and other digital platforms by public entities for competitive recruitment, performance-based promotion and digital upskilling. ( Percentage ) 0 0 18 % of recruitments, promotions, and digital skills trainings recorded using HRMIS and other digital platforms 36 % of recruitments, promotions, and digital skills trainings recorded using HRMIS and other digital platforms 54 % of recruitments, promotions, and digital skills trainings recorded using HRMIS and other digital platforms 72 % of recruitments", + "ner_text": [ + [ + 746, + 751, + "named" + ] + ], + "validated": false, + "empirical_context": "57 % \u27a2 5. 4: Increase in the utilization of HRMIS and other digital platforms by public entities for competitive recruitment, performance-based promotion and digital upskilling. ( Percentage ) 0 0 18 % of recruitments, promotions, and digital skills trainings recorded using HRMIS and other digital platforms 36 % of recruitments, promotions, and digital skills trainings recorded using HRMIS and other digital platforms 54 % of recruitments, promotions, and digital skills trainings recorded using HRMIS and other digital platforms 72 % of recruitments", + "type": "system", + "explanation": "However, HRMIS is described as a system and not explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HRMIS is a dataset because it is associated with recorded data on recruitments and promotions.", + "contextual_reason_agent": "However, HRMIS is described as a system and not explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "113_Project-Appraisal-Document-PAD-disclosable-version-P156479-08122016", + "page": 60, + "text": "Data will be collected for all pre-selected households, following a list of variables and based on the experience of other targeting techniques, namely the HE A. This will allow the calculation of a PMT score to be used to confirm preselected households, reduce potential inclusion errors and make the final selection. The community will then discuss the list and validate it if in agreement. Comparison of the poverty ranking of the actual beneficiaries with the national survey poverty criteria at different levels will provide the basis for estimating the extent of any inclusion and exclusion errors ( ECOSIT 2011 is the most recent national household survey, but a new one is expected to be conducted in 2016 ). 40. M & E capacity building. An important objective of the project is to strengthen the national M & E capacity for SSNs and other social programs and develop harmonized tools for identifying and registering beneficiaries of SSNs covered by different programs. The project will provide significant support and TA to the Government to develop a robust MIS and to reinforce the capacity of the CFS in the management of such system even after the Project closure.", + "ner_text": [ + [ + 606, + 617, + "named" + ] + ], + "validated": true, + "empirical_context": "The community will then discuss the list and validate it if in agreement. Comparison of the poverty ranking of the actual beneficiaries with the national survey poverty criteria at different levels will provide the basis for estimating the extent of any inclusion and exclusion errors ( ECOSIT 2011 is the most recent national household survey, but a new one is expected to be conducted in 2016 ). 40.", + "type": "survey", + "explanation": "It is indeed a dataset as it is used to compare poverty rankings and validate inclusion and exclusion errors.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it references a national household survey.", + "contextual_reason_agent": "It is indeed a dataset as it is used to compare poverty rankings and validate inclusion and exclusion errors.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 43, + "text": "The World Bank Education Quality Improvement Project ( P179363 ) Page 34 of 68 RESULT_FRAME_TBL_PDO Indicator Name PBC Baseline Intermediate Targets End Target 1 2 3 4 5 based on data generated by the integrated EMIS and used for education sector management including refugee response ( Text ) assessment, NBS and MoF data ( report on trends, regional variations in learning outcomes and inputs, and determinants of student learning ) assessment, NBS and MoF data ( report on trends, regional variations in learning outcomes and inputs, and determinants of student learning ) using its data, are publicly disseminated and used for education sector management including refugee response. PDO Table SPACE Intermediate Results Indicators by Components RESULT_FRAME_TBL_IO Indicator Name PBC Baseline Intermediate Targets End Target 1 2 3 4 5 Improving Quality of Teaching Teachers trained on professional standards under the project ( % of teachers in ECEC facilities and grades 1 \u2013 12 ) ( Percentage ) 0. 00 0. 00 5. 00 10. 00 15. 00 20. 00 20. 00 Rapid student assessment mechanism developed and piloted ( for selected grade ) ( Yes / No ) No No Yes Yes Yes Yes Yes Tutoring / accelerated learning program for disadvantaged students developed and implemented ( for selected grade ) ( Yes / No ) No No Yes Yes Yes Yes Yes Students benefiting from direct interventions to 0. 00 0. 00", + "ner_text": [ + [ + 447, + 450, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Education Quality Improvement Project ( P179363 ) Page 34 of 68 RESULT_FRAME_TBL_PDO Indicator Name PBC Baseline Intermediate Targets End Target 1 2 3 4 5 based on data generated by the integrated EMIS and used for education sector management including refugee response ( Text ) assessment, NBS and MoF data ( report on trends, regional variations in learning outcomes and inputs, and determinants of student learning ) assessment, NBS and MoF data ( report on trends, regional variations in learning outcomes and inputs, and determinants of student learning ) using its data, are publicly disseminated and used for education sector management including refugee response. PDO Table SPACE Intermediate Results Indicators by Components RESULT_FRAME_TBL_IO Indicator Name PBC Baseline Intermediate Targets End Target 1 2 3 4 5 Improving Quality of Teaching Teachers trained on professional standards under the project ( % of teachers in ECEC facilities and grades 1 \u2013 12 ) ( Percentage ) 0.", + "type": "organization", + "explanation": "'NBS' refers to an organization (National Bureau of Statistics) rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'NBS' is a dataset because it is mentioned alongside data-related terms.", + "contextual_reason_agent": "'NBS' refers to an organization (National Bureau of Statistics) rather than a structured collection of data.", + "contextual_signal": "'mentioned only as a project, not as a data source'", + "tags": [] + }, + { + "filename": "084_Lebanon-PAD-final-06082018", + "page": 59, + "text": "The household-level survey would provide gender-disaggregated data on labor market outcomes ( e. g. labor force participation, employment by sector / region / age, etc. ). The employer survey would be implemented in key economic sectors ( e. g. manufacturing, tourism, agriculture, etc. ) to gather information on women \u2019 s employment in those sectors, at the firm level. Such gender-disaggregated data are missing today and are critical for the newly established ministry for relevant and adequate evidence-based policy making on gender. o Gender database with gender-disaggregated data ( DLR 9. 2 ). A database is proposed to be set up at the OMSWA to compile existing and the newly-collected gender-disaggregated data ( as per DLR 9. 1 ). The first step will be to coordinate with different ministries ( working with the Gender Units, for example ) and donor agencies to compile existing administrative data related to women ( e. g. access to education, jobs, health, etc. ). o Childcare provision action plan and launch of a pilot project ( DLR 9. 3 ). High-quality childcare accessibility and affordability are widely accepted as necessary areas needing improvement to enhance women \u2019 s participation in the labor market in Lebanon. This activity will therefore develop a detailed feasibility study, with a clear action plan and required laws / regulations to be passed, and", + "ner_text": [ + [ + 176, + 191, + "named" + ] + ], + "validated": false, + "empirical_context": "labor force participation, employment by sector / region / age, etc. ). The employer survey would be implemented in key economic sectors ( e. g.", + "type": "survey", + "explanation": "However, it is mentioned as a survey that would be implemented, not as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'employer survey' suggests a collection of data from employers.", + "contextual_reason_agent": "However, it is mentioned as a survey that would be implemented, not as a data source itself.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 35, + "text": "The World Bank Education Quality Improvement Project ( P179363 ) Page 26 of 68 project manager, FM specialist, PS, M & E specialist, environmental specialist and social development specialist with experience in GBV prevention and response, civil works engineers, and other technical specialists to ensure timely, quality, transparent, and effective implementation of the civil works activities. Further details will be provided in the POM. The fiduciary assessment of the PMT of the MoER and NORLD has been completed and is reflected in the fiduciary sections. B. Results Monitoring and Evaluation Arrangements 68. The PDO-level results indicators and intermediate results indicators will be monitored using the following data: ( a ) data on education institutions and students generated by EMIS and e-Catalogue / e - Register ( disaggregated by gender, urban-rural divide, students with disabilities, and refugee students ); ( b ) results from the nationally representative national and international assessments of student performance and classroom observations; ( c ) regular survey data and administrative data of the MoER and NORLD; and ( d ) semiannual monitoring reports prepared by the MoER under support of the PMT. The MoER through the PMT will carry out the day-to-day coordination of M & E activities.", + "ner_text": [ + [ + 1071, + 1090, + "named" + ], + [ + 874, + 900, + "regular survey data <> reference population" + ], + [ + 906, + 922, + "regular survey data <> reference population" + ], + [ + 1194, + 1198, + "regular survey data <> author" + ], + [ + 1329, + 1347, + "regular survey data <> usage context" + ] + ], + "validated": true, + "empirical_context": "Results Monitoring and Evaluation Arrangements 68. The PDO-level results indicators and intermediate results indicators will be monitored using the following data: ( a ) data on education institutions and students generated by EMIS and e-Catalogue / e - Register ( disaggregated by gender, urban-rural divide, students with disabilities, and refugee students ); ( b ) results from the nationally representative national and international assessments of student performance and classroom observations; ( c ) regular survey data and administrative data of the MoER and NORLD; and ( d ) semiannual monitoring reports prepared by the MoER under support of the PMT. The MoER through the PMT will carry out the day-to-day coordination of M & E activities.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to data collected systematically through surveys for monitoring and evaluation purposes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'regular survey data' implies a structured collection of data collected through surveys.", + "contextual_reason_agent": "This is indeed a dataset as it refers to data collected systematically through surveys for monitoring and evaluation purposes.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 42, + "text": "The World Bank Mauritania Social Safety Net System Project II ( P171125 ) Page 37 of 61 RESULT_FRAME_TBL_IO Indicator Name DLI Baseline Intermediate Targets End Target 1 2 3 4 ( Yes / No ) The common financial vehicle has been established ( Yes / No ) No No Yes Yes Yes Yes IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Programs using the Social Registry to select their beneficiaries Number of user agreements signed by the Social Registry Semester Project Semester report Supervision Social Registry Directorate SSN programs ' beneficiary households avoiding negative coping strategies ( national / refugees ) This indicators tracks the percentage of households benefiting from the shock - response program which avoid negative coping strategies as a result of the program. Negative Coping strategies refer to those included in the Coping Strategy Index ( CSI ): reduce number of meals eaten in a day, consume seed stock held for next season, and so on.", + "ner_text": [ + [ + 582, + 597, + "named" + ], + [ + 4, + 14, + "Social Registry <> publisher" + ], + [ + 15, + 25, + "Social Registry <> data geography" + ], + [ + 686, + 708, + "Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "The World Bank Mauritania Social Safety Net System Project II ( P171125 ) Page 37 of 61 RESULT_FRAME_TBL_IO Indicator Name DLI Baseline Intermediate Targets End Target 1 2 3 4 ( Yes / No ) The common financial vehicle has been established ( Yes / No ) No No Yes Yes Yes Yes IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Programs using the Social Registry to select their beneficiaries Number of user agreements signed by the Social Registry Semester Project Semester report Supervision Social Registry Directorate SSN programs ' beneficiary households avoiding negative coping strategies ( national / refugees ) This indicators tracks the percentage of households benefiting from the shock - response program which avoid negative coping strategies as a result of the program. Negative Coping strategies refer to those included in the Coping Strategy Index ( CSI ): reduce number of meals eaten in a day, consume seed stock held for next season, and so on.", + "type": "registry", + "explanation": "In the context, it is mentioned as a source for selecting beneficiaries, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Social Registry' suggests a structured collection of data related to beneficiaries.", + "contextual_reason_agent": "In the context, it is mentioned as a source for selecting beneficiaries, indicating it functions as a data source.", + "contextual_signal": "mentioned as a data source for selecting beneficiaries", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 41, + "text": "32 Indicator 2. 4: Teacher feedback on training and certification system monitored, analyzed, and included in the annual monitoring and progress reports developed by ETC No Yes / No No Yes Annually MOE Teacher surveys Reformed student assessment and certification system Indicator 3. 1: Grade 3 diagnostic test on early grade reading and math implemented 7. 2 No Yes / No No Yes Annually MOE Assessments records for a sample of schools Indicator 3. 2: Legal framework for the Tawjihi exam has been adopted so that its secondary graduation and certification function is separated from its function as a screening mechanism for university entrance 7. 4 No Yes / No No Yes Annually MOE Indicator 3. 3: Student and Teacher Feedback on first phase Tawjihi reform inform the Tawjihi reform rollout plan No Yes / No No Yes Annually MOE Student and teacher feedback surveys Strengthened management of education system Indicator 4. 1: Geographical Information System ( GIS ) and Open EMIS produce accurate and timely disaggregated data needed for program monitoring and evaluation 8. 1 No Yes / No No Yes Annually MOE GIS reports should reflect data updated on a quarterly basis and verified by MOE using its verification protocols to be considered accurate Indicator 4. 2: Percentage of bids for goods and works that needed to be re \u2010 bid No Percentage N / A < 20 % Annually MOE", + "ner_text": [ + [ + 926, + 957, + "named" + ] + ], + "validated": false, + "empirical_context": "3: Student and Teacher Feedback on first phase Tawjihi reform inform the Tawjihi reform rollout plan No Yes / No No Yes Annually MOE Student and teacher feedback surveys Strengthened management of education system Indicator 4. 1: Geographical Information System ( GIS ) and Open EMIS produce accurate and timely disaggregated data needed for program monitoring and evaluation 8. 1 No Yes / No No Yes Annually MOE GIS reports should reflect data updated on a quarterly basis and verified by MOE using its verification protocols to be considered accurate Indicator 4.", + "type": "system", + "explanation": "However, in this context, it is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Information System' which often relates to data management.", + "contextual_reason_agent": "However, in this context, it is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "037_Uganda-Investment-for-Industrial-Transformation-and-Employment-Project", + "page": 36, + "text": "The design of the impact evaluation will be done in agreement with the collaborating agencies and is expected to rely on a rigorous methodology ( i. e., Randomized Controlled Trial or similar ). Additionally, all project beneficiaries will be linked with firms surveyed by UBOS for allowing long-term follow up. This M & E process will involve the teams in the implementing agencies. Ugandan research institutions will be a key partner in implementing the M & E framework building on existing data reports and in partnership with local research institutions. The PSFU PIU will work closely with Ugandan research institutions for three reasons. First, to coordinate the various data collections so as to be more efficient in utilizing all existing firm-level data. Second to identify and collect additional data with respect to jobs and firm productivity. Third, to use the data collected on an ongoing basis make suggestions to the PSC to improve the project impact. 48 All data will be disaggregated by gender, refugee, host community, and non-host community nations to ensure adequate targeting and collection of results for targeted populations. C. Sustainability 79. In principle, the project components are based on implementing interventions that are fully financially sustainable while generating the largest possible impact.", + "ner_text": [ + [ + 747, + 762, + "named" + ] + ], + "validated": true, + "empirical_context": "The PSFU PIU will work closely with Ugandan research institutions for three reasons. First, to coordinate the various data collections so as to be more efficient in utilizing all existing firm-level data. Second to identify and collect additional data with respect to jobs and firm productivity.", + "type": "data", + "explanation": "In this context, 'firm-level data' is explicitly mentioned as part of data collections, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'firm-level data' suggests a structured collection of information related to firms.", + "contextual_reason_agent": "In this context, 'firm-level data' is explicitly mentioned as part of data collections, indicating it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "127_PAD10180PAD0P14972400PUBLIC00Box391431B", + "page": 54, + "text": "44 Procedures for preparing and approving RAP 54. Project Screening. Once the subprojects are identified by unions or municipalities, the PMU will obtain all permits / approvals related to the Project. Thereafter, they will cooperate with unions or municipalities to carry out social screening to determine whether or not the subprojects will result in any resettlement impact. The PMU will then decide on the need for the preparation of a Resettlement Action Plan ( RAP ) or an abbreviated RAP. 55. Socioeconomic and Inventory Survey. Following the identification of the subprojects that may involve involuntary resettlement, the PMU in cooperation with unions and municipalities will carry out a socio-economic study and census survey, in which baseline data within the subproject \u2019 s target areas is collected. This information shall include the PAPs and related household members or dependents, total land holdings, and affected assets. This information will be put in writing and shall be used in determining the appropriate compensation and assistance for each affected individual / household. 56. RAP preparation, review and approval. Once the census survey is completed, the PMU will work with relevant unions and municipalities to prepare the RAP.", + "ner_text": [ + [ + 698, + 718, + "named" + ] + ], + "validated": false, + "empirical_context": "Socioeconomic and Inventory Survey. Following the identification of the subprojects that may involve involuntary resettlement, the PMU in cooperation with unions and municipalities will carry out a socio-economic study and census survey, in which baseline data within the subproject \u2019 s target areas is collected. This information shall include the PAPs and related household members or dependents, total land holdings, and affected assets.", + "type": "study", + "explanation": "However, it is described as a study, not a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves data collection.", + "contextual_reason_agent": "However, it is described as a study, not a structured collection of data or a data source.", + "contextual_signal": "mentioned only as a study, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 21, + "validated": 8, + "not_validated": 13 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "107_PAD-Citizens-Charter-Afghanistan-P160567-Oct-7-Board-version-10072016", + "page": 28, + "text": "The Project will develop simple citizens \u2019 scorecards to be completed by CDCs and Social Organizers to report upon the minimum service standards. CCAP will also track more closely through the scorecards, regular reporting and evaluations the participation of women, poor and vulnerable groups, such as returnees and IDPs, during the project cycle. Furthermore, taking advantage of technology and high mobile access coverage in the country, CCAP will explore mobile applications for reporting and grievance redress. Second, the project will innovate and use the satellite imagery of the existing ARTF third party monitoring activity to validate infrastructure gaps and service delivery outputs. For example, the presence of schools and irrigation canals in a sample number of areas will be validated through satellite imagery against community monitoring reports. Lastly, this component will support ways to strengthen a coordinated approach across line ministries \u2019 monitoring and evaluation mechanisms, including at the community, district and provincial levels, within government and with third party monitors. As part of the Government \u2019 s strong commitment to making the Citizens \u2019 Charter operate effectively, the Office of the President and MoF will receive semi-annual progress reports on the achievement of the service standards so they can closely monitor progress, assist with removing bottlenecks in service delivery, and allocate budgetary resources as needed. ( b ) Studies and evaluations. Several studies are planned related to service delivery, CDC institution strengthening, social inclusion, social accountability, and technical quality audits. The project will also explore the possibility of an evaluation to", + "ner_text": [ + [ + 561, + 578, + "named" + ] + ], + "validated": false, + "empirical_context": "Furthermore, taking advantage of technology and high mobile access coverage in the country, CCAP will explore mobile applications for reporting and grievance redress. Second, the project will innovate and use the satellite imagery of the existing ARTF third party monitoring activity to validate infrastructure gaps and service delivery outputs. For example, the presence of schools and irrigation canals in a sample number of areas will be validated through satellite imagery against community monitoring reports.", + "type": "non-dataset", + "explanation": "'Satellite imagery' is not a dataset itself but rather a method or tool used for validation in the project context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'satellite imagery' is a dataset because it involves data collection through technology.", + "contextual_reason_agent": "'Satellite imagery' is not a dataset itself but rather a method or tool used for validation in the project context.", + "contextual_signal": "mentioned only as a method for validation, not as a data source", + "tags": [] + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 22, + "text": "Inadequate data protection due to lack of user centricity, user control of personal data, and transparency. DLI4 on digital transformation of health service delivery 63. 00 18. 34 81. 34 Partial coverage of the national EMR system developed and deployed to only some of the MOH facilities over the last 10 years. For the remaining MOH facilities, the MOH intends to complete the installation and operationalization within the next five years. This is in accordance with its strategic plans, which will ensure full coverage and operationalization, thus promoting patient access to e-health services, including for refugees. Result Area 2 on Enhanced Government Effectiveness through Digitalization DLI5 on professionalization of the civil service 60. 00 - 60. 00 a. Prevailing seniority-based HRM in the civil service. b. Need to enhance digital skills across the civil service. c. Underrepresentation of women in leadership positions. DLI6 on establishing digital health standards across a national Health Information Exchange ( HIE ) 29. 00 18. 00 47. 00 a. Absence of a national HIE framework and multiplicity of health information systems. b. Need to establish standards ( including foundational registries and coding ) to enhance the interoperability of health information systems. DLI7 on digital student assessment 50. 00 - 50. 00 a.", + "ner_text": [ + [ + 220, + 230, + "named" + ] + ], + "validated": false, + "empirical_context": "34 81. 34 Partial coverage of the national EMR system developed and deployed to only some of the MOH facilities over the last 10 years. For the remaining MOH facilities, the MOH intends to complete the installation and operationalization within the next five years.", + "type": "system", + "explanation": "However, the context indicates it is a system for managing records, not a dataset itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'EMR system' suggests a structured collection of electronic medical records.", + "contextual_reason_agent": "However, the context indicates it is a system for managing records, not a dataset itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "013_BOSIB0efb09b920d90858a0135df22da7d1", + "page": 31, + "text": "Recent ID4D research on women \u2019 s ID ownership in Ethiopia found that women do not see the Kebele ID as salient to their daily lives and therefore do not pursue applying for one even if it is accessible. 53 By streamlining and, in some cases, automating the updating of information, the project will be making it easier for women to assert their associated rights and entitlements. The new Fayda system can also embed use cases and value propositions that are tailored to women, to increase demand for identification. Furthermore, a digital identification system will create more opportunities for Ethiopia \u2019 s many women - owned small and medium enterprises to do business online, and more generally access employment, and enroll in male - dominated education sectors such as science, technology, engineering, and mathematics education. 68. The authentication component of a digital ID system can also increase the security of funds transfers for both in - person and remote environments, particularly as Ethiopian legislation enables and helps increase payment interoperability between financial service providers.", + "ner_text": [ + [ + 390, + 402, + "named" + ] + ], + "validated": false, + "empirical_context": "53 By streamlining and, in some cases, automating the updating of information, the project will be making it easier for women to assert their associated rights and entitlements. The new Fayda system can also embed use cases and value propositions that are tailored to women, to increase demand for identification. Furthermore, a digital identification system will create more opportunities for Ethiopia \u2019 s many women - owned small and medium enterprises to do business online, and more generally access employment, and enroll in male - dominated education sectors such as science, technology, engineering, and mathematics education.", + "type": "system", + "explanation": "However, the context describes the Fayda system as a digital identification system, not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a system that could potentially manage data.", + "contextual_reason_agent": "However, the context describes the Fayda system as a digital identification system, not as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "062_Turkey-Municipal-Services-Improvement-Project", + "page": 44, + "text": "The World Bank Municipal Services Improvement Project in Refugee Affected Areas ( P169996 ) Page 40 of 94 Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection People benefitting from safely managed drinking water services in the selected municipalities as a result of the project This indicator measures the cumulative number of people benefitted from safely managed drinking water services in the selected municipalities that have been provided through this Project. Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs. Municipal PIUs and ILBANK PMU People benefitting from safely managed drinking water services in the selected municipalities ( out of which female ) This indicator measures the percentage of female benefitted from safely managed drinking water services in the selected municipalities that have been provided through this Project. Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s connection records from respective SKIs. Municipal PIUs and ILBANK PMU People benefitting from safely managed drinking water services in the selected municipalities ( out of which host community ) This indicator measures the percentage of host community benefitted from safely managed drinking water services in the selected municipalities that have been provided through this Project.", + "ner_text": [ + [ + 617, + 625, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Municipal Services Improvement Project in Refugee Affected Areas ( P169996 ) Page 40 of 94 Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection People benefitting from safely managed drinking water services in the selected municipalities as a result of the project This indicator measures the cumulative number of people benefitted from safely managed drinking water services in the selected municipalities that have been provided through this Project. Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs. Municipal PIUs and ILBANK PMU People benefitting from safely managed drinking water services in the selected municipalities ( out of which female ) This indicator measures the percentage of female benefitted from safely managed drinking water services in the selected municipalities that have been provided through this Project.", + "type": "program", + "explanation": "'PMU Data' is not a dataset but rather refers to data compiled by the Project Management Unit, which is part of a program.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'PMU Data' refers to a dataset because it includes the term 'data'.", + "contextual_reason_agent": "'PMU Data' is not a dataset but rather refers to data compiled by the Project Management Unit, which is part of a program.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 10, + "validated": 9, + "not_validated": 1 + } + }, + { + "filename": "073_Ethiopia-Consolidated-Water-Supply-Sanitation-and-Hygiene-Account-Project", + "page": 41, + "text": "Rural This indicator measures the cumulative number of Annual Regular WASH MIS Regular WASH MIS reports, HH surveys, National and regional WASH coordination", + "ner_text": [ + [ + 105, + 115, + "named" + ], + [ + 0, + 5, + "HH surveys <> reference population" + ] + ], + "validated": true, + "empirical_context": "Rural This indicator measures the cumulative number of Annual Regular WASH MIS Regular WASH MIS reports, HH surveys, National and regional WASH coordination", + "type": "survey", + "explanation": "'HH surveys' is indeed a dataset as it is mentioned in the context of measuring cumulative data related to WASH indicators.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'HH surveys' is a dataset because it refers to a specific type of survey that collects data.", + "contextual_reason_agent": "'HH surveys' is indeed a dataset as it is mentioned in the context of measuring cumulative data related to WASH indicators.", + "contextual_signal": "'enumerated alongside known datasets'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "172_multi0page", + "page": 45, + "text": "( c ) 100 junior secondary reports. school complete grants by EOP. ( d ) Average of 95 JSS partial grants per year. ( e ) 90 % of unqualified teachers ( on a total of estimated 10, 000 unqualified teachers Grade I to 9 ) receive in-service basic training. Annual basic education sector The MEST is implementing a performance report by the transparent selection criteria of MEST, and by the PSC. site selection and construction and rehabilitation activities. Annual school facilities SPs are fully committed and surveys and EMIS reports involved to deliver education starting year 2003. services under the BOL standards premise. Quarterly consolidated Other stakeholders mamtain progress report prepared by their commitment to support the PCU and Planning the education sector. Directorate. Supervision reports. Capacity to implement the BOL standards is adequate at Field supervision and the national, regional, district evaluation reports and school levels. Component H: Enhanced institutional The PCU is able to undertake PSC quarterly reports on grant The MEST is committed to capacity of the MEST. at least quarterly technical proposal vetted by the work in partnershlp with the reviews of grant proposals in PCU / technical team. other stakeholders ( including Capacity is developed at the order to present them to the NGOs ). MEST central and district PSC for decision.", + "ner_text": [ + [ + 523, + 527, + "named" + ], + [ + 580, + 584, + "EMIS <> reference year" + ] + ], + "validated": true, + "empirical_context": "site selection and construction and rehabilitation activities. Annual school facilities SPs are fully committed and surveys and EMIS reports involved to deliver education starting year 2003. services under the BOL standards premise.", + "type": "system", + "explanation": "In this context, 'EMIS' is confirmed as a dataset since it is involved in delivering education and is linked to reports.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'EMIS' is a dataset because it is associated with reports and surveys used for educational data.", + "contextual_reason_agent": "In this context, 'EMIS' is confirmed as a dataset since it is involved in delivering education and is linked to reports.", + "contextual_signal": "mentioned as a source of reports and surveys", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "113_Project-Appraisal-Document-PAD-disclosable-version-P156479-08122016", + "page": 60, + "text": "48 second cycle of activities. The Mid Term Review will take into account the results of the evaluations and readjust the project activities accordingly. 38. The process evaluation will use several sources of data. The MIS and regular monitoring reports will produce information on whether the key elements of the safety nets system ( targeting mechanism, payment, MIS ) have been adequately developed. The process evaluation will include interviews with local implementing partners, such as payment agencies and NGOs in charge of the accompanying measures, to identify bottlenecks and recommend solutions. A small qualitative beneficiary assessment might be carried out to complete the process evaluation to assess the satisfaction of direct beneficiaries with payment procedures, accompanying measures and with the benefits they will have received. 39. Targeting assessment. The project targeting approach will be refined as the project starts implementation and on the basis of specific studies supported by the ASP MDTF. The result of the targeting assessment will support the preparation of the manuals. The proposed approach is as follows: Beneficiary households will be selected combining community, geographical, and poverty approaches; and will include a census of all village households in select areas, categorical targeting ( households with children under the age of 12 or with pregnant women ) and a simple PMT exercise.", + "ner_text": [ + [ + 219, + 222, + "named" + ] + ], + "validated": false, + "empirical_context": "The process evaluation will use several sources of data. The MIS and regular monitoring reports will produce information on whether the key elements of the safety nets system ( targeting mechanism, payment, MIS ) have been adequately developed. The process evaluation will include interviews with local implementing partners, such as payment agencies and NGOs in charge of the accompanying measures, to identify bottlenecks and recommend solutions.", + "type": "system", + "explanation": "'MIS' is mentioned as a management information system but not as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is an acronym that could imply a structured collection of data.", + "contextual_reason_agent": "'MIS' is mentioned as a management information system but not as a data source in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 35, + "text": "This can also provide a platform for remote supervision and real-time risk monitoring of project activities. 89. Data verification will be built into the M & E arrangements. Data will be verified on a sample basis by a third - party monitoring ( TPM ) agent. The TPM will be recruited to monitor progress of project activities and verify project data on a sample basis throughout implementation. The TPM will report directly to the PSC. The information collected by the TPM will help triangulate the data submitted by the various implementing arms including NTTIs and universities. Spending reported by participants will be verified through an independent audit of MoGEI and MoHEST accounts. Indicators that are reported will be cross-referenced with EMIS data. Effective verification will depend on the quality of EMIS data available, which will be strengthened with support from the Project. 90. The Project will invest in an integrated system for the management and analysis of data collected through various channels for effective monitoring. The integrated monitoring system will be designed to capture the data needed to report across all the indicators outlined in the Results Framework. This system will incorporate capacities for data importing, either manually or directly via online survey and / or SMS survey platforms and EMIS data. Data collected on mobile devices by project monitoring officers or from the communities will also be uploaded to the system.", + "ner_text": [ + [ + 1335, + 1344, + "named" + ] + ], + "validated": true, + "empirical_context": "The integrated monitoring system will be designed to capture the data needed to report across all the indicators outlined in the Results Framework. This system will incorporate capacities for data importing, either manually or directly via online survey and / or SMS survey platforms and EMIS data. Data collected on mobile devices by project monitoring officers or from the communities will also be uploaded to the system.", + "type": "data", + "explanation": "In the context, 'EMIS data' is explicitly referenced as part of the data collection process for reporting indicators, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'EMIS data' is a dataset because it is mentioned in the context of data collection and reporting.", + "contextual_reason_agent": "In the context, 'EMIS data' is explicitly referenced as part of the data collection process for reporting indicators, indicating it functions as a data source.", + "contextual_signal": "mentioned as part of the data collection process", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 45, + "text": "36 Table A3. 2. DLRs Verification Protocol Table Disbursement \u2010 Linked Result ( DLR ) Definition of DLI and proof of accomplishment Protocol to evaluate achievement of the DLRs and data / results verification Data Source Verification Entity Verification Procedure DLR # 1. 1 Number of Syrian refugee children enrolled in target schools at the basic and secondary education levels DLR # 1. 2 Number of Syrian refugee children enrolled in target schools at KG2 level Number of Syrian refugee children that are enrolled in the Program target schools, disaggregated by gender. The Program target schools are schools that are participating in the Program interventions, including: ECE enrolment, teacher training, socio \u2010 emotional learning program, school maintenance, student assessment, etc. MOE ( OpenEMIS ) Third Party The verification agency will check the number of Syrian refugee children enrolled in target schools and will conduct site visits and spot checks in a sample of randomly selected schools to verify enrollment numbers. DLR # 2 Number of additional children enrolled in public and private KG2 Number of students enrolled in public or freely provided private KG2. Data should be reported disaggregated by type of school, directorate, gender, and nationality. MOE ( OpenEMIS ) Third Party Enrollment data and disaggregation is provided to the verification agency.", + "ner_text": [ + [ + 1302, + 1317, + "named" + ], + [ + 285, + 308, + "Enrollment data <> reference population" + ], + [ + 790, + 793, + "Enrollment data <> publisher" + ], + [ + 1273, + 1276, + "Enrollment data <> publisher" + ], + [ + 1392, + 1410, + "Enrollment data <> usage context" + ] + ], + "validated": true, + "empirical_context": "Data should be reported disaggregated by type of school, directorate, gender, and nationality. MOE ( OpenEMIS ) Third Party Enrollment data and disaggregation is provided to the verification agency.", + "type": "data", + "explanation": "In this context, 'Enrollment data' is explicitly mentioned as being provided to a verification agency, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'Enrollment data' is a dataset because it refers to a specific type of data that is reported and disaggregated.", + "contextual_reason_agent": "In this context, 'Enrollment data' is explicitly mentioned as being provided to a verification agency, indicating it is used as a data source.", + "contextual_signal": "follows 'disaggregation is provided to the verification agency'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 25, + "text": "This subcomponent will support improved planning, management, and evaluation of education reforms in the following areas: ( a ) improvement of the existing EMIS and making better use of data to support management decisions at all levels, ( b ) national and international student assessments, and ( c ) TA to support implementation of Components 1 and 2 of the project. 41. Improvement of the existing EMIS includes the following activities: ( a ) technical improvements to EMIS to enable integrated use of NBS and MoF data; ( b ) strengthening a system for the generation of aggregate data and reports based on primary data and integrate data on refugee children from Ukraine; ( c ) training to EMIS experts on data collection and management, ( d ) strengthening the MoER \u2019 s analytical capacity in using data; and ( e ) strengthening of EMIS data protection systems as well as data quality assurance.", + "ner_text": [ + [ + 838, + 842, + "named" + ] + ], + "validated": false, + "empirical_context": "41. Improvement of the existing EMIS includes the following activities: ( a ) technical improvements to EMIS to enable integrated use of NBS and MoF data; ( b ) strengthening a system for the generation of aggregate data and reports based on primary data and integrate data on refugee children from Ukraine; ( c ) training to EMIS experts on data collection and management, ( d ) strengthening the MoER \u2019 s analytical capacity in using data; and ( e ) strengthening of EMIS data protection systems as well as data quality assurance.", + "type": "system", + "explanation": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is involved in data management and generation.", + "contextual_reason_agent": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 62, + "text": "The World Bank Uganda Digital Acceleration Project \u2013 GovNet ( P171305 ) Page 50 of 76 ANNEX 2: Gender Assessment 1. This assessment seeks to elaborate on gender considerations for the UDAP-GovNet. The assessment, ( a ) identifies the gaps between the experiences and status of women and men relevant to this project, ( b ) identifies activities to address these gaps, and ( c ) suggests suitable indicators to track progress. 2. This assessment is based on information from: ( a ) desk-based document review including gender literature, national-level gender analytics, regional policy and guidance notes, and international good practice guidelines; and ( b ) stakeholder / client consultations to identify the most relevant and targeted ways to address these gaps and design measurable indicators to track progress. The assessment accounted for data collection constraints in refugee and host communities. A survey will be conducted to inform the baseline on device ownership / digital skills during project implementation, with the aim to improve sex-disaggregated data collection that is currently unavailable, and help measure outcomes on digital literacy targeted through digital skills and online safety training. 3. The table below identifies how the project will contribute to closing gender gaps through project interventions. It highlights the biggest gaps that are being addressed by project actions and assigns a relevant and measurable indicator to each.", + "ner_text": [ + [ + 909, + 915, + "named" + ], + [ + 15, + 21, + "survey <> data geography" + ], + [ + 877, + 905, + "survey <> reference population" + ], + [ + 948, + 993, + "survey <> data description" + ], + [ + 1049, + 1082, + "survey <> data description" + ] + ], + "validated": true, + "empirical_context": "The assessment accounted for data collection constraints in refugee and host communities. A survey will be conducted to inform the baseline on device ownership / digital skills during project implementation, with the aim to improve sex-disaggregated data collection that is currently unavailable, and help measure outcomes on digital literacy targeted through digital skills and online safety training. 3.", + "type": "survey", + "explanation": "In this context, the survey is explicitly mentioned as a method to collect baseline data, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because surveys are often structured collections of data used for empirical analysis.", + "contextual_reason_agent": "In this context, the survey is explicitly mentioned as a method to collect baseline data, indicating it functions as a data source.", + "contextual_signal": "follows 'will be conducted to inform the baseline on device ownership / digital skills'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 80, + "text": "70 59. The analysis of the Batwa \u2019 s socio-economic situation highlight the following issues: ( i ) marginalization and discrimination of the Batwa by other ethnic communities, ( ii ) lack of access to health especially for children under 5 and pregnant and lactating women, ( iii ) lack of access to education, ( iv ) weak knowledge about nutrition, ( v ) food insecurity, ( vi ) precarious hygiene practices, ( vi ) access to justice issues, ( vii ) low Batwa participation in the instances of decision making resulting in their low representation in the various sectoral committees set up at Community level, ( viii ) lack of access to land and ( ix ) low access to housing. 60. Based on the experiences in addressing the specific vulnerabilities and discrimination facing the Batwa, the Project will ensure that they are included in the first listing ( by conducting a separate listing of Batwa households in the participating collines based on the provincial census ) and included in the beneficiary registry per the results of the targeting survey. The project will also ensure that beneficiary Batwa households can participate in the accompanying measures by partnering with NGOs that are experienced in taking into account their specificity.", + "ner_text": [ + [ + 953, + 970, + "named" + ], + [ + 27, + 32, + "provincial census <> reference population" + ], + [ + 780, + 785, + "provincial census <> reference population" + ], + [ + 893, + 909, + "provincial census <> reference population" + ], + [ + 1101, + 1117, + "provincial census <> reference population" + ] + ], + "validated": true, + "empirical_context": "60. Based on the experiences in addressing the specific vulnerabilities and discrimination facing the Batwa, the Project will ensure that they are included in the first listing ( by conducting a separate listing of Batwa households in the participating collines based on the provincial census ) and included in the beneficiary registry per the results of the targeting survey. The project will also ensure that beneficiary Batwa households can participate in the accompanying measures by partnering with NGOs that are experienced in taking into account their specificity.", + "type": "census", + "explanation": "In this context, it is confirmed as a dataset since it is used for conducting a separate listing of households.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'provincial census' implies a structured collection of demographic data.", + "contextual_reason_agent": "In this context, it is confirmed as a dataset since it is used for conducting a separate listing of households.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 36, + "text": "The World Bank Expanding Opportunities for Learning Project ( P166059 ) Page 31 of 85 school leaders ( as well as CPs, inspectors, and ministerial staff ). The CPD would be evaluated not through simple participation in training but by the impact it is having on practice. For teachers, this would be through in-class observations and for school leaders, through evaluations. Other evaluations would be needed for other staff. d. Modernize the EMIS to become a sector-wide platform that manages all education data. For the moment, various sorts of education data exist but are not readily available or useable because of the format in which they are found. Schools use different ICT solutions for their school level data and the Ministry has several different incompatible platforms holding different sorts of data. A unified comprehensive system needs to be developed that allows all data to be managed in one platform. 77. In terms of project implementation arrangements, it would include the following: a. The Ministry has adopted a results-based approach to sector management that deserves to be sustained. All ministerial units now develop annual work plans ( AWP ) that reflect the results outlined in the Education Action Plan 2017-2020. In so doing, activities financed by the project are mainstreamed into their regular duties.", + "ner_text": [ + [ + 443, + 447, + "named" + ] + ], + "validated": false, + "empirical_context": "d. Modernize the EMIS to become a sector-wide platform that manages all education data. For the moment, various sorts of education data exist but are not readily available or useable because of the format in which they are found.", + "type": "system", + "explanation": "However, EMIS is described as a platform and not explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it relates to managing education data.", + "contextual_reason_agent": "However, EMIS is described as a platform and not explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "180_multi0page", + "page": 7, + "text": "Progress has also been achieved in some of the other dimensions of human deprivation that affected postwar BH; e. g., improved conditions for return of refugees and the displaced, notably physical security, which has led to increasing numbers of minority returns. But most Bosnians remain far worse off than before the war. And despite high postwar growth rates, BH remains the second-poorest country in the SEE region on a per capita basis. While there are many estimates, there is no reliable recent data on poverty incidence based on a comprehensive household survey [ a Living Standards Measurement Survey ( LSMS ) is underway ]. The best currently available survey data suggest that 27 percent of BH ' s population of about 4. 2 million fall below a relative poverty line, while 11 percent fall below an extreme poverty line. This data also suggest that regional disparities are high: about half the population of Republika Srpska fell below the poverty line, and substantial differences exist among the Federation ' s cantons, with Sarajevo and West Herzegovina having the lowest poverty incidence. The project would complement the LDP and focus on strengthening the institutional and financial capacity of municipalities not eligible under the LDP through the provision of performance-based grant financing for infrastructure and service investments in poorer municipalities.", + "ner_text": [ + [ + 574, + 609, + "named" + ], + [ + 273, + 281, + "Living Standards Measurement Survey <> reference population" + ], + [ + 553, + 569, + "Living Standards Measurement Survey <> data type" + ], + [ + 663, + 674, + "Living Standards Measurement Survey <> data type" + ] + ], + "validated": true, + "empirical_context": "And despite high postwar growth rates, BH remains the second-poorest country in the SEE region on a per capita basis. While there are many estimates, there is no reliable recent data on poverty incidence based on a comprehensive household survey [ a Living Standards Measurement Survey ( LSMS ) is underway ]. The best currently available survey data suggest that 27 percent of BH ' s population of about 4.", + "type": "survey", + "explanation": "However, it is mentioned as 'underway' and not yet providing data, indicating it is not currently a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey aimed at measuring living standards.", + "contextual_reason_agent": "However, it is mentioned as 'underway' and not yet providing data, indicating it is not currently a data source.", + "contextual_signal": "mentioned as a survey that is underway, not yet a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "028_Ethiopia-Response-Recovery-Resilience-for-Conflict-Affected-Communities-in-Ethiopia-Project", + "page": 24, + "text": "However, considering the conflict damage and needs estimates at appraisal and the project \u2019 s financial envelope, it will initially prioritize support to the, Afar, Amhara, Benishangul-Gumuz, Oromia, and Tigray regions. 53 These regions have been highly impacted by the recent conflict, and are currently being assessed by the government and the World Bank via a Conflict Impact Assessment and Recovery and Reconstruction Planning exercise. These regions also host large numbers of IDPs, and are highly vulnerable to the impacts of climate change. During implementation, other regions will be considered based on resource availability and needs. The methodology to select Woredas within the regions will be articulated in the Project Operations Manual ( POM, expected by effectiveness ), taking into account: ( i ) the results of the conflict damage 51 The term \u201c basic services \u201d refers to education, health, WASH, agriculture, and other public services, mobile and / or permanent. 52 Multi-sectoral services are defined as at least two of the following: medical, psychosocial, police / security, and legal support. This indicator is calculated as the number of GBV cases that receive at least two services ( including referrals ) out of the total number of GBV cases that access services. This will be further disaggregated by point of entry for services. At the service provider level and as part of a process evaluation, analysis data from available information management tools will also give a deeper understanding of services available, unavailable, or declined by survivors. As not all survivors need or want to access more than one service, it is not expected to see a percentage increase once about 75 percent of survivors are accessing at least two services services. 53 In alphabetical order. Support will be provided as equitable share of the resources based on the assessment of damages.", + "ner_text": [ + [ + 1454, + 1482, + "named" + ] + ], + "validated": false, + "empirical_context": "This will be further disaggregated by point of entry for services. At the service provider level and as part of a process evaluation, analysis data from available information management tools will also give a deeper understanding of services available, unavailable, or declined by survivors. As not all survivors need or want to access more than one service, it is not expected to see a percentage increase once about 75 percent of survivors are accessing at least two services services.", + "type": "tool", + "explanation": "However, it is not a dataset as it refers to tools rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'information management tools' could imply a structured collection of data.", + "contextual_reason_agent": "However, it is not a dataset as it refers to tools rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a tool, not as a data source", + "tags": [] + }, + { + "filename": "042_Burundi-Skills-for-Jobs-Women-and-Youth-Project", + "page": 42, + "text": "The World Bank Burundi Skills for Jobs: Women and Youth Project ( P164416 ) Page 36 of 102 ( smartphones and tablets ) to gather geo-referenced project data that feeds directly into the central database. This capability will facilitate planning, remote supervision, third-party monitoring, and geo - mapping project beneficiary and implementation progress. 72. The project will support the development of data collection and management mechanisms and capabilities at frontline implementing agencies. With regard to Subcomponent 1. 1. on enhancing labor market responsiveness of formal training providers, data and information will be submitted from the five hubs to the PIU on a quarterly basis. With regard to Subcomponent 1. 2 on modernizing and expanding informal apprenticeship, data and information will be submitted from the 80 CEMs to the national industry apprenticeship unit on a monthly basis and then from the national industry apprenticeship unit to the PIU every quarter. With regard to Component 2 on fostering entrepreneurship, data and information will be submitted from the private sector implementing agencies to the PIU every quarter. Furthermore, in the second and fourth years of the Project, surveys will be conducted with beneficiaries to assess their satisfaction with project interventions. C. Sustainability 73.", + "ner_text": [ + [ + 129, + 156, + "named" + ], + [ + 4, + 14, + "geo-referenced project data <> publisher" + ], + [ + 15, + 22, + "geo-referenced project data <> data geography" + ] + ], + "validated": true, + "empirical_context": "The World Bank Burundi Skills for Jobs: Women and Youth Project ( P164416 ) Page 36 of 102 ( smartphones and tablets ) to gather geo-referenced project data that feeds directly into the central database. This capability will facilitate planning, remote supervision, third-party monitoring, and geo - mapping project beneficiary and implementation progress.", + "type": "data", + "explanation": "This is indeed a dataset as it is described as project data that is collected and utilized for planning and monitoring purposes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific type of data that is collected and used for project monitoring.", + "contextual_reason_agent": "This is indeed a dataset as it is described as project data that is collected and utilized for planning and monitoring purposes.", + "contextual_signal": "mentioned as data that feeds directly into the central database", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 82, + "text": "Data source / Agency MoE \u2019 s annual school census using EMIS, Verification Entity ESS Procedure Data collected by MOE and verified by ESS. The verified information is then submitted by MOF to the World Bank in a consolidated DLI verification report as part of each semiannual Government / World Bank review. DLI_TBL_VERIFICATION DLI 7 Pregnant women and caregivers of children 0-23 months participating in community conversations sessions in 29 selected SPG woredas. Description These are currently pregnant women and caregivers of children 0-23 months who attended at least 1 CC session per month Data source / Agency DHIS2, MoH Verification Entity ESS", + "ner_text": [ + [ + 619, + 624, + "named" + ] + ], + "validated": false, + "empirical_context": "DLI_TBL_VERIFICATION DLI 7 Pregnant women and caregivers of children 0-23 months participating in community conversations sessions in 29 selected SPG woredas. Description These are currently pregnant women and caregivers of children 0-23 months who attended at least 1 CC session per month Data source / Agency DHIS2, MoH Verification Entity ESS", + "type": "system", + "explanation": "However, DHIS2 is a health information system and not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned in the context of data collection.", + "contextual_reason_agent": "However, DHIS2 is a health information system and not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "145_689710PAD0P1300Official0Use0Only090", + "page": 183, + "text": "In addition, the census of the population and their assets for the second wave will be completed as soon as possible and preferably before effectiveness. This will be immediately followed by the publication of a cut-off date to discourage people settling within the project site who are hoping to take advantage of compensation. 86. The RAPs and RPFs provide a detailed analysis on the gaps in compensation and resettlement under the national laws in comparison with the Bank \u2019 s OP4. 12. The main areas of difference are the following: ( i ) squatters are not eligible for compensation in the national law; ( ii ) OP4. 12 recommends that, where possible, compensation should be paid in kind rather than in cash; ( iii ) OP4. 12 is more specific on restoring livelihoods, and ( iv ) OP4. 12 includes a monitoring and evaluation plan for the RAP. All the RAPs and RPFs specify that in case of divergence between national laws and Bank \u2019 s OP4. 12, the Bank \u2019 s OP4. 12 applies. The Bank \u2019 s development partners have also agreed to adhere to the OP4. 12 standard; thus, the safeguard documents comply with OP4. 12, the requirements of the other development partners, and with the national law. 87.", + "ner_text": [ + [ + 17, + 58, + "named" + ] + ], + "validated": true, + "empirical_context": "In addition, the census of the population and their assets for the second wave will be completed as soon as possible and preferably before effectiveness. This will be immediately followed by the publication of a cut-off date to discourage people settling within the project site who are hoping to take advantage of compensation.", + "type": "census", + "explanation": "This is indeed a dataset as it involves a systematic collection of data regarding the population and their assets for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured collection of data about the population and their assets.", + "contextual_reason_agent": "This is indeed a dataset as it involves a systematic collection of data regarding the population and their assets for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 16, + "text": "The PforR will push forward the results orientation of the sector plan by rewarding the achievement of results with disbursements. \u2022 Upfront momentum: The PforR instrument will be used to generate momentum around key activities that are potential bottlenecks in the system. Disbursement \u2010 linked indicators ( DLI ) will be a critical tool for shifting the policy dialogue toward results, especially in the initial years. \u2022 Stakeholder harmonization: The PforR will not only enhance the partnership between the government and the World Bank by using the government \u2019 s own systems, but also the harmonization of donor interventions in the sector targeting a common results framework. The NESP lays out a comprehensive results framework that has been developed in coordination with education partners. \u2022 Institutionalization of measurement: The use of the PforR instrument is an opportunity to leverage MOE \u2019 s investments in data systems, such as the OpenEMIS, and to strengthen and institutionalize a culture of measurement of results.", + "ner_text": [ + [ + 950, + 958, + "named" + ], + [ + 901, + 904, + "OpenEMIS <> author" + ], + [ + 924, + 936, + "OpenEMIS <> data type" + ] + ], + "validated": true, + "empirical_context": "The NESP lays out a comprehensive results framework that has been developed in coordination with education partners. \u2022 Institutionalization of measurement: The use of the PforR instrument is an opportunity to leverage MOE \u2019 s investments in data systems, such as the OpenEMIS, and to strengthen and institutionalize a culture of measurement of results.", + "type": "data system", + "explanation": "OpenEMIS is indeed a data system that supports the measurement of results, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is mentioned in the context of leveraging investments in data systems.", + "contextual_reason_agent": "OpenEMIS is indeed a data system that supports the measurement of results, confirming its role as a data source.", + "contextual_signal": "mentioned as a data system that supports measurement", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 9, + "validated": 2, + "not_validated": 7 + } + }, + { + "filename": "031_IBArchive-bd2c789e-ee04-4df7-a219-9409a5f705d3", + "page": 77, + "text": "Chad has one of the highest fertility rates in the world ( 5. 8 births per women ), which severely affects women \u2019 s capacity to participate in the labor market. 44 Women also lack agency for personal decisions; only 23 percent of women were responsible for deciding whether or not to work. 45 In addition, women have lower access to factors of production, such as land, farming inputs, and livestock. 5. Poverty. In both Chad \u2019 s Household Consumption and Informal Sector Surveys from 2003 and 2011, the monetary poverty rate among female-headed households was lower than the rate for male - headed households. In the most recent one ( 2011 ), the poverty rate for female-headed households was 42. 6 percent, compared with 47. 4 percent for households headed by men. 46 However, despite the lower monetary poverty rate, female-headed households experienced slightly higher multidimensional poverty than male-headed households. Multidimensional poverty is measured as an index that includes information about education, health, housing, employment, empowerment, dignity, and personal security, among many others. 47 Similarly, 39. 2 percent of female-headed households are in the bottom wealth quintile, compared to 21. 2 percent of those headed by males. 6.", + "ner_text": [ + [ + 431, + 480, + "named" + ], + [ + 0, + 4, + "Household Consumption and Informal Sector Surveys <> data geography" + ], + [ + 486, + 490, + "Household Consumption and Informal Sector Surveys <> reference year" + ], + [ + 495, + 499, + "Household Consumption and Informal Sector Surveys <> publication year" + ], + [ + 505, + 526, + "Household Consumption and Informal Sector Surveys <> data description" + ], + [ + 533, + 557, + "Household Consumption and Informal Sector Surveys <> reference population" + ], + [ + 637, + 641, + "Household Consumption and Informal Sector Surveys <> publication year" + ] + ], + "validated": true, + "empirical_context": "Poverty. In both Chad \u2019 s Household Consumption and Informal Sector Surveys from 2003 and 2011, the monetary poverty rate among female-headed households was lower than the rate for male - headed households. In the most recent one ( 2011 ), the poverty rate for female-headed households was 42.", + "type": "survey", + "explanation": "These surveys are explicitly mentioned as sources of data used for empirical analysis of poverty rates.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific surveys that collect data on household consumption and poverty rates.", + "contextual_reason_agent": "These surveys are explicitly mentioned as sources of data used for empirical analysis of poverty rates.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 51, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 40 Indicator Name People within 200m ( 0. 2km ) of an improved water source benefitted in refugees hosting districts Definition / Description Percent of refugees and citizens in host communities that have an improved water source within 200m of walking distance. Frequency Bi-annually Data Source Primary sources include: MWE national M & E system, Refugee Database, Refugees GIS Mapping for WASH, Water Supply Atlas, and UPMiS. Secondary sources include: MWE / DWD / DWRM progress reports, annual water & environments sector performance report, NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE / DWD Rural Indicator Name of which women Definition / Description Frequency Bi-annually. Data Source Primary sources include: MWE national M & E system, Refugee Database, Refugees GIS Mapping for WASH, Water Supply Atlas, and UPMiS. Secondary sources include: MWE / DWD / DWRM progress reports, annual water & environments sector performance report, NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE / DWD Rural.", + "ner_text": [ + [ + 943, + 948, + "named" + ] + ], + "validated": true, + "empirical_context": "Methodology for Data Collection Responsibility for Data Collection MWE / DWD Rural Indicator Name of which women Definition / Description Frequency Bi-annually. Data Source Primary sources include: MWE national M & E system, Refugee Database, Refugees GIS Mapping for WASH, Water Supply Atlas, and UPMiS. Secondary sources include: MWE / DWD / DWRM progress reports, annual water & environments sector performance report, NWSC annual report.", + "type": "system", + "explanation": "UPMiS is mentioned as a primary source of data collection, indicating it functions as a data source in this context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed UPMiS is a dataset because it is listed among primary data sources.", + "contextual_reason_agent": "UPMiS is mentioned as a primary source of data collection, indicating it functions as a data source in this context.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "010_BOSIB1554c314c0a2187c019d7e85bc2a91", + "page": 18, + "text": "Sub-component 1. 1: Institutional and operational reforms to enhance efficiency and transparency of KEMSA ( US $ 30 million ): This sub-component will support: ( a ) building up buffer stocks in KEMSA to ensure timely availability of HPTs at primary care level, thus increasing the order-fill rate, reducing the order turn-around time, and promoting efficiency. Funds will be earmarked for the procurement and distribution of HPTs for primary care services ( levels 1-3 ) in all 47 counties during the life of the project. Counties will draw down HPTs from an agreed list, based on their resource allocation as described in sub-component 2. 1. To ensure transparency and accountability in the procurement process, an HPT governance committee incorporating key stakeholders will be established. Climate sensitive planning for HPTs distribution will be included; ( b ) automation of the procurement processes, through rolling out a new ERP system with extended supply chain modules to ensure end-to-end visibility; and ( c ) strengthening governance and accountability, including development and implementation of an accountability dashboard that provides visibility of the procurement process and distribution of HPTs to various stakeholders. The project will use seasonal data to inform pharmaceutical planning for climate sensitive conditions ( e. g., malaria, cholera, anti-diarrheal medicines, etc. ).", + "ner_text": [ + [ + 1115, + 1139, + "named" + ] + ], + "validated": false, + "empirical_context": "To ensure transparency and accountability in the procurement process, an HPT governance committee incorporating key stakeholders will be established. Climate sensitive planning for HPTs distribution will be included; ( b ) automation of the procurement processes, through rolling out a new ERP system with extended supply chain modules to ensure end-to-end visibility; and ( c ) strengthening governance and accountability, including development and implementation of an accountability dashboard that provides visibility of the procurement process and distribution of HPTs to various stakeholders. The project will use seasonal data to inform pharmaceutical planning for climate sensitive conditions ( e.", + "type": "tool", + "explanation": "However, the term 'accountability dashboard' refers to a tool for monitoring rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'dashboard' which often implies data visualization.", + "contextual_reason_agent": "However, the term 'accountability dashboard' refers to a tool for monitoring rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "012_BOSIB-9a6accb6-73d1-4bd1-8307-d41a339a51ab", + "page": 34, + "text": "For the PCE indicator, at the Implementation Completion and Results Report stage, the number of WSPs achieving OCCR targets will be reviewed if additional commercial loans are accessed, for example, US $ 10 million, which is the balance of the US $ 18 million, net of US $ 8 million PCM, projected by the Water Fund, through a market study, as potential commercial loans that can be leveraged by the participating WSPs. 62. Gender Equality. A baseline survey of WSPs identified key gender gaps in women \u2019 s employment in leadership positions for most of the Program \u2019 s WSPs. Numerous factors contribute to this gender gap including entrenched gender norms that deter women from applying to leadership positions and entering technical fields, lower rates of promotion, and weak or opposing incentives for hiring and promoting women. 45 The baseline for women in leadership positions in the Program \u2019 s WSPs range from 0 to 55 percent. 46 National policies require that no more than two-thirds of either gender occupy leadership positions in WSPs and DLI 3 includes this criteria to achieve a qualifiable \u201c Number of households with access to a sustainably functioning water point. \u201d DLI 6 will incentivize hiring and promoting women in WSPs through implementation of the PIAP.", + "ner_text": [ + [ + 443, + 466, + "named" + ], + [ + 482, + 493, + "baseline survey of WSPs <> data description" + ], + [ + 497, + 502, + "baseline survey of WSPs <> reference population" + ] + ], + "validated": true, + "empirical_context": "Gender Equality. A baseline survey of WSPs identified key gender gaps in women \u2019 s employment in leadership positions for most of the Program \u2019 s WSPs. Numerous factors contribute to this gender gap including entrenched gender norms that deter women from applying to leadership positions and entering technical fields, lower rates of promotion, and weak or opposing incentives for hiring and promoting women.", + "type": "survey", + "explanation": "This is indeed a dataset as it is a structured collection of data specifically aimed at identifying gender gaps in employment.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data on gender gaps.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data specifically aimed at identifying gender gaps in employment.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "013_BOSIB0efb09b920d90858a0135df22da7d1", + "page": 39, + "text": "of whom registered in remote and hard to reach areas ( Number ) Description Number of people registered in areas classified by NIDP as \" remote and difficult to reach \" in their registration strategy. These areas are characterized by a combination of limited infrastructure, geographical location, population density, weather conditions, and restrictions due to conflicts or social conditions. Frequency Bi-annual Data source Fayda registration data and reporting from registration partners Methodology for Data Collection Fayda data analytics platform Responsibility for Data Collection NIDP Service delivery for registered persons in Ethiopia is improved. Number of successful digital ID authentications by Fayda ID holders to access public and private sector services ( Number ) Description Number of successful authentications by individuals using their Fayda ID to access either public or private services. Frequency Biannual Data source Fayda services usage data - number of authentication requests received by the system Methodology for Data Collection Fayda data analytics platform Responsibility for Data Collection NIDP", + "ner_text": [ + [ + 426, + 449, + "named" + ], + [ + 127, + 131, + "Fayda registration data <> publisher" + ], + [ + 588, + 592, + "Fayda registration data <> publisher" + ], + [ + 614, + 632, + "Fayda registration data <> reference population" + ], + [ + 636, + 644, + "Fayda registration data <> data geography" + ], + [ + 658, + 705, + "Fayda registration data <> data description" + ] + ], + "validated": true, + "empirical_context": "These areas are characterized by a combination of limited infrastructure, geographical location, population density, weather conditions, and restrictions due to conflicts or social conditions. Frequency Bi-annual Data source Fayda registration data and reporting from registration partners Methodology for Data Collection Fayda data analytics platform Responsibility for Data Collection NIDP Service delivery for registered persons in Ethiopia is improved. Number of successful digital ID authentications by Fayda ID holders to access public and private sector services ( Number ) Description Number of successful authentications by individuals using their Fayda ID to access either public or private services.", + "type": "registration data", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source of data for reporting and analytics.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a 'data source' in the context.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source of data for reporting and analytics.", + "contextual_signal": "described as a data source for reporting", + "tags": [] + }, + { + "filename": "158_40156", + "page": 53, + "text": "Once changes have been captured, various people sit down together, read the stories aloud and have regular and often in-depth discussions about the value of these reported changes. 31. In addition to surveys and surveillance, routine program monitoring data will be collected for components 1a and 1b, focusing on all the target populations to whom services will be provided, in order to monitor the progress with the implementation and coverage of HIV services to which the project has contributed. As the ACGF is complementary and supplementary funding and in line with the Three Ones principle on one M & E system, routine program monitoring data about medical HIV services generated from health facilities in refugee sites, returnee sites, IDP sites, surrounding populations and in \u2018 hot spot \u2019 areas will not measure services provided to target populations specifically, but it will record increases in service delivery. HIV services delivered in the community however, are based on specific target populations as such data can be collected from UNHCR ( for component 1a ) and the NGOs ( for component 1b ) that implement such projects. All routine data collected, will be based on the country \u2019 s data collection protocols, and will be sent to both the PFO and the NAC M & E unit to ensure that the NACs have ownership and are informed of developments. 49", + "ner_text": [ + [ + 226, + 257, + "named" + ], + [ + 322, + 340, + "routine program monitoring data <> reference population" + ], + [ + 713, + 726, + "routine program monitoring data <> reference population" + ], + [ + 728, + 742, + "routine program monitoring data <> reference population" + ], + [ + 744, + 753, + "routine program monitoring data <> reference population" + ] + ], + "validated": true, + "empirical_context": "31. In addition to surveys and surveillance, routine program monitoring data will be collected for components 1a and 1b, focusing on all the target populations to whom services will be provided, in order to monitor the progress with the implementation and coverage of HIV services to which the project has contributed. As the ACGF is complementary and supplementary funding and in line with the Three Ones principle on one M & E system, routine program monitoring data about medical HIV services generated from health facilities in refugee sites, returnee sites, IDP sites, surrounding populations and in \u2018 hot spot \u2019 areas will not measure services provided to target populations specifically, but it will record increases in service delivery.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to collected data used for monitoring the implementation and coverage of HIV services.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific type of data collected for monitoring purposes.", + "contextual_reason_agent": "This is indeed a dataset as it refers to collected data used for monitoring the implementation and coverage of HIV services.", + "contextual_signal": "described as data collected for monitoring purposes", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "098_CAMEROON-PAD-NEW-08032017", + "page": 89, + "text": "To assess avoided costs in the without investment scenario, the following information was collected from existing literature: a. Health expenses data were sourced from healthcare centers and from a national health survey28. b. Incidence of major water-borne diseases were also sourced from TBS329 c. Opportunity cost of lost working day was calculated from data of a national employment survey30. ( CFAF 2, 500 per work day ). It was assumed that, for the treatment duration, each patient will be taken care of by one adult. Time spent by this adult translates into an opportunity cost as the adult will forgo revenues he / she could otherwise earn. The daily revenue of the average adult was estimated at CFAF 2, 500 per day ( legal minimum salary ). Therefore, given the population in the different sub - projects, avoided costs included ( i ) the direct costs incurred for different water-borne diseases and ( ii ) indirect costs related to the opportunity costs of adults \u2019 time spent on care. 31 Costs were estimated at CFAF 45, 000 per household per year. ( ii ) Flood avoidance related benefits. To assess avoided costs related to avoiding floods, the following information was collected: a. Frequency and costs of major flood events in the project influence area32.", + "ner_text": [ + [ + 129, + 149, + "named" + ], + [ + 168, + 186, + "Health expenses data <> data geography" + ], + [ + 227, + 266, + "Health expenses data <> data description" + ], + [ + 1289, + 1307, + "Health expenses data <> usage context" + ] + ], + "validated": true, + "empirical_context": "To assess avoided costs in the without investment scenario, the following information was collected from existing literature: a. Health expenses data were sourced from healthcare centers and from a national health survey28. b.", + "type": "data", + "explanation": "This is indeed a dataset as it is explicitly mentioned that the data was sourced from healthcare centers and a national health survey, indicating its use in empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'data' collected from healthcare centers and a national health survey.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned that the data was sourced from healthcare centers and a national health survey, indicating its use in empirical analysis.", + "contextual_signal": "sourced from healthcare centers and from a national health survey", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 17, + "text": "The key drivers of stunting27 are poor nutritional status of women during pregnancy, suboptimal Infant and Young Child Feeding ( IYCF ) practices, poor hygiene practices ( leading to childhood diarrhea ), 28 and inadequate access to quality health services ( such as treatment of childhood diarrhea and Growth Monitoring and Promotion ( GMP ) ), to diverse, nutrient-dense foods, and to Water, Sanitation, and Hygiene ( WASH ) ( Annex 3 ). 29 While the latest national data shows stunting rates at over 40 percent in four regions, the figure drops to 15 percent in Addis Ababa. 30 For learning, basic education services are constrained by a multitude of factors, including insufficient learning materials, ineffective pre-service and in - service teacher training, outdated teaching and learning practices, and poor learning environments. They also demonstrate high regional and socioeconomic inequality, with grade 4 reading scores in the National Learning 21 Simane, Belay et al. \u201c Review of Climate Change and Health in Ethiopia: Status and Gap Analysis. \u201d The Ethiopian journal of health development = Ya ' Ityopya tena lemat mashet vol. 30, 1 Spec Iss ( 2016 ): 28-41. https: / / www. ncbi. nlm. nih. gov / pmc / articles / PMC5578710 / pdf / nihms899377. pdf 22https: / / addisstandard. com / news-more-than-7800-students-dropout-of-school-as-impact-of-prolonged-drought-takes-toll-in-borana-oromia-region", + "ner_text": [ + [ + 460, + 473, + "named" + ], + [ + 565, + 576, + "national data <> data geography" + ], + [ + 961, + 981, + "national data <> author" + ], + [ + 1159, + 1163, + "national data <> publication year" + ] + ], + "validated": true, + "empirical_context": "The key drivers of stunting27 are poor nutritional status of women during pregnancy, suboptimal Infant and Young Child Feeding ( IYCF ) practices, poor hygiene practices ( leading to childhood diarrhea ), 28 and inadequate access to quality health services ( such as treatment of childhood diarrhea and Growth Monitoring and Promotion ( GMP ) ), to diverse, nutrient-dense foods, and to Water, Sanitation, and Hygiene ( WASH ) ( Annex 3 ). 29 While the latest national data shows stunting rates at over 40 percent in four regions, the figure drops to 15 percent in Addis Ababa. 30 For learning, basic education services are constrained by a multitude of factors, including insufficient learning materials, ineffective pre-service and in - service teacher training, outdated teaching and learning practices, and poor learning environments.", + "type": "data", + "explanation": "In this context, 'national data' is used to refer to specific statistical information regarding stunting rates, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'national data' refers to a dataset because it implies a collection of statistical information about stunting rates.", + "contextual_reason_agent": "In this context, 'national data' is used to refer to specific statistical information regarding stunting rates, indicating it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "066_Jordan-Youth-Technology-and-Jobs-Project", + "page": 11, + "text": "The direct contribution of the ICT sector to GDP is 4 percent in 20186 and employment in the sector increased from approximately 18, 000 employees in 2016 to 21, 811 in 2018. ICT sector revenues increased from JD677 million in 2017 to JD750 million in 2018 ( a growth rate of 10. 7 percent ), 7 which was fivefold higher than Jordan \u2019 s GDP growth in the same year ( 1. 94 percent ). 8 Women accounted for approximately 33 percent of employment in the sector in 2018 ( 4, 505 female employees ), which is above the national average. 9 ICT services accounted for 5. 7 percent of exports10 and 21. 6 percent of value added in 2017. 11 7. The IFC / World Bank Country Private Sector Diagnostic ( CPSD ) for Jordan confirmed the importance and the potential of the ICT sector for growth. The CPSD highlighted IT outsourcing ( ITO ) / Business Process Outsourcing ( BPO ), and digital entrepreneurship as key potential activities. In addition to recommended reforms in the telecom 5 Excluding public administration. 6 ICT Association of Jordan ( Intaj ) sector profile, 2018 7 ibid 8 World Bank National Accounts Data, 2018 9 Labor Force Survey, 2018 ( DoS ) 10 The Atlas of Economic Complexity, 2017 11 ibid", + "ner_text": [ + [ + 1121, + 1139, + "named" + ], + [ + 169, + 173, + "Labor Force Survey <> publication year" + ], + [ + 252, + 256, + "Labor Force Survey <> publication year" + ], + [ + 326, + 332, + "Labor Force Survey <> data geography" + ], + [ + 704, + 710, + "Labor Force Survey <> data geography" + ], + [ + 1065, + 1069, + "Labor Force Survey <> publication year" + ], + [ + 1114, + 1118, + "Labor Force Survey <> publication year" + ], + [ + 1141, + 1145, + "Labor Force Survey <> publication year" + ], + [ + 1219, + 1237, + "Labor Force Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "In addition to recommended reforms in the telecom 5 Excluding public administration. 6 ICT Association of Jordan ( Intaj ) sector profile, 2018 7 ibid 8 World Bank National Accounts Data, 2018 9 Labor Force Survey, 2018 ( DoS ) 10 The Atlas of Economic Complexity, 2017 11 ibid", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is labeled as a survey, which typically involves structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "124_PAD8220PAD0P12010Box385319B00OUO090", + "page": 19, + "text": "The project will deliver benefits to 1. 6 million residents across the GBML, located within the BMLWE areas of service provision that are geographically divided into four zones and 21 municipalities. Beneficiaries will benefit from increased volume and quality of public water provided to the household and a subsequent decrease in the cost of alternative sources of water. 13 39. The decrease in total cost of water will directly and positively impact the poor. Of the 506, 000 people across the GBML that live below US $ 4 per day, 460, 000 are located in the project area, as determined by a project specific survey of 1, 200 project households, Lebanon \u2019 s 2005 Poverty Assessment, and available census data. 40. A household survey of 1, 200 beneficiary households across the GBML was conducted as part of project preparation. Half the project survey respondents reported per capita incomes of less than LBP 600, 000 LBP ( US $ 400 ) per month, equivalent to less than the US $ 4 per day national poverty line. A map of the percent of surveyed households within each municipality whose monthly income is within the bottom third relative to the sample is presented in Figure 1: 13 Households currently buy tanker water, bottled water and / or construct private wells to supplement the low volumes of public water. Water supplied by the Bisri dam will substitute these alternative sources of water supply. 8", + "ner_text": [ + [ + 719, + 735, + "named" + ], + [ + 71, + 75, + "household survey <> data geography" + ], + [ + 497, + 501, + "household survey <> data geography" + ], + [ + 746, + 768, + "household survey <> reference population" + ], + [ + 780, + 784, + "household survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "40. A household survey of 1, 200 beneficiary households across the GBML was conducted as part of project preparation. Half the project survey respondents reported per capita incomes of less than LBP 600, 000 LBP ( US $ 400 ) per month, equivalent to less than the US $ 4 per day national poverty line.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to a structured collection of data gathered from beneficiary households for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'household survey' implies a structured collection of data from respondents.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a structured collection of data gathered from beneficiary households for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "012_BOSIB-9a6accb6-73d1-4bd1-8307-d41a339a51ab", + "page": 47, + "text": "Frequency Annual measurement Data source County Government Water Department M & E records Methodology for Data Collection Qualitative inspections and quantitative data collection using M & E protocols defined in the POM Responsibility for Data Collection County Government Water Department Rural water supply schemes constructed under the program that adopt a WASH plus approach ( provide water for multiple productive uses beyond doemstic portable water supply ) ( Number ) Description This indicator measures the number of rural water schemes constructed by each county that provide water for productive uses such as irrigation and so on beyond domestic portable water supply. Frequency Annual measurement Data source County Government Water Department M & E records Methodology for Data Collection Qualitative inspections and quantitative data collection using M & E protocols defined in the POM Responsibility for Data Collection County Government Water Department Climate-vulnerable households provided with access to improved water services ( Number ) DLI Description This indicator measures the cumulative number of households that have access to an improved water source constructed through the program. The definition of a household as applied in the 2022 Kenya Demographic and Health Survey ( KDHS ) applies. The KDHS indicates that the mean household size in rural areas is four people. Frequency Annual measurement Data source County Government Water Department M & E records Methodology for Data Collection Qualitative inspections and quantitative data collection using M & E protocols defined in the POM Responsibility for Data Collection County Government Water Department Sustainably functioning rural water supply schemes ( Number ) Description This indicator primarily measures the number of water schemes in the participating counties that are operated under an approved professional service provider model as per WASREB regulations. This is to ensure functionality of the schemes with functionality defined in the POM. Frequency Annual measurement Data source County Government Water Department M & E records Methodology for Data Qualitative inspections and quantitative data collection using M & E protocols defined in the POM", + "ner_text": [ + [ + 1265, + 1300, + "named" + ], + [ + 1260, + 1264, + "Kenya Demographic and Health Survey <> publication year" + ], + [ + 1303, + 1307, + "Kenya Demographic and Health Survey <> acronym" + ], + [ + 1323, + 1327, + "Kenya Demographic and Health Survey <> acronym" + ], + [ + 1347, + 1396, + "Kenya Demographic and Health Survey <> data description" + ] + ], + "validated": true, + "empirical_context": "Frequency Annual measurement Data source County Government Water Department M & E records Methodology for Data Collection Qualitative inspections and quantitative data collection using M & E protocols defined in the POM Responsibility for Data Collection County Government Water Department Climate-vulnerable households provided with access to improved water services ( Number ) DLI Description This indicator measures the cumulative number of households that have access to an improved water source constructed through the program. The definition of a household as applied in the 2022 Kenya Demographic and Health Survey ( KDHS ) applies. The KDHS indicates that the mean household size in rural areas is four people.", + "type": "survey", + "explanation": "It is indeed a dataset as it provides structured data used for empirical analysis regarding household definitions.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a source for defining household size.", + "contextual_reason_agent": "It is indeed a dataset as it provides structured data used for empirical analysis regarding household definitions.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 35, + "text": "The data protection policy of the government will guide management of beneficiary data and data protection. The Project will build government capacity related to hardware, software, regulatory framework, data management protocols, data protection guidelines, and organizational capacity to ensure protection of beneficiary data. Putting in place an integrated monitoring system will provide a strong basis for ongoing learning based on emerging findings and will also streamline processes of independent data verification by third parties, such as external evaluators. C. Sustainability 91. The proposed Project promotes sustainability of its outcomes in several ways. First, the Project objectives and activities are fully aligned with South Sudan \u2019 s education sector strategy and directly support key pillars of the strategy, with a focus on building a pipeline of qualified teachers and strengthening capacity of in-service teachers. Second, the Project will support the training of local instructors and equipping of existing local institutions of higher education to deliver digital skills in agriculture training, building the country \u2019 s capacity to deliver cutting-edge skills training in a priority sector. Third, the Project puts emphasis on building systems and institutions in all aspects of project implementation. The Project will support development and / or finalization of government policies followed by support for implementation of these policies. Specifically, the Project will support development of a", + "ner_text": [ + [ + 349, + 377, + "named" + ] + ], + "validated": false, + "empirical_context": "The Project will build government capacity related to hardware, software, regulatory framework, data management protocols, data protection guidelines, and organizational capacity to ensure protection of beneficiary data. Putting in place an integrated monitoring system will provide a strong basis for ongoing learning based on emerging findings and will also streamline processes of independent data verification by third parties, such as external evaluators. C.", + "type": "system", + "explanation": "However, it is described as a system rather than a data source, indicating it does not function as a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'integrated monitoring system' suggests a structured approach to data management.", + "contextual_reason_agent": "However, it is described as a system rather than a data source, indicating it does not function as a dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 11, + "text": "Under a baseline scenario, real GDP is expected to grow by around 3. 5-4. 0 percent in fiscal 2022 and about 5. 5 percent in fiscal 2023. Considering large global and domestic uncertainties, such as oil and food price shocks exacerbated by the war in Ukraine, the recovery could be slower. 2. The COVID-19 shock has been accompanied by increases in poverty and unemployment. According to the latest Uganda National Household Survey ( UNHS ), although overall poverty in 2019 / 20 ( 20. 3 percent ) was slightly lower than in 2016 / 17 ( 21. 4 percent ), poverty in the COVID-19 period was significantly higher than in the pre-COVID-19 period. 1 It increased to 21. 9 percent during the first COVID-19 wave. Rising unemployment and work stoppages have pushed many Ugandans, especially women, back into subsistence agriculture, setting back achievement of the country \u2019 s development goal of reducing the share of the population dependent on subsistence agriculture as a main source of livelihood from 69 to 55 percent between 2020 / 21 and 2024 / 25. 2 3. Uganda is experiencing accelerating impacts from climate change that affect livelihoods in key sectors. Rising temperatures and variability in rainfall is producing more intense and longer lasting droughts as well as more frequent, heavy precipitation events leading to flooding and landslides.", + "ner_text": [ + [ + 399, + 431, + "named" + ], + [ + 399, + 405, + "Uganda National Household Survey <> data geography" + ], + [ + 470, + 479, + "Uganda National Household Survey <> publication year" + ], + [ + 525, + 534, + "Uganda National Household Survey <> reference year" + ], + [ + 763, + 771, + "Uganda National Household Survey <> reference population" + ], + [ + 1055, + 1061, + "Uganda National Household Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "The COVID-19 shock has been accompanied by increases in poverty and unemployment. According to the latest Uganda National Household Survey ( UNHS ), although overall poverty in 2019 / 20 ( 20. 3 percent ) was slightly lower than in 2016 / 17 ( 21.", + "type": "survey", + "explanation": "This is indeed a dataset as it is a structured collection of data used to analyze poverty levels in Uganda.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides statistical data on poverty.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data used to analyze poverty levels in Uganda.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "121_PAD1190-PAD-P152848-PUBLIC-Box391435B-LB-EESSP-Final-PAD-for-printing", + "page": 20, + "text": "12 sensitive and politicized issues. In recognition of this, significant resources have been allocated under Component 3 to the recruitment of an experienced and qualified M & E Specialist as a key member of the PMU staff. The relevant data sources, on which results monitoring rely, are readily available and many data relevant to this operation are routinely collected. C. Sustainability 38. The imperative of this Project is for a rapid response to the influx of Syrian students in order to maintain a functioning public education system. Nonetheless, the investments in rehabilitation, school finance and textbooks will likely enhance the quality of the learning environment both in the immediate and medium term. Keeping children in school that might otherwise drop out of the system or not have access to educational opportunity at all will increase human capital and improve earning potential ( see Section VI for details ). If the public system were to decline significantly in quality, an exodus of both Lebanese and Syrian children would be expected in response to declining returns. 39. Predictable support to the national education system presents an opportunity to positively influence the lives of a generation of Lebanese and Syrian children, an investment in future jobs and economic prospects, and potentially could reduce vulnerability to radicalization amongst young people.", + "ner_text": [ + [ + 236, + 248, + "named" + ] + ], + "validated": false, + "empirical_context": "In recognition of this, significant resources have been allocated under Component 3 to the recruitment of an experienced and qualified M & E Specialist as a key member of the PMU staff. The relevant data sources, on which results monitoring rely, are readily available and many data relevant to this operation are routinely collected. C.", + "type": "data source", + "explanation": "However, 'data sources' in this context refers to sources of information rather than a specific dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'data sources' refers to structured collections of data used for analysis.", + "contextual_reason_agent": "However, 'data sources' in this context refers to sources of information rather than a specific dataset.", + "contextual_signal": "mentioned only as a source of information, not as a dataset", + "tags": [] + }, + { + "filename": "050_Cameroon-Secondary-Education-and-Skills-Development-Project", + "page": 133, + "text": "During the first year of the project, the accreditation program will be developed by the DESG, supported by a consulting firm. The trainings under the accreditation program will also be managed and carried out by the DESG. Each head teacher will have online and face-to-face training. The online training will be self-paced, while each head teacher will receive two sessions of face-to-face training. However, the accreditation process should be completed by a head teacher within a year of staring the accreditation process or as explained in the accreditation program. 17. After validation of the program by MINESEC, the PCU will request an approval from the World Bank. DESG will use modern technology tools for implementation and monitoring of secondary school management performance of head teachers through a standardized indicator system. The standardized indicator system will be defined in the accreditation program. The first assessment on the competency of head teachers compared to the standards will be recorded by the DESG, which will provide trainings and mentoring for head teachers to improve their performance. After a year of implementation of the training ( that is, during the third year of implementation ), the accreditation program for head teachers will be evaluated by the IVA, and DESG will update", + "ner_text": [ + [ + 815, + 844, + "named" + ] + ], + "validated": false, + "empirical_context": "After validation of the program by MINESEC, the PCU will request an approval from the World Bank. DESG will use modern technology tools for implementation and monitoring of secondary school management performance of head teachers through a standardized indicator system. The standardized indicator system will be defined in the accreditation program.", + "type": "system", + "explanation": "However, it is described as a system for monitoring performance, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'system' which can imply structured data.", + "contextual_reason_agent": "However, it is described as a system for monitoring performance, not as a data source.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "035_Senegal-Casamance-Economic-Development-Project", + "page": 42, + "text": "The World Bank Senegal, Casamance Economic Development Project ( P175325 ) Page 38 of 72 engagement processes supported under Component 1. Strengthening local governance and enhancing access to local services for improved inclusion and resilience, the socio-economic empowerment will be supported through local capacity building, inclusive targeting and linking women and youth to tailored and relevant socio-economic resources under Components 1 and 3. Finally, through consulting women and girls on their local transportation needs and the movement restrictions they may face, the project will seek to design the connectivity component in a way that accounts for these needs and barriers, thus increasing women \u2019 s access both to services and markets under Component 2. Finally, gaps in women \u2019 s agency and voice will be addressed through leadership development and the promotion of GBV prevention and mitigation measures and response mechanisms. 101. Gender tag results chain. Although this project has many activities to address gender, for the gender tag, the project \u2019 s theory of change addresses the issues of women being constrained from using health services due to a lack of time and distance. To illustrate, a 2011 Senegal Demographic and Health and Multiple Indicator Cluster Survey found that survey only 45 percent of births in Casamance are attended by trained personnel, leaving women more exposed to maternal mortality or birth complications.", + "ner_text": [ + [ + 1228, + 1296, + "named" + ], + [ + 24, + 33, + "Senegal Demographic and Health and Multiple Indicator Cluster Survey <> data geography" + ], + [ + 362, + 367, + "Senegal Demographic and Health and Multiple Indicator Cluster Survey <> reference population" + ], + [ + 1119, + 1124, + "Senegal Demographic and Health and Multiple Indicator Cluster Survey <> reference population" + ], + [ + 1223, + 1227, + "Senegal Demographic and Health and Multiple Indicator Cluster Survey <> publication year" + ], + [ + 1228, + 1235, + "Senegal Demographic and Health and Multiple Indicator Cluster Survey <> data geography" + ], + [ + 1397, + 1402, + "Senegal Demographic and Health and Multiple Indicator Cluster Survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "Although this project has many activities to address gender, for the gender tag, the project \u2019 s theory of change addresses the issues of women being constrained from using health services due to a lack of time and distance. To illustrate, a 2011 Senegal Demographic and Health and Multiple Indicator Cluster Survey found that survey only 45 percent of births in Casamance are attended by trained personnel, leaving women more exposed to maternal mortality or birth complications.", + "type": "survey", + "explanation": "This is indeed a dataset as it is referenced in the context as a source of empirical data regarding health service usage.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is referenced in the context as a source of empirical data regarding health service usage.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "185_multi-page", + "page": 182, + "text": "Baseline ( KDHS, 1999 ): 90 percent * Economic situation will improve during Target ( KDHS, 2003 ): 100 percent project life. * Intensified prograrn of HIV / AIDS prevention, care and support will be * By 2003, the median age of first sex * Kenya Demographic and Health socially and culturally acceptable at among young women ( 15-24 years ) will Survey ( KDHS ) data community level. increase. Baseline ( KDHS, 1999 ): 12 years Target ( KDHS, 2003 ): 14 years * Reports from the Ministry of * By 2004, HIV / AIDS curricula will be Education and other Ministries integrated and examinable in an increasing number of primary and secondary schools, and various post-graduate institutions. Baseline: less than I percent Target: 75 percent", + "ner_text": [ + [ + 347, + 353, + "named" + ], + [ + 17, + 21, + "Survey <> reference year" + ], + [ + 92, + 96, + "Survey <> publication year" + ], + [ + 241, + 246, + "Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "Baseline ( KDHS, 1999 ): 90 percent * Economic situation will improve during Target ( KDHS, 2003 ): 100 percent project life. * Intensified prograrn of HIV / AIDS prevention, care and support will be * By 2003, the median age of first sex * Kenya Demographic and Health socially and culturally acceptable at among young women ( 15-24 years ) will Survey ( KDHS ) data community level. increase.", + "type": "survey", + "explanation": "In this context, 'Survey' refers to the Kenya Demographic and Health Survey (KDHS), which is explicitly mentioned as a source of data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Survey' is often associated with structured data collection.", + "contextual_reason_agent": "In this context, 'Survey' refers to the Kenya Demographic and Health Survey (KDHS), which is explicitly mentioned as a source of data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 19, + "text": "Subcomponent 1. 2: Strengthening health sector stewardship ( US $ 3. 39 million equivalent ) will support strengthening health sector stewardship in selected refugee hosting districts through: ( a ) improving availability, quality, and use of routine health data via, inter alia, ( i ) developing and implementing a digital human resources database, ( ii ) digitizing DHIS and integrating selected parallel reporting systems, ( iii ) providing training, equipment, and operational support to health services providers for implementation and operationalization of HMIS, ( iv ) creating a user \u2010 friendly dashboard for decision making; and ( v ) supporting data review meetings and data quality checks; and ( b ) providing training to, and building capacity of, key managerial and technical staff on selected health system strengthening subjects. The project will support a real \u2010 time system monitoring of staff presence at HFs. Training of managerial and technical staff at provincial and district levels in various health system strengthening areas directly linked to improving effectiveness, efficiency, and sustainability of service delivery will include contract management of private \u2010 public partnerships, public financial management, monitoring and supervision, human resources for health ( HRH ) management, and supply chain management.", + "ner_text": [ + [ + 563, + 567, + "named" + ] + ], + "validated": false, + "empirical_context": "2: Strengthening health sector stewardship ( US $ 3. 39 million equivalent ) will support strengthening health sector stewardship in selected refugee hosting districts through: ( a ) improving availability, quality, and use of routine health data via, inter alia, ( i ) developing and implementing a digital human resources database, ( ii ) digitizing DHIS and integrating selected parallel reporting systems, ( iii ) providing training, equipment, and operational support to health services providers for implementation and operationalization of HMIS, ( iv ) creating a user \u2010 friendly dashboard for decision making; and ( v ) supporting data review meetings and data quality checks; and ( b ) providing training to, and building capacity of, key managerial and technical staff on selected health system strengthening subjects. The project will support a real \u2010 time system monitoring of staff presence at HFs.", + "type": "system", + "explanation": "However, HMIS is mentioned as a system for managing health information, not as a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HMIS is a dataset because it relates to health data management.", + "contextual_reason_agent": "However, HMIS is mentioned as a system for managing health information, not as a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 21, + "validated": 4, + "not_validated": 17 + } + }, + { + "filename": "029_IBArchive-ea5ff2ad-6f1f-45bf-a2ad-036440562c18", + "page": 107, + "text": "The World Bank Electricity Access Scale-up Project ( EASP ) ( P166685 ) Page 102 of 130 percent ( which is the average share of female asset ownership in Uganda ) to 30 percent, corresponding to about 1, 200 loans for female recipients under the project. Finally, focus will be placed in ensuring female-led commercial enterprises also have access to finance for modern energy technologies for use in their enterprises. An increase in access to finance of female-led commercial enterprises is expected from the current 24 percent to 35 percent, which corresponds to about 1, 750 recipient enterprises, to bridge the gap in female-led enterprises and female access to resources. B. Enhancing Data and Knowledge on Male and Female Consumers 6. UBOS / MTF Survey update. Lessons learned from international experiences and best practices indicate the key role played by up-to-date data and analytics for the efficient and effective delivery of services. To provide more insight on the access outlook for the country, the Electrification Survey Report-2018 ( UBOS, 2020 ) will be updated to include more granular information on grid and off-grid gender disaggregated access rate. The information collected would feed into the national geospatial planning platform for access targeted to socioeconomic development. The activity will be implemented by the MEMD and UBOS, in close collaboration with the SPs.", + "ner_text": [ + [ + 749, + 759, + "named" + ], + [ + 154, + 160, + "MTF Survey <> data geography" + ], + [ + 1017, + 1051, + "MTF Survey <> reference year" + ], + [ + 1060, + 1064, + "MTF Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "Enhancing Data and Knowledge on Male and Female Consumers 6. UBOS / MTF Survey update. Lessons learned from international experiences and best practices indicate the key role played by up-to-date data and analytics for the efficient and effective delivery of services.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it is mentioned in relation to enhancing data and knowledge, indicating its use in empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MTF Survey' suggests a structured collection of data from a survey.", + "contextual_reason_agent": "The context confirms it is a dataset as it is mentioned in relation to enhancing data and knowledge, indicating its use in empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "101_Project-Appraisal-Document-PAD-P163830-2017-05-22-CLEAN-05232017", + "page": 39, + "text": "To help increase access to fodder, especially during the dry season, the project will support the development of commercial fodder production. Cash-for-work programs under this project will emphasize the rehabilitation or construction of new water management systems that are critical to community and livelihoods recovery. D. Role of Partners 80. This Project has been designed following an inclusive multi-stakeholder and multi-partner process. The project team worked closely with FAO and ICRC to inform and design the project components, to ensure complementarity, geographical and programmatic harmonization. Whereas the ICRC will deliver immediate food, water and cash response to the drought affected population, FAO will provide livelihood 29 FAO \u2019 s impact assessment report shall present a detailed evaluation, which includes key data such as a Food Consumption Index, the dietary diversity score and the increase in Tropical Livestock Unit ( TLU ). Data collection will use the RIMA-based questionnaire ( RIMA \u2013 Resilience Index Measurement and Analysis )", + "ner_text": [ + [ + 883, + 906, + "named" + ] + ], + "validated": false, + "empirical_context": "The project team worked closely with FAO and ICRC to inform and design the project components, to ensure complementarity, geographical and programmatic harmonization. Whereas the ICRC will deliver immediate food, water and cash response to the drought affected population, FAO will provide livelihood 29 FAO \u2019 s impact assessment report shall present a detailed evaluation, which includes key data such as a Food Consumption Index, the dietary diversity score and the increase in Tropical Livestock Unit ( TLU ). Data collection will use the RIMA-based questionnaire ( RIMA \u2013 Resilience Index Measurement and Analysis )", + "type": "data", + "explanation": "However, the dietary diversity score is a specific measure rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes a specific metric related to food consumption.", + "contextual_reason_agent": "However, the dietary diversity score is a specific measure rather than a structured collection of data.", + "contextual_signal": "mentioned only as a metric, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 53, + "text": "However its function is to ultimately serve as the foundation for a registry of extreme poor households, which would be used for all targeted key programs as identified in the NSPS, and for the complementary targeted interventions of CNPS member ministries. A registry is a public good and its use will lower the overall cost of targeting. The fact that the targeting will use clear and transparent rules ( explicit criteria, community involvement, and public validation ) in the registration of extreme poor households will contribute to strengthening the governance of the social assistance sector. This is especially important in the Burundian context of recurrent fragility and persistent post-conflict divisions. 24. The development of the database will take place in phases, according to the availability of poverty information and the implementation of the cash transfer program. Initially, the program will start in selected provinces22 ( the provinces were selected as the ones with the highest combination of monetary poverty from ECVMB 2013-14 and chronic malnutrition from DHS 2010 and include Gitega, Karuzi, Kirundo and Ruyigi ) for the purpose of identifying potential beneficiaries of the cash transfer program. Given the implementation of a US $ 50 million Food-for-Peace project in the province of Muyinga23, targeting all mothers of children less than two years of age, the project will not work in that province.", + "ner_text": [ + [ + 68, + 103, + "named" + ], + [ + 637, + 646, + "registry of extreme poor households <> data geography" + ], + [ + 1106, + 1112, + "registry of extreme poor households <> data geography" + ], + [ + 1114, + 1120, + "registry of extreme poor households <> data geography" + ], + [ + 1122, + 1129, + "registry of extreme poor households <> data geography" + ], + [ + 1134, + 1140, + "registry of extreme poor households <> data geography" + ] + ], + "validated": true, + "empirical_context": "However its function is to ultimately serve as the foundation for a registry of extreme poor households, which would be used for all targeted key programs as identified in the NSPS, and for the complementary targeted interventions of CNPS member ministries. A registry is a public good and its use will lower the overall cost of targeting.", + "type": "registry", + "explanation": "This is indeed a dataset as it serves as a foundation for targeted programs and interventions, indicating its use as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured collection of information about extreme poor households.", + "contextual_reason_agent": "This is indeed a dataset as it serves as a foundation for targeted programs and interventions, indicating its use as a data source.", + "contextual_signal": "described as a registry that serves as a foundation for targeted programs", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "054_Sudan-Basic-Education-Emergency-Support-Project", + "page": 40, + "text": "The Internal Efficiency of the system is weak, particularly due to very high dropout rates and low learning achievements of pupils. The IEC at the primary level is particularly low ( 39 percent ), which implies that more than half of public resources are wasted in paying for repeated grades or schooling for students who dropout before cycle completion. Economic Rationale for Public Investment in Sustaining Basic Education Enrollment in Sudan 8. The rationale for public sector financing of basic education is well established. Investments under the Project would strengthen efficiency and equity at the basic level overall, likely contributing to improved learning outcomes at the school level. The pressing needs and challenges for both improved efficiency and equity warrant public sector support consistent with Sudan \u2019 s commitment to providing Universal Primary Education of reasonable quality to all children. 9. Investment in basic education in Sudan is justified by the low NER ( 69 percent ) and completion rate ( 55 percent ) and weak learning levels among enrolled students. National Learning Assessment conducted in all 18 states of Sudan found that Grade 3 students performed very poorly. On average, 40 percent of pupil are not able to read a single word. This suggests that there is not only a large proportion of school-age children out of school but even when in school many students are not learning. The Project \u2019 s Development Impact 10. The project is expected to contribute positively to Sudan \u2019 s education system and national economic development. It aims to sustain enrollment in public schools during the economic crises and pandemic. To that end, it is expected that the proposed interventions will affect the probability of a child completing primary education and transitioning to the secondary level. This, in turn, will yield gains in labor earnings measured 3 Authors \u2019 estimation based on 2018 School Census data and reported USD / SDG exchange rate ( Economist ).", + "ner_text": [ + [ + 1926, + 1949, + "named" + ] + ], + "validated": true, + "empirical_context": "To that end, it is expected that the proposed interventions will affect the probability of a child completing primary education and transitioning to the secondary level. This, in turn, will yield gains in labor earnings measured 3 Authors \u2019 estimation based on 2018 School Census data and reported USD / SDG exchange rate ( Economist ).", + "type": "census", + "explanation": "This is indeed a dataset as it is explicitly mentioned as '2018 School Census data' which serves as a data source for the analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific census data used for empirical analysis.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as '2018 School Census data' which serves as a data source for the analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 14, + "text": "The World Bank Tajikistan Water Supply and Sanitation Investment Project ( P177325 ) Page 11 of 89 Listening to Tajikistan Survey21 indicated a strong seasonal correlation of share of households reporting water disruptions, even for households with no connection to municipal water systems, with no significant difference between the bottom 40 and top 60 percent of population, suggesting heavy reliance on open water sources \u2014 largely irrigation canals during the off-season. However, the number of reported days with water disruptions varies from two to six days between urban and rural populations accordingly. 22 12. The quality of water collected from open water sources is usually poor, correlating with incidences of waterborne illnesses, such as diarrhea, and increasingly worrisome in areas with high density of population, heavy reliance on untreated surface water for drinking, and experiencing rapid increase in temperatures. The Household WASH Survey confirmed this finding at the national level, with 16 percent of respondents reporting experiencing gastrointestinal disorders due to poor water quality. While official statistics on waterborne diseases and diseases associated with inadequate WSS are largely underestimated, representatives of local government, schools, and health clinics, as well as local leaders also identified the poor quality of drinking water as the main cause of diarrhea outbreaks, along with poor sanitation and hygiene conditions.", + "ner_text": [ + [ + 942, + 963, + "named" + ], + [ + 4, + 14, + "Household WASH Survey <> publisher" + ], + [ + 1350, + 1380, + "Household WASH Survey <> data description" + ], + [ + 1488, + 1506, + "Household WASH Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "The quality of water collected from open water sources is usually poor, correlating with incidences of waterborne illnesses, such as diarrhea, and increasingly worrisome in areas with high density of population, heavy reliance on untreated surface water for drinking, and experiencing rapid increase in temperatures. The Household WASH Survey confirmed this finding at the national level, with 16 percent of respondents reporting experiencing gastrointestinal disorders due to poor water quality. While official statistics on waterborne diseases and diseases associated with inadequate WSS are largely underestimated, representatives of local government, schools, and health clinics, as well as local leaders also identified the poor quality of drinking water as the main cause of diarrhea outbreaks, along with poor sanitation and hygiene conditions.", + "type": "survey", + "explanation": "This is indeed a dataset as it is used to confirm findings at the national level regarding water quality and health issues.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides empirical findings.", + "contextual_reason_agent": "This is indeed a dataset as it is used to confirm findings at the national level regarding water quality and health issues.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 31, + "text": "Page | 22 secretary general ), which should have a significant impact on the quality of jobs for women, since most employed women work for the government. 2 ) Potential gender inequalities in the access to and use of DPI. Although a slightly higher percentage of women ( 97. 9 percent ) own a physical ID than men ( 95. 8 percent ), 17 as of January 2024, only 35 percent of the over 800, 000 people who have activated their digital ID on Sanad are women. Given the overall low activation rate of digital ID as well as the existing gender gap in access to mobile phones and internet ( 21 and 10 percentage points, respectively ), 18 the operation will focus on gender-equitable expansion of digital ID, ensuring that gender gaps are mitigated as the system grows. Similarly, of the approximately 90, 000 total visitors at the two existing GSCs since their inauguration, fewer than 20 percent have been women. 19 With the GOJ \u2019 s planned expansion to 15 GSCs nationwide, the operation will focus on avoiding the creation of a gender gap in the utilization of GSCs and access to the services they offer. 59. Accordingly, the Program will promote gender equity as follows: 1 ) It will support the increased representation of women in leadership positions in the civil service.", + "ner_text": [ + [ + 439, + 444, + "named" + ] + ], + "validated": false, + "empirical_context": "9 percent ) own a physical ID than men ( 95. 8 percent ), 17 as of January 2024, only 35 percent of the over 800, 000 people who have activated their digital ID on Sanad are women. Given the overall low activation rate of digital ID as well as the existing gender gap in access to mobile phones and internet ( 21 and 10 percentage points, respectively ), 18 the operation will focus on gender-equitable expansion of digital ID, ensuring that gender gaps are mitigated as the system grows.", + "type": "program", + "explanation": "'Sanad' is mentioned as a system for digital ID but not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Sanad' is a dataset because it is mentioned in the context of digital ID activation.", + "contextual_reason_agent": "'Sanad' is mentioned as a system for digital ID but not as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [] + }, + { + "filename": "158_40156", + "page": 52, + "text": "29. Advocacy and communications about this new HIV ME system is essential. For this reason, communications plan will be included as part of the annual costed M & E work plan and will involve the PFO and M & E champions in each IGAD country. 30. Surveys and surveillance data will be used to track PDO-level project results. There are six types of surveys and surveillance that need to be carried out, partially with project funding ( additional resources would need to be mobilized by IGAD for all these surveys and surveillance to be carried out ): BSS, sentinel HIV surveillance, health facility survey, rapid assessment at hot spots, mapping assessment, and regional assessment. a ) BSS: Customized behavioral surveillance surveys ( BSSs ) will be undertaken amongst refugees, returnees, IDPs, and surrounding populations. The BSS will be customized to allow for the inclusion of unique characteristics of mobile populations, pre and post displacement, and conflict situations.", + "ner_text": [ + [ + 686, + 689, + "named" + ], + [ + 702, + 733, + "BSS <> data type" + ], + [ + 770, + 778, + "BSS <> reference population" + ], + [ + 780, + 789, + "BSS <> reference population" + ], + [ + 791, + 795, + "BSS <> reference population" + ] + ], + "validated": true, + "empirical_context": "There are six types of surveys and surveillance that need to be carried out, partially with project funding ( additional resources would need to be mobilized by IGAD for all these surveys and surveillance to be carried out ): BSS, sentinel HIV surveillance, health facility survey, rapid assessment at hot spots, mapping assessment, and regional assessment. a ) BSS: Customized behavioral surveillance surveys ( BSSs ) will be undertaken amongst refugees, returnees, IDPs, and surrounding populations. The BSS will be customized to allow for the inclusion of unique characteristics of mobile populations, pre and post displacement, and conflict situations.", + "type": "survey", + "explanation": "BSS is indeed a dataset as it is described as a customized behavioral surveillance survey that will collect data from specific populations.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed BSS is a dataset because it refers to a specific type of survey that collects data.", + "contextual_reason_agent": "BSS is indeed a dataset as it is described as a customized behavioral surveillance survey that will collect data from specific populations.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 4, + "validated": 2, + "not_validated": 2 + } + }, + { + "filename": "107_PAD-Citizens-Charter-Afghanistan-P160567-Oct-7-Board-version-10072016", + "page": 36, + "text": "CCAP will also take advantage of the third-party independent monitoring arrangements under the ARTF. The third party monitors will provide critical data and a level of additional evidence from the field to complement the government monitoring systems and Bank missions. CCAP will make use of their reviews of infrastructure quality as well as their satellite imagery data in order to verify infrastructure assets and gaps based upon the initial needs assessment. The third party monitors will also review the achievement of the service standards, social inclusion dimensions, and CDC organizational maturity. 55. Evaluation and Studies. The Citizens \u2019 Charter provides a rich environment for testing various hypotheses important for development effectiveness in Afghanistan as well as other fragile and conflict situations. The Government together with the World Bank have discussed eight studies, some to be funded through CCAP and others through additional government resources. Due to the purposive selection methodology by which locations were chosen for CCAP, a randomized impact evaluation may not be possible. However, several unique evaluations are planned and there is room to potentially undertake some nested experimental studies on citizens \u2019 monitoring and reporting.", + "ner_text": [ + [ + 349, + 371, + "named" + ], + [ + 762, + 773, + "satellite imagery data <> data geography" + ], + [ + 1296, + 1314, + "satellite imagery data <> usage context" + ] + ], + "validated": true, + "empirical_context": "The third party monitors will provide critical data and a level of additional evidence from the field to complement the government monitoring systems and Bank missions. CCAP will make use of their reviews of infrastructure quality as well as their satellite imagery data in order to verify infrastructure assets and gaps based upon the initial needs assessment. The third party monitors will also review the achievement of the service standards, social inclusion dimensions, and CDC organizational maturity.", + "type": "data", + "explanation": "In this context, it is used to verify infrastructure assets and gaps, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'satellite imagery data' suggests a collection of data derived from satellite images.", + "contextual_reason_agent": "In this context, it is used to verify infrastructure assets and gaps, indicating it functions as a data source.", + "contextual_signal": "mentioned as a source of data for verification", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 27, + "text": "This component will consist of three subcomponents: \uf0b7 Subcomponent 4. 1: Improvement of poverty-related data by providing TA and financing to ( a ) the design, data collection, and analysis of ECAM 5 with a particular focus on northern regions \u2019 rural areas and gender issues ( agricultural income, shocks, and vulnerability, gender, and so on ); ( b ) data collection and analysis of the fourth population census; 28 ( c ) analysis of the complementary household survey ( ECAM 4 ); ( d ) design a system of labor statistics by improving LFS and enterprises and informal sector surveys; 28 This activity budgeted for US $ 1 million will complement the resources ( US $ 4 million ) provided by another World Bank financed project in the health sector. In total the World Bank contribution in the financing of the population will amount to US $ 5 million.", + "ner_text": [ + [ + 538, + 541, + "named" + ] + ], + "validated": false, + "empirical_context": "This component will consist of three subcomponents: \uf0b7 Subcomponent 4. 1: Improvement of poverty-related data by providing TA and financing to ( a ) the design, data collection, and analysis of ECAM 5 with a particular focus on northern regions \u2019 rural areas and gender issues ( agricultural income, shocks, and vulnerability, gender, and so on ); ( b ) data collection and analysis of the fourth population census; 28 ( c ) analysis of the complementary household survey ( ECAM 4 ); ( d ) design a system of labor statistics by improving LFS and enterprises and informal sector surveys; 28 This activity budgeted for US $ 1 million will complement the resources ( US $ 4 million ) provided by another World Bank financed project in the health sector. In total the World Bank contribution in the financing of the population will amount to US $ 5 million.", + "type": "program", + "explanation": "However, LFS is referred to as a system for labor statistics, not explicitly as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed LFS is a dataset because it is mentioned in the context of data collection and analysis.", + "contextual_reason_agent": "However, LFS is referred to as a system for labor statistics, not explicitly as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 4, + "validated": 2, + "not_validated": 2 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 45, + "text": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 41 of 47 Education Component CBA 13. The project is estimated to generate a total of 22, 550 additional years of schooling by increasing the number of children enrolled in project schools by 10 percent annual growth rate and the school completion rate, that is, the share of students enrolled in grade 1 of a school level who graduate from it, by 40 percent. In the absence of longitudinal data to estimate current completion rates, the CBA is limited to the benefits of additional years of schooling due to the project \u2019 s impact on school enrollment. The sex and grade distribution of students in each project district from Balochistan \u2019 s EMIS is used to determine the share of girls and boys in each grade among the 18, 000 students currently enrolled. Assuming no impact in the first project year, the number of additional boys and girls enrolled in each grade for each year in FY21 \u2013 FY24 is then estimated using the targeted annual growth rate of enrollment ( table 1. 5 ). Table 1. 5.", + "ner_text": [ + [ + 454, + 471, + "named" + ] + ], + "validated": false, + "empirical_context": "The project is estimated to generate a total of 22, 550 additional years of schooling by increasing the number of children enrolled in project schools by 10 percent annual growth rate and the school completion rate, that is, the share of students enrolled in grade 1 of a school level who graduate from it, by 40 percent. In the absence of longitudinal data to estimate current completion rates, the CBA is limited to the benefits of additional years of schooling due to the project \u2019 s impact on school enrollment. The sex and grade distribution of students in each project district from Balochistan \u2019 s EMIS is used to determine the share of girls and boys in each grade among the 18, 000 students currently enrolled.", + "type": "data", + "explanation": "'Longitudinal data' is mentioned in the context of a limitation rather than as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'longitudinal data' refers to a structured collection of data used for analysis.", + "contextual_reason_agent": "'Longitudinal data' is mentioned in the context of a limitation rather than as a data source itself.", + "contextual_signal": "mentioned only as a limitation, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "090_Djibouti-Public-Admin-PAD-PAD2604-04062018", + "page": 39, + "text": "The World Bank Public Administration Modernization Project ( P162904 ) Page 36 of 69 Intermediate Results Indicators Indicator Name Core Unit of Measure Baseline End Target Frequency Data Source / Methodology Responsibility for Data Collection Name: Number of e-ID cards issued Number ( Thousand ) 0. 00 200. 00 Annual DGP in the Ministry of Interior will be in charge of providing e-ID and will keep track of data pertaining to this indicator. ANSIE will a be another source of data since each e-ID card will reference to the aggregated and integrated e-ID database that will be created and managed by ANSIE. DGF / Ministry of Interior and ANSIE / PCU Number of e-ID cards issued to women Number ( Thousand ) 0. 00 100. 00 Description: Physical e-ID cards that will be requested and issued to adult citizens. This intermediate indicator is linked to the percent of population with unique ID outcome indicator. A total of 200, 000 e-ID cards are expected to by issued at the end of the project. As the new e-ID system would take two years to be built, issuance of e-ID cards would only occur starting year 3. Name: Percentage increase of transactions intiated per Citizen Service Center ( CSC ) per year Percentage 0. 00 15. 00 Annual CSC pilot will provide statistic data on transactions initiated and / or completed.", + "ner_text": [ + [ + 1258, + 1272, + "named" + ] + ], + "validated": false, + "empirical_context": "00 15. 00 Annual CSC pilot will provide statistic data on transactions initiated and / or completed.", + "type": "data", + "explanation": "'statistic data' is not a structured collection of data but rather a general term for numerical information.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'statistic data' refers to a dataset due to the term 'data' being present.", + "contextual_reason_agent": "'statistic data' is not a structured collection of data but rather a general term for numerical information.", + "contextual_signal": "mentioned only as a type of information, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 57, + "text": "Data on payment of the livelihood grant will be collected through the SNSOP MIS that will be linked with SNSOP payment data The Implementing Partner responsible for Component 2 will be responsible for data collection Eligible beneficiary households with functional income-generating investments The total number of households with functional This indicator will be SNSOP Management Data will be collected through routine M & E Implementing Partner", + "ner_text": [ + [ + 70, + 79, + "named" + ] + ], + "validated": false, + "empirical_context": "Data on payment of the livelihood grant will be collected through the SNSOP MIS that will be linked with SNSOP payment data The Implementing Partner responsible for Component 2 will be responsible for data collection Eligible beneficiary households with functional income-generating investments The total number of households with functional This indicator will be SNSOP Management Data will be collected through routine M & E Implementing Partner", + "type": "system", + "explanation": "However, the context indicates that SNSOP MIS is described as a system rather than a structured collection of data or a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'MIS' which often refers to Management Information Systems that handle data.", + "contextual_reason_agent": "However, the context indicates that SNSOP MIS is described as a system rather than a structured collection of data or a dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 18, + "validated": 10, + "not_validated": 8 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 45, + "text": "In more than 87 percent of households, women are responsible for fetching water in almost all age categories, except for the age category of 6 \u2013 17 years, where the share of boys and girls who fetch water is almost equal. In addition, due to the poor healthcare services and as many households defer to home-based treatment, the time and emotional burden on mothers as main caregivers increases ( details on the baseline assessment across the identified gender gaps are provided in annex 1 ). Women spend time on boiling and settling water to reduce the risk of getting sick, but many are unaware of appropriate water storage and on-site purification practices. Women and girls face heightened physical and health risks associated with collecting water or using open toilets in schools and lack of menstrual hygiene facilities in social institutions. There are also wide gender gaps in employment in the water sector institutions. Global data show that, on average, women account for only 18 percent of total staff in water institutions and 23 percent of staff in engineering and managerial positions. Evidence from Europe and Central Asian countries show similar patterns. In Tajikistan, some of the reasons for", + "ner_text": [ + [ + 931, + 942, + "named" + ], + [ + 39, + 44, + "Global data <> reference population" + ], + [ + 493, + 498, + "Global data <> reference population" + ], + [ + 662, + 667, + "Global data <> reference population" + ], + [ + 966, + 971, + "Global data <> reference population" + ], + [ + 1116, + 1122, + "Global data <> data geography" + ], + [ + 1127, + 1150, + "Global data <> data geography" + ], + [ + 1228, + 1246, + "Global data <> usage context" + ] + ], + "validated": true, + "empirical_context": "There are also wide gender gaps in employment in the water sector institutions. Global data show that, on average, women account for only 18 percent of total staff in water institutions and 23 percent of staff in engineering and managerial positions. Evidence from Europe and Central Asian countries show similar patterns.", + "type": "data", + "explanation": "In this context, 'Global data' is used to refer to empirical statistics about employment in water institutions, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'Global data' refers to a dataset because it implies a collection of statistics regarding gender representation in the water sector.", + "contextual_reason_agent": "In this context, 'Global data' is used to refer to empirical statistics about employment in water institutions, indicating it functions as a data source.", + "contextual_signal": "follows 'show that', indicating it is used as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 47, + "text": "Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation including reporting from beneficiaries. BRD data fed to MINEMA. Responsibility for Data Collection BRD and MINEMA. Households benefiting from graduation programming ( Number ) Description Quantitative indicator counting number of households that benefit from graduation programming. Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation. Responsibility for Data Collection MINEMA. Businesses that benefit from partial credit guarantee scheme ( Number ) Description Quantitative indicator counting number of businesses that benefit from the BDF partial credit guarantee scheme. Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation including reporting from beneficiaries. BDF data fed to MINEMA. Responsibility for Data Collection BDF and MINEMA Volume of additional credit facilitated by the partial credit guarantee scheme ( Amount ( USD ) ) Description Quantitative indicator counting private capital enabled through PFIs under the BDF partial credit guarantee scheme. This is calculated based on an average loan size of US $ 700 x 3, 000 beneficiairies, for an approxiate target of US $ 2 million. Frequency Quarterly", + "ner_text": [ + [ + 196, + 202, + "named" + ] + ], + "validated": false, + "empirical_context": "Methodology for Data Collection Monitoring project implementation including reporting from beneficiaries. BRD data fed to MINEMA. Responsibility for Data Collection BRD and MINEMA.", + "type": "organization", + "explanation": "However, MINEMA is mentioned as an organization responsible for data collection, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed MINEMA is a dataset due to its involvement in data collection.", + "contextual_reason_agent": "However, MINEMA is mentioned as an organization responsible for data collection, not as a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 54, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 43 Indicator Name Volume of BOD pollution loads removed by treatment plants as result of project interventions ( Tons per year ) Definition / Description This indicator measures the cumulative volume ( mass ) of Biological Oxygen Demand ( BOD ) pollution loads removed by the treatment plant supported under the project. Project support may include construction, expansion or rehabilitation of the treatment plant. Frequency Bi-annually Data Source NWSC performance report and NWSC annual report Methodology for Data Collection Responsibility for Data Collection NWSC Indicator Name Catchment and source protection plans developed Definition / Description Plans developed to enhance catchment management and source protection measures ( e. g., soil and water conservation, river banks protection and restoration, etc. ) in the selected water management zones. Frequency Bi-annually Data Source MWE / DWRM progress reports, annual water and environment sector performance reports, and water supply, utility and \\ refugee databases.", + "ner_text": [ + [ + 1002, + 1057, + "named" + ], + [ + 662, + 699, + "annual water and environment sector performance reports <> data description" + ], + [ + 1125, + 1143, + "annual water and environment sector performance reports <> usage context" + ] + ], + "validated": true, + "empirical_context": ", soil and water conservation, river banks protection and restoration, etc. ) in the selected water management zones. Frequency Bi-annually Data Source MWE / DWRM progress reports, annual water and environment sector performance reports, and water supply, utility and \\ refugee databases.", + "type": "report", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a data source for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a data source in the context.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a data source for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 20, + "validated": 4, + "not_validated": 16 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 79, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 74 of 117 prior to construction. Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "ner_text": [ + [ + 884, + 889, + "named" + ], + [ + 589, + 608, + "NEMIS <> data type" + ], + [ + 777, + 793, + "NEMIS <> reference population" + ], + [ + 829, + 832, + "NEMIS <> author" + ] + ], + "validated": true, + "empirical_context": "Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "type": "system", + "explanation": "NEMIS is indeed a dataset as it is described as an online platform that collects and manages data related to primary education.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is referenced in the context of data utilization for education management.", + "contextual_reason_agent": "NEMIS is indeed a dataset as it is described as an online platform that collects and manages data related to primary education.", + "contextual_signal": "mentioned as a data source for managing primary education", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "100_Lebanon-Health-PAD-PAD2358-06152017", + "page": 14, + "text": "The World Bank Lebanon Health Resilience Project ( P163476 ) Page 12 of 54 degree of autonomy. Around 47 percent of the Lebanese population have health insurance coverage; and 53 percent who lack any formal coverage are covered by the MoPH, which serves as an \u201c insurer of last resort. \u201d This means a strong role for the ministry, not only in preventive care, public health leadership, and regulation, but also in curative care. To provide hospital coverage to about 250, 000 cases per year, the MoPH contracts 26 public and 105 private hospitals. Individual patient copayment to the hospital constitutes 5 percent ( public hospital ) or 15 percent ( private hospital ) of the hospitalization costs, and the MoPH directly reimburses the hospital for the 85 \u2013 95 percent difference. 10. Despite the considerable resilience of Lebanon \u2019 s health system, the health sector indicators are regressing since the start of the Syrian crisis. The gains that Lebanon made in meeting the Millennium Development Goals ( MDGs ) before the Syrian crisis are rapidly declining. The latest MoPH hospital data show significant setbacks in neonatal and maternal mortality indicators ( this excludes deliveries outside the hospitals ).", + "ner_text": [ + [ + 1074, + 1092, + "named" + ], + [ + 15, + 22, + "MoPH hospital data <> data geography" + ], + [ + 120, + 139, + "MoPH hospital data <> reference population" + ], + [ + 235, + 239, + "MoPH hospital data <> publisher" + ], + [ + 708, + 712, + "MoPH hospital data <> publisher" + ], + [ + 825, + 832, + "MoPH hospital data <> data geography" + ], + [ + 949, + 956, + "MoPH hospital data <> data geography" + ], + [ + 1122, + 1164, + "MoPH hospital data <> data description" + ] + ], + "validated": true, + "empirical_context": "The gains that Lebanon made in meeting the Millennium Development Goals ( MDGs ) before the Syrian crisis are rapidly declining. The latest MoPH hospital data show significant setbacks in neonatal and maternal mortality indicators ( this excludes deliveries outside the hospitals ).", + "type": "data", + "explanation": "This is indeed a dataset as it provides structured information on hospital data relevant to health indicators.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific data collected by the Ministry of Public Health (MoPH) regarding hospital statistics.", + "contextual_reason_agent": "This is indeed a dataset as it provides structured information on hospital data relevant to health indicators.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "185_multi-page", + "page": 182, + "text": "Baseline ( KDHS, 1999 ): 90 percent * Economic situation will improve during Target ( KDHS, 2003 ): 100 percent project life. * Intensified prograrn of HIV / AIDS prevention, care and support will be * By 2003, the median age of first sex * Kenya Demographic and Health socially and culturally acceptable at among young women ( 15-24 years ) will Survey ( KDHS ) data community level. increase. Baseline ( KDHS, 1999 ): 12 years Target ( KDHS, 2003 ): 14 years * Reports from the Ministry of * By 2004, HIV / AIDS curricula will be Education and other Ministries integrated and examinable in an increasing number of primary and secondary schools, and various post-graduate institutions. Baseline: less than I percent Target: 75 percent", + "ner_text": [ + [ + 356, + 360, + "named" + ], + [ + 17, + 21, + "KDHS <> reference year" + ], + [ + 92, + 96, + "KDHS <> publication year" + ], + [ + 241, + 246, + "KDHS <> data geography" + ], + [ + 412, + 416, + "KDHS <> reference year" + ] + ], + "validated": true, + "empirical_context": "Baseline ( KDHS, 1999 ): 90 percent * Economic situation will improve during Target ( KDHS, 2003 ): 100 percent project life. * Intensified prograrn of HIV / AIDS prevention, care and support will be * By 2003, the median age of first sex * Kenya Demographic and Health socially and culturally acceptable at among young women ( 15-24 years ) will Survey ( KDHS ) data community level. increase.", + "type": "survey", + "explanation": "KDHS is indeed a dataset as it is explicitly mentioned as a survey providing data for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed KDHS is a dataset because it is referenced alongside specific data points and outcomes.", + "contextual_reason_agent": "KDHS is indeed a dataset as it is explicitly mentioned as a survey providing data for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 36, + "text": "The World Bank Pakistan: Balochistan Human Capital Investment Project ( P166308 ) Page 32 of 47 RESULT_FRAME_TBL_PDO Indicator Name PBC Baseline End Target ( Percentage ) PDO Table SPACE Intermediate Results Indicators by Components RESULT_FRAME_TBL_IO Indicator Name PBC Baseline End Target 1. Improving utilization of quality health services Targeted HFs having majority of essential medicines for RMNCHN services ( Percentage ) 20. 00 75. 00 Absenteeism among key staff to provide RMNCHN services ( Percentage ) 50. 00 25. 00 Targeted health care providers with minimum knowledge and competencies in RMNCHN services ( Percentage ) 0. 00 80. 00 Targeted HMIS digitally integrated into DHIS ( 2 ) ( Number ) 0. 00 3. 00 2. Improving utilization of quality education services Targeted school clusters with improved cluster \u2010 based governance ( Percentage ) 0. 00 50. 00 Student learning assessment reforms strategy implemented ( Text ) No strategy exists Dissemination of assessment results from 10 % target primary schools Grade 5 and 8 students scoring at least 50 % in concept \u2010 based learning assessment in project schools ( Percentage ) 0. 00 20. 00 Targeted female teachers trained ( Percentage ) 0. 00 80. 00 Grievances registered related to delivery of project benefits that are addressed ( Percentage ) 0. 00 75. 00 IO Table SPACE", + "ner_text": [ + [ + 687, + 691, + "named" + ] + ], + "validated": false, + "empirical_context": "00 80. 00 Targeted HMIS digitally integrated into DHIS ( 2 ) ( Number ) 0. 00 3.", + "type": "system", + "explanation": "However, DHIS is referred to as a system and not explicitly as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed DHIS is a dataset because it is mentioned in the context of a digital integration.", + "contextual_reason_agent": "However, DHIS is referred to as a system and not explicitly as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 11, + "validated": 5, + "not_validated": 6 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 52, + "text": "The reports should include ( i ) data on piped water supply connections from the service contracts database ( billing and customer databases of operators ); ( ii ) data from operators and state sanitary service department on compliance of tested water Annual reports of targeted water utilities, M & E reports prepared by the PMU, baseline and endline surveys. Methodology for this indicator will be detailed in the POM. MEWR, SUE KMK ( target utilities ) and PMU.", + "ner_text": [ + [ + 81, + 107, + "named" + ], + [ + 33, + 71, + "service contracts database <> data description" + ] + ], + "validated": true, + "empirical_context": "The reports should include ( i ) data on piped water supply connections from the service contracts database ( billing and customer databases of operators ); ( ii ) data from operators and state sanitary service department on compliance of tested water Annual reports of targeted water utilities, M & E reports prepared by the PMU, baseline and endline surveys. Methodology for this indicator will be detailed in the POM.", + "type": "database", + "explanation": "In this context, it is indeed a dataset as it is explicitly mentioned as a source of data for piped water supply connections.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a 'database' which typically implies a structured collection of data.", + "contextual_reason_agent": "In this context, it is indeed a dataset as it is explicitly mentioned as a source of data for piped water supply connections.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "005_BOSIB-8191b179-7209-4faa-b5e0-11783bcd492d", + "page": 13, + "text": "Two recent studies provide a detailed general analysis of policy options. 18 For STEAM courses, the causes relate to: ( i ) enjoyment of reading and preference for mathematics; ( ii ) self-efficacy or belief in own capabilities, often reinforced by teachers; ( iii ) parental support or lack thereof due to low socioeconomic status; ( iv ) teacher preparation to deal effectively with diversity. Figure 1a: PISA learning assessment scores: gender comparison Source: PISA; Differences greater than the grey bands ( + or - two standard errors ) are statistically significant. 16 Source: Government Budget for 2024 and MEP School Census data from 2023. 17 Program-Based budgeting at a government-wide level is being implemented under the Fiscal Management Improvement Project ( P172352, Loan 9075-CR ), known as Hacienda Digital. Investing in readiness to implement program-based budgeting at MEP, the biggest Ministry ( in terms of budget and staff ) is a priority for the Government of Costa Rica. 18 Desigualdades por g\u00e9nero en Primaria y Secundaria, Chapter 4 in Noveno Estado de la Educaci\u00f3n, 2023; and Villlobos and Azofeifa, La paradoja en educaci\u00f3n, alta inversi\u00f3n del PIB y alta brechas de g\u00e9nero, Logos ( II ) 1, 2021.", + "ner_text": [ + [ + 616, + 638, + "named" + ], + [ + 616, + 619, + "MEP School Census data <> publisher" + ], + [ + 644, + 648, + "MEP School Census data <> publication year" + ], + [ + 890, + 893, + "MEP School Census data <> publisher" + ], + [ + 985, + 995, + "MEP School Census data <> data geography" + ], + [ + 1095, + 1099, + "MEP School Census data <> reference year" + ], + [ + 1105, + 1127, + "MEP School Census data <> author" + ] + ], + "validated": true, + "empirical_context": "Figure 1a: PISA learning assessment scores: gender comparison Source: PISA; Differences greater than the grey bands ( + or - two standard errors ) are statistically significant. 16 Source: Government Budget for 2024 and MEP School Census data from 2023. 17 Program-Based budgeting at a government-wide level is being implemented under the Fiscal Management Improvement Project ( P172352, Loan 9075-CR ), known as Hacienda Digital.", + "type": "census", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source of information used in the analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as 'data' in the context of a census.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source of information used in the analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "100_Lebanon-Health-PAD-PAD2358-06152017", + "page": 15, + "text": "Since MCH services account for at least half of all preventive and curative health - seeking among Syrians, this model is expected to reduce service costs substantially, while supporting retention and quality. 13. To meet the increased demand and strengthen primary care services, the MoPH launched the Emergency Primary Health Care Restoration Project ( EPHRP ) in 2015. This project is the building block of the MoPH \u2019 s long-term strategy for UHC, which aims to \u201d provide a specified package of benefits to all members of a society with the end goal of providing financial risk protection, improving access to health services and health outcomes. \u201d 9 Financed from the Lebanon Syria Multi-Donor Trust Fund, the project aims to strengthen and improve access to PHC services, especially for the low-income host communities crowded out by the increased demand for PHC services from refugees. The project strengthens the capacity of 75 MoPH network centers, expands the package of services provided, and subsidizes the cost of care to 150, 000 poor Lebanese enrolled in the NPTP ( see Box 2 ). However, strengthening the capacity of the network clinics also extends benefits to low-income non-subsidized Lebanese and displaced Syrians covered by the international community. The latest MoPH data show that improving the capacity of the network centers through the EPHRP is having a positive impact on access to services for host communities and displaced Syrians alike. While before the project access to PHC services was relatively low, especially for host communities in areas with high concentration of displaced Syrians, it increased steadily after the start of the project for both poor Lebanese ( 28 percent ) and displaced Syrians ( 47 percent ). 10 The project demonstrates that strengthening the integrated PHC model benefits both communities. 9 WHO, SDGs, 2016. 10 Ministry of Public Health data, 2017.", + "ner_text": [ + [ + 1285, + 1294, + "named" + ], + [ + 285, + 289, + "MoPH data <> publisher" + ], + [ + 414, + 418, + "MoPH data <> publisher" + ], + [ + 935, + 939, + "MoPH data <> publisher" + ], + [ + 1043, + 1056, + "MoPH data <> reference population" + ], + [ + 1216, + 1233, + "MoPH data <> reference population" + ], + [ + 1285, + 1289, + "MoPH data <> publisher" + ], + [ + 1874, + 1899, + "MoPH data <> publisher" + ], + [ + 1906, + 1910, + "MoPH data <> publication year" + ] + ], + "validated": true, + "empirical_context": "However, strengthening the capacity of the network clinics also extends benefits to low-income non-subsidized Lebanese and displaced Syrians covered by the international community. The latest MoPH data show that improving the capacity of the network centers through the EPHRP is having a positive impact on access to services for host communities and displaced Syrians alike. While before the project access to PHC services was relatively low, especially for host communities in areas with high concentration of displaced Syrians, it increased steadily after the start of the project for both poor Lebanese ( 28 percent ) and displaced Syrians ( 47 percent ).", + "type": "data", + "explanation": "In this context, 'MoPH data' is indeed used as a source of information regarding health service access, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'MoPH data' is a dataset because it refers to specific data collected by the Ministry of Public Health.", + "contextual_reason_agent": "In this context, 'MoPH data' is indeed used as a source of information regarding health service access, indicating it functions as a dataset.", + "contextual_signal": "'latest MoPH data' suggests it is a structured collection of data used for analysis.", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "073_Ethiopia-Consolidated-Water-Supply-Sanitation-and-Hygiene-Account-Project", + "page": 82, + "text": "Also, necessary woreda-level information and a drought and flood prevalence summary, including a correlation result between water supply and the defined impact-level indicators, are captured under the assessment. The findings will be further refined during the planned National WASH Inventory and will serve as a base for facilitating mid - and end-term as well as impact evaluations. 98. Support to the WASH M & E and MIS: Under the program management and institutional strengthening component, the CWA will provide support to strengthen the operationalization of the broader WASH M & E MIS system developed by joint financing of the DFID and AfDB. The support will focus on: a. Institutionalization of the MIS to ensure regular updating of the National WASH Inventory as well as use of the data for decision making. This will include: ( i ) capacity-building support to the newly established MIS directorate under the WDC; ( ii ) building woreda-level capacity for regular reporting of indicators ( for instance, through expanding the ToRs for CFTs ); and ( iii ) building data analysis, interpretation, and reporting capacity at all levels through targeted training. b. Introducing a community-based monitoring tool ( focusing on a few WASH indicators ) to inform decision makers regarding sustainable service delivery. The current data collection tool ( Cosmos ) can only reach up to the woreda level. Given the number and dispersed nature of schemes in a woreda and", + "ner_text": [ + [ + 16, + 40, + "named" + ] + ], + "validated": false, + "empirical_context": "Also, necessary woreda-level information and a drought and flood prevalence summary, including a correlation result between water supply and the defined impact-level indicators, are captured under the assessment. The findings will be further refined during the planned National WASH Inventory and will serve as a base for facilitating mid - and end-term as well as impact evaluations.", + "type": "information", + "explanation": "However, it is described as 'information' rather than a structured collection of data, indicating it is not a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific information that could be structured.", + "contextual_reason_agent": "However, it is described as 'information' rather than a structured collection of data, indicating it is not a dataset.", + "contextual_signal": "mentioned only as information, not as a data source", + "tags": [] + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 37, + "text": "There will be two main M & E tasks that MENFOP must ensure: ( a ) data collection and analysis, and reporting related to the RF; and ( b ) management of the third-party entity that is responsible to verify select DLRs. To fulfil this, the director responsible for M & E within MENFOP will need to develop the M & E system and protocols that will be outlined in the operations manual. The M & E system will detail the roles and responsibilities for data collection and data access to ensure timely and regular reporting, including any clearances or approvals required to conduct surveys or field visits. The project will systematically use data to assess whether project implementation is on track and whether modifications are needed. Under component 4, two sub-components will help this: the modernization of the EMIS and the development of a student-identification number for all children on Djiboutian soil ( which will improve tracking of student enrollment ). 83. The PAE was revised from 2017-2019 to 2020, so that the first targets in the project align with those of the sector. As the project begins, the MENFOP will also be developing the new Education Sector Plan for 2020 - 2035 which will further extend those results. C. Sustainability 84. The proposed project aims to transform the sector by focusing on the key elements for education quality.", + "ner_text": [ + [ + 814, + 818, + "named" + ] + ], + "validated": false, + "empirical_context": "The project will systematically use data to assess whether project implementation is on track and whether modifications are needed. Under component 4, two sub-components will help this: the modernization of the EMIS and the development of a student-identification number for all children on Djiboutian soil ( which will improve tracking of student enrollment ). 83.", + "type": "system", + "explanation": "However, EMIS is described as a system, not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it relates to data management in education.", + "contextual_reason_agent": "However, EMIS is described as a system, not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "068_Burundi-Integrated-Community-Development-Project", + "page": 15, + "text": "Burundi has made good progress in the health and education sectors over the last fifteen years, but gaps remain with respect to access to basic services and public infrastructure. Just nine percent of the population has access to 16 Victoria Rames, Cl\u00e9mence Bununagi, and Caritas Niyonzima ( 2017 ) USAID / Burundi Gender Analysis Report. Prepared by Banyan Global. 17 56 percent of children under 5 years of age in Burundi are stunted, with 61 percent in Ngozi, 66 percent in Muyinga, 59 percent in Cankuzo, and 52 percent in Ruyigi: DHS 2016-17 18 Data as of 2017. See https: / / data. worldbank. org / indicator / sp. dyn. tfrt. in 77. 5 56. 6 42. 4 31. 4 3. 8 0 10 20 30 40 50 60 70 80 90 Fetal growth restriction and preterm birth Water, sanitation and biomass fuel use Child nutrition and infection Maternal nutrition and infection Teenage motherhood and short birth intervals Attributable stunting cases ( thousands )", + "ner_text": [ + [ + 535, + 538, + "named" + ], + [ + 0, + 7, + "DHS <> data geography" + ], + [ + 233, + 247, + "DHS <> author" + ], + [ + 249, + 266, + "DHS <> author" + ], + [ + 272, + 289, + "DHS <> author" + ], + [ + 292, + 296, + "DHS <> publication year" + ], + [ + 383, + 412, + "DHS <> reference population" + ], + [ + 416, + 423, + "DHS <> data geography" + ], + [ + 456, + 461, + "DHS <> data geography" + ], + [ + 500, + 507, + "DHS <> data geography" + ], + [ + 527, + 533, + "DHS <> data geography" + ], + [ + 539, + 546, + "DHS <> publication year" + ], + [ + 561, + 565, + "DHS <> publication year" + ] + ], + "validated": true, + "empirical_context": "Prepared by Banyan Global. 17 56 percent of children under 5 years of age in Burundi are stunted, with 61 percent in Ngozi, 66 percent in Muyinga, 59 percent in Cankuzo, and 52 percent in Ruyigi: DHS 2016-17 18 Data as of 2017. See https: / / data.", + "type": "survey", + "explanation": "In this context, 'DHS' refers to the Demographic and Health Survey, which is a structured collection of data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'DHS' is a dataset because it is associated with specific statistical data on child stunting rates.", + "contextual_reason_agent": "In this context, 'DHS' refers to the Demographic and Health Survey, which is a structured collection of data used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 7, + "validated": 6, + "not_validated": 1 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 118, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 113 of 117 Action Description DLI Respon sible Timing Completion measurement 2. Review and analyse NEMIS capabilities on collecting, disaggregation and reporting on data for learners with special needs and disability and recommendations to close identified gaps. Integrated in DLI # 6 MoE / KI SE / KICD / KNEC / TSC Year 1 Reports generated from NEMIS with granular data on special needs children by category of disability, age, and gender. 3. Complete the ongoing process to institutionalize the process and procedures for administration and management of scholarships by Jomo Kenyatta Foundation ( JKF ) Integrated in DLI # 4 MoE Year 2 Scholarship administration and management processes and procedures established at JKF. 4. Design and develop a GRM MIS module compatible with NEMIS and interoperable between agencies and able to capture refugee data. N / A. MoE By Year 2, 2023 Complaints and grievances lodged through the NEMIS. Number of grievances satisfactorily addressed 5. Assess the outcomes of biogas pilots and analyze existing practices on use of biogas in schools N / A MoE Year 2 Costed action plan developed to scale up biogas projects.", + "ner_text": [ + [ + 424, + 429, + "named" + ], + [ + 251, + 293, + "NEMIS <> reference population" + ], + [ + 452, + 474, + "NEMIS <> reference population" + ], + [ + 956, + 960, + "NEMIS <> publication year" + ] + ], + "validated": true, + "empirical_context": "Review and analyse NEMIS capabilities on collecting, disaggregation and reporting on data for learners with special needs and disability and recommendations to close identified gaps. Integrated in DLI # 6 MoE / KI SE / KICD / KNEC / TSC Year 1 Reports generated from NEMIS with granular data on special needs children by category of disability, age, and gender. 3.", + "type": "system", + "explanation": "NEMIS is confirmed as a dataset since it is described as a system that generates reports with granular data on special needs children.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is mentioned in the context of collecting and reporting data.", + "contextual_reason_agent": "NEMIS is confirmed as a dataset since it is described as a system that generates reports with granular data on special needs children.", + "contextual_signal": "system but mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 67, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 55. Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Girls \u2019 promotion rate from grade 11 to 12, nationwide The indicator is measured by the proportion of girls enrolled in grade 11 at a given school year who study in the grade 12 in the following school year. The indicator will be calculated by dividing the number of new enrolments of girls in grade 12 in school year t + 1 by the number of girls enrolled in grade 11 in the previous school year. Annual EMIS Annual school census system MOE Percentage of secondary schools that have received IEC materials and conducted at least one school community workshop on climate change awareness using the materials in 29 selected SPG woredas The indicator measures the share of secondary schools that received prepared IEC materials for climate awareness and held at leas onw school community workshop using the materials. The IDD supports the design of the materials needed to raise awareness of the CCE and indicators follow the schools receiving the materials and conducting Year 3 and 4 Questionnaire s will be developed to keep track on number of secondary schools received the IEC materials and workshop In each SPG, the woreda education officer will keep a record of details of IEC materials distributed to secondary schools.", + "ner_text": [ + [ + 658, + 662, + "named" + ] + ], + "validated": false, + "empirical_context": "The indicator will be calculated by dividing the number of new enrolments of girls in grade 12 in school year t + 1 by the number of girls enrolled in grade 11 in the previous school year. Annual EMIS Annual school census system MOE Percentage of secondary schools that have received IEC materials and conducted at least one school community workshop on climate change awareness using the materials in 29 selected SPG woredas The indicator measures the share of secondary schools that received prepared IEC materials for climate awareness and held at leas onw school community workshop using the materials. The IDD supports the design of the materials needed to raise awareness of the CCE and indicators follow the schools receiving the materials and conducting Year 3 and 4 Questionnaire s will be developed to keep track on number of secondary schools received the IEC materials and workshop In each SPG, the woreda education officer will keep a record of details of IEC materials distributed to secondary schools.", + "type": "system", + "explanation": "However, EMIS is mentioned as a system and not explicitly as a data source in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is related to educational data collection.", + "contextual_reason_agent": "However, EMIS is mentioned as a system and not explicitly as a data source in this context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 44, + "text": "CBA for Component 1: Improving Health Services Benefits, Present Value US $, current Costs, Present Value US $, current BCR NPV US $ Years 2 % 4 % 2 % 4 % 2 % 4 % 2 % 4 % 2020 0 0 2, 478, 360 2, 430, 581 0. 00 0. 00 \u2212 2, 478, 360 \u2212 2, 430, 581 2021 4, 078, 067 3, 922, 345 3, 996, 990 3, 844, 363 1. 02 1. 02 81, 077 77, 981 2022 4, 945, 713 4, 665, 155 4, 445, 516 4, 193, 332 1. 11 1. 11 500, 197 471, 822 2023 6, 115, 487 5, 657, 361 3, 622, 024 3, 350, 689 1. 69 1. 69 2, 493, 463 2, 306, 672 2024 7, 702, 585 6, 988, 195 3, 326, 210 3, 017, 714 2. 32 2. 32 4, 376, 375 3, 970, 480 Total 22, 841, 852 21, 233, 055 17, 869, 100 16, 836, 680 1. 28 1. 26 4, 972, 753 4, 396, 375 74 Data on the relationship of measles and full vaccination come from PDHS 2017 \u2013 18 75 Ahmed, S., Q. Li, L. Liu, and A. O. Tsui. 2012. \u201c: Maternal Deaths Averted by Contraceptive Use: An Analysis of 172 Countries. \u201d The Lancet 380 ( 9837 ): 111 \u2013 125. 76 Rafiq, M., and M. K. Shah. 2010. \u201c The Value of Reduced Risk of Injury and Deaths in Pakistan \u2014 Using Actual and Perceived Risk Estimates. \u201d The Pakistan Development Review 49 ( 4 ): 823 \u2013 837.", + "ner_text": [ + [ + 750, + 754, + "named" + ], + [ + 755, + 764, + "PDHS <> publication year" + ], + [ + 779, + 784, + "PDHS <> author" + ], + [ + 786, + 792, + "PDHS <> author" + ], + [ + 798, + 808, + "PDHS <> author" + ], + [ + 819, + 863, + "PDHS <> data description" + ], + [ + 951, + 961, + "PDHS <> author" + ], + [ + 1021, + 1029, + "PDHS <> data geography" + ] + ], + "validated": true, + "empirical_context": "28 1. 26 4, 972, 753 4, 396, 375 74 Data on the relationship of measles and full vaccination come from PDHS 2017 \u2013 18 75 Ahmed, S. , Q.", + "type": "survey", + "explanation": "PDHS is explicitly mentioned as the source of data regarding the relationship of measles and full vaccination.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because PDHS is referenced in relation to data on vaccination.", + "contextual_reason_agent": "PDHS is explicitly mentioned as the source of data regarding the relationship of measles and full vaccination.", + "contextual_signal": "mentioned as a source of data", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "044_Turkey-Social-Entrepreneurship-Empowerment-and-Cohesion-in-Refugee-and-Host-Communities-in-Turkey-Project", + "page": 14, + "text": "The World Bank Social Entrepreneurship, Empowerment and Cohesion in Refugee and Host Communities in Turkey Project ( P171456 ) 5 15. With regard to women, there are also significant gaps in access to employment and self-employment opportunities for both refugee women and women from Turkish hosting communities ( see also Annex 3 ) that warrant a particular focus on supporting women \u2019 s entrepreneurship and women-owned social enterprises. Only 32 percent of Turkish women of working age are employed, compared to 72 percent of Turkish men. 16 Further, 7 percent of Turkish men are employers compared to 1 percent of Turkish women. 17 Barriers to Turkish women \u2019 s entry into entrepreneurship include lack of access to finance, lack of skills, and social norms related to marriage and childcare. Syrian refugee women in Turkey face significant challenges in accessing formal employment. According to results from a UN Women survey, 18 85 percent of Syrian women were unemployed in Turkey. Only 10 percent of female respondents stated that they work in a regular income-generating activity, while 5 percent make their living from irregular or seasonal work.", + "ner_text": [ + [ + 916, + 931, + "named" + ], + [ + 100, + 106, + "UN Women survey <> data geography" + ], + [ + 797, + 817, + "UN Women survey <> reference population" + ], + [ + 821, + 827, + "UN Women survey <> data geography" + ], + [ + 950, + 962, + "UN Women survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "Syrian refugee women in Turkey face significant challenges in accessing formal employment. According to results from a UN Women survey, 18 85 percent of Syrian women were unemployed in Turkey. Only 10 percent of female respondents stated that they work in a regular income-generating activity, while 5 percent make their living from irregular or seasonal work.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it presents specific results derived from the UN Women survey regarding employment among Syrian women.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides statistical results.", + "contextual_reason_agent": "The context confirms it is a dataset as it presents specific results derived from the UN Women survey regarding employment among Syrian women.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "157_417980PAD0P1041ly10IDAR200081006011", + "page": 32, + "text": "5. Continuing conflict i s perpetuating an internal cycle o f violence, fragmenting social cohesion and affecting psychosocial well-being. A 2006 survey showed that nearly three - quarters o f the Palestinian population suffers from severe depression resulting from the current sociopolitical situation. Research has also shown that victims o f violence and torture become prey to mental illness and often direct acts o f aggression against members o f their own household and communities. Palestinians have been victims o f violence on a mass scale. There are indications that this could contribute to a form o f violence that i s directed inward, further fragmenting the social fabric. 6. There i s an urgent need to foster youth inclusion. Since September 2000, youth have been caught literally and figuratively in the crossfire o f the Israeli-Palestinian conflict. Nearly 75 percent o f total injured during the second Intifada were between 10 and 29 years o f age. Youth have also been active participants in the Intifada and have therefore suffered both violence and imprisonment with its consequent impact on mental health and physical disability. They also suffer from high rates o f unemployment ( 35 percent ) and are excluded from formal mechanisms o f decision making. 7. Locally rooted NGOs remain critical providers o f social services.", + "ner_text": [ + [ + 141, + 152, + "named" + ] + ], + "validated": true, + "empirical_context": "Continuing conflict i s perpetuating an internal cycle o f violence, fragmenting social cohesion and affecting psychosocial well-being. A 2006 survey showed that nearly three - quarters o f the Palestinian population suffers from severe depression resulting from the current sociopolitical situation. Research has also shown that victims o f violence and torture become prey to mental illness and often direct acts o f aggression against members o f their own household and communities.", + "type": "survey", + "explanation": "This is indeed a dataset as it is referenced in the context as a source of empirical data regarding the mental health impact of the sociopolitical situation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey conducted in 2006 that provides empirical data on the mental health of the Palestinian population.", + "contextual_reason_agent": "This is indeed a dataset as it is referenced in the context as a source of empirical data regarding the mental health impact of the sociopolitical situation.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "155_432110PAD0P09717632B01off0use0only1", + "page": 65, + "text": "At the same time, this approach allows for further expansion through private connections in a subsequent phase, once the necessary hydraulic capacity is present in the network. Household Survey and Community Consultations 13. The economic and social feasibility of this alternative was explored through the socio - economic feasibility study and household survey, and through several rounds of discussion groups with representatives of local communities. In addition, the project team and REGIDESO organized a participatory workshop, in which experiences to date with standpost management in Burundi, Kenya, Senegal and Rwanda were presented and discussed. The workshop, which included community representatives, members of government, REGIDESO staff, World Bank representatives, and international invited speakers with direct experience in standpost management, also served to produce recommendations on the type of standpost management that would be most appropriate in the context of Bujumbura. 14. As seen in the previous section, the household survey generated baseline information regarding water supply, sanitation, electricity provision, and general socio-economic and demographic data about the 26 neighborhoods under study. It also asked respondents to express their preferences about the type of service they would like. Not surprisingly, a majority of respondents ( 63. 9 % ) would prefer to pay to have a private connection to the network.", + "ner_text": [ + [ + 307, + 341, + "named" + ] + ], + "validated": false, + "empirical_context": "Household Survey and Community Consultations 13. The economic and social feasibility of this alternative was explored through the socio - economic feasibility study and household survey, and through several rounds of discussion groups with representatives of local communities. In addition, the project team and REGIDESO organized a participatory workshop, in which experiences to date with standpost management in Burundi, Kenya, Senegal and Rwanda were presented and discussed.", + "type": "study", + "explanation": "However, it is not a dataset as it refers to a study rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'study' which often relates to data collection.", + "contextual_reason_agent": "However, it is not a dataset as it refers to a study rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a study, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 21, + "validated": 8, + "not_validated": 13 + } + }, + { + "filename": "003_BOSIB-850e0c11-07c1-4c9c-8d44-4286704221bd", + "page": 46, + "text": "The targeted stakeholders include Direction des Transports Routiers ( Road Transport Directorate, DTR ), the DGA, the DGAI, DGECMR, the DTF / MF, the DGC, the General Directorate of Customs, and other stakeholders such as commercial financial institutions, private sector representatives and logistics actors. Frequency At baseline, midterm, and after completion Data source Project progress reports, feasibility studies, and strategic documents prepared for the development of value chains and their implementation by the implementing agency Methodology for Data Collection Revision of project reports Responsibility for Data Collection Ministry in charge of agriculture and livestock ( DGA, DGDPPIA ) Monitoring & Evaluation Plan: Intermediate Results Indicators by Component Climate-resilient Improvement of Niger \u2019 s Transport Backbone People benefitting from climate-resilient infrastructure ( Number of people, gender-disaggregated, youth-disaggregated ) Description This indicator measures the total number of people benefiting from climate-resilient infrastructure based on physical proximity. The distance-based catchment area is predefined as a 2km radius around an all-season rural road. This is in line with the approach of the Rural Access Index, which measures the proportion of people who have access to an all-season road within an approximate walking distance of 2 km. There is a common understanding that the 2km threshold is reasonable for people \u2019 s normal economic and social purposes. The indicator includes access to climate-resilient roads.", + "ner_text": [ + [ + 375, + 399, + "named" + ] + ], + "validated": false, + "empirical_context": "The targeted stakeholders include Direction des Transports Routiers ( Road Transport Directorate, DTR ), the DGA, the DGAI, DGECMR, the DTF / MF, the DGC, the General Directorate of Customs, and other stakeholders such as commercial financial institutions, private sector representatives and logistics actors. Frequency At baseline, midterm, and after completion Data source Project progress reports, feasibility studies, and strategic documents prepared for the development of value chains and their implementation by the implementing agency Methodology for Data Collection Revision of project reports Responsibility for Data Collection Ministry in charge of agriculture and livestock ( DGA, DGDPPIA ) Monitoring & Evaluation Plan: Intermediate Results Indicators by Component Climate-resilient Improvement of Niger \u2019 s Transport Backbone People benefitting from climate-resilient infrastructure ( Number of people, gender-disaggregated, youth-disaggregated ) Description This indicator measures the total number of people benefiting from climate-resilient infrastructure based on physical proximity. The distance-based catchment area is predefined as a 2km radius around an all-season rural road.", + "type": "document", + "explanation": "However, 'Project progress reports' are described as documents rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'reports' which can imply structured information.", + "contextual_reason_agent": "However, 'Project progress reports' are described as documents rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a project document, not as a data source", + "tags": [] + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 63, + "text": "As such, it would help to transform health sector service delivery and produce more quality data for timely decision-making. The Program aims to address critical gaps by supporting the expansion, interoperability, and effective use of digital health information systems in Jordan. The Program includes Results Areas that foster a foundational environment, thus enabling digital transformation at scale via the HIE and appropriate oversight, governance, and management mechanisms. Building on the strong interoperability layer, the national EMR system will be expanded to all MOH facilities at the primary, secondary, and tertiary levels. Currently, the Electronic Medical Records do not track refugee status, making it difficult to determine whether a registered non-Jordanian \u2013 including Syrian refugee patient is eligible for subsidized healthcare services. This contributes to the low uptake of health-services by refugees. Collected data from the national EMR will then be used to strengthen institutional capacity in data use through institutionalized data quality audits. 7. Result Area 2 on government effectiveness supports a cross-cutting objective of enhancing the professionalization of the civil service, including its digital literacy, as well as two sector specific strategic objectives, namely: improving student assessments through digitalization and enhancing the quality of health data. - Enhancing the professionalization of the civil service in Jordan is one of the main strategic thrusts of the Public Sector Modernization Roadmap.", + "ner_text": [ + [ + 531, + 550, + "named" + ] + ], + "validated": false, + "empirical_context": "The Program includes Results Areas that foster a foundational environment, thus enabling digital transformation at scale via the HIE and appropriate oversight, governance, and management mechanisms. Building on the strong interoperability layer, the national EMR system will be expanded to all MOH facilities at the primary, secondary, and tertiary levels. Currently, the Electronic Medical Records do not track refugee status, making it difficult to determine whether a registered non-Jordanian \u2013 including Syrian refugee patient is eligible for subsidized healthcare services.", + "type": "system", + "explanation": "However, the context indicates it is a system for managing records, not a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to an electronic system that manages medical records.", + "contextual_reason_agent": "However, the context indicates it is a system for managing records, not a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 30, + "text": "The MIS will collect information from applicants during registration, such as ( a ) key contact information, gender, age, marital status, education status, refugee status, employment; ( b ) business level outcomes such as business partners, business age, monthly profits, number of employees; and ( c ) access to other programs and loans. The MIS database will be updated as program applicants make use of specific components. The GROW_ID can be entered to update the database with any additional information on services received to ensure the applicant is only registered once. For example, dates of business trainings, service provider, or amount of funds will all be entered into the MIS. This database will help support operations through feedback loops as it can track who is accessing which services in real time. The MIS will also be important in being able to establish a sample of study participants to draw on for an impact evaluation or other learning activities. 61. The project will also support MGLSD \u2019 s capacity to lead and oversight social risk management. At the national level, this will support capacity building of unit staff, public fora, SRM experts, etc.", + "ner_text": [ + [ + 824, + 827, + "named" + ] + ], + "validated": false, + "empirical_context": "This database will help support operations through feedback loops as it can track who is accessing which services in real time. The MIS will also be important in being able to establish a sample of study participants to draw on for an impact evaluation or other learning activities. 61.", + "type": "system", + "explanation": "However, 'MIS' is described as a management information system, which does not function as a structured collection of data in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is mentioned in the context of tracking services and supporting operations.", + "contextual_reason_agent": "However, 'MIS' is described as a management information system, which does not function as a structured collection of data in this context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "094_PAD-final-02262018", + "page": 47, + "text": "The World Bank Greater Beirut Public Transport Project ( P160224 ) Page 36 of 59 Indicator Name Core Unit of Measure Baseline End Target Frequency Data Source / Methodology Responsibility for Data Collection percentage of female passengers and percentage of vulnerable population. Name: Percentage of population residing in GBA with access to Beirut city center ( \u201c La place des martyrs \u201d ) within 60 minutes commuting period using public transport Percentage 50. 00 61. 00 Annual A global information system ( GIS ) - based spatial analysis will be conducted using the open source accessibility tool developed by the World Bank called Open Trip Planner Analyst ( OTPA ). CDR The RPTA / BRT operators ( for the GPS data ) Description: This indicator will measure the increase in percentage of population with access to jobs and services located at the CBD using public transport services. This indicator captures the improved accessibility objective of the project for public transport passengers. Name: Average travel time by public transport from Tabarja station to Charles Helou terminal at morning peak hours Minutes 75. 00 45. 00 Biannual Data to be obtained from the ITS. CDR / the RPTA BRT operators Description: Average rush hour in-vehicle travel time by the PT services from Tabarja station to Beirut ( Charles Helou terminal ) at morning peak hours between 7: 00am and 9: 00am.", + "ner_text": [ + [ + 711, + 719, + "named" + ] + ], + "validated": true, + "empirical_context": "00 Annual A global information system ( GIS ) - based spatial analysis will be conducted using the open source accessibility tool developed by the World Bank called Open Trip Planner Analyst ( OTPA ). CDR The RPTA / BRT operators ( for the GPS data ) Description: This indicator will measure the increase in percentage of population with access to jobs and services located at the CBD using public transport services. This indicator captures the improved accessibility objective of the project for public transport passengers.", + "type": "data", + "explanation": "In this context, 'GPS data' is used as a source of information for measuring accessibility, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'GPS data' is a dataset because it refers to a specific type of data used for analysis.", + "contextual_reason_agent": "In this context, 'GPS data' is used as a source of information for measuring accessibility, confirming its role as a dataset.", + "contextual_signal": "follows 'uses data from'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 35, + "text": "The World Bank Building Skills for Human Capital Development in South Sudan ( P178654 ) Page 31 of 66 implementation reports from various implementing units, which would include the progress toward PDOs through reporting of key performance indicators; ( c ) carrying out assessment and evaluation studies; and ( d ) conducting citizen engagement surveys to ensure beneficiaries \u2019 and communities \u2019 feedback in improving project performance. TA support will be available to the PIU for carrying out these M & E activities through component 4 of the Project. 88. Simple and low-cost ICT tools will be used for M & E and remote supervision of the Project given the highly fragile and conflict-affected operating environment, which may lead to constraints on access to project sites on the ground due to security-related and / or logistical reasons. The Project will focus on building government capacity to use these tools for their day-to-day monitoring and supervision of the Project. The Geo-Enabled Initiative for Monitoring and Supervision ( GEMS ) will be utilized to build capacity of government counterparts to use open-source tools for in-field collection of structured digital data that automatically feeds into a centralized M & E system. The integrated data collected can include photos, audio, videos; time and date stamps; and Global Positioning System ( GPS ) coordinates that allow for automated geo-mapping of the information.", + "ner_text": [ + [ + 327, + 353, + "named" + ], + [ + 64, + 75, + "citizen engagement surveys <> data geography" + ], + [ + 364, + 377, + "citizen engagement surveys <> reference population" + ], + [ + 1312, + 1332, + "citizen engagement surveys <> data description" + ] + ], + "validated": true, + "empirical_context": "The World Bank Building Skills for Human Capital Development in South Sudan ( P178654 ) Page 31 of 66 implementation reports from various implementing units, which would include the progress toward PDOs through reporting of key performance indicators; ( c ) carrying out assessment and evaluation studies; and ( d ) conducting citizen engagement surveys to ensure beneficiaries \u2019 and communities \u2019 feedback in improving project performance. TA support will be available to the PIU for carrying out these M & E activities through component 4 of the Project.", + "type": "survey", + "explanation": "In this context, it is indeed a dataset as it is used to gather feedback from beneficiaries and communities, which is essential for project performance evaluation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'citizen engagement surveys' imply a structured collection of data gathered from citizens.", + "contextual_reason_agent": "In this context, it is indeed a dataset as it is used to gather feedback from beneficiaries and communities, which is essential for project performance evaluation.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "182_multi0page", + "page": 31, + "text": "It also calls for increased public awareness about social problems, and suggests that the SSDP focus on certain key g 7oups of vulnerable people, including women at risk of violence, youths at risk, street children, and the abandloned elderly. For the project preparation stage, the VNICA has set up baseline data on the number of people in vulnerable situations and the type and number of supplied services in four Albanian districts ( Tirana, Durres, Skodra and Vlora ). In order to monitor and evaluate progress of projects in the four districts; list of indicators has been selected for periodic follow-up. Monitoring and evaluation will be done at the district level and carried out once a year. In one year, the SSDP project will develop a national monitoring strategy. Preliminary suggestions for the preparation of a national strategy include conducting research throughout Albania using the research instruments used in the VNICA. These include a rapid quantitative assessment of vulnerable groups in the districts, a \" vulnerability census \" ( data collection and survey ), and relative ranking of vulnerability. In addition, the project will support a permnanent household survey that will assist the Government in monitoring trends in living standards and poverty correlates over time.", + "ner_text": [ + [ + 933, + 938, + "named" + ] + ], + "validated": false, + "empirical_context": "In one year, the SSDP project will develop a national monitoring strategy. Preliminary suggestions for the preparation of a national strategy include conducting research throughout Albania using the research instruments used in the VNICA. These include a rapid quantitative assessment of vulnerable groups in the districts, a \" vulnerability census \" ( data collection and survey ), and relative ranking of vulnerability.", + "type": "project", + "explanation": "VNICA is mentioned only as a project, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed VNICA is a dataset because it is associated with research instruments and data collection.", + "contextual_reason_agent": "VNICA is mentioned only as a project, not as a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 17, + "text": "- refutes-drought-related-death / 23 UNHCR. Ethiopia Operational Update. January 2023. https: / / reporting. unhcr. org / document / 4393 24 UNHCR Ethiopia Education Factsheet. 2022. file: / / / C: / Users / wb374705 / Downloads / Factsheet_Layout_Education_Final. pdf 25 ESAA, 2018 / 19 26 ESAA different year 27 H. Tasic et. al. ( 2020 ), \u201c Drivers of Stunting Reduction in Ethiopia: A Country Case Study, \u201d The American Journal of Clinical Nutrition 112 ( 2 ): 875S \u2013 893S. https: / / doi. org / 10. 1093 / ajcn / nqaa163. 28 J. Golan, D. Headey, K. Hirvonen, and J. Hoddinott ( 2019 ), Changes in Child Undernutrition Rates in Ethiopia, 2000-2016 ( Oxford: Oxford University Press ). 29 E. Skoufias, K. Vinha, and R. Sata ( 2019 ), All Hands-on Deck: Reducing Stunting through a Multi-Sectoral Approach in Sub-Saharan Africa and Ethiopia ( Washington, DC: World Bank ). 30 2016 Demographic and Health Survey ( DHS ).", + "ner_text": [ + [ + 882, + 911, + "named" + ], + [ + 44, + 52, + "Demographic and Health Survey <> data geography" + ], + [ + 376, + 384, + "Demographic and Health Survey <> data geography" + ], + [ + 590, + 627, + "Demographic and Health Survey <> data description" + ], + [ + 631, + 639, + "Demographic and Health Survey <> data geography" + ], + [ + 641, + 650, + "Demographic and Health Survey <> reference year" + ], + [ + 833, + 841, + "Demographic and Health Survey <> data geography" + ], + [ + 877, + 881, + "Demographic and Health Survey <> publication year" + ], + [ + 914, + 917, + "Demographic and Health Survey <> acronym" + ], + [ + 973, + 991, + "Demographic and Health Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "Sata ( 2019 ), All Hands-on Deck: Reducing Stunting through a Multi-Sectoral Approach in Sub-Saharan Africa and Ethiopia ( Washington, DC: World Bank ). 30 2016 Demographic and Health Survey ( DHS ).", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referred to as the '2016 Demographic and Health Survey' which is a structured collection of data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is a recognized survey that collects demographic and health data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referred to as the '2016 Demographic and Health Survey' which is a structured collection of data used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 51, + "text": "The World Bank Generating Growth Opportunities and Productivity for Women Enterprises Uganda ( P176747 ) Page 47 of 77 Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Women participating in a networking platform supported by the project, disaggregated by age ( Number ) The number of women that register with a networking platform. Continuous. Platform registered users. Data are collected automatically as people register. Platform managers to report the information to the MGLSD. The MGLSD will collect the information from the primary source each month. Women in RHDs Refugee women Women entrepreneurs who complete the core women entrepreneur course ( Number ) The satisfaction of participants of the training courses with the training. Continuous. Questionnair e. At the completion of the training, participants will complete a satisfaction questionnaire. The MGLSD will collect the questionnaires from the service providers and PSFU each month. Women RHDs Refugee women RHC women who report the core women entrepreneur course is accessible and meets their needs ( Percentage ) The satisfaction of refugee and host community women with the core course on entrepreneurship. Continuous Questionnair e At the completion of the training, participants will complete a satisfaction questionnaire. The MGLSD will collect the questionnaires from the service providers and PSFU each month.", + "ner_text": [ + [ + 628, + 633, + "named" + ] + ], + "validated": false, + "empirical_context": "Platform managers to report the information to the MGLSD. The MGLSD will collect the information from the primary source each month. Women in RHDs Refugee women Women entrepreneurs who complete the core women entrepreneur course ( Number ) The satisfaction of participants of the training courses with the training.", + "type": "organization", + "explanation": "MGLSD is an organization mentioned in the context, not a data source or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed MGLSD is a dataset because it is involved in collecting information.", + "contextual_reason_agent": "MGLSD is an organization mentioned in the context, not a data source or dataset.", + "contextual_signal": "mentioned only as an organization, not as a data source", + "tags": [] + }, + { + "filename": "026_Djibouti-Health-System-Strengthening-Project", + "page": 18, + "text": "The World Bank Djibouti Health System Strengthening ( P178033 ) Page 14 of 64 malaria, a vector-borne disease also exacerbated by climate change, is rapidly increasing ( Figure 3 ). Based on findings from the 2017-18 household survey, the top two causes for households seeking healthcare for children under five years of age and under one in the past 30 days is malaria / fever and diarrhea ( Figure 4 ). Figure 3: Top causes of deaths in Djibouti Source: Institute for Health Metrics and Evaluation ( IHME )", + "ner_text": [ + [ + 209, + 233, + "named" + ] + ], + "validated": true, + "empirical_context": "The World Bank Djibouti Health System Strengthening ( P178033 ) Page 14 of 64 malaria, a vector-borne disease also exacerbated by climate change, is rapidly increasing ( Figure 3 ). Based on findings from the 2017-18 household survey, the top two causes for households seeking healthcare for children under five years of age and under one in the past 30 days is malaria / fever and diarrhea ( Figure 4 ). Figure 3: Top causes of deaths in Djibouti Source: Institute for Health Metrics and Evaluation ( IHME )", + "type": "survey", + "explanation": "This is indeed a dataset as it provides empirical data used to analyze healthcare-seeking behavior in households.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey that collects data on healthcare-seeking behavior.", + "contextual_reason_agent": "This is indeed a dataset as it provides empirical data used to analyze healthcare-seeking behavior in households.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 35, + "text": "The focus of TA will be on activities critical for the attainment of DLI targets under each results area. TA for EMIS will include support for training at deconcentrated levels on the use of the information technology platform for data management, descriptive statistics, and data management techniques ( data validation, tabulations, metadata and database management, and report generation, among others ). Support will also include EMIS monitoring at the central and deconcentrated levels. Subcomponent 3. 2: Project management ( US $ 3 million ) 62. The objective of this subcomponent is to ensure adequate capacity for project implementation and coordination through the establishment of a Project Coordination and Management Unit ( PCMU ) within MINEDUB. This subcomponent will also finance costs associated with training, the recruitment of short - and long-term consultants, studies, surveys, M & E activities ( including independent verification of", + "ner_text": [ + [ + 434, + 438, + "named" + ] + ], + "validated": false, + "empirical_context": "TA for EMIS will include support for training at deconcentrated levels on the use of the information technology platform for data management, descriptive statistics, and data management techniques ( data validation, tabulations, metadata and database management, and report generation, among others ). Support will also include EMIS monitoring at the central and deconcentrated levels. Subcomponent 3.", + "type": "program", + "explanation": "However, EMIS is mentioned as a program for training and support, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it relates to data management and statistics.", + "contextual_reason_agent": "However, EMIS is mentioned as a program for training and support, not as a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "005_BOSIB-8191b179-7209-4faa-b5e0-11783bcd492d", + "page": 19, + "text": "Support for Foundational Learning ( US $ 30 million ). This component includes the following: ( i ) Diagnostic tools for the classroom that will provide a teacher with an accurate understanding of the level of literacy and numeracy of each student and of the classroom as a whole - the diagnostic will help the teacher understand objectively the need for learning recovery from school disruptions and other causes as well as a preventive early-warning protocol to help prevent students falling behind in the future; ( ii ) Development of the didactic resources in digital and analog formats to support early reading, writing, and early numeracy, to respond to the learning diagnostic ( iii ) Deployment of these didactic resources to classrooms at the Preschool, Primary, and Secondary levels ( within each level and articulation between levels ), and through MEP \u2019 s digital platform; ( iv ) Digital platform of a professional development plan for teachers, school principals, and pedagogical advisors for the deployment of foundational learning; ( v ) Development and implementation of a national foundational learning campaign emphasizing the joy of reading, writing, and numerical competency to involve parents and the larger education community; and ( vi ) Public provision of six monthly reports regarding the school and grade level accomplishment of literacy and numeracy.", + "ner_text": [ + [ + 100, + 134, + "named" + ] + ], + "validated": false, + "empirical_context": "Support for Foundational Learning ( US $ 30 million ). This component includes the following: ( i ) Diagnostic tools for the classroom that will provide a teacher with an accurate understanding of the level of literacy and numeracy of each student and of the classroom as a whole - the diagnostic will help the teacher understand objectively the need for learning recovery from school disruptions and other causes as well as a preventive early-warning protocol to help prevent students falling behind in the future; ( ii ) Development of the didactic resources in digital and analog formats to support early reading, writing, and early numeracy, to respond to the learning diagnostic ( iii ) Deployment of these didactic resources to classrooms at the Preschool, Primary, and Secondary levels ( within each level and articulation between levels ), and through MEP \u2019 s digital platform; ( iv ) Digital platform of a professional development plan for teachers, school principals, and pedagogical advisors for the deployment of foundational learning; ( v ) Development and implementation of a national foundational learning campaign emphasizing the joy of reading, writing, and numerical competency to involve parents and the larger education community; and ( vi ) Public provision of six monthly reports regarding the school and grade level accomplishment of literacy and numeracy.", + "type": "tool", + "explanation": "However, the context indicates that these are tools designed to assess literacy and numeracy, not a structured collection of data themselves.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'diagnostic tools' could imply a structured method of collecting data on student performance.", + "contextual_reason_agent": "However, the context indicates that these are tools designed to assess literacy and numeracy, not a structured collection of data themselves.", + "contextual_signal": "mentioned only as a tool for assessment, not as a data source", + "tags": [] + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 35, + "text": "The World Bank Education Quality Improvement Project ( P179363 ) Page 26 of 68 project manager, FM specialist, PS, M & E specialist, environmental specialist and social development specialist with experience in GBV prevention and response, civil works engineers, and other technical specialists to ensure timely, quality, transparent, and effective implementation of the civil works activities. Further details will be provided in the POM. The fiduciary assessment of the PMT of the MoER and NORLD has been completed and is reflected in the fiduciary sections. B. Results Monitoring and Evaluation Arrangements 68. The PDO-level results indicators and intermediate results indicators will be monitored using the following data: ( a ) data on education institutions and students generated by EMIS and e-Catalogue / e - Register ( disaggregated by gender, urban-rural divide, students with disabilities, and refugee students ); ( b ) results from the nationally representative national and international assessments of student performance and classroom observations; ( c ) regular survey data and administrative data of the MoER and NORLD; and ( d ) semiannual monitoring reports prepared by the MoER under support of the PMT. The MoER through the PMT will carry out the day-to-day coordination of M & E activities.", + "ner_text": [ + [ + 949, + 1063, + "named" + ], + [ + 874, + 900, + "nationally representative national and international assessments of student performance and classroom observations <> reference population" + ], + [ + 1329, + 1347, + "nationally representative national and international assessments of student performance and classroom observations <> usage context" + ] + ], + "validated": true, + "empirical_context": "Results Monitoring and Evaluation Arrangements 68. The PDO-level results indicators and intermediate results indicators will be monitored using the following data: ( a ) data on education institutions and students generated by EMIS and e-Catalogue / e - Register ( disaggregated by gender, urban-rural divide, students with disabilities, and refugee students ); ( b ) results from the nationally representative national and international assessments of student performance and classroom observations; ( c ) regular survey data and administrative data of the MoER and NORLD; and ( d ) semiannual monitoring reports prepared by the MoER under support of the PMT. The MoER through the PMT will carry out the day-to-day coordination of M & E activities.", + "type": "assessment", + "explanation": "This is indeed a dataset as it is used to monitor educational outcomes and is mentioned as a source of data for evaluation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to assessments that collect data on student performance.", + "contextual_reason_agent": "This is indeed a dataset as it is used to monitor educational outcomes and is mentioned as a source of data for evaluation.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + } + }, + { + "filename": "146_537660PAD0ARAB0PAD000Arabic00final0", + "page": 97, + "text": "98 \u0627\u0644\u0645\u0631\u0641\u0642 \u0627\u0644\u062b\u0627\u0646 \u064b \u0639\u0634\u0631: \u062b\u0628\u062a \u0627\u0644\u0645\u0631\u0627\u062c\u0639 Chandler, Rudolph. ( 2009 ), Yemen Immunization Tracking Study. Health Management Associates. ( 2007 ) Midwifery Licensure and Discipline Program in Washington State-Economic Costs and Benefits. Thuriau, M. C. ( 1971 ), Notes on the Epidemiology of Malaria in the Yemen Republic, Ann. Soc. Belg. Med. Trop., 51 ( 2 ), 229-238. Suleman, M. ( 1999 ), Entomological Situation Related to Epidemiology of Malaria in Yemen. Assignment Report, 17 February - 15March, Who / EMRO. Ministry of Public Health and Population in Yemen. Public Health Expenditure Review, 2004-2007. Ministry of Public Health and Population in Yemen. PAPFAM Summary Report of the Yemen Family Health Survey 2003. The World Bank. ( 2007 ), Yemen Poverty Assessment vol. II: Annexes. The World Bank. ( 2009 ), Yemen Health Sector Review: Conceptual Framework and Strategy Options The World Bank ( 2009 ), Health Financing Modalities in Yemen.", + "ner_text": [ + [ + 683, + 709, + "named" + ], + [ + 64, + 69, + "Yemen Family Health Survey <> data geography" + ], + [ + 299, + 313, + "Yemen Family Health Survey <> data geography" + ], + [ + 362, + 373, + "Yemen Family Health Survey <> author" + ], + [ + 654, + 660, + "Yemen Family Health Survey <> publisher" + ], + [ + 683, + 688, + "Yemen Family Health Survey <> data geography" + ], + [ + 710, + 714, + "Yemen Family Health Survey <> publication year" + ], + [ + 720, + 730, + "Yemen Family Health Survey <> publisher" + ], + [ + 742, + 747, + "Yemen Family Health Survey <> data geography" + ], + [ + 789, + 799, + "Yemen Family Health Survey <> publisher" + ], + [ + 885, + 895, + "Yemen Family Health Survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "Ministry of Public Health and Population in Yemen. PAPFAM Summary Report of the Yemen Family Health Survey 2003. The World Bank.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to a specific survey conducted to collect health-related data in Yemen.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a specific survey conducted to collect health-related data in Yemen.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "182_multi0page", + "page": 20, + "text": "Sub-project level monitoring will be carried during the sub-project cycle and be more closely defined once the operations manual has been developed. To monitor the indicators that have been developed in the project LogFrame ( see Annex 1 ), each of the indicators will be associated with baseline and target values, frequency of collection, responsibility and methods of measurement. The indicators in the LogFrame will be further developed to include input, output, and outcome. In some cases those would be associated with relevant risk and sustainability indicators. The monitoring system through the use of pre-formatted reports will require the recipient of the report to provide comments and feedback on the information submitted. This will insure the presence of a feedback mechanism. In addition, the annual work plan will be required to have monitoring activities as part of component activities to insure that such activities are not set aside and that they become an integral part of project implementation. Using the established baselines from the VNICA study and other studies to be carried out during the project lifetime the monitoring will be carried out starting with project effectiveness. Monitoring reports will be required on a periodic basis in addition to various reports from special studies and research activities. As part of the Bank monitoring, semi-annual supervision missions and a mid-term review will be undertaken. D. Project Rationale 1. Project alternatives considered and reasons for rejection: I. Project alternatives considered and reasons for rejection: * The project initially considered up-front financing of a large package of community services. However, it was determined that systemic reform would best be accomplished through a two-stage process, with the first stage of the project supporting a smaller package of services and intensive policy and institutional strengthening, and the second stage financing a larger package of services once a set of policy triggers had been achieved. * The possibility of adding a component for social services delivery in the Albania Development Fund - 17 -", + "ner_text": [ + [ + 1060, + 1071, + "named" + ], + [ + 1185, + 1206, + "VNICA study <> data description" + ], + [ + 2109, + 2116, + "VNICA study <> data geography" + ], + [ + 2156, + 2174, + "VNICA study <> usage context" + ] + ], + "validated": true, + "empirical_context": "In addition, the annual work plan will be required to have monitoring activities as part of component activities to insure that such activities are not set aside and that they become an integral part of project implementation. Using the established baselines from the VNICA study and other studies to be carried out during the project lifetime the monitoring will be carried out starting with project effectiveness. Monitoring reports will be required on a periodic basis in addition to various reports from special studies and research activities.", + "type": "study", + "explanation": "The VNICA study is used as a baseline for monitoring activities, indicating it serves as a data source in the context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a study that likely contains collected data for analysis.", + "contextual_reason_agent": "The VNICA study is used as a baseline for monitoring activities, indicating it serves as a data source in the context.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 21, + "validated": 8, + "not_validated": 13 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 54, + "text": "Frequency Quarterly Data source UNICEF Methodology for Data Collection UNICEF to provide data / TPM to verify Responsibility for Data Collection UNICEF; PMU Percentage of completeness of reporting by facilities Description Percentage of facilities that submit complete reports within the required deadline. Frequency Quarterly Data source DHIS2 Methodology for Data Collection DHIS2 Responsibility for Data Collection MoH / PMU Percentage of states that conducted quarterly coordination meetings with a review of data and documented with minutes including action items and follow-up Description Percentage of State \u2019 s quarterly health service delivery coordination meetings for the health sector held with a review of data included in the meeting and documented with minutes which include action items and follow-up on action items. Meetings are to be held quarterly in each state. Four meetings are expected each year per state. CHDs and implementing partners will be participated in the review Frequency Quarterly Data source MoH / WHO Methodology for Data Collection WHO to provide data / TPM to verify", + "ner_text": [ + [ + 377, + 382, + "named" + ] + ], + "validated": false, + "empirical_context": "Frequency Quarterly Data source UNICEF Methodology for Data Collection UNICEF to provide data / TPM to verify Responsibility for Data Collection UNICEF; PMU Percentage of completeness of reporting by facilities Description Percentage of facilities that submit complete reports within the required deadline. Frequency Quarterly Data source DHIS2 Methodology for Data Collection DHIS2 Responsibility for Data Collection MoH / PMU Percentage of states that conducted quarterly coordination meetings with a review of data and documented with minutes including action items and follow-up Description Percentage of State \u2019 s quarterly health service delivery coordination meetings for the health sector held with a review of data included in the meeting and documented with minutes which include action items and follow-up on action items. Meetings are to be held quarterly in each state.", + "type": "system", + "explanation": "However, DHIS2 is described as a system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned as a data source.", + "contextual_reason_agent": "However, DHIS2 is described as a system, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 25, + "text": "This subcomponent will support improved planning, management, and evaluation of education reforms in the following areas: ( a ) improvement of the existing EMIS and making better use of data to support management decisions at all levels, ( b ) national and international student assessments, and ( c ) TA to support implementation of Components 1 and 2 of the project. 41. Improvement of the existing EMIS includes the following activities: ( a ) technical improvements to EMIS to enable integrated use of NBS and MoF data; ( b ) strengthening a system for the generation of aggregate data and reports based on primary data and integrate data on refugee children from Ukraine; ( c ) training to EMIS experts on data collection and management, ( d ) strengthening the MoER \u2019 s analytical capacity in using data; and ( e ) strengthening of EMIS data protection systems as well as data quality assurance.", + "ner_text": [ + [ + 156, + 160, + "named" + ] + ], + "validated": false, + "empirical_context": "This subcomponent will support improved planning, management, and evaluation of education reforms in the following areas: ( a ) improvement of the existing EMIS and making better use of data to support management decisions at all levels, ( b ) national and international student assessments, and ( c ) TA to support implementation of Components 1 and 2 of the project. 41.", + "type": "system", + "explanation": "However, EMIS is described as a system for managing information rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it relates to data management in education.", + "contextual_reason_agent": "However, EMIS is described as a system for managing information rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 69, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | 60 20. Targeted results may be impacted by the exogenous risks identified in the Program \u2019 s integrated risk assessment. Increasing security concerns triggered by the ongoing conflict may distract government attention and fiscal resources, while climate change is not expected to impact the achievement of results. M & E Capacity 21. The Program \u2019 s M & E framework leverages government monitoring systems, which ensures adequate measuring and reporting of progress toward the targeted results. Indicators to measure process, output and outcome level results are deemed specific, measurable, attainable, relevant, and timely ( SMART ). Across the Result Areas, reporting on progress will be based on information and data generated by the very same IT infrastructure mobilized for the achievement of results, as follows: Sanad for DLIs 1, 2, and 3 on access to e-services, digital identity, and trusted, people-centric data sharing; the EMR system for DLI 4 on the management of electronic medical records; the HRMIS for DLI 5 on the professionalization of the civil service; the Education Management Information System ( EMIS ) for DLI 7 on the digitalization of general secondary education exam; the PMDU internet portal for DLI 8 on e-participation; and the DOS interactive internet portal for DLI 9 on interactive statistical data.", + "ner_text": [ + [ + 1296, + 1316, + "named" + ] + ], + "validated": false, + "empirical_context": "Indicators to measure process, output and outcome level results are deemed specific, measurable, attainable, relevant, and timely ( SMART ). Across the Result Areas, reporting on progress will be based on information and data generated by the very same IT infrastructure mobilized for the achievement of results, as follows: Sanad for DLIs 1, 2, and 3 on access to e-services, digital identity, and trusted, people-centric data sharing; the EMR system for DLI 4 on the management of electronic medical records; the HRMIS for DLI 5 on the professionalization of the civil service; the Education Management Information System ( EMIS ) for DLI 7 on the digitalization of general secondary education exam; the PMDU internet portal for DLI 8 on e-participation; and the DOS interactive internet portal for DLI 9 on interactive statistical data.", + "type": "portal", + "explanation": "However, the context indicates that the PMDU internet portal is mentioned as a project or platform, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'internet portal' which could imply data access.", + "contextual_reason_agent": "However, the context indicates that the PMDU internet portal is mentioned as a project or platform, not as a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 9, + "validated": 1, + "not_validated": 8 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 58, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XLIX Against DLR 9. 2: US $ 1 million for each core micro-data set made available via an online portal to users in adherence with the access protocols for up to a total of US $ 8 million. Against DLR 9. 3: An additional US $ 400, 000 for every core micro-data set made available with indicators for gender, disability and residency status, for up to 5 micro data sets and in the limit of US $ 2 million. Description The DLI supports the operationalization of an interactive statistical interface by DOS to allow users to query key statistical indicators ( such as unemployment, labor force participation, and so on ). This can be done according to user-defined cross tabulations, including gender, age, education-level, nationality and governorate. The DLI also supports the integration of administrative data on the platform from key line ministries to allow for a \u2018 one-stop shop \u2019, with incentives for each additional line Ministry that is integrated.", + "ner_text": [ + [ + 515, + 518, + "named" + ] + ], + "validated": false, + "empirical_context": "3: An additional US $ 400, 000 for every core micro-data set made available with indicators for gender, disability and residency status, for up to 5 micro data sets and in the limit of US $ 2 million. Description The DLI supports the operationalization of an interactive statistical interface by DOS to allow users to query key statistical indicators ( such as unemployment, labor force participation, and so on ). This can be done according to user-defined cross tabulations, including gender, age, education-level, nationality and governorate.", + "type": "program", + "explanation": "'DLI' is not a dataset but rather a program that supports the operationalization of a statistical interface.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'DLI' is a dataset because it is associated with data availability and indicators.", + "contextual_reason_agent": "'DLI' is not a dataset but rather a program that supports the operationalization of a statistical interface.", + "contextual_signal": "'mentioned only as a project, not as a data source'", + "tags": [] + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 13, + "text": "3 Poverty, Food insecurity and Malnutrition 6. An estimated 60 percent of Burundians were living below the food consumption poverty line prior to the current crisis ( Vulnerability assessment, 2014, based on the PMS data from 2012-13 ) 5 and an estimated 40 percent faced extreme deprivation. Large families with a high dependency index ( especially single parent families ) are less likely to satisfy their basic food needs, particularly if the head of household works in agriculture. Regionally, the highest levels of deprivation are found in the North ( despite improvements in Ngozi and Kirundo ) and in the Center East ( especially in Mwaro and Cankuso ). In both regions, multiple deprivations affect up to 75 percent of households. On the other hand, food deprivation is lower in urban areas ( 48 percent ) and in the capital Bujumbura ( 41 percent ). 7. Burundi was the most food insecure country in the world in 2013 and food insecurity is increasing again as a result of the current political and climate crisis. The majority of the poor ( 97 percent ) reside in rural areas, depend on rain-fed low-input agriculture on very small farms ( 87 percent of poor households cultivate less than \u00bd ha ), and are highly vulnerable to climate shocks.", + "ner_text": [ + [ + 212, + 220, + "named" + ], + [ + 74, + 84, + "PMS data <> reference population" + ], + [ + 193, + 197, + "PMS data <> publication year" + ], + [ + 226, + 233, + "PMS data <> reference year" + ], + [ + 581, + 586, + "PMS data <> data geography" + ], + [ + 591, + 598, + "PMS data <> data geography" + ], + [ + 612, + 623, + "PMS data <> data geography" + ], + [ + 640, + 645, + "PMS data <> data geography" + ], + [ + 650, + 657, + "PMS data <> data geography" + ], + [ + 833, + 842, + "PMS data <> data geography" + ], + [ + 862, + 869, + "PMS data <> data geography" + ] + ], + "validated": true, + "empirical_context": "3 Poverty, Food insecurity and Malnutrition 6. An estimated 60 percent of Burundians were living below the food consumption poverty line prior to the current crisis ( Vulnerability assessment, 2014, based on the PMS data from 2012-13 ) 5 and an estimated 40 percent faced extreme deprivation. Large families with a high dependency index ( especially single parent families ) are less likely to satisfy their basic food needs, particularly if the head of household works in agriculture.", + "type": "data", + "explanation": "The context confirms it is a dataset as it is used to provide empirical data for the vulnerability assessment.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'PMS data' is referenced in relation to a vulnerability assessment and specific statistics.", + "contextual_reason_agent": "The context confirms it is a dataset as it is used to provide empirical data for the vulnerability assessment.", + "contextual_signal": "follows 'based on' indicating it is a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "045_Turkey-Agricultural-Employment-Support-for-Refugees-and-Turkish-Citizens-through-Enhanced-Market-Linkages-Project", + "page": 21, + "text": "To facilitate the match between certified workers and the required skills demanded by employers, the ACC-CU will develop two operational tools: ( a ) A pool of workers. The ACC-CU will develop, implement, administer, and maintain a comprehensive registry of certified workers. The registry, which can also be accessed by eligible employers, will contain detailed information about the potential worker including, among others, a clear profile, area of expertise, experience, and background. ( b ) A database of vacancies. The ACC-CU will develop and maintain an updated system of vacancies arising from contract farming schemes with clear skills requirements by specific employers. 38. These tools will be developed and implemented as part of the information technology ( IT ) enhancement support described in Subcomponent 3. 1. These tools will provide the necessary information to develop a job matching methodology to support the allocation of skilled workers in the most suitable available vacancies. Using a matching algorithm, the preferences of workers regarding the type of agricultural work to be carried, its location, and the specific skills and experience they are equipped with will be matched to the demand for workers and preferences among farmers. Based on this process, a ranked set of workers who are the closest match to farmers \u2019 requirements expressed in their vacancy will be proposed to farmers.", + "ner_text": [ + [ + 232, + 275, + "named" + ], + [ + 101, + 107, + "comprehensive registry of certified workers <> publisher" + ], + [ + 173, + 179, + "comprehensive registry of certified workers <> publisher" + ], + [ + 526, + 532, + "comprehensive registry of certified workers <> publisher" + ] + ], + "validated": true, + "empirical_context": "To facilitate the match between certified workers and the required skills demanded by employers, the ACC-CU will develop two operational tools: ( a ) A pool of workers. The ACC-CU will develop, implement, administer, and maintain a comprehensive registry of certified workers. The registry, which can also be accessed by eligible employers, will contain detailed information about the potential worker including, among others, a clear profile, area of expertise, experience, and background.", + "type": "registry", + "explanation": "This is a dataset as it is explicitly described as a registry that contains detailed information about workers, functioning as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it describes a structured collection of information about certified workers.", + "contextual_reason_agent": "This is a dataset as it is explicitly described as a registry that contains detailed information about workers, functioning as a data source.", + "contextual_signal": "described as a registry that contains detailed information", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "110_Lebaon-RACE2-PforR-Board-Package-PAD-WB-9-5-16-09072016", + "page": 31, + "text": "The Lebanese education system is structured around three ( 3 ) cycles of mandatory education ( age 6 to 15 ) with three ( 3 ) grades in each cycle. There is also a kindergarten stage with 3 grades ( age 3 to 6 ) and a secondary education stage with 3 grades ( age 15 to 18 ). The private sector is highly predominant in the education sector with approximately 70 percent of the student population attending private schools ( 14 percent in private schools subsidized by the Government, and 55 percent in non - subsidized private schools ). The average student success rates in the official examinations for the 9th and 12th grades in public and private schools are 61 and 73 percent respectively, which are low given the high expenditure levels in the sector. Moreover, an analysis of international and national-level survey data reveals that between-school differences are large and associated with factors such as school size, socioeconomic status, and quality of school buildings and instructional resources. 4. The RACE 2 program aims to achieve three outcomes and nine outputs that are structured around three pillars: i ) Equitable Access, ii ) Enhanced Quality, and iii ) Strengthened Systems. Each of these are described below. The results framework for the overall RACE program was developed in a consultative fashion, under the leadership of MEHE.", + "ner_text": [ + [ + 784, + 828, + "named" + ], + [ + 4, + 12, + "international and national-level survey data <> data geography" + ], + [ + 915, + 948, + "international and national-level survey data <> data description" + ], + [ + 954, + 1009, + "international and national-level survey data <> data description" + ] + ], + "validated": true, + "empirical_context": "The average student success rates in the official examinations for the 9th and 12th grades in public and private schools are 61 and 73 percent respectively, which are low given the high expenditure levels in the sector. Moreover, an analysis of international and national-level survey data reveals that between-school differences are large and associated with factors such as school size, socioeconomic status, and quality of school buildings and instructional resources. 4.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned that it is used for analysis of student success rates.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'survey data' which typically indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned that it is used for analysis of student success rates.", + "contextual_signal": "follows 'analysis of' indicating it is used as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 104, + "text": "DLI_TBL_VERIFICATION DLI 4. 1 Number of Vulnerable Learners selected to receive a scholarship, school kits and mentoring support services ( Refugee Host Communities ) Description 2, 000 poor and vulnerable learners receiving scholarship, school kits and mentoring support services, with at least 55 % girl beneficiaries Data source / Agency MoE, through EGF Verification Entity IVA Procedure IVA will obtain a detailed report from MoE on the recruitment process of the beneficiaries and the awarded scholarships by gender and school of admission. The required selection criteria is detailed in the POM. IVA will conduct an online or phone based survey to at least 50 % of the beneficiaries to confirm award of the scholarship. US $ 2, 200 for every beneficiary selected to receive scholarship, school kits and mentoring support services annually, and attending school, up to $ 2, 2000, 000 for each School Calendar Year during Program implementation. DLI_TBL_VERIFICATION DLI 4. 2 Number of Vulnerable Learners selected to receive a scholarship, school kits and mentoring support services ( Refugees ) Description 8, 000 poor and vulnerable learners receiving scholarship, school kits and mentoring support services, with at least 55 % girl beneficiaries. Criteria for selection is detailed in the POM.", + "ner_text": [ + [ + 623, + 651, + "named" + ], + [ + 392, + 395, + "online or phone based survey <> author" + ], + [ + 603, + 606, + "online or phone based survey <> author" + ], + [ + 676, + 689, + "online or phone based survey <> reference population" + ], + [ + 749, + 760, + "online or phone based survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "The required selection criteria is detailed in the POM. IVA will conduct an online or phone based survey to at least 50 % of the beneficiaries to confirm award of the scholarship. US $ 2, 200 for every beneficiary selected to receive scholarship, school kits and mentoring support services annually, and attending school, up to $ 2, 2000, 000 for each School Calendar Year during Program implementation.", + "type": "survey", + "explanation": "This is indeed a dataset as it involves collecting structured data from participants to confirm scholarship awards.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data from beneficiaries.", + "contextual_reason_agent": "This is indeed a dataset as it involves collecting structured data from participants to confirm scholarship awards.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 47, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XXXVIII Verification Protocol Table PDO-level Indicators Result Area 1 on improved service delivery through digitalization Expanding trusted and inclusive access to people-centric digitalized services Description Individuals accessing digitalized public - and private-sector services using trusted, people-centric DPI [ Number ]. Frequency Annually. Data source Annual reports on the usage of digital services from MODEE, sectoral ministries, and the private sector. Methodology for Data Collection Indicator values will be collected from MODEE \u2019 s administrative data on the usage of transactional digital services that incorporate trusted, people-centric DPI, and cross-checked by the IVA through spot surveys. Responsibility for Data Collection MODEE. Improved access to patient-centric digital service Description Number of beneficiaries who actively use patient-centric digital services offfered through an eletronic medical record ( EMR ) platfrom. Active users refer to those with active acccounts who had logged into accounts at least twice since registration ( disaggregated for Syrian refugee users ).. Frequency Annually. Data source Electronic Health Solution ( EHS ). Methodology for Data Collection Data extracted from myHakeem to measure the number of active users. Responsibility for Data Collection MOH and EHS. Result Area 2 on enhanced government effectiveness through Digitalization Increased student trust in the fairness of the Tawjihi exam. Description Increased percentage of students expressing trust in the fairness of the Tawjihi exam.", + "ner_text": [ + [ + 644, + 663, + "named" + ], + [ + 510, + 515, + "administrative data <> publisher" + ], + [ + 634, + 639, + "administrative data <> publisher" + ], + [ + 843, + 848, + "administrative data <> publisher" + ], + [ + 1183, + 1203, + "administrative data <> reference population" + ] + ], + "validated": true, + "empirical_context": "Data source Annual reports on the usage of digital services from MODEE, sectoral ministries, and the private sector. Methodology for Data Collection Indicator values will be collected from MODEE \u2019 s administrative data on the usage of transactional digital services that incorporate trusted, people-centric DPI, and cross-checked by the IVA through spot surveys. Responsibility for Data Collection MODEE.", + "type": "administrative data", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source of data for collecting indicator values.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'administrative data' typically refers to structured data collected for administrative purposes.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source of data for collecting indicator values.", + "contextual_signal": "mentioned as a source of data for collecting indicator values", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 11, + "validated": 10, + "not_validated": 1 + } + }, + { + "filename": "027_Jordan-Emergency-Food-Security-Project", + "page": 27, + "text": "The design of the project should focus on responding to the government \u2019 s short-term priority and to the urgent needs of the beneficiaries, as a key factor in the project \u2019 s successful implementation. \u2022 Project monitoring and evaluation: Establishing an effective monitoring and evaluation ( M & E ) system in the context of a crisis response operation is a challenge, but its importance should not be underestimated. 50. The project also builds on evidence from the implementation of Takaful, as it relates to monitoring. The national welfare support program \u201c Takaful \u201d has increased the use of digitization, and refinement of targeting based on government coverage targets using national databases. Takaful has automated processes for online registration, data verification, selection of households for field verification, as well as selection, enrollment, and digital payment to beneficiaries through bank accounts and e-wallets. The Takaful database already includes administratively verified and most up-to-date data for over one million households who applied earlier through the Takaful platform for social assistance. Nevertheless, for registration of applicants, the National Aid Fund ( NAF ) reaches out to poor and vulnerable households in remote areas using mobile registration centers. The Takaful program under NAF uses a targeting formula that includes 57 socioeconomic indicators, including by sex and gender of the household head, recognizing the additional vulnerabilities females ( and particularly female-headed households ) face. The targeting formula is then applied again to the verified registration data to select the poorest 100, 000 households according to the formula score.", + "ner_text": [ + [ + 1605, + 1631, + "named" + ], + [ + 1220, + 1250, + "verified registration data <> reference population" + ], + [ + 1646, + 1673, + "verified registration data <> reference population" + ] + ], + "validated": true, + "empirical_context": "The Takaful program under NAF uses a targeting formula that includes 57 socioeconomic indicators, including by sex and gender of the household head, recognizing the additional vulnerabilities females ( and particularly female-headed households ) face. The targeting formula is then applied again to the verified registration data to select the poorest 100, 000 households according to the formula score.", + "type": "data", + "explanation": "This is indeed a dataset as it is explicitly mentioned as being used to apply the targeting formula for selecting households.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'verified registration data' suggests a structured collection of information used for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as being used to apply the targeting formula for selecting households.", + "contextual_signal": "follows 'applied again to the verified registration data'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "157_417980PAD0P1041ly10IDAR200081006011", + "page": 82, + "text": "Each participating JSCPD will be required to open separate bank accounts to receive the project \u2019 s funds and maintain separate records to account for project transactions; ( d ) each JSCPD will submit to PMT on a monthly basis, all original supporting documents o f project disbursements accompanied by a copy o f the bank statement and reconciliation statement, as well as the accounting entries recorded at the JSCPD level, for project disbursements; ( e ) based on the monthly records received from the JSCPDs, MOLG PMT will review and record on its accounting system all transactions and file original supporting documents received from the JSCPDs. Consequently, the JSCPD quarterly advance will be reduced by the monthly portion settled and approved by PMT; ( 0 no quarterly new advances will be made to the JSCPDs unless the previous quarter advance has been settled and cleared by PMT; ( g ) JSCPDs will submit to PMT a summary report on a quarterly basis illustrating the advance received during the quarter, uses of such advances, percentage o f completion of planned projects versus percentage of disbursements, updated cash flow forecast based on actual disbursements of the previous quarter and budget deviation analysis; ( h ) based on the information received from the JSCPDs, PMT will prepare quarterly IFRs setting out the sources and uses of funds by project category and component. The IFR will also include a deviation analysis at the project level between the annual approved budget and actual implementation with an updated cash flow forecast. Financial Reporting and Monitoring 26. be responsible for: PMT will have overall responsibility for the F M o f the Grant. Specifically, it will ( a ) consolidating all project financial data from all the beneficiary communities in the West Bank and Gaza; 75", + "ner_text": [ + [ + 1735, + 1757, + "named" + ] + ], + "validated": false, + "empirical_context": "be responsible for: PMT will have overall responsibility for the F M o f the Grant. Specifically, it will ( a ) consolidating all project financial data from all the beneficiary communities in the West Bank and Gaza; 75", + "type": "data", + "explanation": "'Project financial data' is not a structured collection of data but rather a general reference to financial information related to the project.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'project financial data' refers to a dataset because it includes the term 'data'.", + "contextual_reason_agent": "'Project financial data' is not a structured collection of data but rather a general reference to financial information related to the project.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "103_Kenya-Hon-Africa-PAD-04072017", + "page": 28, + "text": "The project will build on existing community \u2010 level structures, and will establish new local \u2010 level institutions as needed, including Village Level Committees ( VLC ), Ward Level Committees ( WLC ), and social audit committees ( SACs ). 56. The component will support the design of the project \u2019 s management information system ( MIS ) for monitoring inputs, outputs, and processes; the evaluation of outcome and impacts; environmental and social safeguard monitoring; and participatory M & E and internal learning. M & E activities will include regular monitoring of the progress and performance of implementation; independent process monitoring of the community \u2010 level planning and effectiveness and quality of capacity \u2010 building efforts; and undertaking annual thematic studies and outcome as well as impact assessments of the project. The project \u2019 s Results Framework will be used as a basis for reporting progress against indicators, including progress toward achieving the PDO and implementation progress. The project will consider the use of mobile technologies to increase the reach and frequency of data capturing at the local level and aggregating it in a platform that could serve as a dashboard. Component 5: Support to IGAD for Expansion of the Regional Secretariat on FDMM ( US $ 3 million equivalent ) 57.", + "ner_text": [ + [ + 300, + 329, + "named" + ] + ], + "validated": false, + "empirical_context": "56. The component will support the design of the project \u2019 s management information system ( MIS ) for monitoring inputs, outputs, and processes; the evaluation of outcome and impacts; environmental and social safeguard monitoring; and participatory M & E and internal learning. M & E activities will include regular monitoring of the progress and performance of implementation; independent process monitoring of the community \u2010 level planning and effectiveness and quality of capacity \u2010 building efforts; and undertaking annual thematic studies and outcome as well as impact assessments of the project.", + "type": "system", + "explanation": "However, it is mentioned as a system for monitoring and evaluation, not as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'management information system' suggests a structured collection of data.", + "contextual_reason_agent": "However, it is mentioned as a system for monitoring and evaluation, not as a data source itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "167_27761", + "page": 15, + "text": "The systems at M O F and Post Banks \u201d were found to be adequate to support the project. MOSA staff resources in program management will be strengthened by the addition of five experts ( a management, a health, an education and family trainer, a statistics, a finance and an information systems expert ). MOSA will employ consultants to carry out awareness campaigns and training to build capacity in MOH health centers and in project-related services provided by social workers. It will independent monitoring and evaluation consultants. 3. Monitoring and evaluation of outcomedresults The SSNRP project will monitor two types of indicators, output and outcome. The indicators will be collected internally and recorded in the MIS from the program. The output information will be used for the quarterly implementation progress reports to be submitted to the PA, the Bank, and other donors. These reports will cover financial data @ e., disbursements, payments made, payment commitments ), the number of beneficiaries by type, and percentage of condition compliance by the beneficiaries. Outcome indicators will be based on data gathered for the three kinds o f conditions: attendance levels, payments, and compliance.", + "ner_text": [ + [ + 726, + 729, + "named" + ] + ], + "validated": false, + "empirical_context": "Monitoring and evaluation of outcomedresults The SSNRP project will monitor two types of indicators, output and outcome. The indicators will be collected internally and recorded in the MIS from the program. The output information will be used for the quarterly implementation progress reports to be submitted to the PA, the Bank, and other donors.", + "type": "system", + "explanation": "'MIS' is not a dataset but a management information system that stores records and facilitates data collection.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is related to data collection and monitoring.", + "contextual_reason_agent": "'MIS' is not a dataset but a management information system that stores records and facilitates data collection.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "033_IBArchive-38b35d89-2c5c-4636-ab7e-0ba662b95a5f", + "page": 64, + "text": "The World Bank Djibouti Skills Development for Employment Project ( P175483 ) Page 60 of 68 of short cycle courses. IRI # 18: Modernizing Education and Skills Governance Sub-component 2. 1 Annually Annual Work Plan and Budgets DGETFP will gather data from AWPBs DGETFP IRI # 19 Share of TVET graduates in project supported training programs who report that they are satisfied with their acquisition of employability skills. ( disaggregated ) Indicator will be disaggregated by economic sector, gender, refugee status and disability status. The findings compiled through the beneficiary surveys will be used to plan and implement time and bound actions or action plans to address this feedback. The results of the beneficiary survey will inform the development and implementation of the CEPs. Annually Survey conducted by PMU Survey using technology DGETFP ME IO Table SPACE", + "ner_text": [ + [ + 821, + 831, + "named" + ], + [ + 4, + 14, + "PMU Survey <> publisher" + ], + [ + 15, + 23, + "PMU Survey <> data geography" + ], + [ + 574, + 593, + "PMU Survey <> data type" + ] + ], + "validated": true, + "empirical_context": "The results of the beneficiary survey will inform the development and implementation of the CEPs. Annually Survey conducted by PMU Survey using technology DGETFP ME IO Table SPACE", + "type": "survey", + "explanation": "The context confirms it is a dataset as it is used to inform the development and implementation of the CEPs.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically collects structured data.", + "contextual_reason_agent": "The context confirms it is a dataset as it is used to inform the development and implementation of the CEPs.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 14, + "text": "World Bank. 12 United Nations High Commissioner for Refugees ( UNHCR ) and Government of Uganda ( GoU ), Uganda Comprehensive Refugee Response Portal, November 2020, https: / / data2. unhcr. org / en / country / uga 13 UNHCR and GoU, Uganda - Refugee Statistics October 2020, https: / / data2. unhcr. org / en / documents / details / 82807 14 http: / / documents. worldbank. org / curated / en / 571081569598919068 / Informing-the-Refugee-Policy-Response-in-Uganda-Results-from - the-Uganda-Refugee-and-Host-Communities-2018-Household-Survey 15 http: / / documents1. worldbank. org / curated / en / 571081569598919068 / pdf / Informing-the-Refugee-Policy-Response-in-Uganda-Results - from-the-Uganda-Refugee-and-Host-Communities-2018-Household-Survey. pdf 16 http: / / documents. worldbank. org / curated / en / 571081569598919068 / Informing-the-Refugee-Policy-Response-in-Uganda-Results-from - the-Uganda-Refugee-and-Host-Communities-2018-Household-Survey 17 http: / / documents. worldbank. org / curated / en / 571081569598919068 / Informing-the-Refugee-Policy-Response-in-Uganda-Results-from - the-Uganda-Refugee-and-Host-Communities-2018-Household-Survey", + "ner_text": [ + [ + 105, + 149, + "named" + ], + [ + 0, + 10, + "Uganda Comprehensive Refugee Response Portal <> publisher" + ], + [ + 105, + 111, + "Uganda Comprehensive Refugee Response Portal <> data geography" + ], + [ + 151, + 164, + "Uganda Comprehensive Refugee Response Portal <> publication year" + ], + [ + 219, + 224, + "Uganda Comprehensive Refugee Response Portal <> publisher" + ], + [ + 234, + 240, + "Uganda Comprehensive Refugee Response Portal <> data geography" + ] + ], + "validated": true, + "empirical_context": "World Bank. 12 United Nations High Commissioner for Refugees ( UNHCR ) and Government of Uganda ( GoU ), Uganda Comprehensive Refugee Response Portal, November 2020, https: / / data2. unhcr.", + "type": "portal", + "explanation": "This is a dataset as it is referenced in the context of the UNHCR and the Government of Uganda, indicating it serves as a data source for refugee response.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Portal' in its name, suggesting a collection of data.", + "contextual_reason_agent": "This is a dataset as it is referenced in the context of the UNHCR and the Government of Uganda, indicating it serves as a data source for refugee response.", + "contextual_signal": "mentioned as a data source in collaboration with UNHCR and GoU", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 9, + "validated": 1, + "not_validated": 8 + } + }, + { + "filename": "054_Sudan-Basic-Education-Emergency-Support-Project", + "page": 33, + "text": "The World Bank Sudan Basic Education Emergency Support Project ( P172812 ) Page 29 of 40 IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Student enrollment in targeted schools The enrollment will be monitored through the annual school census Annual Annual School Census Census of schools key data collected yearly Ministry of Education and PCU Girls enrolment in targeted schools Number of girls enrolled in targeted schools Annual Annual School Census Census of school key data collected annually Ministry of Education and PCU ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Number of schools receiving grants Number of schools receiving grants Annual Administrativ e Reports Reports compiled by Localities and States PCU Share of schools with school-based management committees trained on school grant management Share of schools receiving training on school grant management.", + "ner_text": [ + [ + 376, + 396, + "named" + ], + [ + 4, + 14, + "annual school census <> publisher" + ], + [ + 15, + 20, + "annual school census <> data geography" + ], + [ + 469, + 490, + "annual school census <> author" + ] + ], + "validated": true, + "empirical_context": "The World Bank Sudan Basic Education Emergency Support Project ( P172812 ) Page 29 of 40 IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Student enrollment in targeted schools The enrollment will be monitored through the annual school census Annual Annual School Census Census of schools key data collected yearly Ministry of Education and PCU Girls enrolment in targeted schools Number of girls enrolled in targeted schools Annual Annual School Census Census of school key data collected annually Ministry of Education and PCU ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Number of schools receiving grants Number of schools receiving grants Annual Administrativ e Reports Reports compiled by Localities and States PCU Share of schools with school-based management committees trained on school grant management Share of schools receiving training on school grant management.", + "type": "census", + "explanation": "This is a dataset as it is explicitly mentioned as a source for monitoring student enrollment and is collected annually.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a systematic collection of data on school enrollment.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a source for monitoring student enrollment and is collected annually.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 14, + "text": "Syrian refugee students are reported to leave school ( 1, 600 students left due to bullying in 2016 ), or not enter at all, to preserve their safety and self \u2010 respect. 17. Teachers and school leaders are poorly trained to handle violence and disruptive behaviors. Teachers themselves are still prone to use aggressive means for managing classrooms and disciplining students. In the 2015 \u2013 2016 school year, 18 percent of children reported experiencing verbal violence in schools and 11 percent reported experiencing corporal punishment. Serious concerns also exist about the increase in student \u2010 to \u2010 student violence and disruptive behaviors ( particularly in schools with Syrian refugees ), including vandalism, harassment, bullying, and gender \u2010 based violence. The MOE has made concerted efforts, including the introduction of the school \u2010 based program Ma \u2019 an, to promote nonviolent and positive student discipline. The MOE has also initiated monthly violence surveys that act as deterrents for teachers from using violence and help to keep all actors accountable for their actions. However, further efforts are needed to support safe school environments and to understand and tackle the different challenges faced in gender \u2010 segregated schools. 18. Jordan faces an additional major challenge in relation to its student assessment system. Jordan administers several census and sample \u2010 based student assessments that appear to have weak feedback loops and therefore fail to inform the system on its performance early and effectively. There are three major national student assessments: ( a ) census \u2010 based national tests for grades 4, 8, and 10 in four core subjects ( Arabic, mathematics, science, and English ); ( b ) the sample \u2010 based National Assessment for Knowledge Economy ( NAFKE ) for grades 5, 9, and 11 ( in Arabic, mathematics, and science; and ( c ) the general Secondary Certificate Examination ( Tawjihi ), which serves the double", + "ner_text": [ + [ + 951, + 975, + "named" + ], + [ + 0, + 23, + "monthly violence surveys <> reference population" + ], + [ + 383, + 406, + "monthly violence surveys <> reference year" + ], + [ + 771, + 774, + "monthly violence surveys <> publisher" + ], + [ + 928, + 931, + "monthly violence surveys <> publisher" + ], + [ + 1259, + 1265, + "monthly violence surveys <> data geography" + ], + [ + 1348, + 1354, + "monthly violence surveys <> data geography" + ] + ], + "validated": true, + "empirical_context": "The MOE has made concerted efforts, including the introduction of the school \u2010 based program Ma \u2019 an, to promote nonviolent and positive student discipline. The MOE has also initiated monthly violence surveys that act as deterrents for teachers from using violence and help to keep all actors accountable for their actions. However, further efforts are needed to support safe school environments and to understand and tackle the different challenges faced in gender \u2010 segregated schools.", + "type": "survey", + "explanation": "This is indeed a dataset as it is described as a survey that collects data to understand and address violence in schools.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'monthly violence surveys' which implies a structured collection of data on violence.", + "contextual_reason_agent": "This is indeed a dataset as it is described as a survey that collects data to understand and address violence in schools.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 93, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 82 77. An integrated M & E system will be developed and implemented as part of the Project to support implementation and reporting. The MWE, NWSC, and other agencies are currently collecting sector performance data in different databases and monitoring platforms. The MWE utilizes the Water Supply Database, Water Supply Atlas, and the UPMIS. The MWE utilizes operations and financial tracking systems at its headquarters and district levels. The OPM utilized the Refugees Response Portal. The OPM is also currently developing a new refugee database and service provider performance monitoring and tracking system. The new integrated M & E system will interface with various systems to consolidate data storage and facilitate information management. Specific Project information related to procurement, disbursements, and environmental and safeguards implementation will also be integrated in the new M & E system. The development and implementation of this M & E system is expected to be centralized at the MWE. Climate Mitigation and Adaptation Co-benefits 78. An assessment of climate co-benefits for the Project was conducted for Components 1 to 3. A quantitative analysis was conducted to estimate the Project ' s impact on GHG emissions and to value the externality using the shadow price of carbon ( Annex 4 ).", + "ner_text": [ + [ + 543, + 567, + "named" + ] + ], + "validated": false, + "empirical_context": "The MWE utilizes operations and financial tracking systems at its headquarters and district levels. The OPM utilized the Refugees Response Portal. The OPM is also currently developing a new refugee database and service provider performance monitoring and tracking system.", + "type": "system", + "explanation": "However, the context indicates that it is a system used by the OPM, not explicitly a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Portal', which can imply a data source.", + "contextual_reason_agent": "However, the context indicates that it is a system used by the OPM, not explicitly a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "203_multi-page", + "page": 32, + "text": "The volume of work may require HUDC to hire consultants to undertake some tasks, but in-house managerial capacity and experience exist to carry out HUDC ' s role in the program. 20. Part B: CVDB ' s role in the program is first as a fund management institution, which is their normal scope of work. They will also be charged with promoting, appraising and overseeing implementation, for which they will require considerable strengthening. CVDB will establish a program management unit ( CVDB / PMU ), and additional skills will be acquired through the use of consultants. In addition, Part B will make use of regional offices of the Ministry of Municipalities and Rural Affairs and environiment ( MMRAE ), where local engineering services exist. All institutions will require new capacity to analyze projects and training in beneficiary participation techniques. 21. National-level capacity to identify the poor and their needs is an important operational input. The technical assistance component of the program will address capacity strengthening in this area. Addressing Poverty 22. Both Part A and Part B target benefits to the poor. In the case of Part A, the refugee camps and squatter areas selected are well known and studied by officials. According to studies the typical income in most families in Part A target areas are below the poverty line. Part B will use data from the NAF and unemployment statistics ( available unemployment data is unreliable but surveys may provide information to fine-tune the initial targeting mechanisms based on NAF ' s data ) to prioritize communities for assistance. 23. The definition and selection of eligible projects is also part of the targeting mechanism. In the case of Part A, the standards were kept low to maximize coverage of the areas of concentrated poverty. The investment programs for each location has been discussed with camp improvement committees. For Part B, the list of eligible projects favors smaller investments in the areas with poorest service and demands a full consultative process with residents in order to receive funding priority.", + "ner_text": [ + [ + 1394, + 1417, + "named" + ], + [ + 1430, + 1447, + "unemployment statistics <> data type" + ] + ], + "validated": true, + "empirical_context": "According to studies the typical income in most families in Part A target areas are below the poverty line. Part B will use data from the NAF and unemployment statistics ( available unemployment data is unreliable but surveys may provide information to fine-tune the initial targeting mechanisms based on NAF ' s data ) to prioritize communities for assistance. 23.", + "type": "statistics", + "explanation": "In this context, it is used as a source of information to prioritize communities for assistance, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'unemployment statistics' suggests a collection of data related to unemployment figures.", + "contextual_reason_agent": "In this context, it is used as a source of information to prioritize communities for assistance, indicating it functions as a dataset.", + "contextual_signal": "follows 'uses data from'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 3, + "validated": 2, + "not_validated": 1 + } + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 92, + "text": "The World Bank Niger Accelerating Electricity Access Project ( Hask\u00e9 ) ( P174034 ) Page 85 of 174 ANPER and ANERSOL project databases. acquire an off-grid system. This is obtained from NIGELEC ' s customers database, ANPER and ANERSOL ( for mini-grid and standalone systems ). The number of people would be estimated by multiplying the number of households by the average household size at the national or regional level. People provided with new or improved electricity service - Female Semi - annually. National statistics / Proj ect data. Estimated based on the share of female in the national population as published by the national statistics institute or other credible sources. NIGELEC / ANPER / ANERS OL. Refugee and host beneficiaries provided with electricity services through grid, mini grids and electrified public institutions This relates to the number of refugees and host populations living in localities and settings where the project provides electricity through grid, mini grids and off-grid electrification ( under components 1, 2 and 3 ). Semi - annually. National statistics / Proj ect data / UNHCR data / World Bank data. Estimated based on collected data about refugees and host communities zones and project-level data. NIGELEC / ANPER / ANERS OL.", + "ner_text": [ + [ + 217, + 222, + "named" + ] + ], + "validated": false, + "empirical_context": "acquire an off-grid system. This is obtained from NIGELEC ' s customers database, ANPER and ANERSOL ( for mini-grid and standalone systems ). The number of people would be estimated by multiplying the number of households by the average household size at the national or regional level.", + "type": "organization", + "explanation": "ANPER is mentioned as an entity but not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed ANPER is a dataset because it is mentioned in the context of acquiring data for off-grid systems.", + "contextual_reason_agent": "ANPER is mentioned as an entity but not as a data source or structured collection of data.", + "contextual_signal": "mentioned only as an organization, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 4, + "validated": 1, + "not_validated": 3 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 44, + "text": "The World Bank Mauritania Social Safety Net System Project II ( P171125 ) Page 39 of 61 Households with complete information in the Social Registry - Refugee Number of refugee households registred within the Social Registry database and for which the socio - economic data has been collected and recorded. Semester Social Registry MIS Specific request on the Social Registry database Social Registry Directorate Households with updated information in the Social registry ( less than 3 years ) Percentage of households included in the Social Register database for which socio-economic data was collected within the previous three years. Semester Social Registry MIS Request on the Social Registry database Social Registry Directorate Cases from the Social Registry grievance redress mechanism resolved in a timely manner Percentage of complaints or requests for information received by the Social Registry that have been resolved within 60 days. Semester Social Registry MIS MIS extraction Social Registry Directorate Spot-check surveys carried-out and disclosed by the Social Registry Number of spot checks on the Social Registry carried out by an external firm and / or the Social Registry during the duration of the project and which have been the subject of a report and feedback to the teams.", + "ner_text": [ + [ + 1069, + 1084, + "named" + ] + ], + "validated": false, + "empirical_context": "Semester Social Registry MIS Request on the Social Registry database Social Registry Directorate Cases from the Social Registry grievance redress mechanism resolved in a timely manner Percentage of complaints or requests for information received by the Social Registry that have been resolved within 60 days. Semester Social Registry MIS MIS extraction Social Registry Directorate Spot-check surveys carried-out and disclosed by the Social Registry Number of spot checks on the Social Registry carried out by an external firm and / or the Social Registry during the duration of the project and which have been the subject of a report and feedback to the teams.", + "type": "system", + "explanation": "However, the context indicates that 'Social Registry' is mentioned as a management information system and not explicitly as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a 'registry' which often implies a structured collection of data.", + "contextual_reason_agent": "However, the context indicates that 'Social Registry' is mentioned as a management information system and not explicitly as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "157_417980PAD0P1041ly10IDAR200081006011", + "page": 19, + "text": "C. Monitoring and evaluation of outcomes / results 29. The monitoring and evaluation ( M & E ) system will include: ( a ) regular monitoring and reporting by PMT and MOLG District Office ( b ) participatory M & E through evaluations by target communities and; ( c ) external evaluations and audits as described in Annex 3. Coordinators; M & E at the national and regional levels 30. The PMT will be responsible for aggregating and synthesizing data on the project \u2019 s outcome indicators, using a results-based M & E framework. A Management Information System ( MIS ) i s currently being designed at the MOLG Projects Department with support from UNDP and the Government of Japan. It was agreed that the VNDP M & E system will build on this MIS to become the central storehouse that links data coming from MOLG District Offices, the JSCPDs, and the communities. The MIS will track information on indicators for achieving the PDO using simple forms such as the CDD subproject proposal forms, final evaluations of implemented CDD subprojects submitted by the Joint Project Support Group ( JPSGs ) and include characteristics of main beneficiaries ( gender, youth, economically marginalized persons, etc. ), the amount of community contributions and quarterly progress reporting by the JSCPDs3. 31.", + "ner_text": [ + [ + 529, + 558, + "named" + ] + ], + "validated": false, + "empirical_context": "The PMT will be responsible for aggregating and synthesizing data on the project \u2019 s outcome indicators, using a results-based M & E framework. A Management Information System ( MIS ) i s currently being designed at the MOLG Projects Department with support from UNDP and the Government of Japan. It was agreed that the VNDP M & E system will build on this MIS to become the central storehouse that links data coming from MOLG District Offices, the JSCPDs, and the communities.", + "type": "system", + "explanation": "However, it is described as a system designed to manage information rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves data aggregation and synthesis.", + "contextual_reason_agent": "However, it is described as a system designed to manage information rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "005_BOSIB-8191b179-7209-4faa-b5e0-11783bcd492d", + "page": 36, + "text": "Frequency Annual Data source MEP Curriculum Department Methodology for Data Collection PIU memo Responsibility for Data Collection PIU IRI 2. 1. 3. Percentage of preschool, primary and secondary schools receiving educational resources for foundational learning Description This indicator measures the share of preschool, primary and secondary schools out of the total number of schools in the system receiving education resources developed or validated as part of the Project for foundational learning. Frequency Annual Data source MEP Department of Statistics Methodology for Data Collection Annual end-of-year school survey with module on foundational learning Responsibility for Data Collection MEP Department of Statistics IRI 2. 1. 4 My Foundational Learning Professional Development Plan platform for teachers, school administrators and pedagogical advisors Description The indicator measures the use of an application developed under the Project for teachers, schools administrators and pedagogical advisors at the Preschool, Primary, and Secondary levels Frequency Annual Data source MEP Curriculum Department Methodology for Data Collection The App will generate realtime statistics from which data will be downloaded by the PIU for this indicator. Responsibility for Data Collection PIU IRI 2. 1. 5 National foundational learning campaign", + "ner_text": [ + [ + 742, + 793, + "named" + ] + ], + "validated": false, + "empirical_context": "1. 4 My Foundational Learning Professional Development Plan platform for teachers, school administrators and pedagogical advisors Description The indicator measures the use of an application developed under the Project for teachers, schools administrators and pedagogical advisors at the Preschool, Primary, and Secondary levels Frequency Annual Data source MEP Curriculum Department Methodology for Data Collection The App will generate realtime statistics from which data will be downloaded by the PIU for this indicator. Responsibility for Data Collection PIU IRI 2.", + "type": "program", + "explanation": "However, it is described as a program and not as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'data' in the context of a professional development plan.", + "contextual_reason_agent": "However, it is described as a program and not as a structured collection of data or a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 17, + "text": "The proposed project will allow for the inclusion of poor and vulnerable households in the refugee and host communities of the Hodh Chargui region within the SSN system, including the Social Registry, Tekavoul, and Elmaouna programs. Component 1: Updating and enhancing the Social Registry ( US $ 7. 3 million with US $ 4. 0 million from national IDA18, US $ 0. 9 million from IDA18 RSW, US $ 0. 9 million from ASP MDTF and US $ 1. 5 million from government ) 22. This component will be implemented by the Taazour General Delegation. It supports a full update of the Social Registry and inclusion of eligible refugee households ( Sub-component 1. 1 ) and its usage for operational and strategic purposes ( Sub-component 1. 2 ). Payment Platform POOR HOUSEHOLDS Social Transfer Program \u2022 Extreme Poor households \u2022 5-year cycle \u2022 CCT \u2022 Social promotion Social Registry \u2022 Identification \u2022 Data collection \u2022 Support to targeted social programs Shock - responsive Safety Net Program \u2022 Vulnerablehouseholds \u2022 4 months \u2022 Unconditional CT \u2022 Using SP tools Early Warning Systems DATA DATA Beneficiary list Beneficiary List", + "ner_text": [ + [ + 851, + 866, + "named" + ], + [ + 127, + 146, + "Social Registry <> data geography" + ], + [ + 506, + 532, + "Social Registry <> author" + ], + [ + 600, + 627, + "Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "2 ). Payment Platform POOR HOUSEHOLDS Social Transfer Program \u2022 Extreme Poor households \u2022 5-year cycle \u2022 CCT \u2022 Social promotion Social Registry \u2022 Identification \u2022 Data collection \u2022 Support to targeted social programs Shock - responsive Safety Net Program \u2022 Vulnerablehouseholds \u2022 4 months \u2022 Unconditional CT \u2022 Using SP tools Early Warning Systems DATA DATA Beneficiary list Beneficiary List", + "type": "registry", + "explanation": "In the context, it is explicitly mentioned as part of data collection and support to targeted social programs, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Social Registry' suggests a structured collection of data related to households.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as part of data collection and support to targeted social programs, indicating it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "113_Project-Appraisal-Document-PAD-disclosable-version-P156479-08122016", + "page": 65, + "text": "These are key variables in computing estimated potential impacts on poverty and consumption. Therefore, if updated, nationally representative consumption data becomes available, it should be considered. 4. The benchmark benefit scenarios considered in this analysis are the following: ( a ) CfW pilot. XAF 1, 200 per day wage for 80 days of work implemented in N ' Djamena ( b ) CT pilot. XAF 15, 000 per month per household for a period of 24 months ( paid every two months ) implemented in one Sahel region and one Sudanian region 5. Based on the ECOSIT data, these benefit packages will cover at least 40 percent of the food poverty gap in the respective regions in which the pilots will be implemented. 6. Table 5. 1 summarizes the estimated impact on poverty and consumption of the benefits provided through the CfW pilot. This shows that the benchmark scenario of 80 days at XAF 1, 200 per day could cover 40 percent of the food poverty gap. Given an expected budget of US $ 2. 0 million for the implementation of this subcomponent, it is estimated that up to 9, 000 individuals could participate in the pilot.", + "ner_text": [ + [ + 116, + 158, + "named" + ], + [ + 1132, + 1150, + "nationally representative consumption data <> usage context" + ] + ], + "validated": true, + "empirical_context": "These are key variables in computing estimated potential impacts on poverty and consumption. Therefore, if updated, nationally representative consumption data becomes available, it should be considered. 4.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to a specific type of data that is used for empirical analysis regarding poverty and consumption.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'nationally representative consumption data', which implies a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a specific type of data that is used for empirical analysis regarding poverty and consumption.", + "contextual_signal": "mentioned as a data source for estimating impacts", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "101_Project-Appraisal-Document-PAD-P163830-2017-05-22-CLEAN-05232017", + "page": 18, + "text": "With over 1. 1 million people ( 10 percent of the population ) already internally displaced, new displacement, particularly to urban centres, is compounding existing development deficits and strain the capacity of most communities. 13. Female-headed households and displaced women and girls are among the populations most vulnerable to the adverse impacts of the current drought. In addition, the limited security in IDP settlements, poor living conditions and lack of protection pose added risks of gender-based violence ( GBV ) and sexual exploitation to female IDPs. The separation of many women and children from community and familial support structures, as well as from traditional livelihoods activities, contributes to an increased reliance on marginal, inconsistent and hazardous livelihood strategies, which heighten poverty, food 7 OCHA: Humanitarian Bulletin Somalia, May 4 2017 8 The RDNA relies on the following sources of information: Drought Severity data was derived from ENDVI 10-year historical anomaly data based on LANDSAT 8 imagery. Displacement figures were derived from compiled IDP data from IOM, UN Habitat, UNHCR and local Ministries of Interior. Agriculture and livestock impact data heavily relied on expert judgment and data from FAO FSNAU database ( 2011-2017 ), UNFPA 2014 population data and LANDSCAN 2015 population density. Cholera / AWD forecasts and data was derived from World Health Organization ( WHO ) data, reporting and forecasting analysis. Water resources data was derived from UN SWALIM and ICRC databases 9 When these assets are priced at the prevailing depressed prices in the markets. The scenarios assume drought effect on grazing areas, disparities in household resilience to the effects of drought based on livelihood zone, wealth and coping mechanisms, as well as differences in adaptation of livestock to drought 10 http: / / reliefweb. int / report / somalia / unhcr-somalia-drought-displacement-period-1-nov-2016-31-march-2017", + "ner_text": [ + [ + 989, + 994, + "named" + ] + ], + "validated": false, + "empirical_context": "In addition, the limited security in IDP settlements, poor living conditions and lack of protection pose added risks of gender-based violence ( GBV ) and sexual exploitation to female IDPs. The separation of many women and children from community and familial support structures, as well as from traditional livelihoods activities, contributes to an increased reliance on marginal, inconsistent and hazardous livelihood strategies, which heighten poverty, food 7 OCHA: Humanitarian Bulletin Somalia, May 4 2017 8 The RDNA relies on the following sources of information: Drought Severity data was derived from ENDVI 10-year historical anomaly data based on LANDSAT 8 imagery. Displacement figures were derived from compiled IDP data from IOM, UN Habitat, UNHCR and local Ministries of Interior.", + "type": "data", + "explanation": "ENDVI is not a dataset itself but rather a method or index used to derive data from LANDSAT imagery.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed ENDVI is a dataset because it is mentioned in relation to historical anomaly data.", + "contextual_reason_agent": "ENDVI is not a dataset itself but rather a method or index used to derive data from LANDSAT imagery.", + "contextual_signal": "mentioned as a method for deriving data, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "170_multi0page", + "page": 11, + "text": "The biggest risk that Sierra Leone ' s poor face is a return to civil conflict, political instability and chaos in public administration that would prevent the government from responding to the population ' s needs for food, shelter and economically productive activity. The project is expected to respond to this risk through investments in rehabilitation, employment, and the reinforcement of basic services. As conditions improve, endogenous resistance to a resurgence of conflict is expected to increase. However there is still a need to understand the profile of risks, identify high risk groups, define the interface between vulnerability mapping and poverty mapping, coordinate public programs to reduce nsks and reinforce the coping capacity of the poor. Initially, a participatory assessment of risks and vulnerability will be commissioned using available and forthcoming data from the living standards measurement survey ( LSMS ) of 2003. Risk and vulnerability concepts have already been introduced into the PRSP preparation process by including appropriate questions in the 2003 LSMS. This should enhance the poverty diagnostic dimensions of the PRSP, and inform the development of strategies to ensure that poverty levels do not increase. Risk and vulnerability concepts would be introduced in the design of individual sub-projects selected by communities. Sub-projects would address the most common risks faced by communities, such as inadequate infrastructure, poor health, low school enrollment, and the resumption of conflict. A - 6 -", + "ner_text": [ + [ + 1091, + 1095, + "named" + ], + [ + 22, + 34, + "LSMS <> data geography" + ], + [ + 895, + 930, + "LSMS <> data type" + ], + [ + 943, + 947, + "LSMS <> publication year" + ], + [ + 1086, + 1090, + "LSMS <> publication year" + ], + [ + 1121, + 1150, + "LSMS <> data description" + ] + ], + "validated": true, + "empirical_context": "Initially, a participatory assessment of risks and vulnerability will be commissioned using available and forthcoming data from the living standards measurement survey ( LSMS ) of 2003. Risk and vulnerability concepts have already been introduced into the PRSP preparation process by including appropriate questions in the 2003 LSMS. This should enhance the poverty diagnostic dimensions of the PRSP, and inform the development of strategies to ensure that poverty levels do not increase.", + "type": "survey", + "explanation": "LSMS is indeed a dataset as it is explicitly mentioned as a source of data used for the participatory assessment.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed LSMS is a dataset because it is referenced as a source of data for the assessment.", + "contextual_reason_agent": "LSMS is indeed a dataset as it is explicitly mentioned as a source of data used for the participatory assessment.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "010_BOSIB1554c314c0a2187c019d7e85bc2a91", + "page": 38, + "text": "Frequency Every six months Data source Project report Methodology for Data Collection Project monitoring Responsibility for Data Collection KEMSA, MoH People who have received essential health, nutrition, and population ( HNP ) services ( Number ) CRI Description Total number of deliveries attended by skilled health personnel and total number of children immunized. Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Number of children immunized ( Number ) CRI Description Total number of children immunized. Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Number of deliveries attended by skilled health personnel ( Number ) CRI", + "ner_text": [ + [ + 688, + 692, + "named" + ] + ], + "validated": false, + "empirical_context": "Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Number of children immunized ( Number ) CRI Description Total number of children immunized. Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Number of deliveries attended by skilled health personnel ( Number ) CRI", + "type": "system", + "explanation": "HMIS is mentioned as a methodology for data collection, not as a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HMIS is a dataset because it is associated with data collection and health metrics.", + "contextual_reason_agent": "HMIS is mentioned as a methodology for data collection, not as a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 21, + "validated": 4, + "not_validated": 17 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 14, + "text": "Note: Disaggregation by preschool highlights differences in PISA for students who attended two years or more of preschool and childcare versus those attending one year or less. 12. School consolidation and the quality of the learning environment are inextricably linked, affecting more students from disadvantaged areas. Education in Moldova is largely publicly financed, with falling or stagnant student numbers in all subsectors except for preschool. In recent years, the general education system has been optimized in response to the declining demographic trends ( largely through reduction of the number of classes and teachers - table 1 ). 16. As a result of the school network consolidation, students are transported to receiving schools when the institution in their locality is closed or downsized. However, these receiving schools offer learning environments that are generally outdated and lack quality educational inputs. While the financial savings from the school consolidation create a more efficient education system, more can be done to foster higher-quality education, particularly for affected students from these disadvantaged areas. True efficiency in the sector can only be realized when fiscal savings are complemented with investments in quality enhancing inputs for the most vulnerable ( qualified teachers, modern facilities, appropriate information technology, and laboratory equipment ). 12 PISA 2018 data.", + "ner_text": [ + [ + 1418, + 1422, + "named" + ], + [ + 6, + 33, + "PISA <> data description" + ], + [ + 334, + 341, + "PISA <> data geography" + ], + [ + 1423, + 1427, + "PISA <> publication year" + ], + [ + 1449, + 1467, + "PISA <> usage context" + ] + ], + "validated": true, + "empirical_context": "True efficiency in the sector can only be realized when fiscal savings are complemented with investments in quality enhancing inputs for the most vulnerable ( qualified teachers, modern facilities, appropriate information technology, and laboratory equipment ). 12 PISA 2018 data.", + "type": "dataset", + "explanation": "In this context, 'PISA' is indeed a dataset as it is explicitly mentioned with '2018 data', indicating its use for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'PISA' is a dataset because it is referenced alongside '2018 data', suggesting it contains empirical information.", + "contextual_reason_agent": "In this context, 'PISA' is indeed a dataset as it is explicitly mentioned with '2018 data', indicating its use for analysis.", + "contextual_signal": "follows 'uses data from'", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "086_NEW-ETHIOPIA-PAD-06072018", + "page": 110, + "text": "102 EIC and MoLSA follow the Government reporting system. Both submit monthly financial statements to MoFEC in soft and hard copies within the stipulated dead line ( within 15 days after the month end ). Both entities closed the EFY 2009 accounts and submitted to MoFEC and OFAG. 16. ARRA submits quarterly IPFMR to UNHCR. The IPFMR contains both financial and physical activity report. ARRA is required to submit the quarterly report within 15 days after the end of the quarter. The quarterly report is being submitted within the deadline. For instance, the 1st quarter report for 2017 was submitted on April 10 ( 5 days before the dead line and the 2nd quarter report on July 15, 2017 ( on the deadline ). Annual financial statement is also prepared and is submitted to UNHCR and external auditors. IPDC produces consolidated financial statements of the entity for both internal and external use. The internal reports are intended for management and Board while the external reports are mainly for the tax authorities. The reports to management and Board are produced on quarterly basis and include: budget vs. expenditure reports, profit and loss statement, balance sheet, ratio analysis and narration to explain performance and budget variances mainly on revenue. The reports are usually submitted to the management and Board within ten days of quarter closing.", + "ner_text": [ + [ + 327, + 332, + "named" + ] + ], + "validated": false, + "empirical_context": "ARRA submits quarterly IPFMR to UNHCR. The IPFMR contains both financial and physical activity report. ARRA is required to submit the quarterly report within 15 days after the end of the quarter.", + "type": "report", + "explanation": "However, IPFMR is described as a report, not a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed IPFMR is a dataset because it contains reports on financial and physical activities.", + "contextual_reason_agent": "However, IPFMR is described as a report, not a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a report, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 20, + "validated": 4, + "not_validated": 16 + } + }, + { + "filename": "138_781290PAD0JO0R0t0Box377365B00OUO090", + "page": 11, + "text": "According to UNHCR data, 78 percent of the Syrian refugees are vulnerable, requiring additional assistance. This includes women ( 49 percent ), children under the age of 12 ( 40 percent ), and elderly ( 2. 1 percent ). In addition, 23 percent of Syrian refugees have chronic diseases or serious medical conditions that require medical follow up. Comparative morbidity data show a different disease profile with increased levels of morbidity for Syrians refugees than Jordanians which may affect the disease burden in the future. According to Jordan \u2019 s national cancer statistics, Syrian refugees presenting with cancer at health facilities rose from 134 in 2011 to 169 in the first quarter of 2013, representing a 14 percent increase in Jordan \u2019 s total cancer disease burden. Similarly, morbidity data from the MOH show a rise in selected communicable diseases. For example, TB case notification increased from 5 / 100, 000 in 2009 among Jordanians to 13 / 100, 000 among Syrian refugees in 2013. While no measles cases have been reported in Jordan since 2009, MOH data show that 18 Jordanians and 23 Syrians have been diagnosed with the disease in 2013. Polio, which had been eliminated since 1999 in Jordan, was also detected in two cases in 2013. Demand for services by refugees at MOH facilities has increased significantly.", + "ner_text": [ + [ + 789, + 803, + "named" + ], + [ + 43, + 58, + "morbidity data <> reference population" + ], + [ + 542, + 548, + "morbidity data <> data geography" + ], + [ + 694, + 698, + "morbidity data <> publication year" + ], + [ + 813, + 816, + "morbidity data <> publisher" + ], + [ + 877, + 897, + "morbidity data <> data description" + ], + [ + 929, + 933, + "morbidity data <> reference year" + ], + [ + 993, + 997, + "morbidity data <> publication year" + ], + [ + 1044, + 1050, + "morbidity data <> data geography" + ], + [ + 1063, + 1066, + "morbidity data <> publisher" + ], + [ + 1151, + 1155, + "morbidity data <> publication year" + ], + [ + 1204, + 1210, + "morbidity data <> data geography" + ] + ], + "validated": true, + "empirical_context": "According to Jordan \u2019 s national cancer statistics, Syrian refugees presenting with cancer at health facilities rose from 134 in 2011 to 169 in the first quarter of 2013, representing a 14 percent increase in Jordan \u2019 s total cancer disease burden. Similarly, morbidity data from the MOH show a rise in selected communicable diseases. For example, TB case notification increased from 5 / 100, 000 in 2009 among Jordanians to 13 / 100, 000 among Syrian refugees in 2013.", + "type": "data", + "explanation": "In this context, 'morbidity data' is indeed used as a source of information regarding health statistics, confirming it as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'morbidity data' refers to a dataset because it is used to present statistical information about health conditions.", + "contextual_reason_agent": "In this context, 'morbidity data' is indeed used as a source of information regarding health statistics, confirming it as a dataset.", + "contextual_signal": "follows 'uses data from'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 98, + "text": "Financial prices were transformed to economic prices to eliminate market distortions caused by taxes and subsidies among other factors. A value added tax of 18 percent was used to transform financial prices to economic prices. 4. Although the Project will support the implementation of several sanitation works, this evaluation was conducted including only benefits from water interventions and applying them to the total investment cost ( including sanitation ). 5. Financial benefits were estimated as the increase in revenues and gains from efficiency improvements. The economic and financial analysis of the Project was closely coordinated with the financial analysis at the utility level to determine the impact of the Project on the utility. 6. The evaluation was conducted using 2017 prices and assuming a 20-year lifetime, a 6 percent discount rate, and an exchange rate of UGX 3, 596 to US $ 1. Current Situation of Water Service 7. According to the 2015 National Service Delivery Survey ( NSDS ), about 25 percent of the population in the dry season and 13 percent of the population in the wet season relies on unprotected / unsafe water sources. Less than 20 percent obtains drinking water from piped systems or public taps. About 50 percent relies on boreholes, protected springs, and gravity flow schemes. Household members also have to travel a considerable distance to access safe water sources.", + "ner_text": [ + [ + 964, + 996, + "named" + ], + [ + 786, + 790, + "National Service Delivery Survey <> publication year" + ], + [ + 959, + 963, + "National Service Delivery Survey <> publication year" + ], + [ + 999, + 1003, + "National Service Delivery Survey <> acronym" + ] + ], + "validated": true, + "empirical_context": "Current Situation of Water Service 7. According to the 2015 National Service Delivery Survey ( NSDS ), about 25 percent of the population in the dry season and 13 percent of the population in the wet season relies on unprotected / unsafe water sources. Less than 20 percent obtains drinking water from piped systems or public taps.", + "type": "survey", + "explanation": "This is indeed a dataset as it is used to present empirical data regarding water source reliance in the population.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides statistical information about water service delivery.", + "contextual_reason_agent": "This is indeed a dataset as it is used to present empirical data regarding water source reliance in the population.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "035_Senegal-Casamance-Economic-Development-Project", + "page": 13, + "text": "Casamance remains deeply affected by FCV. For example, the number of conflict events per capita is highest Casamance ( 28 percent of all incidents have occurred in Casamance, while only 12. 5 percent of the population lives there ). The most common forms of conflict events in Casamance are protests and battles. These events are also more violent and deadly in Casamance than in the rest of the country. Seventy percent of conflict events involved violence against civilians and 78 percent of all battles have taken place in Casamance. All explosions / remotely detonated violence that occurred in Senegal took place in Casamance. Eighty-three percent of all deaths resulting from conflict in the last decade took place in Casamance. 12 7 Clark, 154. 8 Ibid. 9 United Nations High Commissioner for Refugees ( UNHCR ). 2021. URL: https: / / www. unhcr. org / climate-change-and-disasters. html. 10 \u201c Senegal \u2019 s Casamance MDFC rebels declare a ceasefire, \u201d BBC, April 30, 2014, https: / / www. bbc. com / news / world-africa-27221999. 11 \u201c Senegal pledges US $ 500 mln railway to southern Casamance region, \u201d Reuters, February 20, 2015, https: / / af. reuters. com / article / topNews / idAFKBN0LO0D020150220 12 ACLED ( Armed Conflict Location & Event Data ) conflict event mapping dashboard: https: / / acleddata. com / dashboard / # / dashboard. ( 2019 ).", + "ner_text": [ + [ + 1212, + 1217, + "named" + ], + [ + 0, + 9, + "ACLED <> data geography" + ], + [ + 164, + 173, + "ACLED <> data geography" + ], + [ + 599, + 606, + "ACLED <> data geography" + ], + [ + 900, + 907, + "ACLED <> data geography" + ], + [ + 912, + 921, + "ACLED <> data geography" + ], + [ + 1040, + 1047, + "ACLED <> data geography" + ], + [ + 1089, + 1098, + "ACLED <> data geography" + ], + [ + 1259, + 1291, + "ACLED <> data type" + ], + [ + 1350, + 1354, + "ACLED <> publication year" + ] + ], + "validated": true, + "empirical_context": "reuters. com / article / topNews / idAFKBN0LO0D020150220 12 ACLED ( Armed Conflict Location & Event Data ) conflict event mapping dashboard: https: / / acleddata. com / dashboard / # / dashboard.", + "type": "dataset", + "explanation": "ACLED is indeed a dataset as it is explicitly linked to a dashboard that provides structured data on conflict events.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed ACLED is a dataset because it is associated with a conflict event mapping dashboard that suggests data collection.", + "contextual_reason_agent": "ACLED is indeed a dataset as it is explicitly linked to a dashboard that provides structured data on conflict events.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "041_Burkina-Faso-Support-to-Land-and-Mining-Management-Strengthening-Project", + "page": 36, + "text": "The World Bank Support to Land and Mining Management Strengthening Project ( P169267 ) Page 30 of 95 vegetal covers contributing to climate mitigation. Investments in geospatial infrastructures and the development of integrated decision support systems will enable the collection, management and dissemination of land, mining and other common territorial datasets. Access to this geospatial information will be key to enhance climate resilient planning and decision making, and will enable Government entities, NGOs and academia to effectively monitor climate change and strengthen early warning systems. In the case of disaster displacement, the creation of a digital cadaster linked to a land registry that includes information on property values will contribute to expedite recovery by enabling the Government to reconstitute parcel boundaries, verify associated rights, and assess the damages incurred. It may also be the foundation for efficient insurance products, provided they account for local specificities of tenure management. 44 Evidence in rural Burkina Faso also indicates that increased tenure security provides an incentive for landholders and occupants to plant trees and invest in higher-quality infrastructure and climate-smart agriculture techniques, 45 which would contribute to increase carbon sequestration and improve climate change resilience. The project will also support mapping on the prospectivity of Burkina Faso for minerals in demand for the green economy including rare earths, nickel, and lithium. D. Results Chain 63. Project Results Chain / Theory of Change.", + "ner_text": [ + [ + 217, + 252, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Support to Land and Mining Management Strengthening Project ( P169267 ) Page 30 of 95 vegetal covers contributing to climate mitigation. Investments in geospatial infrastructures and the development of integrated decision support systems will enable the collection, management and dissemination of land, mining and other common territorial datasets. Access to this geospatial information will be key to enhance climate resilient planning and decision making, and will enable Government entities, NGOs and academia to effectively monitor climate change and strengthen early warning systems.", + "type": "system", + "explanation": "However, it is not a dataset as it refers to a system designed for decision support rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'systems' which can imply data handling.", + "contextual_reason_agent": "However, it is not a dataset as it refers to a system designed for decision support rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 6, + "text": "Email Kampeta Sayinzoga CEO + 250788312112 k. sayinzoga @ brd. rw Innocent Gatete Head, Strategic Project Implementation Unit + 250788388168 i. gatete @ brd. rw Innocent Mugabe Head, Single Project Implementation Unit + 250788221479 info @ minema. gov. rw Philippe Habinshuti Permanent Secretary + 250788221479 info @ minema. gov. rw Vincent Munyeshyaka CEO + 250 788 193 200 info @ bdf. rw Imena MUNYAMPENDA Director General + 250788519022 imena. munyampenda @ rtda. gov. r w @ # & OPS ~ Doctype ~ OPS ^ dynamics @ padfinancingsummary # doctemplate PROJECT FINANCING DATA ( US $, Millions ) Maximizing Finance for Development Is this an MFD-Enabling Project ( MFD-EP )? Yes Is this project Private Capital Enabling ( PCE )? Yes", + "ner_text": [ + [ + 550, + 572, + "named" + ], + [ + 691, + 715, + "PROJECT FINANCING DATA <> data description" + ] + ], + "validated": true, + "empirical_context": "gov. r w @ # & OPS ~ Doctype ~ OPS ^ dynamics @ padfinancingsummary # doctemplate PROJECT FINANCING DATA ( US $, Millions ) Maximizing Finance for Development Is this an MFD-Enabling Project ( MFD-EP )? Yes Is this project Private Capital Enabling ( PCE )?", + "type": "data", + "explanation": "This is indeed a dataset as it refers to a structured collection of financial data related to project financing.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'DATA' which often indicates a collection of information.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a structured collection of financial data related to project financing.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "023_Ethiopia-Second-Phase-Development-Response-to-Displacement-Impacts-Project-in-the-Horn-of-Africa-Project", + "page": 43, + "text": "The World Bank Development Response to Displacement Impacts Project in the Horn of Africa Phase II ( P178047 ) Page 38 of 80 114. Progress monitoring. The main tool for monitoring progress will be recording information / data from various levels ( kebele, woreda, Region and Federal ) relating to implementation progress through the existing robust project M & E system. The M & E manual will be updated with new reporting templates. The PCU will submit quarterly reports and an Annual Report to the Steering Committee and the World Bank. 115. Remote monitoring approaches. COVID-19 and / or insecurity are likely to continue to limit the scope of field monitoring. To address this gap, the project will utilize remote monitoring approaches, including a scale-up of the application of GEMS. IBM will also be introduced for Phase II to gather feedback through phone-based surveys of a large sample of beneficiaries on implementation bottlenecks to allow for real-time course correction. 116. Community participatory monitoring. Participatory monitoring at the kebele level will include community-friendly tools to enhance the transparency of community level institutions. The main tools to be used are display boards, benefit tracking matrices, social audit committees and monthly community meetings. 117. Evaluation of outcomes and impacts. Progress towards achievement of PDO will be evaluated through a baseline study and mid-term and end-of-project reviews.", + "ner_text": [ + [ + 859, + 878, + "named" + ], + [ + 4, + 14, + "phone-based surveys <> author" + ], + [ + 75, + 89, + "phone-based surveys <> data geography" + ], + [ + 900, + 913, + "phone-based surveys <> reference population" + ] + ], + "validated": true, + "empirical_context": "To address this gap, the project will utilize remote monitoring approaches, including a scale-up of the application of GEMS. IBM will also be introduced for Phase II to gather feedback through phone-based surveys of a large sample of beneficiaries on implementation bottlenecks to allow for real-time course correction. 116.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to a structured collection of feedback gathered from beneficiaries.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'phone-based surveys' imply a structured collection of responses from participants.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a structured collection of feedback gathered from beneficiaries.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 34, + "text": "The World Bank Uganda Digital Acceleration Project \u2013 GovNet ( P171305 ) Page 22 of 76 E. Rationale for Bank Involvement and Role of Partners 50. There is a strong rationale for public financing of the project, given the public goods nature of the benefits derived from the project across social and economic sectors and the potential to increase overall inclusion. While access to digital infrastructure has been seen as important for a while, the emergence of the COVID-19 crisis has transformed it into a necessity and driven the need to accelerate network rollout in a major way and without further delays, partnering with the private sector as much as possible. The project will promote investments in digital service delivery in vital sectors such as health, education, agriculture, and trade. This will strengthen digital sectoral plans \u2019 implementation, including the ability to respond to COVID-19 within these sectors and provide enabling digital infrastructure for a post-COVID-19 recovery. In addition, the project will address inclusion concerns in terms of gender, PWDs, refugees and hosting communities, geography, and income levels, to compensate for market failures and ensure that the most vulnerable are not excluded from the social and economic benefits of the digital transformation of the country. 51.", + "ner_text": [ + [ + 53, + 59, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Uganda Digital Acceleration Project \u2013 GovNet ( P171305 ) Page 22 of 76 E. Rationale for Bank Involvement and Role of Partners 50.", + "type": "project", + "explanation": "'GovNet' is mentioned as a project, not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'GovNet' is a dataset because it includes 'Net', which can imply a network of data.", + "contextual_reason_agent": "'GovNet' is mentioned as a project, not as a data source or structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "057_West-Bank-and-Gaza-Phase-1-of-the-Multiphase-Programmatic-Approach-Advancing-Sustainability-in-Performance-Infrastructure-and-Reliability-of-Energy-Sector-Project", + "page": 41, + "text": "The ESCP includes ( but not limited ) to the following commitments, ( i ) regular monitoring of the implementation of all instruments and reporting to the Bank, including any serious incidents, ( ii ) preparation, implementation, and monitoring of all necessary site-specific ESMPs, LMPs, RAPs, SEPs, ( iii ) monitoring compliance of all contractors on the provisions of all relevant instruments, ( iv ) maintaining sufficient capacity for environmental management by PENRA, and cause the beneficiary distribution companies to maintain sufficient capacity for environmental and social management, ( v ) conducting meaningful and inclusive consultations with stakeholders and affected parties, ( vi ) implementation and monitoring of GBV action plan. E. Citizen Engagement 93. To ensure the implementation of a citizen engagement mechanism with a feedback loop, PENRA will conduct a baseline beneficiary survey, followed by periodic tracer surveys and an end survey within the duration of the ASPIRE MPA to seek feedback on benefits and services provided by the program. A citizen engagement strategy will be developed to maintain continuous engagement and communication with beneficiaries and citizens overall and contribute to building trust and a social contract. In addition, ongoing citizens \u2019 feedback will be considered when implementing the activities of the MPA phases, and PENRA will publish the results of the beneficiary surveys on its website, as a key results indicator for citizen engagement.", + "ner_text": [ + [ + 882, + 909, + "named" + ], + [ + 468, + 473, + "baseline beneficiary survey <> publisher" + ], + [ + 861, + 866, + "baseline beneficiary survey <> publisher" + ], + [ + 1175, + 1188, + "baseline beneficiary survey <> reference population" + ], + [ + 1193, + 1201, + "baseline beneficiary survey <> reference population" + ], + [ + 1287, + 1295, + "baseline beneficiary survey <> reference population" + ], + [ + 1382, + 1387, + "baseline beneficiary survey <> publisher" + ], + [ + 1522, + 1540, + "baseline beneficiary survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "Citizen Engagement 93. To ensure the implementation of a citizen engagement mechanism with a feedback loop, PENRA will conduct a baseline beneficiary survey, followed by periodic tracer surveys and an end survey within the duration of the ASPIRE MPA to seek feedback on benefits and services provided by the program. A citizen engagement strategy will be developed to maintain continuous engagement and communication with beneficiaries and citizens overall and contribute to building trust and a social contract.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey designed to gather feedback, which is a structured collection of data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured survey aimed at collecting data from beneficiaries.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey designed to gather feedback, which is a structured collection of data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "161_28046", + "page": 14, + "text": "The project \u2019 s Task Force, also under the supervision o f the Secretary General, would follow i t s technical implementation and would ensure objectives are reached. The project would finance the recurrent costs of this Task Force, its logistical support ( transport and computer system ), technical assistance, and study tours in other African countries. Institutional responsibilities for the Medical Waste Management Plan would rest ( a ) for the overall responsibility, with the MOH Directorate o f Equipment and Infrastructure ( DIEM ), and ( b ) for the decentralized levels, with the General Director o f Hospitals, the Head o f the Health Centers and o f the Health Posts. 3. Monitoring and evaluation of outcomes / results A yearly health expenditures tracking survey would be carried out, as well as a yearly client satisfaction survey focusing on quality o f care. Those two tools along with data from the health information system, would provide the data necessary to assess progress and identify bottlenecks. Guinea has a management information system quite sophisticated where a monitoring o f health centers activities and finances i s undertaken every six months, thus providing facility-based information. For evaluation purposes, a DHS was be carried out in 2004 which will serve as base-line.", + "ner_text": [ + [ + 918, + 943, + "named" + ] + ], + "validated": false, + "empirical_context": "Monitoring and evaluation of outcomes / results A yearly health expenditures tracking survey would be carried out, as well as a yearly client satisfaction survey focusing on quality o f care. Those two tools along with data from the health information system, would provide the data necessary to assess progress and identify bottlenecks. Guinea has a management information system quite sophisticated where a monitoring o f health centers activities and finances i s undertaken every six months, thus providing facility-based information.", + "type": "system", + "explanation": "However, it is described as a management information system, not explicitly as a data source or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'system' which can imply data collection.", + "contextual_reason_agent": "However, it is described as a management information system, not explicitly as a data source or dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "041_Burkina-Faso-Support-to-Land-and-Mining-Management-Strengthening-Project", + "page": 36, + "text": "The World Bank Support to Land and Mining Management Strengthening Project ( P169267 ) Page 30 of 95 vegetal covers contributing to climate mitigation. Investments in geospatial infrastructures and the development of integrated decision support systems will enable the collection, management and dissemination of land, mining and other common territorial datasets. Access to this geospatial information will be key to enhance climate resilient planning and decision making, and will enable Government entities, NGOs and academia to effectively monitor climate change and strengthen early warning systems. In the case of disaster displacement, the creation of a digital cadaster linked to a land registry that includes information on property values will contribute to expedite recovery by enabling the Government to reconstitute parcel boundaries, verify associated rights, and assess the damages incurred. It may also be the foundation for efficient insurance products, provided they account for local specificities of tenure management. 44 Evidence in rural Burkina Faso also indicates that increased tenure security provides an incentive for landholders and occupants to plant trees and invest in higher-quality infrastructure and climate-smart agriculture techniques, 45 which would contribute to increase carbon sequestration and improve climate change resilience. The project will also support mapping on the prospectivity of Burkina Faso for minerals in demand for the green economy including rare earths, nickel, and lithium. D. Results Chain 63. Project Results Chain / Theory of Change.", + "ner_text": [ + [ + 661, + 677, + "named" + ] + ], + "validated": false, + "empirical_context": "Access to this geospatial information will be key to enhance climate resilient planning and decision making, and will enable Government entities, NGOs and academia to effectively monitor climate change and strengthen early warning systems. In the case of disaster displacement, the creation of a digital cadaster linked to a land registry that includes information on property values will contribute to expedite recovery by enabling the Government to reconstitute parcel boundaries, verify associated rights, and assess the damages incurred. It may also be the foundation for efficient insurance products, provided they account for local specificities of tenure management.", + "type": "system", + "explanation": "However, it is described as a system for managing land information rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'digital cadaster' suggests a collection of geographic data.", + "contextual_reason_agent": "However, it is described as a system for managing land information rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 46, + "text": "In addition, a survey will be conducted in sampled SPG woredas as part of the impact evaluation to measure composite indicators not collected through routine information systems. 71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2. The MOH will share HCO-related data with the regions and woredas to provide any disaggregated data needed to monitor HCO results indicators and DLIs. Likewise, the central MOE is responsible for the management of EMIS and NLA which collects varying aspects of data at the school level. MOE will validate and share disaggregated data with regions and woredas to monitor HCO results indicators and DLIs. Other M & E data sources will be coordinated with various implementation units and service providers. The SPG impact evaluation survey will be 71 Such as Minimal Acceptable Diet for children 6 \u2013 23 months. Minimal Acceptable Diet is calculated according to breasfeeding status, number of food groups consumed, and number of times fed. https: / / inddex. nutrition. tufts. edu / data4diets / indicator / minimum-acceptable-diet-mad.", + "ner_text": [ + [ + 358, + 365, + "named" + ], + [ + 347, + 355, + "UNISE ) <> data geography" + ], + [ + 703, + 719, + "UNISE ) <> data type" + ], + [ + 764, + 782, + "UNISE ) <> data type" + ] + ], + "validated": true, + "empirical_context": "In addition, a survey will be conducted in sampled SPG woredas as part of the impact evaluation to measure composite indicators not collected through routine information systems. 71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2.", + "type": "system", + "explanation": "UNISE is indeed a data collection system mentioned as part of the context for data management and collection.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed UNISE is a dataset because it is mentioned alongside other data collection systems.", + "contextual_reason_agent": "UNISE is indeed a data collection system mentioned as part of the context for data management and collection.", + "contextual_signal": "mentioned as a data collection system", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 54, + "text": "DLR 5. 3: Enhanced digital literacy / skills. DLR 5. 4: Utilization of the HRMIS and other digital platforms for recording competitive recruitments, performance-based promotions, and digital upskilling. Data source / Agency DLR 5. 0: SPAC DLR 5. 1 and 2: SPAC Assessment Center HR interview and evaluation records, as well as HRMIS data records. DLR 5. 3: Training registry maintained by the IPA and HRMIS data records. DLR 5. 4: HRMIS data reports. Verification Entity KACE. Procedure DLR 5. 0: Verification of adopted SPAC by-law and subsidiary regulation. DLR 5. 1: Verification of recruitment records and competency assessment reports shared biannually by the SPAC and competence-based recruitments recorded by the HRMIS. DLR 5. 2: Verification of promotion records and competency assessment reports shared biannually by the SPAC and performance-based promotions recorded by the HRMIS and other digital platforms. DLR 5. 3: Verification of digital skill certifying training records shared annually by IPA and digital skill credentials recorded by the HRMIS and other digital platforms. DLR 5. 4: Cross-validation of HRMIS and other digital platforms data reports with SPAC Assessment Center interview and evaluation records and IPA training assessment reports. DLI 6 on the establishment of digital health standards across a national Health Information Exchange ( HIE ) Formula Scalable. The DLI disburses up to US $ 29 million across the three following DLRs:", + "ner_text": [ + [ + 719, + 724, + "named" + ] + ], + "validated": false, + "empirical_context": "DLR 5. 1: Verification of recruitment records and competency assessment reports shared biannually by the SPAC and competence-based recruitments recorded by the HRMIS. DLR 5.", + "type": "system", + "explanation": "However, HRMIS is mentioned as a system, not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HRMIS is a dataset because it is associated with recruitment records and assessments.", + "contextual_reason_agent": "However, HRMIS is mentioned as a system, not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "113_Project-Appraisal-Document-PAD-disclosable-version-P156479-08122016", + "page": 44, + "text": "32 on electronic devices ( such as tablets or laptops with card readers ). This information will then be transmitted confidentially to the CFS beneficiary database for the project and will be included in the registry. This database will contain all of the information related to the registration of beneficiaries and their updates as well as their payment information. It will also retain information on the other households that were surveyed during the PMT survey, but who were not eligible for the CT program. The development of unique identifier numbers for each household included in the registry is crucial for coordinating different safety net and other social programs using the database. 25. Accompanying measures will help improve awareness and behaviors at household level in support of human development, especially child growth and nutrition. These accompanying measures will be limited to demand-side measures and include information on the project \u2019 s objectives and social and behavior change communication for improved welfare and human capital of the household. The component will support training for and delivery of social and behavior change communication services, including advocacy, group education, and interpersonal communication on nutrition, basic health care and sanitation to all participating households. It is expected that most sessions will take place during payment days, every two months. A study will be undertaken during project preparation to explore and define possible themes and delivery mechanisms.", + "ner_text": [ + [ + 455, + 465, + "named" + ] + ], + "validated": false, + "empirical_context": "This database will contain all of the information related to the registration of beneficiaries and their updates as well as their payment information. It will also retain information on the other households that were surveyed during the PMT survey, but who were not eligible for the CT program. The development of unique identifier numbers for each household included in the registry is crucial for coordinating different safety net and other social programs using the database.", + "type": "survey", + "explanation": "However, it is mentioned only as a survey and not as a data source in the context provided.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'PMT survey' implies a structured collection of data from the survey.", + "contextual_reason_agent": "However, it is mentioned only as a survey and not as a data source in the context provided.", + "contextual_signal": "mentioned only as a survey, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 4, + "validated": 1, + "not_validated": 3 + } + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 11, + "text": "In addition to the analytical work undertaken, activity under the SPF grant also includes support for the Joint Recovery Needs Assessment ( JRNA ) for Fizuli, Agdam and Jabrayil, and just-in-time advice to the Office of Special Representative ( OSR ) of the President to the Karabakh Economic Region on issues including management information systems, local governance, and smart city development. Preparation of the JRNA will benefit from the knowledge gained through the proposed Improved Livelihoods for Internally Displaced Persons ( ILIDP ) Project and the OSR will benefit from the Project as well given their role in facilitating the sustainable return of IDPs. 10. The IDP survey and lessons learned paper on livelihoods described above have informed the design of the proposed project by summarizing the current living conditions of IDPs as well as lessons from implementation of previous livelihood programs. Though both studies are in the process of being finalized, they have provided valuable inputs to the design of this project. The study of lessons from other livelihoods and job training programs revealed the need for close support for training participants to ensure the sustainability of their achievements. This has resulted in the incorporation of mentors into the project design from the time of project launch through completion and an emphasis on community-based support to address the unique context of each IDP settlement. The household survey found that 22 percent of household members are unemployed and 30 percent of respondents are looking for work. There remains a reliance on state support with 90 percent of respondents receiving an IDP allowance. To address their income generation needs, respondents identified various skills they would like to acquire with males wanting to have skills in the agriculture / fishery, automotive and land transport, and construction sectors while women preferred garments / sewing, health care and community development. While the data collected through the survey on job and skills provides useful benchmarking information, more localized labor market surveys will need to be undertaken to identify targeted opportunities in the communities where IDPs are living to support livelihoods that provide greater incomes over sustained periods.", + "ner_text": [ + [ + 677, + 687, + "named" + ], + [ + 159, + 164, + "IDP survey <> data geography" + ], + [ + 169, + 177, + "IDP survey <> data geography" + ], + [ + 275, + 299, + "IDP survey <> data geography" + ], + [ + 842, + 846, + "IDP survey <> reference population" + ], + [ + 1454, + 1470, + "IDP survey <> data type" + ], + [ + 2323, + 2341, + "IDP survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "10. The IDP survey and lessons learned paper on livelihoods described above have informed the design of the proposed project by summarizing the current living conditions of IDPs as well as lessons from implementation of previous livelihood programs. Though both studies are in the process of being finalized, they have provided valuable inputs to the design of this project.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it is described as informing the design of a project based on collected data about living conditions.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically collects structured data.", + "contextual_reason_agent": "The context confirms it is a dataset as it is described as informing the design of a project based on collected data about living conditions.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 17, + "text": "The global spread of COVID-19 has led to unprecedented disruptions in schooling, requiring understanding of how students \u2019 learning has changed. Moldova \u2019 s participation in PISA 2025 and the analysis of PISA 2022 results are important for managing learning recovery from the pandemic. The MoER consolidated and strengthened the student-level EMIS to support evidence-based decision-making. 22 EMIS now covers preschool, primary, general secondary, and vocational education data, which are regularly collected by the system. Reports on the performance of each school are produced and disclosed annually. However, significant differences exist between data reported in the EMIS open data portal and data published by the National Bureau of Statistics ( NBS ) of Moldova. A single integrated EMIS capable of linking information on students with population register data and financing data of the Ministry of Finance ( MoF ) is currently lacking. The current EMIS and established data reporting practices provide a solid foundation for the development of an integrated EMIS 22 Under support of the completed World Bank-financed Moldova Education Reform Project.", + "ner_text": [ + [ + 343, + 347, + "named" + ] + ], + "validated": false, + "empirical_context": "Moldova \u2019 s participation in PISA 2025 and the analysis of PISA 2022 results are important for managing learning recovery from the pandemic. The MoER consolidated and strengthened the student-level EMIS to support evidence-based decision-making. 22 EMIS now covers preschool, primary, general secondary, and vocational education data, which are regularly collected by the system.", + "type": "system", + "explanation": "However, EMIS is described as a management information system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it involves data collection related to education.", + "contextual_reason_agent": "However, EMIS is described as a management information system, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "084_Lebanon-PAD-final-06082018", + "page": 134, + "text": "124 Action Description DLI * Legally binding Due Date Responsible Party Completion Measurement * * SME Observatory designed and established ( Implementation action plan including finalization of governance and steering committee, annual work plan, staffing plan, reporting ) N 18 months after effectiveness MOET SME Observatory strategy and action plan approved by MOET; required budget approved and staff appointed Matching grant operations manual completed Y 3 months after effectiveness MOET Operations manual completed and incorporated into POM Value Chains Committee ( VCC ) established Y 6 months after effectiveness MOET VCC established VC program designed, VC selected N 8 months after effectiveness MOET Pilot VC selected and approved by MOET SME database and the digital platform developed and training conducted N 24 months after effectiveness MOET Database developed, Platform available online and approved by MOET, training completed Results Area 3: Connecting women and youth to jobs Eligibility criteria and prioritization system defined and agreed by the Government to select beneficiaries of ALMP services Y 3 months after effectiveness NEO Criteria and mechanism defined in the POM Design of the wage subsidy financial mechanism ( agree on either reimbursement or waiver to employers ) Y 3 months after effectiveness NEO with NSSF MOU signed between NEO and NSSF New Management Information Systems ( MIS ) for registration and profiling designed and implemented, NEO staff training completed N 6 months after effectiveness NEO MIS developed and link for registration available.", + "ner_text": [ + [ + 1381, + 1415, + "named" + ] + ], + "validated": false, + "empirical_context": "124 Action Description DLI * Legally binding Due Date Responsible Party Completion Measurement * * SME Observatory designed and established ( Implementation action plan including finalization of governance and steering committee, annual work plan, staffing plan, reporting ) N 18 months after effectiveness MOET SME Observatory strategy and action plan approved by MOET; required budget approved and staff appointed Matching grant operations manual completed Y 3 months after effectiveness MOET Operations manual completed and incorporated into POM Value Chains Committee ( VCC ) established Y 6 months after effectiveness MOET VCC established VC program designed, VC selected N 8 months after effectiveness MOET Pilot VC selected and approved by MOET SME database and the digital platform developed and training conducted N 24 months after effectiveness MOET Database developed, Platform available online and approved by MOET, training completed Results Area 3: Connecting women and youth to jobs Eligibility criteria and prioritization system defined and agreed by the Government to select beneficiaries of ALMP services Y 3 months after effectiveness NEO Criteria and mechanism defined in the POM Design of the wage subsidy financial mechanism ( agree on either reimbursement or waiver to employers ) Y 3 months after effectiveness NEO with NSSF MOU signed between NEO and NSSF New Management Information Systems ( MIS ) for registration and profiling designed and implemented, NEO staff training completed N 6 months after effectiveness NEO MIS developed and link for registration available.", + "type": "system", + "explanation": "However, it is described as a system for registration and profiling, not as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Management Information Systems' which suggests data handling.", + "contextual_reason_agent": "However, it is described as a system for registration and profiling, not as a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 58, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 53 of 117 RESULT_FRAME_TBL_IO Indicator Name DLI Baseline Intermediate Targets End Target 1 refugee / host communities ( Number ) Strengthen Capacity for Implementing Initiated Reforms Capacity building of target Teacher Training Colleges for implementation of competency-based teacher education curriculum. ( Yes / No ) No Yes Yes Establishment of standards and tools for quality assurance mechanisms for Pre-primary education, including teacher appraisal tools, assessment and classroom observation tools. ( Yes / No ) No Yes Yes New classrooms constructed in existing schools as per the needs-based school infrastructure investment plan ( Number ) 0. 00 6, 500. 00 8, 000. 00 New classrooms constructed in refugee host communities existing schools as per the needs - based school infrastructure investment plan ( Number ) 0. 00 50. 00 50. 00 New classrooms constructed in existing non - refugee / host communities primary schools as per the needs-based school infrastructure investment plan ( Number ) 0. 00 6, 000. 00 7, 950. 00 Evidence of utilization of NEMIS data for better management of primary education ( Yes / No ) No Yes Yes Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS ( Yes / No ) No Yes Yes Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of", + "ner_text": [ + [ + 1137, + 1142, + "named" + ], + [ + 1215, + 1231, + "NEMIS <> reference population" + ], + [ + 1302, + 1318, + "NEMIS <> reference population" + ] + ], + "validated": true, + "empirical_context": "00 7, 950. 00 Evidence of utilization of NEMIS data for better management of primary education ( Yes / No ) No Yes Yes Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS ( Yes / No ) No Yes Yes Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of", + "type": "system", + "explanation": "NEMIS is indeed a dataset as it is explicitly mentioned in relation to the registration of refugee learners and management of primary education.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is referenced in the context of data utilization and management.", + "contextual_reason_agent": "NEMIS is indeed a dataset as it is explicitly mentioned in relation to the registration of refugee learners and management of primary education.", + "contextual_signal": "mentioned as a data source for registration of refugee learners", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "143_749060PAD0P124010Box374388B00OUO090", + "page": 20, + "text": "The prime importance of data will be emphasized in the project design by: ( i ) making collection of verifiable baseline data a selection criterion for CSD proposals ( in both windows ); and ( ii ) investing project resources in the collection of household survey data to evaluate NPTP performance, including a comprehensive monitoring and evaluation system. ( c ) Wide-ranging outreach and awareness campaigns are needed to achieve the intended impact for both programs implemented under components 2 and 3. One common risk associated with community-driven projects is \" elite capture \", whereby elites reap most of the benefits by manipulating the decision-making process. As the CSD component would invite submissions from different entities ( e. g., SDCs, NGOs, and CSOs ), the necessary but not sufficient condition to stave off elite capture is to ensure that all the potential stakeholders are aware of the call for proposals, the application process, and the selection criteria. To achieve this, the 21", + "ner_text": [ + [ + 247, + 268, + "named" + ], + [ + 101, + 125, + "household survey data <> data description" + ] + ], + "validated": true, + "empirical_context": "The prime importance of data will be emphasized in the project design by: ( i ) making collection of verifiable baseline data a selection criterion for CSD proposals ( in both windows ); and ( ii ) investing project resources in the collection of household survey data to evaluate NPTP performance, including a comprehensive monitoring and evaluation system. ( c ) Wide-ranging outreach and awareness campaigns are needed to achieve the intended impact for both programs implemented under components 2 and 3.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned in the context as data collected for evaluating project performance.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'household survey data' implies a structured collection of data gathered from households.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned in the context as data collected for evaluating project performance.", + "contextual_signal": "mentioned as data to evaluate NPTP performance", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 43, + "text": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 39 of 47 feeding practices that decrease the prevalence of child diarrhea ), this CBA identifies only a lower bound of project benefits. Table 1. 2. Discounted Project Costs of Health Component ( current US $ ) Years Present Value 2 % Discount Rate 4 % Discount Rate 2020 2, 478, 360 2, 430, 581 2021 3, 996, 990 3, 844, 363 2022 4, 445, 516 4, 193, 332 2023 3, 622, 024 3, 350, 689 2024 3, 326, 210 3, 017, 714 Total 17, 869, 100 16, 836, 680 9. Mortality avoided is estimated based on the number of additional health services provided under the project. As a first step, utilization rates of reproductive, maternal, and child health services over the project cycle \u2014 both with and without the project \u2014 are projected. Three types of services are considered: skilled birth attendance, measles vaccination, and family planning visits. Baseline utilization data for 2017 and 2018, in the target HFs came from the Balochistan DHIS.", + "ner_text": [ + [ + 989, + 1005, + "named" + ], + [ + 4, + 14, + "Balochistan DHIS <> publisher" + ], + [ + 650, + 720, + "Balochistan DHIS <> data description" + ], + [ + 837, + 861, + "Balochistan DHIS <> data description" + ], + [ + 912, + 937, + "Balochistan DHIS <> data type" + ], + [ + 942, + 946, + "Balochistan DHIS <> reference year" + ], + [ + 951, + 955, + "Balochistan DHIS <> reference year" + ], + [ + 989, + 1000, + "Balochistan DHIS <> data geography" + ] + ], + "validated": true, + "empirical_context": "Three types of services are considered: skilled birth attendance, measles vaccination, and family planning visits. Baseline utilization data for 2017 and 2018, in the target HFs came from the Balochistan DHIS.", + "type": "system", + "explanation": "In the context, it is explicitly mentioned as the source of data for baseline utilization, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a source of baseline utilization data.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as the source of data for baseline utilization, confirming its role as a dataset.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 27, + "text": "This component will consist of three subcomponents: \uf0b7 Subcomponent 4. 1: Improvement of poverty-related data by providing TA and financing to ( a ) the design, data collection, and analysis of ECAM 5 with a particular focus on northern regions \u2019 rural areas and gender issues ( agricultural income, shocks, and vulnerability, gender, and so on ); ( b ) data collection and analysis of the fourth population census; 28 ( c ) analysis of the complementary household survey ( ECAM 4 ); ( d ) design a system of labor statistics by improving LFS and enterprises and informal sector surveys; 28 This activity budgeted for US $ 1 million will complement the resources ( US $ 4 million ) provided by another World Bank financed project in the health sector. In total the World Bank contribution in the financing of the population will amount to US $ 5 million.", + "ner_text": [ + [ + 389, + 413, + "named" + ], + [ + 227, + 243, + "fourth population census <> data geography" + ], + [ + 701, + 711, + "fourth population census <> publisher" + ], + [ + 764, + 774, + "fourth population census <> publisher" + ] + ], + "validated": true, + "empirical_context": "This component will consist of three subcomponents: \uf0b7 Subcomponent 4. 1: Improvement of poverty-related data by providing TA and financing to ( a ) the design, data collection, and analysis of ECAM 5 with a particular focus on northern regions \u2019 rural areas and gender issues ( agricultural income, shocks, and vulnerability, gender, and so on ); ( b ) data collection and analysis of the fourth population census; 28 ( c ) analysis of the complementary household survey ( ECAM 4 ); ( d ) design a system of labor statistics by improving LFS and enterprises and informal sector surveys; 28 This activity budgeted for US $ 1 million will complement the resources ( US $ 4 million ) provided by another World Bank financed project in the health sector. In total the World Bank contribution in the financing of the population will amount to US $ 5 million.", + "type": "census", + "explanation": "This is indeed a dataset as it is explicitly mentioned in the context as part of data collection and analysis efforts.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific census, which typically involves structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned in the context as part of data collection and analysis efforts.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "066_Jordan-Youth-Technology-and-Jobs-Project", + "page": 34, + "text": "This will include support for setting up an M & E system and establishing a comprehensive CRM system, as well as relevant staff training on different aspects of M & E. The project evaluation system will help determine whether the training provided under the project translates into income opportunities. In addition to the outcome of obtaining a job or income opportunity, the project will include results indicators on skills development ( such as increased confidence, or other personal or social outcomes ). The project monitoring reports will contain, at a minimum, summary data on overall performance against project targets, implementation challenges experienced, and feedback received from project beneficiaries. 38 70. The World Bank will review the Results Framework submitted by the PMU as part of implementation support. The World Bank experts will discuss the progress and deviations with the PMU to identify any areas where additional help from the World Bank is needed. The PMU and the World Bank will also use results data to build awareness of project results among key beneficiaries and counterparts. Beneficiary feedback will also feed into regular monitoring. The M & E framework will leverage data collection activities undertaken by the independent verification agency ( IVA ) for DLIs. 71. The project will conduct rigorous impact evaluations to identify the absolute impact of the interventions prescribed by the project design on key outcomes and provide feedback on the relative efficacy of alternate design modalities to enable course corrections. 39 A consultation process to prioritize and select the questions and methodologies for the impact evaluations will take place between country stakeholders and researchers when project activities are being further defined. The impact evaluations would focus on the following knowledge gaps: \uf0b7 For Sub-component 1. 1, it will be important to assess the impact of skills trainings activities on the command 38 To the extent possible, any data gathered on Syrian refugees will be disaggregated by sex to identify specific gender gaps that need to be addressed. 39 Rigorous impact evaluations assess causality by identifying a counterfactual with experimental or quasi-experimental methodologies.", + "ner_text": [ + [ + 1025, + 1037, + "named" + ], + [ + 731, + 741, + "results data <> publisher" + ], + [ + 836, + 846, + "results data <> publisher" + ], + [ + 962, + 972, + "results data <> publisher" + ], + [ + 1000, + 1010, + "results data <> publisher" + ], + [ + 2026, + 2041, + "results data <> reference population" + ] + ], + "validated": true, + "empirical_context": "The World Bank experts will discuss the progress and deviations with the PMU to identify any areas where additional help from the World Bank is needed. The PMU and the World Bank will also use results data to build awareness of project results among key beneficiaries and counterparts. Beneficiary feedback will also feed into regular monitoring.", + "type": "data", + "explanation": "In this context, 'results data' is explicitly mentioned as being used to build awareness of project results, indicating it serves as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'results data' is a dataset because it refers to data used to assess project outcomes.", + "contextual_reason_agent": "In this context, 'results data' is explicitly mentioned as being used to build awareness of project results, indicating it serves as a data source.", + "contextual_signal": "follows 'use results data to build awareness'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 70, + "text": "Many girls interviewed during the project visits report that they prefer to \u2018 hold on \u2019 over long periods, while an emerging body of work indicates that this practice may lead to difficulty in conceiving and an increased risk of miscarriage. Discussions with communities in the rural Khatlon region revealed that women and girls have some ideas on what they could productively spend time saved from required water fetching. Some referred to the potential for cultivation on their household plots and others referred to experience in making canned vegetables and jams or sewing. There are limited job opportunities for women in rural areas, but those would need to be considered and explored by involving women in trainings, investment planning, and prioritization of resources. Consultations with communities in rural villages of Dusti district revealed that when communities are given a chance to prioritize infrastructure investments, priority is always given either to schools or road rehabilitation activities, with the latter benefitting more men than women. There are also wide gender gaps in employment in the water sector institutions. Global data show that, on average, women account for only 18 percent of total staff in water institutions and 23 percent of staff in engineering and managerial positions. Evidence from Europe and Central Asia countries show similar patterns. In Tajikistan, some of the reasons behind women \u2019 s low employment in the water sector include the low female enrollment in", + "ner_text": [ + [ + 1144, + 1155, + "named" + ], + [ + 313, + 318, + "Global data <> reference population" + ], + [ + 618, + 623, + "Global data <> reference population" + ], + [ + 1179, + 1184, + "Global data <> reference population" + ], + [ + 1329, + 1335, + "Global data <> data geography" + ], + [ + 1340, + 1352, + "Global data <> data geography" + ], + [ + 1389, + 1399, + "Global data <> data geography" + ] + ], + "validated": true, + "empirical_context": "There are also wide gender gaps in employment in the water sector institutions. Global data show that, on average, women account for only 18 percent of total staff in water institutions and 23 percent of staff in engineering and managerial positions. Evidence from Europe and Central Asia countries show similar patterns.", + "type": "data", + "explanation": "In this context, 'Global data' is used to refer to empirical statistics regarding employment in the water sector, confirming it as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'Global data' refers to a dataset because it implies a collection of statistical information about gender gaps in employment.", + "contextual_reason_agent": "In this context, 'Global data' is used to refer to empirical statistics regarding employment in the water sector, confirming it as a data source.", + "contextual_signal": "follows 'Global data show that'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "143_749060PAD0P124010Box374388B00OUO090", + "page": 88, + "text": "We are making an optimistic assumption that, by 2024, 100 percent of extremely poor households will be covered with NPTP benefits. Since we are using 2004 HBS data to simulate NPTP impact, we are assuming that the extreme poverty line is US $ 2. 4 per capita per day, and that 7. 2 percent of the population ( or 273, 761 individuals ) were extremely poor in that year. Assuming no changes in the distributions or total number of extreme poor in Lebanon between 2004 and 201260 NPTP currently ( in 2012 ) covers 84, 322 / 273, 761 = 30. 8 percent of extremely poor individuals ( assuming that all the current NPTP beneficiaries are extremely poor, i. e., there are no errors of inclusion ). Thus, 100 percent coverage rate assumes better outreach achieved by 2024. 18. Thus, the simulation of the expected impact of NPTP on aggregate extreme poverty and inequality in Lebanon consists of applying the average value of the NPTP benefit ( US $ 541. 15 per household in 2004 prices, assuming the value of the benefit increased at the same rate as overall CPI, or by 40 percent from 2004 to 2011 ) to the income of all individuals below the extreme poverty line of US $ 2. 4 per capita per day. We use the 2004 Household Budget Survey to run this simulation. The results are summarized in Table 3 below: 60 The estimated number of extremely poor in 2012 was determined based on the 2012 population in Lebanon ( World Development Indicator ) and the extreme poverty rate of 7. 2 percent ( 2004 Household Budget Survey or HBS ). Due to lack of updated data on poverty, it is assumed that the extreme poverty rate will remain the same as it was in 2004, when it was assessed through a HBS. 89", + "ner_text": [ + [ + 1484, + 1512, + "named" + ] + ], + "validated": true, + "empirical_context": "The results are summarized in Table 3 below: 60 The estimated number of extremely poor in 2012 was determined based on the 2012 population in Lebanon ( World Development Indicator ) and the extreme poverty rate of 7. 2 percent ( 2004 Household Budget Survey or HBS ). Due to lack of updated data on poverty, it is assumed that the extreme poverty rate will remain the same as it was in 2004, when it was assessed through a HBS.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source for assessing the extreme poverty rate.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey that provides data on poverty rates.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source for assessing the extreme poverty rate.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "050_Cameroon-Secondary-Education-and-Skills-Development-Project", + "page": 53, + "text": "The World Bank Secondary Education and Skills Development Project ( P170561 ) Page 49 of 159 ensure that appropriate actions are taken on the feedback received from beneficiaries. The beneficiary institutions will receive feedback on a regular basis through supervision meetings, which will include separate focus group discussions with students, separated by girls and boys. The education sector also has a well - established Local Education Group, which provides an opportunity for citizens, CSOs, and other stakeholders to raise education questions and issues with the Government. These feedback mechanisms will also inform the annual review of project progress and annual work plans. The citizens \u2019 engagement will be tracked in the results framework through the indicator which measures number of secondary education and TVET institutions that developed their school project or an SDF proposal using a community-empowered approach. 136. Personal Data. In order to identify the beneficiaries under the project activities, it is expected that personal data, personally identifiable information and sensitive data ( including, for example, family and individual information, age, proof of enrollment of students in public secondary school, etc. ) will be collected, stored and processed. Personal data is expected to be sourced through targeting mechanisms to be created under the project and existing targeting mechanisms under the SSNP.", + "ner_text": [ + [ + 1046, + 1059, + "named" + ] + ], + "validated": false, + "empirical_context": "Personal Data. In order to identify the beneficiaries under the project activities, it is expected that personal data, personally identifiable information and sensitive data ( including, for example, family and individual information, age, proof of enrollment of students in public secondary school, etc. ) will be collected, stored and processed. Personal data is expected to be sourced through targeting mechanisms to be created under the project and existing targeting mechanisms under the SSNP.", + "type": "data", + "explanation": "'Personal data' is not a structured collection of data but rather refers to types of information that will be collected.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'personal data' refers to a dataset because it involves the collection and processing of information.", + "contextual_reason_agent": "'Personal data' is not a structured collection of data but rather refers to types of information that will be collected.", + "contextual_signal": "mentioned only as types of information, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 79, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 74 of 117 prior to construction. Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "ner_text": [ + [ + 696, + 701, + "named" + ], + [ + 578, + 581, + "NEMIS <> author" + ], + [ + 589, + 608, + "NEMIS <> data type" + ], + [ + 777, + 793, + "NEMIS <> reference population" + ], + [ + 829, + 832, + "NEMIS <> author" + ] + ], + "validated": true, + "empirical_context": "Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "type": "system", + "explanation": "NEMIS is indeed a dataset as it is described as an online platform that collects and manages data related to primary education.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is referenced in the context of data utilization for education management.", + "contextual_reason_agent": "NEMIS is indeed a dataset as it is described as an online platform that collects and manages data related to primary education.", + "contextual_signal": "mentioned as a data source for managing primary education", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "161_28046", + "page": 29, + "text": "Smaller surveys and operations research might also be needed every time a problem i s identified and its solution i s not obvious. Financial Monitoring and Evaluation. Lastly, financial management monitoring o f the utilization o f resources and funds by the public sector, private sector and civil society would be combined with program monitoring to provide a basis for cross checking financial and activity data and establishing the relation between disbursement and activities. Audits would be carried out by the FMA as well as external auditors, at all levels. A yearly health expenditures tracking survey would provide information on how much the M O H i s able to improve on the national budget \u2019 s allocation and utilization o f resources by different levels. 24", + "ner_text": [ + [ + 568, + 610, + "named" + ] + ], + "validated": true, + "empirical_context": "Audits would be carried out by the FMA as well as external auditors, at all levels. A yearly health expenditures tracking survey would provide information on how much the M O H i s able to improve on the national budget \u2019 s allocation and utilization o f resources by different levels. 24", + "type": "survey", + "explanation": "This is indeed a dataset as it provides structured data on health expenditures for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is described as a survey that tracks health expenditures.", + "contextual_reason_agent": "This is indeed a dataset as it provides structured data on health expenditures for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "084_Lebanon-PAD-final-06082018", + "page": 57, + "text": "47 ( i ) Program intake and profiling of beneficiaries to support the implementation of targeting, registering, selecting and profiling beneficiaries, specifically: \uf0b7 Design and implement an extensive outreach process to identify and attract Program beneficiaries. Outreach activities will adopt a tailored gender-sensitive approach for different target groups ( women, not in employment, education, or training [ NEETs ], youth, urban versus rural areas ); \uf0b7 Set-up a Management Information System ( MIS ) for the Program and all required processes for the enrollment and monitoring of beneficiaries during implementation \uf0b7 Support the establishment of a profiling system to better understand the constraints individuals face. This will enable the categorization of beneficiaries into homogeneous groups facing similar labor market barriers and help estimate the scope ( and thus cost ) of ALMPs that will be expected from service providers. The results of this job profiling exercise will help tailor services for target youth and women, depending on the type of labor market or social barriers they face, and provide more inclusive and effective support. ( ii ) Provision of a package of tailored Active Labor Market Programs ( ALMPs ) to connect beneficiaries to wage-employment.", + "ner_text": [ + [ + 656, + 672, + "named" + ] + ], + "validated": false, + "empirical_context": "47 ( i ) Program intake and profiling of beneficiaries to support the implementation of targeting, registering, selecting and profiling beneficiaries, specifically: \uf0b7 Design and implement an extensive outreach process to identify and attract Program beneficiaries. Outreach activities will adopt a tailored gender-sensitive approach for different target groups ( women, not in employment, education, or training [ NEETs ], youth, urban versus rural areas ); \uf0b7 Set-up a Management Information System ( MIS ) for the Program and all required processes for the enrollment and monitoring of beneficiaries during implementation \uf0b7 Support the establishment of a profiling system to better understand the constraints individuals face. This will enable the categorization of beneficiaries into homogeneous groups facing similar labor market barriers and help estimate the scope ( and thus cost ) of ALMPs that will be expected from service providers.", + "type": "system", + "explanation": "However, it is described as a system for understanding constraints and categorizing beneficiaries, not as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'profiling system' suggests a structured approach to data collection.", + "contextual_reason_agent": "However, it is described as a system for understanding constraints and categorizing beneficiaries, not as a data source itself.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "167_27761", + "page": 20, + "text": "The FMIS system at the MOF and the Post Bank were found to be adequate to support the project. The project \u2019 s financial statements, including Statement of Expenditures ( SOE ) and the Special Account ( SA ), would be audited quarterly internally and annually by an independent auditor, in accordance with internationally accepted standards. In addition, the auditor would carry out field spot check audits to ascertain compliance with contractual requirements. Compliance with conditional cash transfers would be monitored by an independent external consultant ( paragraph C. 3 ). 4. Social Opportunities, constraints, impacts, and risks arising. out of the socio-cultural and political context. The impact of closure and incursions in the West Bank and Gaza has been extensively documented. On the rise are poverty, unemployment, school drop out rates; on the decline or deteriorating are household incomes, living conditions, school attendance rates, at-large nutritional status and in particular of children 0-5 years of age. 13 The so called \u201c newly poor \u201d, those who are just above the poverty line before the outbreak of the second Intifada, in the third quarter of 2000, have been assisted by various interventions. To date, it has been much more difficult to find ways to assist the very poor, or first decile, whose coping mechanisms are essentially exhausted. The proposed project i s designed to support children ( 0-18 years old ) in the first decile. Although the SHC has targeted the first decile for more than 20 years, the program i s very small today in relation to need and requires new instruments to address the present situation and to shift MOSA \u2019 s overall strategy from one of coping to one of social springboard. Therefore, the project will support MOSA in reshaping its social assistance strategy and to improve effectiveness and efficiency of ~ l3 PCBS quarterly household surveys, Palestinian Living Conditions quarterly surveys, IUCN, University of Geneva, Living Standards quarterly surveys, Nutrition Study, Johns Hopkins University, 2002, Nutrition Survey, PCBS and BirZeit University, 2002. 17", + "ner_text": [ + [ + 2072, + 2088, + "named" + ], + [ + 741, + 750, + "Nutrition Survey <> data geography" + ], + [ + 755, + 759, + "Nutrition Survey <> data geography" + ], + [ + 1003, + 1028, + "Nutrition Survey <> reference population" + ], + [ + 1876, + 1908, + "Nutrition Survey <> data type" + ], + [ + 2040, + 2064, + "Nutrition Survey <> author" + ], + [ + 2066, + 2070, + "Nutrition Survey <> publication year" + ], + [ + 2119, + 2123, + "Nutrition Survey <> publication year" + ], + [ + 2143, + 2161, + "Nutrition Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "Although the SHC has targeted the first decile for more than 20 years, the program i s very small today in relation to need and requires new instruments to address the present situation and to shift MOSA \u2019 s overall strategy from one of coping to one of social springboard. Therefore, the project will support MOSA in reshaping its social assistance strategy and to improve effectiveness and efficiency of ~ l3 PCBS quarterly household surveys, Palestinian Living Conditions quarterly surveys, IUCN, University of Geneva, Living Standards quarterly surveys, Nutrition Study, Johns Hopkins University, 2002, Nutrition Survey, PCBS and BirZeit University, 2002. 17", + "type": "survey", + "explanation": "The context confirms it is a dataset as it is referenced alongside other surveys that are used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Nutrition Survey' is mentioned in a list of surveys that provide data on living conditions.", + "contextual_reason_agent": "The context confirms it is a dataset as it is referenced alongside other surveys that are used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "073_Ethiopia-Consolidated-Water-Supply-Sanitation-and-Hygiene-Account-Project", + "page": 83, + "text": "The World Bank One WASH \u2014 Consolidated Water Supply, Sanitation, and Hygiene Account Project ( One WASH \u2014 CWA ) ( P167794 ) Page 79 of 103 the lack of logistics, getting real-time data on service delivery indicators such as functionality will continue to be a challenge. In response to this challenge, the CWA will provide support to explore options for community-based WASH data collection. The support will include: ( i ) assessment of the different community-based data collection tools that could complement and integrate with the WASH MIS; ( ii ) designing ( including the selection of frequently needed indicators ), piloting, and rolling out the selected community-based data collection tool; ( iii ) institutionalizing the data collection responsibility at the WASHCOM level ( inclusion of this responsibility as part of the WASHCOM legalization document ); and ( iv ) continuous capacity building of WASHCOM members for regular reporting. Information collected from community-level monitoring will be integrated into sector MIS. c. Introducing the rural water and sanitation information system ( SIASAR ) in Ethiopia: The WASH sector in Ethiopia can draw lessons from the SIASAR model implemented in different countries of Latin America, Africa, and Central Asia. The Project will build on the ongoing WASH MIS initiative for Ethiopia and will link to SIASAR to allow a compressive and broader range of data analysis for decision making.", + "ner_text": [ + [ + 1181, + 1187, + "named" + ] + ], + "validated": false, + "empirical_context": "c. Introducing the rural water and sanitation information system ( SIASAR ) in Ethiopia: The WASH sector in Ethiopia can draw lessons from the SIASAR model implemented in different countries of Latin America, Africa, and Central Asia. The Project will build on the ongoing WASH MIS initiative for Ethiopia and will link to SIASAR to allow a compressive and broader range of data analysis for decision making.", + "type": "system", + "explanation": "However, SIASAR is described as a model and system rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed SIASAR is a dataset because it is associated with data analysis in the context of WASH initiatives.", + "contextual_reason_agent": "However, SIASAR is described as a model and system rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "021_BOSIB05e6fc47e0770aeec00ad5e11774f2", + "page": 12, + "text": "Without adequate action, social and economic losses are expected to be more pronounced in the marginalized regions of the country where the declining resilience of rural households would have devastating impacts on agricultural productivity, food security, incomes, and poverty reduction. 1 Uganda National Household Survey ( 2016 / 17 ) 2 Between 2012 / 13 and 2016 / 17 there was drought, crop and livestock pest and disease outbreaks, floods, and storms that resulted in sharp changes in prices. These events were more prevalent among the rural areas except for sharp changes in prices of commodities that were highly ranked in the urban areas. The prevalence of drought was almost universal except in the subregions of Elgon and Kigezi. Sharp changes in prices were most common in the subregions of Lango, Central II, and Karamoja. Bukedi subregion was the most hit by crop pests and diseases followed by Lango, while Karamoja was the most affected by livestock diseases ( 100 percent ). Teso subregion was affected by storms and floods.", + "ner_text": [ + [ + 291, + 323, + "named" + ], + [ + 326, + 335, + "Uganda National Household Survey <> publication year" + ], + [ + 362, + 371, + "Uganda National Household Survey <> reference year" + ], + [ + 803, + 808, + "Uganda National Household Survey <> data geography" + ], + [ + 810, + 820, + "Uganda National Household Survey <> data geography" + ], + [ + 826, + 834, + "Uganda National Household Survey <> data geography" + ], + [ + 836, + 852, + "Uganda National Household Survey <> data geography" + ], + [ + 909, + 914, + "Uganda National Household Survey <> data geography" + ], + [ + 922, + 930, + "Uganda National Household Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "Without adequate action, social and economic losses are expected to be more pronounced in the marginalized regions of the country where the declining resilience of rural households would have devastating impacts on agricultural productivity, food security, incomes, and poverty reduction. 1 Uganda National Household Survey ( 2016 / 17 ) 2 Between 2012 / 13 and 2016 / 17 there was drought, crop and livestock pest and disease outbreaks, floods, and storms that resulted in sharp changes in prices. These events were more prevalent among the rural areas except for sharp changes in prices of commodities that were highly ranked in the urban areas.", + "type": "survey", + "explanation": "The Uganda National Household Survey is explicitly mentioned in the context, indicating it is used as a data source for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a national survey, which typically collects structured data.", + "contextual_reason_agent": "The Uganda National Household Survey is explicitly mentioned in the context, indicating it is used as a data source for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "062_Turkey-Municipal-Services-Improvement-Project", + "page": 45, + "text": "The World Bank Municipal Services Improvement Project in Refugee Affected Areas ( P169996 ) Page 41 of 94 which refugees ) managed drinking water services in the selected municipalities that have been provided through this Project. progress reports, based on updated beneficiary \u2019 s records from respective SKIs. People benefitting from safely managed sanitation services in the selected municipalities as a result of the project This indicator measures the cumulative number of people benefitted from safely managed sanitation services in the selected municipalities that have been provided through this Project. Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs. Municipal PIUs and ILBANK PMU People benefitting from safely managed sanitation services in the selected municipalities ( out of which female ) This indicator measures the percentage of female benefitted from safely managed sanitation services in the selected municipalities that have been provided through this Project. Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs.", + "ner_text": [ + [ + 643, + 651, + "named" + ], + [ + 4, + 14, + "PMU Data <> publisher" + ] + ], + "validated": true, + "empirical_context": "People benefitting from safely managed sanitation services in the selected municipalities as a result of the project This indicator measures the cumulative number of people benefitted from safely managed sanitation services in the selected municipalities that have been provided through this Project. Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs. Municipal PIUs and ILBANK PMU People benefitting from safely managed sanitation services in the selected municipalities ( out of which female ) This indicator measures the percentage of female benefitted from safely managed sanitation services in the selected municipalities that have been provided through this Project.", + "type": "data", + "explanation": "In the context, 'PMU Data' is explicitly mentioned as data to be compiled and recorded, indicating it functions as a structured collection of data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'PMU Data' is a dataset because it is referenced in the context of compiling and recording information related to project beneficiaries.", + "contextual_reason_agent": "In the context, 'PMU Data' is explicitly mentioned as data to be compiled and recorded, indicating it functions as a structured collection of data.", + "contextual_signal": "mentioned as data to be compiled and recorded in progress reports", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 10, + "validated": 9, + "not_validated": 1 + } + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 22, + "text": "In addition, a Selection Committee will be formed to make decisions regarding the selection of project beneficiaries and the provision of supplies and equipment under Component 2. VI. APPRAISAL SUMMARY Technical, Economic and Financial Analysis 48. Most of the technical design of the project has been implemented under the Youth Support Program ( YSP ) which was a subcomponent of the LSLP. These activities were implemented between 2018 and 2020 and evaluated using baseline and endline data collected from 827 persons out of the total number of 833 individuals that had participated in the YSP. This survey found that 86. 7 % of respondents ( 717 persons ) were employed or self-employed. This figure was 0. 4 % ( only 3 persons ) at the baseline. 52 % of beneficiaries were men and 48 % women. More than 91 % of working beneficiaries were self-employed with the remainder working for other employers. Almost all the employed / self-employed respondents were found to be working in the field of their vocational study, except for 3 persons. Average monthly nominal income of the households surveyed, from all working individuals, increased by more than 2. 5 times between the baseline and endline survey. As such, the design of the YSP has proven to be effective, so it has served as the basis of the design of this project. Financial Management 49. Financial Management ( FM ) functions under the proposed grant, including flow of funds,", + "ner_text": [ + [ + 468, + 493, + "named" + ], + [ + 443, + 447, + "baseline and endline data <> publication year" + ], + [ + 509, + 520, + "baseline and endline data <> reference population" + ] + ], + "validated": true, + "empirical_context": "Most of the technical design of the project has been implemented under the Youth Support Program ( YSP ) which was a subcomponent of the LSLP. These activities were implemented between 2018 and 2020 and evaluated using baseline and endline data collected from 827 persons out of the total number of 833 individuals that had participated in the YSP. This survey found that 86.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to specific data collected during the evaluation of the Youth Support Program.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'baseline and endline data' suggests structured data collected for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it refers to specific data collected during the evaluation of the Youth Support Program.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 47, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XXXVIII Verification Protocol Table PDO-level Indicators Result Area 1 on improved service delivery through digitalization Expanding trusted and inclusive access to people-centric digitalized services Description Individuals accessing digitalized public - and private-sector services using trusted, people-centric DPI [ Number ]. Frequency Annually. Data source Annual reports on the usage of digital services from MODEE, sectoral ministries, and the private sector. Methodology for Data Collection Indicator values will be collected from MODEE \u2019 s administrative data on the usage of transactional digital services that incorporate trusted, people-centric DPI, and cross-checked by the IVA through spot surveys. Responsibility for Data Collection MODEE. Improved access to patient-centric digital service Description Number of beneficiaries who actively use patient-centric digital services offfered through an eletronic medical record ( EMR ) platfrom. Active users refer to those with active acccounts who had logged into accounts at least twice since registration ( disaggregated for Syrian refugee users ).. Frequency Annually. Data source Electronic Health Solution ( EHS ). Methodology for Data Collection Data extracted from myHakeem to measure the number of active users. Responsibility for Data Collection MOH and EHS. Result Area 2 on enhanced government effectiveness through Digitalization Increased student trust in the fairness of the Tawjihi exam. Description Increased percentage of students expressing trust in the fairness of the Tawjihi exam.", + "ner_text": [ + [ + 1328, + 1336, + "named" + ] + ], + "validated": false, + "empirical_context": "Data source Electronic Health Solution ( EHS ). Methodology for Data Collection Data extracted from myHakeem to measure the number of active users. Responsibility for Data Collection MOH and EHS.", + "type": "system", + "explanation": "'myHakeem' is referred to as a source for data extraction but is not itself a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'myHakeem' is a dataset because it is mentioned in the context of data extraction.", + "contextual_reason_agent": "'myHakeem' is referred to as a source for data extraction but is not itself a structured collection of data.", + "contextual_signal": "mentioned as a source for data extraction", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "008_BOSIB-60d57288-4e09-4519-ae6c-ffdc0037e0b1", + "page": 57, + "text": "These interim financial reports ( IFRs ) will be produced with information extracted from the Bertha System and will consolidate the Project \u2019 s financial data for all components using the cash basis. 59. At the end of each fiscal year, the CIM-AMFRI will prepare the annual financial statements for the Project, which will be audited. The second semester IFRs with accompanying notes will serve as the Project \u2019 s annual financial statements to be audited. 60. The following biannual IFRs [ to be prepared in Reais ] will be prepared for Project monitoring and management purposes and submitted to the Bank: a. IFR 1-A \u2013 Sources and Uses of Funds by Disbursement Category ( period to date, year to date, Project to date ): Shows the budgeted amounts against the actual expenditures ( i. e., documented expenditures ), including a variance analysis b. IFR 1-B \u2013 Uses of Funds by Project Component ( period to date, year to date, Project to date ): Shows the budgeted amounts against the actual expenditures ( i. e., documented expenditures ), including a variance analysis c. IFR 1-C \u2013 DA bank reconciliation, and accompanying bank statements d. Cash flow for the following period 61.", + "ner_text": [ + [ + 268, + 295, + "named" + ] + ], + "validated": false, + "empirical_context": "59. At the end of each fiscal year, the CIM-AMFRI will prepare the annual financial statements for the Project, which will be audited. The second semester IFRs with accompanying notes will serve as the Project \u2019 s annual financial statements to be audited.", + "type": "document", + "explanation": "However, these statements are documents that summarize financial information rather than structured collections of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because financial statements contain numerical data.", + "contextual_reason_agent": "However, these statements are documents that summarize financial information rather than structured collections of data used for empirical analysis.", + "contextual_signal": "mentioned only as a project document, not as a data source", + "tags": [] + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 14, + "text": "Unit. 20 In addition to the DHIS, there exist many parallel health management information systems ( HMIS ) ( for example, one for each vertical program, PPHI, and so on ). Services delivered through the private sector are also not reported. 21 Less than 4 percent of the population seeks care at the public primary level HFs and 28 percent seeks care at higher \u2010 level public HFs, while more than 60 percent seeks care at private HFs ( PSLSM Survey 2014 \u2013 15. Pakistan Bureau of Statics, 2016 ). Zaidi, S. A., M. Bigdeli, and E. V. Langlois, et al. 2019. \u201c Health Systems Changes after Decentralization: Progress, Challenges and Dynamics in Pakistan. \u201d BMJ Glob Health 4. 22 In Pakistan, primary schools cover grades 1 through 5 and secondary schools cover grades 6 to 10 with middle schools for grades 6 to 8 and high schools for grades 9 and 10. Higher \u2010 secondary schools cover grades 11 and 12.", + "ner_text": [ + [ + 436, + 448, + "named" + ], + [ + 449, + 458, + "PSLSM Survey <> reference year" + ], + [ + 460, + 486, + "PSLSM Survey <> publisher" + ], + [ + 488, + 492, + "PSLSM Survey <> publication year" + ], + [ + 496, + 508, + "PSLSM Survey <> author" + ], + [ + 510, + 520, + "PSLSM Survey <> author" + ], + [ + 526, + 548, + "PSLSM Survey <> author" + ], + [ + 641, + 649, + "PSLSM Survey <> data geography" + ], + [ + 678, + 686, + "PSLSM Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "Services delivered through the private sector are also not reported. 21 Less than 4 percent of the population seeks care at the public primary level HFs and 28 percent seeks care at higher \u2010 level public HFs, while more than 60 percent seeks care at private HFs ( PSLSM Survey 2014 \u2013 15. Pakistan Bureau of Statics, 2016 ).", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey providing data on healthcare-seeking behavior.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically involves structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey providing data on healthcare-seeking behavior.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "031_IBArchive-bd2c789e-ee04-4df7-a219-9409a5f705d3", + "page": 77, + "text": "A statistical analysis of Chad \u2019 s Household Consumption and Informal Sector Surveys by the World Bank identified a number of factors influencing household poverty, including some that disproportionally affect female-headed households. For example, employment of heads of households in the public service or in nonagricultural activities ( which is less common among female heads ) is correlated with lower poverty incidence. Livestock ownership ( which is also less common among female-headed households ) is also correlated with lower poverty rates. 48 Women \u2019 s vulnerability within households is due to their lack of financial autonomy. ECOSIT 3 found that only 23 percent of women were responsible for deciding 44 United Nations fertility data ( 2017 ). https: / / www. un. org / en / development / desa / population / publications / dataset / fertility / wfd2017. asp 45 World Bank. 2015. Systematic Country Diagnostic, Chad. Report No. 96537-TD. Washington, DC: World Bank. 46 https: / / repository. uneca. org / ds2 / stream /? # / documents / b47f8dc9-8948-5d3e-bae5-bc8ff64fa7ba / page / 3 47 World Bank. 2013. Chad - Poverty Note: Dynamics of Poverty and Inequality Following the Rise of the Oil Sector. http: / / documents. worldbank. org / curated / en / 201821468015589462 / Chad-Poverty-note-dynamics-of-poverty-and-inequality - following-the-rise-of-the-oil-sector 48 World Bank. 2015. Systematic Country Diagnostic, Chad. Report No. 96537-TD. Washington, DC: World Bank.", + "ner_text": [ + [ + 641, + 647, + "named" + ] + ], + "validated": false, + "empirical_context": "48 Women \u2019 s vulnerability within households is due to their lack of financial autonomy. ECOSIT 3 found that only 23 percent of women were responsible for deciding 44 United Nations fertility data ( 2017 ). https: / / www.", + "type": "program", + "explanation": "However, ECOSIT is referred to as a program and not explicitly as a data source or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed ECOSIT is a dataset because it is mentioned in a context discussing data about women's financial autonomy.", + "contextual_reason_agent": "However, ECOSIT is referred to as a program and not explicitly as a data source or dataset.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "161_28046", + "page": 13, + "text": "4. Lessons learned and reflected in the project design This operation builds on the lessons learned from three previous health operations financed by the World Bank since 1988 ( see Annex 15, where the Guinean coordinators o f those projects themselves listed the lessons to retain ). For example, working at the community level, reinforcing the Bamako Initiative and introducing health mutuals i s something that was seen as very positive and would be financed again here. This operation would also build on existing successful operations, namely the Urban Project I11 which has started writing agreements between the central govemment and municipalities. The MOH would follow the same approach and strengthen this system by better defining the role o f the DRS and DPS in relation to elected bodies such as municipalities and CRDs. Finally, preparation o f this project benefited from sector work such as the Health Expenditures Review, the Health Expenditures Tracking Survey, the Guinea Country Status Report, which all provided insight on problems to be tackled. 5. Alternatives considered and reasons for rejection At the outset o f project preparation, because o f the interest in moving towards budget support, a SWAP approach was considered. However common pooling o f donors \u2019 resources was not considered feasible in the context o f the problems which the Public Expenditures Review highlighted.", + "ner_text": [ + [ + 943, + 978, + "named" + ], + [ + 154, + 164, + "Health Expenditures Tracking Survey <> publisher" + ], + [ + 1422, + 1440, + "Health Expenditures Tracking Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "The MOH would follow the same approach and strengthen this system by better defining the role o f the DRS and DPS in relation to elected bodies such as municipalities and CRDs. Finally, preparation o f this project benefited from sector work such as the Health Expenditures Review, the Health Expenditures Tracking Survey, the Guinea Country Status Report, which all provided insight on problems to be tackled. 5.", + "type": "survey", + "explanation": "This is indeed a dataset as it is mentioned as providing insight on problems to be tackled, indicating its use in empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Survey' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is mentioned as providing insight on problems to be tackled, indicating its use in empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "045_Turkey-Agricultural-Employment-Support-for-Refugees-and-Turkish-Citizens-through-Enhanced-Market-Linkages-Project", + "page": 13, + "text": "Household Labor Force Survey collects information from a representative sample of registered household residents living in Turkey, but the coverage of refugee population is thought to be extremely low as many of them are not registered residents in households and lack Turkish language skills to respond to the questionnaire. 12 Those in refugee status do not need to obtain work permits to work as agricultural seasonal workers. 13 Source: Turkish Statistical Institute Household Labor Force Survey. 14 These reports are reflected in the sectoral distribution of workers by age groups, which suggests that Turkish youth have been shifting out of agriculture: less than 10 percent of Turkish youth is employed in the agricultural sector as opposed to 50 percent for the older age groups. 15 Limited data are available for the agriculture sector in Turkey due to very high informality, and findings rely heavily on the interviews with farmers in the field that were carried out as part of project preparation.", + "ner_text": [ + [ + 0, + 28, + "named" + ], + [ + 82, + 112, + "Household Labor Force Survey <> reference population" + ], + [ + 123, + 129, + "Household Labor Force Survey <> data geography" + ], + [ + 441, + 470, + "Household Labor Force Survey <> publisher" + ], + [ + 539, + 585, + "Household Labor Force Survey <> data description" + ], + [ + 607, + 620, + "Household Labor Force Survey <> reference population" + ], + [ + 848, + 854, + "Household Labor Force Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "Household Labor Force Survey collects information from a representative sample of registered household residents living in Turkey, but the coverage of refugee population is thought to be extremely low as many of them are not registered residents in households and lack Turkish language skills to respond to the questionnaire. 12 Those in refugee status do not need to obtain work permits to work as agricultural seasonal workers.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly described as a survey that collects structured data from households.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it collects information from a sample of households.", + "contextual_reason_agent": "This is a dataset as it is explicitly described as a survey that collects structured data from households.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 65, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 53. Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Girls \u2019 survival rate in secondary school in all regions including areas affected by conflict, droughts and high levels of refugees This indicator measures the share of a cohort of female students enrolled in Grade 9 who successfully completed secondary schools and it will be measured by using a \u2018 synthetic cohort method \u2019. Annual EMIS, RRS and UNHCR Data collected by MoE through the annual EMIS process and through RRS and UNHCR in refugee host communities MOE / EMIS Directorate Children 0-59 months with diarrhea treated with ORS and zinc in low performing regions The indicator will be measured by the percentage of children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in the low performing regions Annual DHIS2, RRS and UNHCH in refugee host communities Data collected by MoH through the annual DHIS2 and through RRS and UNHCR in refugee host communities MoH Woredas that engage citizens in planning and budgeting including in areas affected by conflict, droughts and high levels of refugees in conflict affected areas The percentage of woredas that conduct pre-budget discussion with at least 40 % participation of women Annual Administrative data / reports from regions including through RRS and UNHCR in refugee host communities Use", + "ner_text": [ + [ + 570, + 574, + "named" + ], + [ + 4, + 14, + "EMIS <> publisher" + ], + [ + 15, + 23, + "EMIS <> data geography" + ], + [ + 608, + 611, + "EMIS <> author" + ], + [ + 673, + 697, + "EMIS <> reference population" + ] + ], + "validated": true, + "empirical_context": "Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Girls \u2019 survival rate in secondary school in all regions including areas affected by conflict, droughts and high levels of refugees This indicator measures the share of a cohort of female students enrolled in Grade 9 who successfully completed secondary schools and it will be measured by using a \u2018 synthetic cohort method \u2019. Annual EMIS, RRS and UNHCR Data collected by MoE through the annual EMIS process and through RRS and UNHCR in refugee host communities MOE / EMIS Directorate Children 0-59 months with diarrhea treated with ORS and zinc in low performing regions The indicator will be measured by the percentage of children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in the low performing regions Annual DHIS2, RRS and UNHCH in refugee host communities Data collected by MoH through the annual DHIS2 and through RRS and UNHCR in refugee host communities MoH Woredas that engage citizens in planning and budgeting including in areas affected by conflict, droughts and high levels of refugees in conflict affected areas The percentage of woredas that conduct pre-budget discussion with at least 40 % participation of women Annual Administrative data / reports from regions including through RRS and UNHCR in refugee host communities Use", + "type": "system", + "explanation": "EMIS is indeed a dataset as it is used for collecting and reporting educational data in the context of monitoring and evaluation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is mentioned as a source of data collection for indicators.", + "contextual_reason_agent": "EMIS is indeed a dataset as it is used for collecting and reporting educational data in the context of monitoring and evaluation.", + "contextual_signal": "mentioned as a data source for indicators", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "116_PAD1510-PAD-P152821-IDA-R2016-0078-1-Box394886B-OUO-9", + "page": 52, + "text": "The first families to move as part of the local integration process were vulnerable former 27 Zambia Central Statistical Office, 2010, Census of Population and Housing 28 World Bank, 2015, Mapping Subnational Poverty in Zambia, p. 27-29; World Bank, 2015. 6th edition, Zambia Economic Brief indicates that the national poverty rate in Zambia is 62 percent, p. 1. 29 Zambia Central Statistics Office, 2011, Living Conditions Monitoring Survey 2006 and 2010, p. 249 30 Oxford Policy Management, 2014, Baseline Study and Monitoring and Evaluation Framework for Phase II of the PPCR: 26 percent reported moderate hunger in the Barotse sub-basin where Kaoma is, whereas the figure was 7 percent in the Kafue sub-basin where Solwezi is. In both regions less than 1 percent stated they had severe hunger, p. 70 31 788 Angolans out of 12715 residing in Meheba and Mayukwayukwa. There are an additional 5890 self-settled Angolans and 56 Angolans registered in Lusaka. ( UNHCR, 2015 ) 32 Examples of vulnerability criteria include: separated child, exposure to multiple displacements, physical disability, older person unable to care for self, and single female household representative. UNHCR Angolan and Rwandan Refugee Profile as of November 6, 2015, ( UNHCR, 2015 ), p. 1, 2 33 World Bank, 2013, PAD - Zambia Strengthening Climate Resilience ( PPCR Phase II ), p. 1", + "ner_text": [ + [ + 406, + 441, + "named" + ], + [ + 220, + 226, + "Living Conditions Monitoring Survey <> data geography" + ], + [ + 366, + 398, + "Living Conditions Monitoring Survey <> author" + ], + [ + 400, + 404, + "Living Conditions Monitoring Survey <> publication year" + ], + [ + 442, + 446, + "Living Conditions Monitoring Survey <> reference year" + ], + [ + 451, + 455, + "Living Conditions Monitoring Survey <> reference year" + ], + [ + 623, + 640, + "Living Conditions Monitoring Survey <> data geography" + ], + [ + 647, + 652, + "Living Conditions Monitoring Survey <> data geography" + ], + [ + 697, + 712, + "Living Conditions Monitoring Survey <> data geography" + ], + [ + 719, + 726, + "Living Conditions Monitoring Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "1. 29 Zambia Central Statistics Office, 2011, Living Conditions Monitoring Survey 2006 and 2010, p. 249 30 Oxford Policy Management, 2014, Baseline Study and Monitoring and Evaluation Framework for Phase II of the PPCR: 26 percent reported moderate hunger in the Barotse sub-basin where Kaoma is, whereas the figure was 7 percent in the Kafue sub-basin where Solwezi is.", + "type": "survey", + "explanation": "This is indeed a dataset as it is referenced in the context of providing empirical data on living conditions.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is referenced in the context of providing empirical data on living conditions.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 32, + "text": "The economic analysis assumes that beneficiaries of the capacity development, MG and credit guarantee scheme will 26 Average of 13. 2 reduced to 5 lives saved per km of road rehabilitation. Burden of Road Injuries in Sub-Saharan Africa, http: / / pubdocs. worldbank. org / en / 356861434469785833 / Road-Safety-Burden-of-Injuries-in-Africa. pdf. 27 Global Partnership for Education, https: / / www. globalpartnership. org / education / the-benefits-of-education. 28 This morbidity assumption is based on proxy data used in other countries in Sub-Saharan Africa for refugee camps and host communities ( e. g., Somalia, Zimbabwe, South Sudan, Kenya ). 29 Kip Viscusi, W., and Clayton J. Masterman. \u201c Income Elasticities and Global Values of a Statistical Life. \u201d https: / / law. vanderbilt. edu / phd / faculty / w-kip-viscusi / 355_Income_Elasticities_and_Global_VSL. pdf 30 O & M costs for infrastructure vary widely. These costs include water treatment, road resurfacing, building maintenance, cleaning, energy and staff costs. As such, the analysis uses a conservative assumption of five percent of the investment costs for annual O & M. Sensitivity condition ERR ( percent ) Benefits reduced by 50 % 17 Three-year lag in benefits 21 Eight-year only impact 20", + "ner_text": [ + [ + 504, + 514, + "named" + ] + ], + "validated": false, + "empirical_context": "org / education / the-benefits-of-education. 28 This morbidity assumption is based on proxy data used in other countries in Sub-Saharan Africa for refugee camps and host communities ( e. g.", + "type": "data", + "explanation": "'Proxy data' is not a structured collection of data but rather a type of data used for estimation or inference.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'proxy data' refers to a dataset because it implies a collection of information used for analysis.", + "contextual_reason_agent": "'Proxy data' is not a structured collection of data but rather a type of data used for estimation or inference.", + "contextual_signal": "mentioned as a type of data, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 21, + "text": "This subcomponent will support an in-depth periodic quality assessment in a nationally representative sample of ECEC service providers to better understand the quality of ECEC service delivery in the country. 28 The standard package of support for ECEC will include the provision of indoor and outdoor teaching and learning and play materials. 29 The quality assessment survey will also collect information about the quality of ECEC facilities against climate change and related disaster risks, particularly extreme heat, droughts, and floods. The data collected through the project will provide the MoER with valuable information that can be used to plan and enhance the climate resilience of the ECEC infrastructure through rehabilitation and expansion efforts under Component 2. 30. Subcomponent 1. 2: Develop, pilot, implement, and evaluate a learning recovery program for disadvantaged students ( US $ 4. 2 million IBRD ). This subcomponent will finance the development, implementation, and evaluation of a new rapid formative learning assessments in key subject areas ( for example, reading and math ) for at least one selected grade. This work will be conducted by the National Agency for Curriculum and Evaluation of the MoER. Following the identification of students and learning challenges, a consulting firm will be hired to work closely with the MoER to develop feasible tutoring program options and provide relevant TA.", + "ner_text": [ + [ + 351, + 376, + "named" + ], + [ + 112, + 134, + "quality assessment survey <> reference population" + ], + [ + 600, + 604, + "quality assessment survey <> publisher" + ], + [ + 1229, + 1233, + "quality assessment survey <> publisher" + ], + [ + 1358, + 1362, + "quality assessment survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "28 The standard package of support for ECEC will include the provision of indoor and outdoor teaching and learning and play materials. 29 The quality assessment survey will also collect information about the quality of ECEC facilities against climate change and related disaster risks, particularly extreme heat, droughts, and floods. The data collected through the project will provide the MoER with valuable information that can be used to plan and enhance the climate resilience of the ECEC infrastructure through rehabilitation and expansion efforts under Component 2.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned that the survey will collect data on the quality of ECEC facilities.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects information.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned that the survey will collect data on the quality of ECEC facilities.", + "contextual_signal": "follows 'will also collect information about'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 46, + "text": "In addition, a survey will be conducted in sampled SPG woredas as part of the impact evaluation to measure composite indicators not collected through routine information systems. 71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2. The MOH will share HCO-related data with the regions and woredas to provide any disaggregated data needed to monitor HCO results indicators and DLIs. Likewise, the central MOE is responsible for the management of EMIS and NLA which collects varying aspects of data at the school level. MOE will validate and share disaggregated data with regions and woredas to monitor HCO results indicators and DLIs. Other M & E data sources will be coordinated with various implementation units and service providers. The SPG impact evaluation survey will be 71 Such as Minimal Acceptable Diet for children 6 \u2013 23 months. Minimal Acceptable Diet is calculated according to breasfeeding status, number of food groups consumed, and number of times fed. https: / / inddex. nutrition. tufts. edu / data4diets / indicator / minimum-acceptable-diet-mad.", + "ner_text": [ + [ + 371, + 405, + "named" + ], + [ + 194, + 197, + "District Health Information System <> publisher" + ], + [ + 532, + 554, + "District Health Information System <> reference population" + ], + [ + 572, + 575, + "District Health Information System <> publisher" + ], + [ + 688, + 691, + "District Health Information System <> publisher" + ], + [ + 703, + 719, + "District Health Information System <> data type" + ], + [ + 764, + 782, + "District Health Information System <> data type" + ], + [ + 1240, + 1263, + "District Health Information System <> data description" + ] + ], + "validated": true, + "empirical_context": "In addition, a survey will be conducted in sampled SPG woredas as part of the impact evaluation to measure composite indicators not collected through routine information systems. 71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2.", + "type": "system", + "explanation": "It is indeed a dataset as it is described as a data management system that regularly collects data from service delivery units.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "It is indeed a dataset as it is described as a data management system that regularly collects data from service delivery units.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "088_UGANDA-PAD-04272018", + "page": 66, + "text": "Only Kasese MLG has the position of the Town Clerk, Principal Treasurer and Senior Environment Officer substantively filled. Moreover, even the staff substantively appointed do not have previous experience in handling activities to the magnitude expected under USMID AF. 42. An analysis of the staffing levels for a sample of 7 USMID and 4 coming MLGs indicates that there are gaps in both USMID and 4 additional MLIGs, see the summary table below. The sample shows that the coming 4 additional USMID MLGs have significant gaps in especially procurement and engineering whereas the Finance and IA positions are filled with gaps as per the current 14 MLGs. Table 8: Overview of Required Staffing Positions Filled of required positions ( % ) Finance Department Internal Audit Procurement Planning Engineering 7 USMID ( average ) 66 % 62 % 92 % 79 % 54 % 4 \u201c additional \u201d ULGs 66 % 71 % 50 % 63 % 49 % Source: Self-reported data during field level collections, October 2017. 43. The capacity gaps identified across all the 18 municipal LGs assessed still falls into three broad categories, namely: ( i ) gaps in numbers of key positions filled, ( ii ) operation skills to backup academic qualifications, and ( iii ) inadequate tools, equipment and facilities. The USMID Program will contribute to addressing the last two gaps. The first gap is structural and can only be addressed with the involvement of Ministry of Finance, Ministry of Public Service, and Ministry of LGs. Although the municipal LGs can use part of the Program fund for investment servicing cost ( procurement of technical support for engineering design, preparation of bidding documents and supervision ), there is need to continue building their technical and managerial capacity to handle the significant increase in development funds. For the additional", + "ner_text": [ + [ + 907, + 925, + "named" + ] + ], + "validated": false, + "empirical_context": "The sample shows that the coming 4 additional USMID MLGs have significant gaps in especially procurement and engineering whereas the Finance and IA positions are filled with gaps as per the current 14 MLGs. Table 8: Overview of Required Staffing Positions Filled of required positions ( % ) Finance Department Internal Audit Procurement Planning Engineering 7 USMID ( average ) 66 % 62 % 92 % 79 % 54 % 4 \u201c additional \u201d ULGs 66 % 71 % 50 % 63 % 49 % Source: Self-reported data during field level collections, October 2017. 43.", + "type": "data", + "explanation": "'Self-reported data' is not a structured collection of data but rather a description of the type of information collected, thus it does not function as a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'self-reported data' refers to a dataset because it implies a collection of information gathered from individuals.", + "contextual_reason_agent": "'Self-reported data' is not a structured collection of data but rather a description of the type of information collected, thus it does not function as a dataset.", + "contextual_signal": "mentioned only as a type of information collected, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 49, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XL Direct project beneficiaries \u2013 Refugees and Host Communities Description Number of direct beneficiaries of digital identification, EMR and Tawjihi digitalization who are refugees or living in host communities with a high concentration of refugees ( > 10 percent of the population ), disaggregated by gender Frequency Annual Data source Reports on digital ID implementation by MODEE, administrative data from the EHS, Education Management Information System ( EMIS ) Methodology for Data Collection Data collected and compiled from administrative sources by MOPIC to measure the number of refugee beneficiaries and host communities Responsibility for Data Collection MOPIC Result Area 2 on enhanced government effectiveness through digitalization Increased access to reliable and efficient digital student assessment ( DLI ) Description Percentage of students sitting for the digital Tawjihi exam, including refugees. Frequency Annually. Data source MOE, Education Management Information System ( EMIS ) Methodology for Data Collection Review of EMIS-generated report. Responsibility for Data Collection MOE. Strengthened digital skills in civil service Description Number of senior officials and applicants to jobs in the civil service with digital skills certified as a result of training. Frequency Once, by Program closure. Data source Roster of digitally skilled trained and certified senior officials and applicants.", + "ner_text": [ + [ + 481, + 500, + "named" + ], + [ + 15, + 21, + "administrative data <> data geography" + ], + [ + 129, + 137, + "administrative data <> reference population" + ], + [ + 434, + 470, + "administrative data <> data description" + ], + [ + 655, + 660, + "administrative data <> author" + ], + [ + 1437, + 1518, + "administrative data <> data description" + ] + ], + "validated": true, + "empirical_context": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XL Direct project beneficiaries \u2013 Refugees and Host Communities Description Number of direct beneficiaries of digital identification, EMR and Tawjihi digitalization who are refugees or living in host communities with a high concentration of refugees ( > 10 percent of the population ), disaggregated by gender Frequency Annual Data source Reports on digital ID implementation by MODEE, administrative data from the EHS, Education Management Information System ( EMIS ) Methodology for Data Collection Data collected and compiled from administrative sources by MOPIC to measure the number of refugee beneficiaries and host communities Responsibility for Data Collection MOPIC Result Area 2 on enhanced government effectiveness through digitalization Increased access to reliable and efficient digital student assessment ( DLI ) Description Percentage of students sitting for the digital Tawjihi exam, including refugees. Frequency Annually.", + "type": "data", + "explanation": "In this context, 'administrative data' is indeed a dataset as it is used as a source for collecting and compiling information on beneficiaries.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'administrative data' is a dataset because it is mentioned in the context of data collection for measuring beneficiaries.", + "contextual_reason_agent": "In this context, 'administrative data' is indeed a dataset as it is used as a source for collecting and compiling information on beneficiaries.", + "contextual_signal": "mentioned as a data source for measuring beneficiaries", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 11, + "validated": 10, + "not_validated": 1 + } + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 25, + "text": "This activity will directly contribute to climate mitigation. d ) National ICT Spatial Data Infrastructure Store and High-Performance Computing Center. This activity will finance studies, equipment, software, Geographic Information System ( GIS ) mapping tool, as well as mapping of core ICT infrastructure that will guide the rollout of ICT spatial infrastructure in the country, in collaboration with the road, energy, railway, and water sectors, city councils, and municipalities countrywide. This will also include the creation of a unified spatial system for sharing data among the utility providers and sectors and support activities to inform major infrastructure investments and data - driven policy making in these sectors. 48 The project will leverage good experiences and successful practice models from other World Bank projects, notably from the Regional Communications Infrastructure Project ( P094103 ). Given that the referenced study is needed to analyze options under the project on how to move forward, the said study, in form and substance satisfactory to the World Bank, would be carried out no later than 12 months after project effectiveness. 49 Given that the referenced assessment is needed to analyze options under the project on how to move forward, the said study, in form and substance satisfactory to the World Bank, would be carried out no later than 12 months after project effectiveness.", + "ner_text": [ + [ + 209, + 238, + "named" + ] + ], + "validated": false, + "empirical_context": "d ) National ICT Spatial Data Infrastructure Store and High-Performance Computing Center. This activity will finance studies, equipment, software, Geographic Information System ( GIS ) mapping tool, as well as mapping of core ICT infrastructure that will guide the rollout of ICT spatial infrastructure in the country, in collaboration with the road, energy, railway, and water sectors, city councils, and municipalities countrywide. This will also include the creation of a unified spatial system for sharing data among the utility providers and sectors and support activities to inform major infrastructure investments and data - driven policy making in these sectors.", + "type": "system", + "explanation": "However, in this context, it is described as a tool rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Geographic Information System' which is often associated with data collection and analysis.", + "contextual_reason_agent": "However, in this context, it is described as a tool rather than a structured collection of data.", + "contextual_signal": "mentioned only as a tool, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "028_Ethiopia-Response-Recovery-Resilience-for-Conflict-Affected-Communities-in-Ethiopia-Project", + "page": 60, + "text": "The World Bank Response - Recovery - Resilience for Conflict-Affected Communities in Ethiopia ( P177233 ) Page 56 of 104 Beneficiaries with rebuilt and improved access to basic services Basic services refers to education, health, WASH, agriculture, and other public services, mobile and / or permanent. Semi-Annual Woreda Project Coordination Teams, Central Statistical Agency of Ethiopia Aggregation of Woreda-level data. Beneficiaries are the people of the Kebeles where the new or improved services are provided MoF FPCU Beneficiaries with rebuilt and improved access to basic services - female Semi-Annual Woreda Project Coordination Teams, Central Statistical Agency of Ethiopia Aggregation of Woreda-level data on female beneficiaries as per the relevant Kebele \u2019 s demographics MoF FPCU Beneficiaries with rebuilt and improved access to basic services - displaced ' Displaced ' are persons who have been forced or obliged to flee or to leave their homes or places of habitual residence, in particular as a result of or in order to avoid the effects of armed conflict, situations of generalized violence, violations of human rights or natural or human-made disasters.", + "ner_text": [ + [ + 404, + 421, + "named" + ], + [ + 4, + 14, + "Woreda-level data <> publisher" + ], + [ + 85, + 93, + "Woreda-level data <> data geography" + ], + [ + 380, + 388, + "Woreda-level data <> data geography" + ], + [ + 459, + 466, + "Woreda-level data <> reference population" + ], + [ + 675, + 683, + "Woreda-level data <> data geography" + ], + [ + 720, + 740, + "Woreda-level data <> reference population" + ] + ], + "validated": true, + "empirical_context": "The World Bank Response - Recovery - Resilience for Conflict-Affected Communities in Ethiopia ( P177233 ) Page 56 of 104 Beneficiaries with rebuilt and improved access to basic services Basic services refers to education, health, WASH, agriculture, and other public services, mobile and / or permanent. Semi-Annual Woreda Project Coordination Teams, Central Statistical Agency of Ethiopia Aggregation of Woreda-level data. Beneficiaries are the people of the Kebeles where the new or improved services are provided MoF FPCU Beneficiaries with rebuilt and improved access to basic services - female Semi-Annual Woreda Project Coordination Teams, Central Statistical Agency of Ethiopia Aggregation of Woreda-level data on female beneficiaries as per the relevant Kebele \u2019 s demographics MoF FPCU Beneficiaries with rebuilt and improved access to basic services - displaced ' Displaced ' are persons who have been forced or obliged to flee or to leave their homes or places of habitual residence, in particular as a result of or in order to avoid the effects of armed conflict, situations of generalized violence, violations of human rights or natural or human-made disasters.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to aggregated data collected from specific administrative divisions (Woredas) for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific data collected at the Woreda level.", + "contextual_reason_agent": "This is indeed a dataset as it refers to aggregated data collected from specific administrative divisions (Woredas) for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "076_Rwanda-Socio-Economic-Inclusion-of-Refugees-and-Host-Communities-in-Rwanda-Project", + "page": 29, + "text": "The World Bank Socio-economic Inclusion of Refugees & Host Communities in Rwanda Project ( P164130 ) Page 25 of 82 73. The project will, to the extent possible, utilize existing government systems and procedures. Districts implement M & E activities under the coordination of LODA and use the LODA-administered Monitoring, Evaluation, and Information System ( MEIS ). The MEIS facilitates coordination of social protection planning, household profiling and M & E of projects from grassroots to central level. The system can generate reports with values on key figures and the progress of indicators. While the MEIS is operational and implemented nationally as the central MIS, the level of system utilization varies from district to district due to different levels of technical capacity. The SEIRHCP will build district capacity to use the MEIS. The MINEMA SPIU will receive required project data and information from the districts and consolidate it in the P-MIS. 74. Project M & E arrangements. The MINEMA SPIU will have a dedicated M & E team, which will include ( i ) a Kigali - based National Project M & E Specialist, who will oversee overall M & E implementation; and ( ii ) District field specialists ( embedded in district offices, financed by the project ) to monitor all project activities in the target districts.", + "ner_text": [ + [ + 311, + 357, + "named" + ] + ], + "validated": false, + "empirical_context": "The project will, to the extent possible, utilize existing government systems and procedures. Districts implement M & E activities under the coordination of LODA and use the LODA-administered Monitoring, Evaluation, and Information System ( MEIS ). The MEIS facilitates coordination of social protection planning, household profiling and M & E of projects from grassroots to central level.", + "type": "system", + "explanation": "However, it is described as a system for coordination and management, not explicitly as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Information System', which often implies data handling.", + "contextual_reason_agent": "However, it is described as a system for coordination and management, not explicitly as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 29, + "text": "The World Bank SPF: Improved Livelihoods for Internally Displaced Persons in Azerbaijan ( P178125 ) Page 27 of 34 Indicator Name Corporate Unit of Measure Baseline End Target Frequency Data Source / Methodology Responsibility for Data Collection three months after civic engagement training. Percentage of beneficiaries taking a more active role in their communities - disabled Percentag e 0. 00 50. 00 Twice, once before civic engagement training and again at least three months after civic engagement training completion. For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection. For individuals trained only as part of Component 3, a separate pre-training survey will be conducted as well as a follow-up survey conducted at least three months after civic engagement training. M & E Specialist Description: Total number of individuals trained in civic engagement topics who report a higher level of community activity compared to the level reported before receiving training and support.", + "ner_text": [ + [ + 732, + 751, + "named" + ], + [ + 77, + 87, + "pre-training survey <> data geography" + ], + [ + 895, + 957, + "pre-training survey <> data description" + ] + ], + "validated": true, + "empirical_context": "For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection. For individuals trained only as part of Component 3, a separate pre-training survey will be conducted as well as a follow-up survey conducted at least three months after civic engagement training. M & E Specialist Description: Total number of individuals trained in civic engagement topics who report a higher level of community activity compared to the level reported before receiving training and support.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey used for data collection in the context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured method of data collection.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey used for data collection in the context.", + "contextual_signal": "follows 'used for data collection'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 22, + "text": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 18 of 47 Figure 1. Theory of Change Output Inadequate infrastructure, equipment, and medicine \uf0b7 Renovation / upgradation of selected HFs to 24 / 7 \uf0b7 Purchasing of equipment and medicines \uf0b7 Increased number of HFs renovated / upgraded including female staff hostels * \uf0b7 Increased number of HFs with essential inputs Improved utilization of quality health services in selected refugee hosting districts in Balochistan Improved child health outcomes ( reduced under \u2010 five mortality, reduced stunting ) among refugees and host communities in Balochistan Improved human capital among refugees and host communities in Balochistan Shortage, absence, low productivity, and competency of providers \uf0b7 Contracting of especially female providers * \uf0b7 Creation of HRH database \uf0b7 Competency \u2010 based training \uf0b7 Increased availability of ( especially female ) providers * at the HFs \uf0b7 Functional HRH database \uf0b7 Increased number of providers with minimum clinical competency Low quality and suboptimal use of health data \uf0b7 Digitization and integration of the HMIS into the DHIS with dashboards \uf0b7 Data review meetings \uf0b7 HMIS ( DHIS, vertical programs ) digitized and integrated into DHIS / DHIS2 \uf0b7 Improved quality of DHIS reports Weak institutional capacity \uf0b7 Capacity building ( HMIS, monitoring, supervision, and management ) \uf0b7 Increased number of targeted staff with basic / advanced competencies Low demand of health and education services \uf0b7 Advocacy and awareness \u2010 raising", + "ner_text": [ + [ + 1179, + 1183, + "named" + ], + [ + 4, + 14, + "HMIS <> publisher" + ], + [ + 15, + 26, + "HMIS <> data geography" + ], + [ + 481, + 492, + "HMIS <> data geography" + ], + [ + 583, + 591, + "HMIS <> reference population" + ], + [ + 616, + 627, + "HMIS <> data geography" + ], + [ + 657, + 665, + "HMIS <> reference population" + ], + [ + 690, + 701, + "HMIS <> data geography" + ] + ], + "validated": true, + "empirical_context": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 18 of 47 Figure 1. Theory of Change Output Inadequate infrastructure, equipment, and medicine \uf0b7 Renovation / upgradation of selected HFs to 24 / 7 \uf0b7 Purchasing of equipment and medicines \uf0b7 Increased number of HFs renovated / upgraded including female staff hostels * \uf0b7 Increased number of HFs with essential inputs Improved utilization of quality health services in selected refugee hosting districts in Balochistan Improved child health outcomes ( reduced under \u2010 five mortality, reduced stunting ) among refugees and host communities in Balochistan Improved human capital among refugees and host communities in Balochistan Shortage, absence, low productivity, and competency of providers \uf0b7 Contracting of especially female providers * \uf0b7 Creation of HRH database \uf0b7 Competency \u2010 based training \uf0b7 Increased availability of ( especially female ) providers * at the HFs \uf0b7 Functional HRH database \uf0b7 Increased number of providers with minimum clinical competency Low quality and suboptimal use of health data \uf0b7 Digitization and integration of the HMIS into the DHIS with dashboards \uf0b7 Data review meetings \uf0b7 HMIS ( DHIS, vertical programs ) digitized and integrated into DHIS / DHIS2 \uf0b7 Improved quality of DHIS reports Weak institutional capacity \uf0b7 Capacity building ( HMIS, monitoring, supervision, and management ) \uf0b7 Increased number of targeted staff with basic / advanced competencies Low demand of health and education services \uf0b7 Advocacy and awareness \u2010 raising", + "type": "system", + "explanation": "HMIS is indeed a dataset as it refers to a Health Management Information System that collects and manages health data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed HMIS is a dataset because it is mentioned in the context of health data management and integration.", + "contextual_reason_agent": "HMIS is indeed a dataset as it refers to a Health Management Information System that collects and manages health data.", + "contextual_signal": "mentioned as a data source in the context of digitization and integration", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 21, + "validated": 4, + "not_validated": 17 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 38, + "text": "of milestones in the implementation plan for roll out of the CBC and CBA, as detailed in the Operations Manual53 US $ 5, 500, 000 for completion of milestones in the implementation plan for roll out of the CBC and CBA, as detailed in the Operations Manual US $ 4, 000, 000 for completion of milestones in the implementation plan for roll out of the CBC and CBA, as detailed in the Operations Manual US $ 1, 750, 000 for completion of milestones in the implementation plan for roll out of the CBC and CBA, as detailed in the Operations Manual 52 Only two cohorts are targeted to 2021 and 2022. There will be re-organization for the teachers teaching grades 7 to 8 as the education system transitions to Junior secondary at grade 7. The organization will have substantial implications on the current teacher shortage data. At mid-term review of the Operation, this DLI will be reviewed and adjusted as may be needed. 53 Details of the milestones included in the verification protocol, which will also be reflected in the operational manual.", + "ner_text": [ + [ + 798, + 819, + "named" + ], + [ + 578, + 582, + "teacher shortage data <> publication year" + ], + [ + 587, + 591, + "teacher shortage data <> publication year" + ], + [ + 631, + 662, + "teacher shortage data <> reference population" + ] + ], + "validated": true, + "empirical_context": "There will be re-organization for the teachers teaching grades 7 to 8 as the education system transitions to Junior secondary at grade 7. The organization will have substantial implications on the current teacher shortage data. At mid-term review of the Operation, this DLI will be reviewed and adjusted as may be needed.", + "type": "data", + "explanation": "This is indeed a dataset as it provides empirical information relevant to the education system's transition and teacher organization.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific data regarding teacher shortages.", + "contextual_reason_agent": "This is indeed a dataset as it provides empirical information relevant to the education system's transition and teacher organization.", + "contextual_signal": "mentioned as data relevant to teacher shortages", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 69, + "text": "The World Bank Cash for Jobs Project ( P175327 ) Page 65 of 86 Beneficiaries of job-focused interventions - host communities, disaggregated by gender Beneficiaries of job-focused interventions that live in communities hosting refugees Annual Project Management Information System The enrolment module of the MIS contains information on the locality of residence of the beneficiaries PIU ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Number of provinces implementing cash transfers Number of provinces of the country where cash transfers activities financed by the project are implemented Annual Implementati on Progress Reports The Project Implementation Unit will inform of the provinces where the project is being implemented Project Implementation Unit Beneficiaries receiving payments on the schedule set-up in the operational manual Share of cash transfers beneficiaries that receive the transfers within the time period stipulated in the project operational manual Every 6 months Payment agent reports The payment agent reports include the date of payments and this will be compared with the date initially planned Proportion of beneficiary households in the beneficiary collines participating in behavior change promotion activities Share of households that attend sessions on human capital development Every six months Attendance sheets from NGOs carrying out the sessions NGOs implementing accompanying measures on human capital development will report on the attendance of Project Implementation Unit", + "ner_text": [ + [ + 235, + 279, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Cash for Jobs Project ( P175327 ) Page 65 of 86 Beneficiaries of job-focused interventions - host communities, disaggregated by gender Beneficiaries of job-focused interventions that live in communities hosting refugees Annual Project Management Information System The enrolment module of the MIS contains information on the locality of residence of the beneficiaries PIU ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Number of provinces implementing cash transfers Number of provinces of the country where cash transfers activities financed by the project are implemented Annual Implementati on Progress Reports The Project Implementation Unit will inform of the provinces where the project is being implemented Project Implementation Unit Beneficiaries receiving payments on the schedule set-up in the operational manual Share of cash transfers beneficiaries that receive the transfers within the time period stipulated in the project operational manual Every 6 months Payment agent reports The payment agent reports include the date of payments and this will be compared with the date initially planned Proportion of beneficiary households in the beneficiary collines participating in behavior change promotion activities Share of households that attend sessions on human capital development Every six months Attendance sheets from NGOs carrying out the sessions NGOs implementing accompanying measures on human capital development will report on the attendance of Project Implementation Unit", + "type": "system", + "explanation": "However, it is described as a system for project management rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Information System' which often relates to data management.", + "contextual_reason_agent": "However, it is described as a system for project management rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 53, + "text": "The World Bank Uganda Digital Acceleration Project - GovNet ( P171305 ) Page 41 of 76 kiosk ( within 1 km from home ) Of which Refugees Number of individuals in host communities and refugee settlements with broadband penetration ( 50 % of total population ) Annually Surveys / Data from UCC Surveys will be conducted in all 13 host communities and 14 refugee settlements to determine beneficiaries that can access internet through 1 ) Personal Subscriptio ns 2 ) Wi-Fi hotspot ( within 1 km from home ) 3 ) Internet kiosk ( within 1 km from home ) NITA-U, UCC & OPM Of which Hosts Number of individuals in host communities and refugee settlements with broadband penetration ( 50 % of total population ). Annual Surveys / Data from UCC Surveys will be conducted in all 13 host communities and 14 refugee settlements to determine beneficiaries that can access internet through 1 ) Personal Subscriptio ns 2 ) Wi-Fi hotspot ( within 1 km from home ) 3 ) Internet NITA-U, UCC & OPM", + "ner_text": [ + [ + 731, + 742, + "named" + ], + [ + 15, + 21, + "UCC Surveys <> data geography" + ], + [ + 384, + 397, + "UCC Surveys <> reference population" + ], + [ + 463, + 476, + "UCC Surveys <> data description" + ] + ], + "validated": true, + "empirical_context": "The World Bank Uganda Digital Acceleration Project - GovNet ( P171305 ) Page 41 of 76 kiosk ( within 1 km from home ) Of which Refugees Number of individuals in host communities and refugee settlements with broadband penetration ( 50 % of total population ) Annually Surveys / Data from UCC Surveys will be conducted in all 13 host communities and 14 refugee settlements to determine beneficiaries that can access internet through 1 ) Personal Subscriptio ns 2 ) Wi-Fi hotspot ( within 1 km from home ) 3 ) Internet kiosk ( within 1 km from home ) NITA-U, UCC & OPM Of which Hosts Number of individuals in host communities and refugee settlements with broadband penetration ( 50 % of total population ). Annual Surveys / Data from UCC Surveys will be conducted in all 13 host communities and 14 refugee settlements to determine beneficiaries that can access internet through 1 ) Personal Subscriptio ns 2 ) Wi-Fi hotspot ( within 1 km from home ) 3 ) Internet NITA-U, UCC & OPM", + "type": "survey", + "explanation": "It is indeed a dataset as it refers to surveys conducted to gather data on broadband penetration in specific communities.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Surveys' which typically indicates a structured collection of data.", + "contextual_reason_agent": "It is indeed a dataset as it refers to surveys conducted to gather data on broadband penetration in specific communities.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "042_Burundi-Skills-for-Jobs-Women-and-Youth-Project", + "page": 64, + "text": "The World Bank Burundi Skills for Jobs: Women and Youth Project ( P164416 ) Page 58 of 102 training hub is operating has officially appointed the Board through a written notice and appointed its members; ( 2 ) the Board has met at least once during the previous six months Number of training programs developed in partnership with the private sector in line with projected skills demands in the labor market The indicator measures the cumulated number of training programs delivered in supported TVET sector training hubs that have been subject to improvement through any of the following: curriculum revision; upgrading / revision of teaching and learning material; workshop / lab upgrading. The improvement must have been planned and implemented with involvement of the industry partner ( s ). Annual since year 2 Annual reports from supported TVET sector training hubs Administrative data M & E PIU Specialist Fully functioning upgraded CEMs that are improved with climate change mitigation considerations; are accessible to the disabled and include separate toilets for females The indicator measures the cumulated number of CEMs for which the upgrading has been completed according to agreed upon standards defined in the PIM.", + "ner_text": [ + [ + 872, + 891, + "named" + ], + [ + 4, + 14, + "Administrative data <> publisher" + ], + [ + 486, + 521, + "Administrative data <> reference population" + ], + [ + 816, + 830, + "Administrative data <> data type" + ] + ], + "validated": true, + "empirical_context": "The improvement must have been planned and implemented with involvement of the industry partner ( s ). Annual since year 2 Annual reports from supported TVET sector training hubs Administrative data M & E PIU Specialist Fully functioning upgraded CEMs that are improved with climate change mitigation considerations; are accessible to the disabled and include separate toilets for females The indicator measures the cumulated number of CEMs for which the upgrading has been completed according to agreed upon standards defined in the PIM.", + "type": "data", + "explanation": "In this context, 'administrative data' is used as a source of information for monitoring and evaluation, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'administrative data' often refers to structured data collected for administrative purposes.", + "contextual_reason_agent": "In this context, 'administrative data' is used as a source of information for monitoring and evaluation, indicating it functions as a dataset.", + "contextual_signal": "mentioned as a source of information for M & E", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 69, + "text": "An impact evaluation will be carried out before project closing. 25. Gender. A baseline survey conducted in one of the districts under the RWSSP confirmed several common gender-based challenges related to WSS access. Those include ( a ) time burden for women and girls due to unreliable and unsafe water supplies; ( b ) poor water quality and healthcare responsibilities for children; ( c ) physical, social, and health risks associated with collecting water or using open toilets; and ( d ) inequitable access to information, training, and opportunities for employment in water institutions, particularly in technical and decision-making roles. The baseline assessment conducted in Vosse district confirmed that water collection responsibility is mainly assigned to women regardless of the water source type and distance to the source. In more than 87 percent of households, women are responsible for fetching water in almost all age categories, except for the age category of 6 \u2013 17years, where the share of boys and girls who fetch water is almost equal. At the same time, in most cases, water is fetched by women or children. In some households, this responsibility is assigned to school or preschool children. This may be explained by the fact that girls after 17 enter family life, get married, and become fully responsible for household chores, including fetching of water. In the assessed settings, water is collected at least twice a day, with the time spent in a round trip to the source and queuing for water collection ranging from 25 to 40 minutes in case of the public tap and from 25 to up to 80 minutes for other sources. Results of the survey are reported in table 1. 2.", + "ner_text": [ + [ + 79, + 94, + "named" + ], + [ + 237, + 312, + "baseline survey <> data description" + ], + [ + 320, + 383, + "baseline survey <> data description" + ], + [ + 683, + 697, + "baseline survey <> data geography" + ], + [ + 876, + 881, + "baseline survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "Gender. A baseline survey conducted in one of the districts under the RWSSP confirmed several common gender-based challenges related to WSS access. Those include ( a ) time burden for women and girls due to unreliable and unsafe water supplies; ( b ) poor water quality and healthcare responsibilities for children; ( c ) physical, social, and health risks associated with collecting water or using open toilets; and ( d ) inequitable access to information, training, and opportunities for employment in water institutions, particularly in technical and decision-making roles.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey that confirmed gender-based challenges related to WSS access.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'baseline survey' implies a structured collection of data collected for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey that confirmed gender-based challenges related to WSS access.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 14, + "text": "Few training courses address the specific challenges of formalizing a business, including meeting tax obligations, preparing proper records, fulfilling reporting requirements, and obtaining licenses. Training tends to focus on limited topics, such as financial or computer literacy, but leaves out training in life skills and support for network. Yet, global evidence demonstrates that developing socio - emotional skills, through psychology-based trainings, are as important to enterprise success as strengthening business skills. 25 Finally, many business development services continue to train women for sectors where women-owned firms are over-represented, such as small trade or food service, rather than where they could diversify their business and earn higher profits. 19 Delecourt, S. and Fitzpatrick, A. 2021. \u201c Childcare Matters: Female Business Owners and the Baby-Profit Gap. \u201d Management Science, Vol, 67, No. 7. May 13. 20 Uganda Bureau of Statistics ( 2021 ). Uganda Violence Against Women and Girls Survey 2020. Uganda Bureau of Statics. Kampala, Uganda. This survey was designed as part of the UNHS and drew from UNHS samples which are nationally representative. 21 World Health Organization ( 2021 ). Violence against women prevalence estimates, 2018: global, regional and national prevalence estimates for intimate partner violence against women and global and regional prevalence estimates for non-partner sexual violence against women. Geneva: World Health Organization. 22 World Bank ( 2019 ). Profiting from Parity: Unlocking the Potential of Women \u2019 s Business in Africa. Washington, D. C.: World Bank. 23 Campos et al. 2015. 24 World Bank, 2022. Breaking Barriers: Female Entrepreneurs Who Cross Over to Male-Dominated Sectors. Washington, D. C.: World Bank. 25 Campos, F., Frese, M., Goldstein, M., Iacovone, L., Johnson, H. C., McKenzie, D., and Mensmann, M. 2017. \u201c Teaching personal initiative beats traditional training in boosting small business in West Africa. \u201d Science, 357 ( 6357 ), 1287-1290.", + "ner_text": [ + [ + 976, + 1027, + "named" + ] + ], + "validated": true, + "empirical_context": "20 Uganda Bureau of Statistics ( 2021 ). Uganda Violence Against Women and Girls Survey 2020. Uganda Bureau of Statics.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referred to as a survey conducted by the Uganda Bureau of Statistics, indicating it is a structured collection of data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Survey' in the title, which often indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referred to as a survey conducted by the Uganda Bureau of Statistics, indicating it is a structured collection of data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 61, + "text": "The limited adoption of Sanad is due to low perceived relevance for many individuals and service providers, as well as to eligibility restricted so far to citizens, thus leaving various demographic groups ( such as non-Jordanians \u2013 amongst whom refugees, and military personnel ) ineligible for a digital ID. MODEE plans to increase the uptake of Sanad by: ( 1 ) expanding eligibility and outreach to enroll additional users ( for example, through the operationalization of new Government Service Centers ( GSCs ) or targeted visits to refugee communities ); ( 2 ) introducing new features to enhance trust, interoperability, security, and people centricity, such as adding a mechanism to collect user consent for data sharing; ( 3 ) implementing a Digital Public Infrastructure ( DPI ) 27 ecosystem approach that is aligned with international good practice, to achieve effective, end-to-end, digitalized service delivery workflows. Taking a trusted, people-centric DPI ecosystem approach offers the opportunity to embrace international standards and open internet technologies by using trusted data for eligibility purposes, access control, and the provision of efficient, in-demand digitalized services.", + "ner_text": [ + [ + 24, + 29, + "named" + ] + ], + "validated": false, + "empirical_context": "The limited adoption of Sanad is due to low perceived relevance for many individuals and service providers, as well as to eligibility restricted so far to citizens, thus leaving various demographic groups ( such as non-Jordanians \u2013 amongst whom refugees, and military personnel ) ineligible for a digital ID. MODEE plans to increase the uptake of Sanad by: ( 1 ) expanding eligibility and outreach to enroll additional users ( for example, through the operationalization of new Government Service Centers ( GSCs ) or targeted visits to refugee communities ); ( 2 ) introducing new features to enhance trust, interoperability, security, and people centricity, such as adding a mechanism to collect user consent for data sharing; ( 3 ) implementing a Digital Public Infrastructure ( DPI ) 27 ecosystem approach that is aligned with international good practice, to achieve effective, end-to-end, digitalized service delivery workflows.", + "type": "program", + "explanation": "'Sanad' is described as a program aimed at increasing digital ID uptake, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Sanad' is a dataset because it is mentioned in the context of digital ID and data sharing.", + "contextual_reason_agent": "'Sanad' is described as a program aimed at increasing digital ID uptake, not as a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "185_multi-page", + "page": 182, + "text": "180 Attachment 1. Project Design Summary Sector-related Country Assistance Strategy Long-Term Program Indicators: Program Reports: ( From Goal to Mission ) ( CAS ) Goal: * By 2003, HIV prevalence will be reduced * National HIV / AIDS surveillance * Social and cultural behavior chag from 14 percent to 13 percent asnong reports, mnidterm review, and end improves throughout the life of the young people ( I15-24 years ). of project evaluation. project To mitigate the social and economic impact of the HIV / AIDS epidemic in Kenya. By 2004, HIV / AIDS prevalence among * National HIV / AIDS surveillance adults ( 1549 years ) will remain below 14 reports, mnidterm review, and end percent. of project evaluation. Project Development Objective: Outcome / Impact Indicators: Project Reports: ( From Purpose to Goal ) * By 2003, the proportion of adults ( 15-49 * Poverty reduction efforts continue To intensify the achievement of the multi-sectoral years ) possessing accurate knowledge of * Kenya Demographic and Health throughout the life of the project. HIV / AIDS primary targets in the National HIV / AIDS means of preventing HIV infection will Survey ( KDHS ) data Strategic Plan with the full participation of communities. increase.", + "ner_text": [ + [ + 1157, + 1161, + "named" + ], + [ + 175, + 179, + "KDHS <> publication year" + ], + [ + 525, + 530, + "KDHS <> data geography" + ], + [ + 990, + 995, + "KDHS <> data geography" + ] + ], + "validated": true, + "empirical_context": "Project Development Objective: Outcome / Impact Indicators: Project Reports: ( From Purpose to Goal ) * By 2003, the proportion of adults ( 15-49 * Poverty reduction efforts continue To intensify the achievement of the multi-sectoral years ) possessing accurate knowledge of * Kenya Demographic and Health throughout the life of the project. HIV / AIDS primary targets in the National HIV / AIDS means of preventing HIV infection will Survey ( KDHS ) data Strategic Plan with the full participation of communities. increase.", + "type": "survey", + "explanation": "In the context, 'KDHS' is explicitly mentioned as a source of data for the project, confirming it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'KDHS' is a dataset because it is referenced in the context of indicators and project reports.", + "contextual_reason_agent": "In the context, 'KDHS' is explicitly mentioned as a source of data for the project, confirming it functions as a dataset.", + "contextual_signal": "'mentioned as a data source for project reports'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 29, + "text": "The World Bank SPF: Improved Livelihoods for Internally Displaced Persons in Azerbaijan ( P178125 ) Page 27 of 34 Indicator Name Corporate Unit of Measure Baseline End Target Frequency Data Source / Methodology Responsibility for Data Collection three months after civic engagement training. Percentage of beneficiaries taking a more active role in their communities - disabled Percentag e 0. 00 50. 00 Twice, once before civic engagement training and again at least three months after civic engagement training completion. For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection. For individuals trained only as part of Component 3, a separate pre-training survey will be conducted as well as a follow-up survey conducted at least three months after civic engagement training. M & E Specialist Description: Total number of individuals trained in civic engagement topics who report a higher level of community activity compared to the level reported before receiving training and support.", + "ner_text": [ + [ + 783, + 799, + "named" + ] + ], + "validated": false, + "empirical_context": "For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection. For individuals trained only as part of Component 3, a separate pre-training survey will be conducted as well as a follow-up survey conducted at least three months after civic engagement training. M & E Specialist Description: Total number of individuals trained in civic engagement topics who report a higher level of community activity compared to the level reported before receiving training and support.", + "type": "survey", + "explanation": "However, it is not a dataset itself but rather a method of data collection mentioned in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'follow-up survey' implies a structured method of data collection.", + "contextual_reason_agent": "However, it is not a dataset itself but rather a method of data collection mentioned in the context.", + "contextual_signal": "mentioned only as a survey, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 3, + "validated": 1, + "not_validated": 2 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 33, + "text": "Under this results area, the following activities will be supported: ( a ) the development of an integrated EMIS for MINEDUB-MINESEC-MINESUP-MINEFOP, including the establishment of new institutional arrangements for the sector, unified methods and processes, and a common platform for data collection and management; ( b ) the deconcentration of EMIS to regional, divisional, and sub-divisional levels ( as applicable ); ( c ) the provision of equipment and software to support the functioning of EMIS, and so on; and ( d ) the strengthening of human capacity and resources, particularly in the areas of statistics and data management. The release of IDA funds under this results area will be linked to the following DLI: Integrated education management information system functional and operational ( DLI 6 ). 52. While not specifically benefitting from financing from the RSW, activities linked to learning assessment and EMIS will include features that will enable the Government to monitor and evaluate data effectively on refugee-related education challenges. Learning assessments will include modules to analyze the learning outcomes of refugees and efforts will be made toward including disaggregated data on refugees in statistical yearbooks. Priority Area 4: Supporting the educational needs of public schools in host communities affected by refugees. 53. Refugee-related project activities, estimated at US $ 36 million, fall under Results Areas 1, 3, and 7.", + "ner_text": [ + [ + 497, + 501, + "named" + ] + ], + "validated": false, + "empirical_context": "Under this results area, the following activities will be supported: ( a ) the development of an integrated EMIS for MINEDUB-MINESEC-MINESUP-MINEFOP, including the establishment of new institutional arrangements for the sector, unified methods and processes, and a common platform for data collection and management; ( b ) the deconcentration of EMIS to regional, divisional, and sub-divisional levels ( as applicable ); ( c ) the provision of equipment and software to support the functioning of EMIS, and so on; and ( d ) the strengthening of human capacity and resources, particularly in the areas of statistics and data management. The release of IDA funds under this results area will be linked to the following DLI: Integrated education management information system functional and operational ( DLI 6 ).", + "type": "system", + "explanation": "However, EMIS is described as a management information system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it relates to data collection and management.", + "contextual_reason_agent": "However, EMIS is described as a management information system, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "033_IBArchive-38b35d89-2c5c-4636-ab7e-0ba662b95a5f", + "page": 14, + "text": "There are numerous growth opportunities and trajectories for Djibouti to consider as it continues to consolidate its role as a trade conduit between economic partners including Ethiopia, as a base for peace and stability in an otherwise volatile Horn of Africa, and as it ambitiously positions itself as a regional digital hub. To ensure that these opportunities lead to sustained improvements in the quality of life for its citizens and the refugee population, Djibouti has focused on improving their skills set to help them access emerging opportunities, while simultaneously supporting efforts to increase the country \u2019 s key growth sectors. The government recognizes that the jobs agenda needs to be a central feature of this economic transformation, and to ensure that future growth is not a jobless one. Ensuring that the country has the needed stock and flow of education and skills to harness this growth potential is one of the key policy priorities of the country. Furthermore, the recognition of refugee skills is an important and growing area in the policy space and policy makers recognize that there are qualified individuals whose skills and talents can contribute to the country \u2019 s economy and that including them in education and training programs could help the labor market integration of refugees. 11. The COVID-19 crisis has had significant impacts on Djibouti \u2019 s educational system across all levels. The transaction of educational services in Djibouti has taken a severe hit due to the COVID-19 crisis, and the implications of this will manifest itself in the years to come. The Djiboutian government did respond quickly in the early months of the pandemic between March and September 2020 and resorted to alternative learning modalities. Preliminary data from pulse surveys suggests relatively good student participation through these", + "ner_text": [ + [ + 1786, + 1799, + "named" + ], + [ + 61, + 69, + "pulse surveys <> data geography" + ], + [ + 442, + 460, + "pulse surveys <> reference population" + ], + [ + 462, + 470, + "pulse surveys <> data geography" + ], + [ + 1374, + 1382, + "pulse surveys <> data geography" + ], + [ + 1468, + 1476, + "pulse surveys <> data geography" + ], + [ + 1710, + 1714, + "pulse surveys <> publication year" + ], + [ + 1913, + 1931, + "pulse surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "The Djiboutian government did respond quickly in the early months of the pandemic between March and September 2020 and resorted to alternative learning modalities. Preliminary data from pulse surveys suggests relatively good student participation through these", + "type": "survey", + "explanation": "In the context, 'pulse surveys' are mentioned as a source of preliminary data, indicating they are used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'pulse surveys' is a dataset because it refers to a collection of data gathered from surveys.", + "contextual_reason_agent": "In the context, 'pulse surveys' are mentioned as a source of preliminary data, indicating they are used for empirical analysis.", + "contextual_signal": "described as a source of preliminary data", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 63, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 59 of 74 installment of livelihood grant. Number of beneficiaries receiving Economic Opportunities who are female youth Number of beneficiaries receiving economic opportunities under Component 2, in accordance with the Project Operations Manual, of which are youth and female, defined as women between the ages of 18 and 35 years, and have received at least 1 installment of livelihood grant. This indicator will be measured, at a minimum, on a quarterly basis SNSOP MIS Beneficiary data will be collected during registration and updated during project implementation. Payment data will be regularly updated in the SNSOP MIS The Implementing Partner in charge of Component 2 will be responsible for data collection. Percentage of grievances resolved through the GRM Number of all grievances which are lodged, processed and resolved through the SNSOP GRM divided by all complaints which are lodged, processed and / or resolved, expressed as a percentage This indicator will be reviewed on a monthly basis Monthly GRM Reports GRM data will be reviewed and analyzed on a monthly basis through the MIS GRM module Selected Implementing Partner Percentage of cash transfers to beneficiary households made on time The total number of cash transfers which were completed on time for both LIPW and DIS, per the Project Operations Manual, divided by the total number of cash transfers, expressed This indicator", + "ner_text": [ + [ + 663, + 675, + "named" + ], + [ + 4, + 14, + "Payment data <> publisher" + ], + [ + 565, + 581, + "Payment data <> data type" + ] + ], + "validated": true, + "empirical_context": "This indicator will be measured, at a minimum, on a quarterly basis SNSOP MIS Beneficiary data will be collected during registration and updated during project implementation. Payment data will be regularly updated in the SNSOP MIS The Implementing Partner in charge of Component 2 will be responsible for data collection. Percentage of grievances resolved through the GRM Number of all grievances which are lodged, processed and resolved through the SNSOP GRM divided by all complaints which are lodged, processed and / or resolved, expressed as a percentage This indicator will be reviewed on a monthly basis Monthly GRM Reports GRM data will be reviewed and analyzed on a monthly basis through the MIS GRM module Selected Implementing Partner Percentage of cash transfers to beneficiary households made on time The total number of cash transfers which were completed on time for both LIPW and DIS, per the Project Operations Manual, divided by the total number of cash transfers, expressed This indicator", + "type": "data", + "explanation": "This is indeed a dataset as it refers to specific data collected and updated regularly for analysis in the context of the project.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Payment data' suggests a structured collection of information related to financial transactions.", + "contextual_reason_agent": "This is indeed a dataset as it refers to specific data collected and updated regularly for analysis in the context of the project.", + "contextual_signal": "mentioned as data collected and updated during project implementation", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 5, + "validated": 4, + "not_validated": 1 + } + }, + { + "filename": "073_Ethiopia-Consolidated-Water-Supply-Sanitation-and-Hygiene-Account-Project", + "page": 82, + "text": "The World Bank One WASH \u2014 Consolidated Water Supply, Sanitation, and Hygiene Account Project ( One WASH \u2014 CWA ) ( P167794 ) Page 78 of 103 the string committees, before submission to the next reporting line, should approve all produced reports. Figure A1. 7 shows the range and schedule of reporting on Project progress. Figure A1. 7: Reporting on Project Progress 97. Baseline assessment: A woreda-level baseline for key WASH indicators was compiled, analyzed, and mapped for a total of 754 woredas using data collected from regional baseline reports, Health Management Information System and Education Management Information System reports, and regular reports of the Early Warning and Response Committee. Under the assessment, ten WASH indicators clustered in two groups were covered ( impact and outcome level ). The impact-level indicators are: ( i ) stunting rate; ( ii ) prevalence of diarrhea under the age of five; and ( iii ) primary students \u2019 dropout rate. Outcome-level indicators include: ( i ) access to rural water supply; ( ii ) access to rural household sanitation; ( iii ) open defecation free ( ODF ) coverage; ( iv ) health facility water supply coverage; ( v ) health facility improved sanitation coverage; ( vi ) school water supply coverage; and ( vii ) school improved sanitation coverage.", + "ner_text": [ + [ + 594, + 633, + "named" + ] + ], + "validated": false, + "empirical_context": "7: Reporting on Project Progress 97. Baseline assessment: A woreda-level baseline for key WASH indicators was compiled, analyzed, and mapped for a total of 754 woredas using data collected from regional baseline reports, Health Management Information System and Education Management Information System reports, and regular reports of the Early Warning and Response Committee. Under the assessment, ten WASH indicators clustered in two groups were covered ( impact and outcome level ).", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting it could contain data.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "187_multi-page", + "page": 41, + "text": "Annex 1: Project Design Summary ALBANIA: Public Administration Reform Project Sector-related CAS Goal: Sector Indicators: Sector / country reports: ( from Goal to Bank Mission ) * Establish transparent and * Governance and Institutional * Monitoring and supervision * Improved transparency and accountable public service reform impact monitoring of the PAR project accountability of the state provision by improving surveys agreed to be eMonitoring and supervision structures, and strengthened goverance and building undertaken under the SAC omtorogram an institutions that function institutions that have the program Public Expenditure Support under the rule of law will capacity to govern within an oAnti-corruption program Program ( PESP ) trm dlo ent ng adequate and improved surveysrgrprogrepo e term development legal and judicial oProgress reports on the imp framework. lementation of the anti-corruption program * Promoting sustainable private sector growth * Reports from Impact Monitoring Surveys * Progress reports on the im plementation of the anti-corruption program * Public Expenditure tracking study * Implementation Completion Report of the project - 38 -", + "ner_text": [ + [ + 981, + 1006, + "named" + ], + [ + 32, + 39, + "Impact Monitoring Surveys <> data geography" + ], + [ + 1188, + 1206, + "Impact Monitoring Surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "Annex 1: Project Design Summary ALBANIA: Public Administration Reform Project Sector-related CAS Goal: Sector Indicators: Sector / country reports: ( from Goal to Bank Mission ) * Establish transparent and * Governance and Institutional * Monitoring and supervision * Improved transparency and accountable public service reform impact monitoring of the PAR project accountability of the state provision by improving surveys agreed to be eMonitoring and supervision structures, and strengthened goverance and building undertaken under the SAC omtorogram an institutions that function institutions that have the program Public Expenditure Support under the rule of law will capacity to govern within an oAnti-corruption program Program ( PESP ) trm dlo ent ng adequate and improved surveysrgrprogrepo e term development legal and judicial oProgress reports on the imp framework. lementation of the anti-corruption program * Promoting sustainable private sector growth * Reports from Impact Monitoring Surveys * Progress reports on the im plementation of the anti-corruption program * Public Expenditure tracking study * Implementation Completion Report of the project - 38 -", + "type": "survey", + "explanation": "In the context, it is explicitly mentioned as a source of information for monitoring and evaluation, indicating its use as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Impact Monitoring Surveys' suggests a structured collection of data related to project impacts.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as a source of information for monitoring and evaluation, indicating its use as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "064_Mauritania-Water-and-Sanitation-Sectoral-Project", + "page": 34, + "text": "A user satisfaction survey will be conducted as part of project M & E to understand how beneficiaries are being affected, or benefitting from, project interventions. The survey will be undertaken regularly, and its findings of the surveys will be used to improve project implementation and course-correct as needed. Lastly, opportunities to involve young people in civil works will be explored, particularly for those in host communities. Gender 93. The project has considered ways for closing the gender and / or social inclusion gaps identified in the CPF in line with the WBG gender strategy. A rapid field assessment and a gender analysis were conducted during project preparation to assess gender gaps and propose specific actions and indicators to close these gaps. Annex 6 includes details on the analysis, action plan and M & E framework for gender under the project. V. GRIEVANCE REDRESS SERVICES 94. Communities and individuals who believe that they are adversely affected by a World Bank ( WB ) supported project may submit complaints to existing project-level grievance redress mechanisms or the WB \u2019 s Grievance Redress Service ( GRS ). The GRS ensures that complaints received are promptly reviewed in order to address project-related concerns.", + "ner_text": [ + [ + 2, + 26, + "named" + ], + [ + 88, + 101, + "user satisfaction survey <> reference population" + ], + [ + 988, + 998, + "user satisfaction survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "A user satisfaction survey will be conducted as part of project M & E to understand how beneficiaries are being affected, or benefitting from, project interventions. The survey will be undertaken regularly, and its findings of the surveys will be used to improve project implementation and course-correct as needed.", + "type": "survey", + "explanation": "This is indeed a dataset as it is a structured collection of data aimed at understanding user satisfaction.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects user satisfaction data.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data aimed at understanding user satisfaction.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "026_Djibouti-Health-System-Strengthening-Project", + "page": 27, + "text": "The World Bank Djibouti Health System Strengthening ( P178033 ) Page 23 of 64 Table 3: Key refugee sub-populations and health facility access Refugee and asylum-seeker population Health facilities Primary Referral Holl Holl camp 8, 012 1 1 ( H\u00f4pital r\u00e9gional d ' Ali Sabieh ) Ali Adeh camp 17, 013 1 Ali Sabieh total health facilities 9 2 Markazi camp 2, 862 1 1 ( Centre medical hospitalier d \u2019 Obock ) Obock town 1 Obock total health facilities 7 1 Djibouti City 7, 103 14 3 Djibouti City total health facilities Dikhil total health facilities Arta total health facilities Tadjourah total health facilities 14 3 8 1 6 1 11 1 National health facilities serving refugees 18 5 Total national health facilities 55 9 Source: UNHCR, MOH 37. Integration of refugees into the national health system can improve the availability of services, but also exerts additional pressure on the health system. In certain communities, such as Balbala, displaced populations and refugees constitute 40 percent of consultations in health facilities. The MOH lacks a digital data system for service utilization disaggregated by refugee status and cannot therefore monitor refugee service utilization in total or at specific facilities.", + "ner_text": [ + [ + 1046, + 1065, + "named" + ] + ], + "validated": false, + "empirical_context": "In certain communities, such as Balbala, displaced populations and refugees constitute 40 percent of consultations in health facilities. The MOH lacks a digital data system for service utilization disaggregated by refugee status and cannot therefore monitor refugee service utilization in total or at specific facilities.", + "type": "system", + "explanation": "However, it is not a dataset as it refers to a system that lacks the actual data needed for analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'data'.", + "contextual_reason_agent": "However, it is not a dataset as it refers to a system that lacks the actual data needed for analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "073_Ethiopia-Consolidated-Water-Supply-Sanitation-and-Hygiene-Account-Project", + "page": 81, + "text": "The surveys will use gender - disaggregated data to measure and verify citizens \u2019 perceptions of the Project \u2019 s activities and will serve as a tool to define gender or social issues and recommendations for further improvements in the WASH sector. 95. Tools for conflict mitigation: As part of the ESMF and RPF, screening tools have been reviewed and enhanced, which include thorough consultations, involvement of key community leaders to facilitate dialogue, and documentation of entire processes ( consultation, design, compensation if any, etc. ). Further, key actions have been articulated to strengthen the CWA \u2019 s redress mechanisms to ensure adequate consultation and handling of cases raised at the woreda level. For Component 4, where water-related conflicts are particularly acute, a dedicated team will carry out feasibility studies to dimension needs considering water resources availability. Technical teams will be supported by Project-contracted Community Facilitation Teams ( CFTs ) that will carry out community assessments and consultations, and provide TA to communities prior, during, and after the construction of proposed water supply schemes. Monitoring and Evaluation 96. Reporting arrangements: The Project \u2019 s regular reporting responsibility will be placed on WASH Coordination Offices ( federal, regional, and zonal ), WASH sector PMUs ( federal and regional ), woreda WASH teams, and town water utilities. Every three months the NWCO will prepare and submit a consolidated progress report based on the agreed reporting format to the NWSC and to the World Bank within 60 days after the end of the quarter. The reports produced on a quarterly bases will focus on implementation progress and result indicator monitoring. To facilitate standard and consistent reporting among the different actors, the NWCO will develop and put in place a revised reporting guideline. The Program will also assist the WASH-sector PMUs and coordination offices to build and use a systematic data collection and compilation tool to facilitate easy report compilation. Respective higher officials and", + "ner_text": [ + [ + 21, + 48, + "named" + ] + ], + "validated": false, + "empirical_context": "The surveys will use gender - disaggregated data to measure and verify citizens \u2019 perceptions of the Project \u2019 s activities and will serve as a tool to define gender or social issues and recommendations for further improvements in the WASH sector. 95.", + "type": "data", + "explanation": "However, it is not a dataset itself but rather a description of the type of data being used in the surveys.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'gender-disaggregated data' suggests a structured collection of information categorized by gender.", + "contextual_reason_agent": "However, it is not a dataset itself but rather a description of the type of data being used in the surveys.", + "contextual_signal": "described as a type of data used in surveys", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 36, + "text": "26 88. Based on the experiences in addressing the specific vulnerabilities and discrimination facing the Batwa, the Project will take specific measures to ensure the participation of eligible Batwa households. Batwa households living in the targeted collines will be included in the first list of eligible households and surveyed in the PMT survey. The final list of beneficiaries will be publicly validated. It is expected that access to ID documents may be particularly low in these communities and the project will support beneficiaries in acquiring national IDs and birth certificates. 89. The program will also ensure they can participate in the accompanying measures by partnering with NGOs that are experienced in taking into account their specific challenges, in particular with hygiene, food preparation and feeding practices. Extra behavior change communication may be necessary, in particular for the potential use of cell phones as well as chargers and incentives to open accounts in micro-finance cooperatives to safeguard the transfer monies, given increased risks of thefts. Service providers will ensure that Batwa households can participate in regular IEC / BCC sessions with the rest of the community to foster social inclusion. However, specific sessions on sensitive topics such as reproductive health and gender-based violence may be conducted separately for the Batwa. Once collines are selected for participation in the project, provincial Indigenous People \u2019 s Plans will be developed.", + "ner_text": [ + [ + 337, + 347, + "named" + ], + [ + 105, + 110, + "PMT survey <> data geography" + ], + [ + 192, + 208, + "PMT survey <> reference population" + ], + [ + 210, + 226, + "PMT survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "Based on the experiences in addressing the specific vulnerabilities and discrimination facing the Batwa, the Project will take specific measures to ensure the participation of eligible Batwa households. Batwa households living in the targeted collines will be included in the first list of eligible households and surveyed in the PMT survey. The final list of beneficiaries will be publicly validated.", + "type": "survey", + "explanation": "The PMT survey is explicitly mentioned as a survey that will collect data from eligible households, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'PMT survey' implies a structured collection of data gathered from households.", + "contextual_reason_agent": "The PMT survey is explicitly mentioned as a survey that will collect data from eligible households, confirming its role as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 4, + "validated": 1, + "not_validated": 3 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 18, + "text": "Private investment generated through the project is counted towards Private Capital Enabling ( PCE ) and Private Capital Mobilization ( PCM ). Component 2 on Economic Opportunity & Self-Reliance is verified as both PCE and PCM by supporting a partial credit guarantee ( PCG ) scheme and matching grants. Sub-component 2 ( c ) support to the Business Development Fund ( BDF ) PCG scheme is expected to facilitate access to finance for up to 3, 000 7 African Development Bank, UNHCR, IGAD & EAC ( 2024 ) Regional Report: Regional Program on Enhancing the Investment Climate for the Economic Empowerment of Refugee, Returnee, and Host / Return Community Women in the East and HoA and Great Lakes Region. 8 Access to Finance Rwanda ( 2024 ) FinScope 2024 Report. The report shows high levels of financial inclusion but low usage of financial services. 9 Non-monetary poverty for the other four host districts is: Nyamagabe ( Kigeme camp ) 39 percent, Gatsibo ( Nyabiheke Camp ) 37 percent, Kirehe ( Mahama camp ) 35 percent and Karongi ( Kiziba camp ) 34 percent. Data is from the 2022 Census. 10 World Bank ( 2021 ) Climate Risk Profile: Rwanda. 11 Alex Hunns et al ( 2023 ) \u201c Refugee Settlements are Highly Exposed to Extreme Weather Conditions. \u201d", + "ner_text": [ + [ + 1077, + 1088, + "named" + ] + ], + "validated": true, + "empirical_context": "9 Non-monetary poverty for the other four host districts is: Nyamagabe ( Kigeme camp ) 39 percent, Gatsibo ( Nyabiheke Camp ) 37 percent, Kirehe ( Mahama camp ) 35 percent and Karongi ( Kiziba camp ) 34 percent. Data is from the 2022 Census. 10 World Bank ( 2021 ) Climate Risk Profile: Rwanda.", + "type": "census", + "explanation": "The 2022 Census is explicitly mentioned as the source of data for the non-monetary poverty statistics.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a source of data for empirical analysis.", + "contextual_reason_agent": "The 2022 Census is explicitly mentioned as the source of data for the non-monetary poverty statistics.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "185_multi-page", + "page": 206, + "text": "It is envisaged that NACC ' s accounting system will be based on a package recently implemented for the STI Project, which has a strong data analysis and reporting capability. Currently the STI Project is running on a single-user version that will be upgraded to a multi-user version as the volume of transactions increases. This system will be fully assessed prior to project effectiveness for the purpose of the Kenya HIV / AlDS Disaster Response Project and recommendations will be made on areas that require special attention and changes to meet the project ' s unique needs. NACC will prepare comprehensive financial management procedures including guidelines for implementing Units to ensure data is properly captured and analyzed. It is expected that project financial transactions from the FMA and ministries will be electronically transferred to NACC ' s accounting system to avoid duplicate processing of data. Once transactions are entered into the system, NACC should be able to generate required PMRs and other internal financial management reports showing actual versus budget and variance, analyzed by project components, activities, expenditure categories, and implementing / coordinating units over required time periods.", + "ner_text": [ + [ + 1009, + 1013, + "named" + ] + ], + "validated": false, + "empirical_context": "It is expected that project financial transactions from the FMA and ministries will be electronically transferred to NACC ' s accounting system to avoid duplicate processing of data. Once transactions are entered into the system, NACC should be able to generate required PMRs and other internal financial management reports showing actual versus budget and variance, analyzed by project components, activities, expenditure categories, and implementing / coordinating units over required time periods.", + "type": "report", + "explanation": "PMRs are described as reports generated from the accounting system, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed PMRs are datasets because they are mentioned in the context of generating reports from financial transactions.", + "contextual_reason_agent": "PMRs are described as reports generated from the accounting system, not as a structured collection of data.", + "contextual_signal": "mentioned only as reports, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 20, + "validated": 4, + "not_validated": 16 + } + }, + { + "filename": "005_BOSIB-8191b179-7209-4faa-b5e0-11783bcd492d", + "page": 10, + "text": "The literacy rate for adults aged 15 and older is 98 percent, and the share of adults aged 15 and above who had no formal education has declined from 19. 2 percent in 1950 to a projected 1. 8 percent in 2020. 8 Younger cohorts are also attaining more years of education; 1 Source: Macro Poverty Outlook for Costa Rica: April 2024; 2 Source: World Development Indicators ( WDI ) https: / / data. worldbank. org / indicator / NE. TRD. GNFS. ZS? locations = CR 3 Source: World Economic Outlook ( WEO ), October 2023, https: / / www. imf. org / external / datamapper / LUR @ WEO / CRI? zoom = CRI & highlight = CRI 4 World Bank estimates using administrative records and annual statistical reports from the Directorate General of Migrants and Foreigners. https: / / www. migracion. go. cr / Paginas / Centro % 20de % 20Documentaci % C3 % B3n / Estad % C3 % ADsticas. aspx. 5 World Bank, Climate Change Knowledge Portal - Costa Rica. 6 World Bank, GFFDR, ThinkHazard! Portal. 7 Ibid 8 Source: https: / / ourworldindata. org /", + "ner_text": [ + [ + 667, + 693, + "named" + ] + ], + "validated": false, + "empirical_context": "org / external / datamapper / LUR @ WEO / CRI? zoom = CRI & highlight = CRI 4 World Bank estimates using administrative records and annual statistical reports from the Directorate General of Migrants and Foreigners. https: / / www.", + "type": "document", + "explanation": "However, it is not a dataset as it refers to documents rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'reports' which can imply structured data.", + "contextual_reason_agent": "However, it is not a dataset as it refers to documents rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a report, not as a data source", + "tags": [] + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 63, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 59 of 74 installment of livelihood grant. Number of beneficiaries receiving Economic Opportunities who are female youth Number of beneficiaries receiving economic opportunities under Component 2, in accordance with the Project Operations Manual, of which are youth and female, defined as women between the ages of 18 and 35 years, and have received at least 1 installment of livelihood grant. This indicator will be measured, at a minimum, on a quarterly basis SNSOP MIS Beneficiary data will be collected during registration and updated during project implementation. Payment data will be regularly updated in the SNSOP MIS The Implementing Partner in charge of Component 2 will be responsible for data collection. Percentage of grievances resolved through the GRM Number of all grievances which are lodged, processed and resolved through the SNSOP GRM divided by all complaints which are lodged, processed and / or resolved, expressed as a percentage This indicator will be reviewed on a monthly basis Monthly GRM Reports GRM data will be reviewed and analyzed on a monthly basis through the MIS GRM module Selected Implementing Partner Percentage of cash transfers to beneficiary households made on time The total number of cash transfers which were completed on time for both LIPW and DIS, per the Project Operations Manual, divided by the total number of cash transfers, expressed This indicator", + "ner_text": [ + [ + 709, + 718, + "named" + ] + ], + "validated": false, + "empirical_context": "This indicator will be measured, at a minimum, on a quarterly basis SNSOP MIS Beneficiary data will be collected during registration and updated during project implementation. Payment data will be regularly updated in the SNSOP MIS The Implementing Partner in charge of Component 2 will be responsible for data collection. Percentage of grievances resolved through the GRM Number of all grievances which are lodged, processed and resolved through the SNSOP GRM divided by all complaints which are lodged, processed and / or resolved, expressed as a percentage This indicator will be reviewed on a monthly basis Monthly GRM Reports GRM data will be reviewed and analyzed on a monthly basis through the MIS GRM module Selected Implementing Partner Percentage of cash transfers to beneficiary households made on time The total number of cash transfers which were completed on time for both LIPW and DIS, per the Project Operations Manual, divided by the total number of cash transfers, expressed This indicator", + "type": "system", + "explanation": "However, SNSOP MIS is described as a system for managing data rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed SNSOP MIS is a dataset because it includes the term 'MIS' which often refers to management information systems that handle data.", + "contextual_reason_agent": "However, SNSOP MIS is described as a system for managing data rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 18, + "validated": 10, + "not_validated": 8 + } + }, + { + "filename": "050_Cameroon-Secondary-Education-and-Skills-Development-Project", + "page": 30, + "text": "The World Bank Secondary Education and Skills Development Project ( P170561 ) Page 26 of 159 62. Improved availability of information on training programs through the development of an integrated digital platform on training opportunities. The project will support the development of an integrated information system on training opportunities and the introduction and operationalization of post-training tracking mechanisms of graduates. The system will capitalize on existing but limited information, for example, in databases of the National Monitoring Service for Employment and Vocational Training ( Observatoire National de l ' Emploi et de la Formation Professionnelle - ONEFOP ); the National Employment Fund ( NEF ); Center for Education, Professional and University Orientation ( Centre d \u2019 Orientation Scolaire, Universitaire et Professionnelle - COSUP ); and the NIS. Building on these existing sources of information, the project will strengthen / upgrade some of the existing tools, expand data collection, and increase transparency. 63. Design and implementation of tracer studies. Currently, neither MINEFOP nor MINESEC track their graduates in a systematic way and do not collect information on their labor market outcomes. Under the project, three tracer surveys ( baseline, midterm, and final ) are planned to monitor labor market outcomes of TVET graduates. The surveys will provide baseline data and yearly information on the external effectiveness of the training system. 64.", + "ner_text": [ + [ + 1265, + 1279, + "named" + ], + [ + 4, + 14, + "tracer surveys <> publisher" + ], + [ + 1080, + 1094, + "tracer surveys <> data type" + ], + [ + 1361, + 1375, + "tracer surveys <> reference population" + ], + [ + 1402, + 1415, + "tracer surveys <> data description" + ] + ], + "validated": true, + "empirical_context": "Currently, neither MINEFOP nor MINESEC track their graduates in a systematic way and do not collect information on their labor market outcomes. Under the project, three tracer surveys ( baseline, midterm, and final ) are planned to monitor labor market outcomes of TVET graduates. The surveys will provide baseline data and yearly information on the external effectiveness of the training system.", + "type": "survey", + "explanation": "The context confirms that the tracer surveys are planned to collect and provide baseline data on labor market outcomes, indicating they function as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'tracer surveys' are datasets because they are structured collections of data intended to monitor outcomes.", + "contextual_reason_agent": "The context confirms that the tracer surveys are planned to collect and provide baseline data on labor market outcomes, indicating they function as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 46, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 35 Indicator Name People provided with access to improved sanitation services Definition / Description Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC Area performance reports, Refugee Database, Refugees GIS Mapping for WASH and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE and NWSC Indicator Name People provided with access to improved sanitation services - Female ( RMS requirement ) Definition / Description Frequency Bi-annually. Data Source MWE M & E system, NWSC Area performance reports, Refugee Database, Refugees GIS Mapping for WASH and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE and NWSC.", + "ner_text": [ + [ + 259, + 288, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Integrated Water Management and Development Project ( P163782 ) 35 Indicator Name People provided with access to improved sanitation services Definition / Description Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC Area performance reports, Refugee Database, Refugees GIS Mapping for WASH and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report.", + "type": "report", + "explanation": "However, it is mentioned as a report and not as a structured collection of data or a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'reports' which can imply data collection.", + "contextual_reason_agent": "However, it is mentioned as a report and not as a structured collection of data or a dataset.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 20, + "validated": 4, + "not_validated": 16 + } + }, + { + "filename": "185_multi-page", + "page": 59, + "text": "WHO and UNAIDS have just issued Guidelines for Second Generation HIVSurveillance ( May 2000 ), which provides an excellent overview of how to tailor M & E for maximum benefit in individual country cases. UNAIDS can also provide substantial technical assistance to secretariats on demand in this area. A country program will usually support the establishment of a Monitoring and Evaluation ( M & E ) unit in the HIV / AIDS council or secretariat. The unit will define the scope of work on program activities to be included in the M & E arrangements. As a principle, the M & E Unit will rely on formal links with external expertise for generation of infornation and analysis, including such bodies as the Central Statistical Authority, Regional Statistical Offices, universities and private institutions. It will be especially important to support the establishment of a network of local social scientists to undertake much of the M & E field work and analysis. With respect to implementation, the M & E Unit will normally be in charge of developing and exploiting a data collection system appropriate to establish the performance indicators of each program component, clearly distinguishing among: ( a ) program outputs; ( b ) progress", + "ner_text": [ + [ + 1065, + 1087, + "named" + ] + ], + "validated": false, + "empirical_context": "It will be especially important to support the establishment of a network of local social scientists to undertake much of the M & E field work and analysis. With respect to implementation, the M & E Unit will normally be in charge of developing and exploiting a data collection system appropriate to establish the performance indicators of each program component, clearly distinguishing among: ( a ) program outputs; ( b ) progress", + "type": "system", + "explanation": "However, it is not a dataset as it refers to a system for collecting data rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'data' and suggests a structured approach to data collection.", + "contextual_reason_agent": "However, it is not a dataset as it refers to a system for collecting data rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 69, + "text": "These tools may advance the use of Kobo-toolbox platform for self - reporting purposes. The M & E specialists will be responsible for verification of the accuracy and timeliness of data entry and training of new staff, WASH-committed contractors, and other implementation partners on the use of the M & E system. The project M & E system will be, to the extent possible, integrated with the WSW MIS and allow gender disaggregation. 24. A key aspect of the results monitoring will be the consumer satisfaction surveys for water services and engagement processes. Consumer satisfaction surveys will be conducted every year for selected subprojects after the start of project implementation to assess satisfaction levels and measure attributable outcomes of the project. Surveys for the engagement processes will be conducted every year using various means of communication. Baseline surveys will be conducted in the first year of project implementation after effectiveness. At the same time, the project will continue relying on WASH - committee models adopted under the RWSSP to enable frequent community roundtables or forums with water users to inform them of the status of investments, seek their feedback regarding project implementation progress, and discuss any corrective action which was taken to address issues raised through the feedback process. Results of such meetings will be documented and reported through the regular M & E process.", + "ner_text": [ + [ + 35, + 56, + "named" + ] + ], + "validated": false, + "empirical_context": "These tools may advance the use of Kobo-toolbox platform for self - reporting purposes. The M & E specialists will be responsible for verification of the accuracy and timeliness of data entry and training of new staff, WASH-committed contractors, and other implementation partners on the use of the M & E system.", + "type": "tool", + "explanation": "However, it is actually a tool/platform used for data collection, not a dataset itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is associated with data entry and reporting.", + "contextual_reason_agent": "However, it is actually a tool/platform used for data collection, not a dataset itself.", + "contextual_signal": "mentioned only as a tool for data collection, not as a data source", + "tags": [] + }, + { + "filename": "003_BOSIB-850e0c11-07c1-4c9c-8d44-4286704221bd", + "page": 30, + "text": "This will include civil society, RHC, and other marginalized groups. Citizen engagement activities will include the implementation of the following mechanisms: ( i ) consultations; ( ii ) a grievance mechanism; and ( iii ) a satisfaction survey at project midterm and end of project. Satisfaction surveys will be diverse and widespread and will include a sample space of respondents from beneficiary groups for each activity set. The surveys will seek to understand beneficiary perceptions of various aspects of the project, including efficiency and relevance. The information gathered from midterm satisfaction surveys will be used to inform and recalibrate project implementation. Representative consultations will also gather feedback on how project activities affect security and are affected by insecurity. Outreach and awareness campaigns will take place to ensure that youth, women, and people with disabilities, including RHC, are included in project-supported activity streams. Representative committees with RHC representation, including IDPs, will take an active role in identifying and monitoring community investments, livelihoods, and support to business plan development within the value chain operator support initiatives. The grievance mechanism will include specific procedures to address complaints related to sexual exploitation, abuse, and harassment; referral to GBV service providers; and confidential, survivor-centered complaint-management protocols. 50. Gender. The project will contribute to increasing women \u2019 s physical accessibility to obstetric care. Feeder and rural roads to be paved / rehabilitated will also be selected based on their potential to connect communities to obstetric care facilities, following a prioritization methodology involving utilization of GIS tools to locate these facilities and the fastest roads that can be used to be reached from different villages, coupled with consultations with women, including refugees, 67 The Sahelian Financial Company ( SAHFI SA - Soci\u00e9t\u00e9 Sah\u00e9lienne de Financement ) established in 2005 as a joint initiative between the European Union ( EU ) and the State of Niger for providing guarantees to small and medium enterprises and small and medium industries ( SMEs / SMIs ).", + "ner_text": [ + [ + 1797, + 1806, + "named" + ] + ], + "validated": false, + "empirical_context": "The project will contribute to increasing women \u2019 s physical accessibility to obstetric care. Feeder and rural roads to be paved / rehabilitated will also be selected based on their potential to connect communities to obstetric care facilities, following a prioritization methodology involving utilization of GIS tools to locate these facilities and the fastest roads that can be used to be reached from different villages, coupled with consultations with women, including refugees, 67 The Sahelian Financial Company ( SAHFI SA - Soci\u00e9t\u00e9 Sah\u00e9lienne de Financement ) established in 2005 as a joint initiative between the European Union ( EU ) and the State of Niger for providing guarantees to small and medium enterprises and small and medium industries ( SMEs / SMIs ).", + "type": "tool", + "explanation": "'GIS tools' is not a dataset as it refers to software or methods used for analysis rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'GIS tools' is a dataset because GIS often involves data analysis and mapping.", + "contextual_reason_agent": "'GIS tools' is not a dataset as it refers to software or methods used for analysis rather than a structured collection of data.", + "contextual_signal": "mentioned as a tool for analysis, not as a data source", + "tags": [] + }, + { + "filename": "028_Ethiopia-Response-Recovery-Resilience-for-Conflict-Affected-Communities-in-Ethiopia-Project", + "page": 62, + "text": "The World Bank Response - Recovery - Resilience for Conflict-Affected Communities in Ethiopia ( P177233 ) Page 58 of 104 communities six months after completion - Education starting in YR2 Coordination Teams Financed sub-projects that are functioning or delivering services to communities six months after completion - WASH Semi - annual starting in YR2 Woreda Project Coordination Teams Aggregation of Kebele - level data MoF FPCU Financed sub-projects that are functioning or delivering services to communities six months after completion - Health Semi - annual starting in YR2 Woreda Project Coordination Teams Aggregation of Kebele - level data MoF FPCU Financed sub-projects that are functioning or delivering services to communities six months after completion - Others ' Others ' refer to all sub - projects that are not counted under the education, WASH, and health breakdown indicators Semi - Annual starting in YR2 Woreda Project Coordination Teams Aggregation of Kebele - level data MoF FPCU Households in selected communities participating in planning, decision-making, and / or implementation of community recovery plans A household is counted if one or more of its members participate in planning, decision-making, and / or implementation of community recovery plans Semi - Annual Woreda Project Coordination Teams, Neighborhoo d Relations Committees Aggregation of data from meetings and employment in implementation of activities.", + "ner_text": [ + [ + 403, + 422, + "named" + ], + [ + 85, + 93, + "Kebele - level data <> data geography" + ] + ], + "validated": true, + "empirical_context": "The World Bank Response - Recovery - Resilience for Conflict-Affected Communities in Ethiopia ( P177233 ) Page 58 of 104 communities six months after completion - Education starting in YR2 Coordination Teams Financed sub-projects that are functioning or delivering services to communities six months after completion - WASH Semi - annual starting in YR2 Woreda Project Coordination Teams Aggregation of Kebele - level data MoF FPCU Financed sub-projects that are functioning or delivering services to communities six months after completion - Health Semi - annual starting in YR2 Woreda Project Coordination Teams Aggregation of Kebele - level data MoF FPCU Financed sub-projects that are functioning or delivering services to communities six months after completion - Others ' Others ' refer to all sub - projects that are not counted under the education, WASH, and health breakdown indicators Semi - Annual starting in YR2 Woreda Project Coordination Teams Aggregation of Kebele - level data MoF FPCU Households in selected communities participating in planning, decision-making, and / or implementation of community recovery plans A household is counted if one or more of its members participate in planning, decision-making, and / or implementation of community recovery plans Semi - Annual Woreda Project Coordination Teams, Neighborhoo d Relations Committees Aggregation of data from meetings and employment in implementation of activities.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to aggregated data collected from Kebele-level activities for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific level of data aggregation related to community projects.", + "contextual_reason_agent": "This is indeed a dataset as it refers to aggregated data collected from Kebele-level activities for empirical analysis.", + "contextual_signal": "mentioned as a source of aggregated data from community projects", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "158_40156", + "page": 52, + "text": "29. Advocacy and communications about this new HIV ME system is essential. For this reason, communications plan will be included as part of the annual costed M & E work plan and will involve the PFO and M & E champions in each IGAD country. 30. Surveys and surveillance data will be used to track PDO-level project results. There are six types of surveys and surveillance that need to be carried out, partially with project funding ( additional resources would need to be mobilized by IGAD for all these surveys and surveillance to be carried out ): BSS, sentinel HIV surveillance, health facility survey, rapid assessment at hot spots, mapping assessment, and regional assessment. a ) BSS: Customized behavioral surveillance surveys ( BSSs ) will be undertaken amongst refugees, returnees, IDPs, and surrounding populations. The BSS will be customized to allow for the inclusion of unique characteristics of mobile populations, pre and post displacement, and conflict situations.", + "ner_text": [ + [ + 582, + 604, + "named" + ], + [ + 702, + 733, + "health facility survey <> data type" + ], + [ + 791, + 795, + "health facility survey <> reference population" + ], + [ + 996, + 1014, + "health facility survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "Surveys and surveillance data will be used to track PDO-level project results. There are six types of surveys and surveillance that need to be carried out, partially with project funding ( additional resources would need to be mobilized by IGAD for all these surveys and surveillance to be carried out ): BSS, sentinel HIV surveillance, health facility survey, rapid assessment at hot spots, mapping assessment, and regional assessment. a ) BSS: Customized behavioral surveillance surveys ( BSSs ) will be undertaken amongst refugees, returnees, IDPs, and surrounding populations.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a type of survey that will be used to track project results.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'health facility survey' suggests a structured collection of data related to health facilities.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a type of survey that will be used to track project results.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "042_Burundi-Skills-for-Jobs-Women-and-Youth-Project", + "page": 35, + "text": "The number of indirect beneficiaries, however, is significantly greater, comprising ( a ) all students participating in secondary TVET who will gain from an improved learning environment in TVET institutions and other training institutions resulting from investments in technical teachers training; ( b ) informal apprentices who will eventually be provided with reforms and recognition of prior learning opportunities; ( c ) the wider business community that will benefit from an increased supply of well-trained MCPs; and ( d ) at least 16, 500 refugees who will indirectly benefit from the development of programs to support skill building and employability, such as the ICT platform connecting youth with information and opportunities for training and employment, which will be made available to all youth. 40 It is estimated that the project will reach about 24, 000 NEET youth each year, which represents an average of 16 percent of the total NEET population each year. Using the 2013 \u2013 2014 household survey to estimate the share of the NEET youth population, it is estimated that about 6 percent ( exactly 5. 62 percent ) of youth ages 15 \u2013 24 were NEET. Assuming that this share remains the same over the lifetime of the project, and using the UNDP population projection to estimate the NEET population, the stock of the NEET population in 2021 is estimated to be 142, 172 with an average annual flow of 5, 509.", + "ner_text": [ + [ + 986, + 1014, + "named" + ] + ], + "validated": true, + "empirical_context": "40 It is estimated that the project will reach about 24, 000 NEET youth each year, which represents an average of 16 percent of the total NEET population each year. Using the 2013 \u2013 2014 household survey to estimate the share of the NEET youth population, it is estimated that about 6 percent ( exactly 5. 62 percent ) of youth ages 15 \u2013 24 were NEET.", + "type": "survey", + "explanation": "This is indeed a dataset as it is used to estimate the share of the NEET youth population.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific household survey that provides data on NEET youth.", + "contextual_reason_agent": "This is indeed a dataset as it is used to estimate the share of the NEET youth population.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 90, + "text": "85 7. Nevertheless, analysis of recent PISA results ( World Bank, 2008 ) 6 indicate that given its endowments ( such as school, student, and household characteristics ), Jordan should likely be performing even better in terms of cognitive skill development. By Jordan \u2019 s own internal assessment systems, there is significant need for quality enhancing reforms. Pass rates on the general secondary certificate GSC ( essentially the secondary school exit exam also known as the Tawjihi ) are quite low ( about 55 percent of regular track students and under 35 percent of students from \u201c irregular \u201d tracks ), as are results on Jordan \u2019 s own national achievement tests which indicate the majority of 8th and 10th grade students are not proficient in any of the subjects ( Science, Math, English, and Arabic. ) In addition, the Tawjihi, like many exams of its kind, does not measure the same kinds of cognitive skills as does PISA, and therefore it creates a strong incentive against education quality. ERfKE II directly addresses this need to transform the exams, and it is not insignificant that increasing scores on school, national, and international assessments is the first ERfKE II project development objective and key indicator, before completion rates and enrolment rates.", + "ner_text": [ + [ + 924, + 928, + "named" + ] + ], + "validated": false, + "empirical_context": "Pass rates on the general secondary certificate GSC ( essentially the secondary school exit exam also known as the Tawjihi ) are quite low ( about 55 percent of regular track students and under 35 percent of students from \u201c irregular \u201d tracks ), as are results on Jordan \u2019 s own national achievement tests which indicate the majority of 8th and 10th grade students are not proficient in any of the subjects ( Science, Math, English, and Arabic. ) In addition, the Tawjihi, like many exams of its kind, does not measure the same kinds of cognitive skills as does PISA, and therefore it creates a strong incentive against education quality. ERfKE II directly addresses this need to transform the exams, and it is not insignificant that increasing scores on school, national, and international assessments is the first ERfKE II project development objective and key indicator, before completion rates and enrolment rates.", + "type": "program", + "explanation": "However, PISA is mentioned as a type of assessment rather than a structured collection of data used in the research context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed PISA is a dataset because it is often associated with educational assessments and results.", + "contextual_reason_agent": "However, PISA is mentioned as a type of assessment rather than a structured collection of data used in the research context.", + "contextual_signal": "mentioned only as an assessment, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "184_multi-page", + "page": 16, + "text": "13 Monitoring and Evaluation Monitoring will be done according to the development indicators given in the attachment to Annex 1. The project will strengthen the capacity of CNOSEGE, and the Planning Unit of the Ministry so that monitoring reports on the implementation of the reform can include key progress and impact indicators. Currently the Planning unit generates statistical data on all aspects of the education sector, however this can be further strengthened to monitor progress on key reform objectives such as access, equity and quality. In addition, during the donors round-table UNESCO offered support to develop an Education Management Information System ( EMIS ). If this is not in place by the end of Phase I of the APL, this would be a priority item for Phase II. Evaluation of the impact of the reforms will be done by CNOSEGE by recruiting experts in this field and an initial evaluation will be done at the end of Phase I. Particular areas of impact assessment will be student performance and success in reaching out to disadvantaged groups. Normally, student performance would be measured by overall test results but as the pool of students widens to include students from less advantaged socioeconomic groups, there will be a downward pressure on test scores.", + "ner_text": [ + [ + 369, + 385, + "named" + ], + [ + 988, + 1007, + "statistical data <> data description" + ], + [ + 1179, + 1229, + "statistical data <> reference population" + ] + ], + "validated": true, + "empirical_context": "The project will strengthen the capacity of CNOSEGE, and the Planning Unit of the Ministry so that monitoring reports on the implementation of the reform can include key progress and impact indicators. Currently the Planning unit generates statistical data on all aspects of the education sector, however this can be further strengthened to monitor progress on key reform objectives such as access, equity and quality. In addition, during the donors round-table UNESCO offered support to develop an Education Management Information System ( EMIS ).", + "type": "data", + "explanation": "In this context, 'statistical data' is used to describe the information generated by the Planning unit, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'statistical data' refers to a dataset because it implies a collection of quantitative information.", + "contextual_reason_agent": "In this context, 'statistical data' is used to describe the information generated by the Planning unit, indicating it functions as a data source.", + "contextual_signal": "mentioned as data generated by the Planning unit", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "028_Ethiopia-Response-Recovery-Resilience-for-Conflict-Affected-Communities-in-Ethiopia-Project", + "page": 105, + "text": "the conflict in the Tigray region. 112 While conflict-related sexual violence has predominantly affected women and girls, men and boys have been affected as well. 113 10. Beyond physical and sexual violence, numerous reports highlight prevalence of other forms of GBV. Rapid gender assessments by several organizations indicate heightened incidence of intimate partner violence among displaced and conflict-affected people. Lack of access to food, cash, and other resources has led to an increase of women and girls engaging in precarious coping strategies, including transactional sex, and increased vulnerability to sexual exploitation and abuse. 114 These risks are particularly high for female-headed households. Displaced women and girls are further vulnerable to other forms of GBV, including harassment, abduction, and separation from families. 105 UNFPA. 2021. UNFPA Ethiopia Response to Tigray Crisit. Situation Report. 1-15 July 2021. See also: https: / / www. humanitarianresponse. info / sites / www. humanitarianresponse. info / files / documents / files / situational_analysis-_gender - based_violence_in_northern_ethiopia1803202102. pdf 106 See https: / / www. unfpa. org / news / tigray-conflict-decimates-maternal-health-services-overwhelms-health-workers. 107 Federal Democratic Republic of Ethiopia. 2016. Ethiopia Demographic and Health Survey. 108 Ibid. 109 UN-OCHA. 2021. Ethiopia \u2013 Tigray Region Humanitarian Update.", + "ner_text": [ + [ + 1325, + 1363, + "named" + ], + [ + 20, + 33, + "Ethiopia Demographic and Health Survey <> data geography" + ], + [ + 105, + 120, + "Ethiopia Demographic and Health Survey <> reference population" + ], + [ + 328, + 377, + "Ethiopia Demographic and Health Survey <> data description" + ], + [ + 863, + 867, + "Ethiopia Demographic and Health Survey <> publication year" + ], + [ + 939, + 943, + "Ethiopia Demographic and Health Survey <> publication year" + ], + [ + 1278, + 1317, + "Ethiopia Demographic and Health Survey <> data geography" + ], + [ + 1319, + 1323, + "Ethiopia Demographic and Health Survey <> reference year" + ], + [ + 1325, + 1333, + "Ethiopia Demographic and Health Survey <> data geography" + ], + [ + 1388, + 1392, + "Ethiopia Demographic and Health Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "2016. Ethiopia Demographic and Health Survey. 108 Ibid.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referred to as a survey that collects demographic and health data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referred to as a survey that collects demographic and health data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 77, + "text": "The country records a steady population increase of 2. 4 percent overall for Tajikistan and an urban population growth of 3 percent. The poverty rate in the Khatlon region is approximately 37 percent, resulting in poor food and nutrition security, where over 60 percent of household income is spent on food, the highest percentage of all four regions in Tajikistan. Stunting among children, aged between 6 months and 5 years of age, is nearly 20 percent according to a health survey conducted in 2020. Figure 2. 1. Proposed Sequence of Projects in SOP 10. Infrastructure investments will build on and expand early results of ongoing RWSSP, with the choice of prioritized water supply infrastructure within the Vakhsh inter-district scheme and adjacent areas informed by the engineering designs and assessments completed under the RWSSP. WSIP-1 will balance targeted infrastructure rehabilitation / modernization investments with continued support to the", + "ner_text": [ + [ + 469, + 482, + "named" + ], + [ + 77, + 87, + "health survey <> data geography" + ], + [ + 157, + 171, + "health survey <> data geography" + ], + [ + 381, + 389, + "health survey <> reference population" + ], + [ + 496, + 500, + "health survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "The poverty rate in the Khatlon region is approximately 37 percent, resulting in poor food and nutrition security, where over 60 percent of household income is spent on food, the highest percentage of all four regions in Tajikistan. Stunting among children, aged between 6 months and 5 years of age, is nearly 20 percent according to a health survey conducted in 2020. Figure 2.", + "type": "survey", + "explanation": "The term is confirmed as a dataset since it is explicitly referenced in the context as providing data on stunting rates among children.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'health survey' implies a structured collection of data related to health metrics.", + "contextual_reason_agent": "The term is confirmed as a dataset since it is explicitly referenced in the context as providing data on stunting rates among children.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "086_NEW-ETHIOPIA-PAD-06072018", + "page": 66, + "text": "Monthly Administrative records of the EIC, MoLSA, and ARRA Template to collect data from relevant agencies The PCU in collaboration with relevant government bodies Independent agent hired by the PCU Yes", + "ner_text": [ + [ + 0, + 30, + "named" + ], + [ + 38, + 41, + "Monthly Administrative records <> publisher" + ], + [ + 111, + 114, + "Monthly Administrative records <> author" + ], + [ + 218, + 236, + "Monthly Administrative records <> usage context" + ] + ], + "validated": true, + "empirical_context": "Monthly Administrative records of the EIC, MoLSA, and ARRA Template to collect data from relevant agencies The PCU in collaboration with relevant government bodies Independent agent hired by the PCU Yes", + "type": "records", + "explanation": "This is indeed a dataset as it is described as records used to collect data from relevant agencies.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'Monthly Administrative records' which suggests a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is described as records used to collect data from relevant agencies.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 5, + "validated": 3, + "not_validated": 2 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 80, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 68. Verification Protocol Table: Disbursement Linked Indicators DLI_TBL_VERIFICATION DLI 1 Girls \u2019 promotion rate from grade 11 to 12, nationwide Description The improvement in grils enrollment is measured by the proportion of girls enrolled in grade 11 at a given school year who study in the grade 12 in the following school year. The indicator is calculated by dividing the number of new enrolments of girls in grade 12 in school year t + 1 by the number of girls enrolled in grade 11 in the previous school year t. Data source / Agency EMIS Verification Entity ESS Procedure Data collected by MOE through the annual EMIS process and verified by ESS using a representative sample. The verified information is then submitted by MOF to the World Bank in a consolidated DLI verification report as part of each semiannual Government / World Bank review. DLI_TBL_VERIFICATION DLI 2 Children 0-59 months with diarrhea treated with ORS and Zinc in low performing regions Description The indicator will be measured by the percentage of children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc Data source / Agency DHIS2 Verification Entity ESS Procedure Data collected by MOH through the annual DHIS2 and verified by ESS using a representative sample.", + "ner_text": [ + [ + 1291, + 1296, + "named" + ] + ], + "validated": false, + "empirical_context": "The verified information is then submitted by MOF to the World Bank in a consolidated DLI verification report as part of each semiannual Government / World Bank review. DLI_TBL_VERIFICATION DLI 2 Children 0-59 months with diarrhea treated with ORS and Zinc in low performing regions Description The indicator will be measured by the percentage of children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc Data source / Agency DHIS2 Verification Entity ESS Procedure Data collected by MOH through the annual DHIS2 and verified by ESS using a representative sample.", + "type": "system", + "explanation": "However, DHIS2 is described as a health information system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned in the context of data collection and verification.", + "contextual_reason_agent": "However, DHIS2 is described as a health information system, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 50, + "text": "least 80 % of public primary schools and at least 50 % of private primary schools in districts 1-2 ( Djibouti Ville ) transmit their data via the application on time At least 80 % of public primary schools and at least 50 % of private primary schools in districts 3-4 ( Balbala ) transmit their data via the application on time 60 % of primary schools in the regions and in the refugee villages transmit their data via the application on time 70 % of public and 50 % of private primary schools transmit their data via the application on time Modernization of the EMIS ( Text ) No Evaluation report of existing data systems and recommendations for strengthening EMIS performance Student information system is developed Student information system is developed Student information system is developed Student information system is developed", + "ner_text": [ + [ + 661, + 665, + "named" + ] + ], + "validated": false, + "empirical_context": "least 80 % of public primary schools and at least 50 % of private primary schools in districts 1-2 ( Djibouti Ville ) transmit their data via the application on time At least 80 % of public primary schools and at least 50 % of private primary schools in districts 3-4 ( Balbala ) transmit their data via the application on time 60 % of primary schools in the regions and in the refugee villages transmit their data via the application on time 70 % of public and 50 % of private primary schools transmit their data via the application on time Modernization of the EMIS ( Text ) No Evaluation report of existing data systems and recommendations for strengthening EMIS performance Student information system is developed Student information system is developed Student information system is developed Student information system is developed", + "type": "system", + "explanation": "However, EMIS is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with data transmission from schools.", + "contextual_reason_agent": "However, EMIS is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "144_84657-PAD-P132268-Project-Commitment-Paper", + "page": 17, + "text": "Indicator ( 1 ) includes the following studies and actions to be implemented in the first year: - Indicator ( 1 ) SWM Strategy: \uf0b7 Strategy Development: this entails development of plans for management and treatment of slaughterhouse and medical waste and the development of plans for closure and rehabilitation of unsanitary dumpsites8. \uf0b7 Implementation of the MIS: the design and implementation of the MIS is targeted for the first year. Relevant SWM data such as waste tonnage handled, costs, etc., is currently tracked by service providers; however, the MIS will facilitate the consolidation and reporting of this data and allow JSC-H & B to more effectively track progress and performance. B3. Project Design Targeting In the absence of data on household income, the Project uses geographical targeting at the level of the governorates where poverty level is estimated at 18. 3 % on average, 32. 5 % in Hebron and 21. 3 % in Bethlehem ( 2010 ) 9. Both governorates also have the highest unemployment rates in the West Bank with averages of 22. 8 % and 22. 4 %, respectively10. The vulnerability of these groups is exacerbated by mobility restrictions and poor market access. Output-based subsidy payment 8 These closures are part of the World Bank project and including them in the OBA targets will further incentivize JSC-H & B to achieve closures according to the planned schedule. In addition, these closures are necessary to meet OBA targets for Indicator ( 3 ) Waste Managed. 9 PCBS: West Bank Southern Governorates Statistical Yearbook, 2011 10 PCSBS Press Release on Labour Force Survey Results, Labour Force Survey ( January-March, 2011 ) Round ( Q1 / 2011 ).", + "ner_text": [ + [ + 1578, + 1597, + "named" + ], + [ + 907, + 913, + "Labour Force Survey <> data geography" + ], + [ + 929, + 938, + "Labour Force Survey <> data geography" + ], + [ + 941, + 945, + "Labour Force Survey <> reference year" + ], + [ + 1017, + 1026, + "Labour Force Survey <> data geography" + ], + [ + 1493, + 1502, + "Labour Force Survey <> data geography" + ], + [ + 1547, + 1551, + "Labour Force Survey <> publication year" + ], + [ + 1555, + 1560, + "Labour Force Survey <> publisher" + ], + [ + 1644, + 1648, + "Labour Force Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "In addition, these closures are necessary to meet OBA targets for Indicator ( 3 ) Waste Managed. 9 PCBS: West Bank Southern Governorates Statistical Yearbook, 2011 10 PCSBS Press Release on Labour Force Survey Results, Labour Force Survey ( January-March, 2011 ) Round ( Q1 / 2011 ).", + "type": "survey", + "explanation": "In the context, it is explicitly mentioned alongside other statistical yearbooks and surveys, indicating its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Labour Force Survey' is a recognized term for a structured collection of data regarding labor statistics.", + "contextual_reason_agent": "In the context, it is explicitly mentioned alongside other statistical yearbooks and surveys, indicating its role as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "062_Turkey-Municipal-Services-Improvement-Project", + "page": 44, + "text": "Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs. Municipal PIUs and ILBANK PMU People benefitting from safely managed drinking water services in the selected municipalities ( out of This indicator measures the percentage of refugees benefitted from safely Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU Municipal PIUs and ILBANK PMU", + "ner_text": [ + [ + 29, + 37, + "named" + ], + [ + 87, + 107, + "PMU Data <> data type" + ], + [ + 347, + 355, + "PMU Data <> reference population" + ], + [ + 489, + 495, + "PMU Data <> publisher" + ] + ], + "validated": true, + "empirical_context": "Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs. Municipal PIUs and ILBANK PMU People benefitting from safely managed drinking water services in the selected municipalities ( out of This indicator measures the percentage of refugees benefitted from safely Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU Municipal PIUs and ILBANK PMU", + "type": "data", + "explanation": "In the context, 'PMU Data' is explicitly mentioned as being compiled and recorded, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'PMU Data' is a dataset because it is referenced in the context of compiling reports and recording beneficiary records.", + "contextual_reason_agent": "In the context, 'PMU Data' is explicitly mentioned as being compiled and recorded, indicating it functions as a data source.", + "contextual_signal": "mentioned as a data source for compiling reports", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 10, + "validated": 9, + "not_validated": 1 + } + }, + { + "filename": "143_749060PAD0P124010Box374388B00OUO090", + "page": 80, + "text": "A study on the survival, and where possible, impact of sub-projects implemented CDP was conducted in 2012 and provided insights and information on: ( i ) the survival rate of sub-projects after a period of four years since their implementation; ( ii ) quantitative and qualitative changes that can potentially be attributed to the project ( on the community, households, beneficiaries, etc. ); ( iii ) success factors and drawbacks / obstacles; and ( iv ) lessons learned and recommendations. A telephone survey was conducted to determine the status of the 321 sub-projects; the results show that 70 % are still operational, 11 % could not be reached, 10 % are not operational anymore and the rest preferred not to reveal their status. A sample of 10 % ( 31 sub-projects ), with both operational and non-operational ones, was drawn for the fieldwork covering various sectors and regions. The results of the fieldwork revealed that the telephone survey was not reliable. Assuming this finding of the field work is representative of all 321 sub-projects, the survival rate of sub-projects is 66 %. 44 Presentation by Anne Ritchie entitled \" Financing Productive Assets in Social Funds and CDD \" for the following workshop: Social Funds: Exploring New Frontiers; Session III: Financing Productive Assets through Social Funds and CDD Operations November 16, 2005 45 Ritchie, A ( 2006 ). \" Grants for Income Generation \". Agricultural and Rural Development Notes Series. The World Bank. 81", + "ner_text": [ + [ + 495, + 511, + "named" + ] + ], + "validated": false, + "empirical_context": "A study on the survival, and where possible, impact of sub-projects implemented CDP was conducted in 2012 and provided insights and information on: ( i ) the survival rate of sub-projects after a period of four years since their implementation; ( ii ) quantitative and qualitative changes that can potentially be attributed to the project ( on the community, households, beneficiaries, etc. ); ( iii ) success factors and drawbacks / obstacles; and ( iv ) lessons learned and recommendations. A telephone survey was conducted to determine the status of the 321 sub-projects; the results show that 70 % are still operational, 11 % could not be reached, 10 % are not operational anymore and the rest preferred not to reveal their status. A sample of 10 % ( 31 sub-projects ), with both operational and non-operational ones, was drawn for the fieldwork covering various sectors and regions.", + "type": "survey", + "explanation": "However, the telephone survey itself is a method of data collection, not a structured collection of data or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves collecting data through a survey.", + "contextual_reason_agent": "However, the telephone survey itself is a method of data collection, not a structured collection of data or dataset.", + "contextual_signal": "mentioned only as a method of data collection, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "074_Djibouti-Integrated-Cash-Transfer-and-Human-Capital-Project", + "page": 12, + "text": "The emphasis is now on scaling up PNSF to cover a greater share of the poor population, building on the capacity developed with the support of the World Bank \u2019 s Social Safety Nets Project ( SSNP, P130328 ) which has financed the development of the social registry, the targeting methodology, and the PNSF \u2019 s Management Information System ( MIS ). 10. Prior to the PNSF, most SSN programs had been established at the wake of drought shocks and were largely donor-driven initiatives ( such as the World Food Programme ( WFP ), the United Nations Children \u2019 s Fund ( UNICEF ), the Food and Agriculture Organization, the Islamic Development Bank, the United States Agency for Development, and the Norwegian Refugee Council ) which were mainly focused on providing food to vulnerable populations. At present, the scale and funding of SSN programs remains inadequate to protect most poor and vulnerable groups. According to the latest available data, only 32. 7 percent of the poorest 20 percent of households are covered by any SSN program. In addition, the Government \u2019 s share of spending in SSN is quite limited, as Djibouti only spends 0. 18 percent of its GDP on 10 Hallegatte et al, \u201c Shockwaves: Managing the Impacts of Climate Change on Poverty \u201d, World Bank, 2016. 11 Wooden et al, \u201c Impact of Weather Shocks on MENA Households \u201d, World Bank, 2014. 12 Djibouti \u2019 s First NDC, August 2015, p2.", + "ner_text": [ + [ + 310, + 339, + "named" + ] + ], + "validated": false, + "empirical_context": "The emphasis is now on scaling up PNSF to cover a greater share of the poor population, building on the capacity developed with the support of the World Bank \u2019 s Social Safety Nets Project ( SSNP, P130328 ) which has financed the development of the social registry, the targeting methodology, and the PNSF \u2019 s Management Information System ( MIS ). 10.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Management Information System' suggests a structured collection of data.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "158_40156", + "page": 15, + "text": "level so that this does not become an extra burden for already stretched programs. The TORs for this coordination \u201c function \u201d and how this will be supported is included in the Project Operations Manual. 43. In terms of capacity building, the project will provide funding for the training of key stakeholders and implementers to ensure the efficient implementation of the project. This assistance would include capacity support to the IGAD Secretariat, the PFO, the national AIDS coordination authorities, and the implementers under component 1. A detailed capacity building plan for the first year will be developed by IGAD, and updated on an annual basis. 44. M & E, is both crucial and cross-cutting; it is essential to track implementation, assess the progress with achieving intended and unintended results, and provide data to improve future implementation. The M & E system for the project will use indicators, linked to the objectives of the project, to track the implementation status and project outcomes. Indicator data will be obtained from five sources of data \u2013 biological surveillance, behavioral surveillance, essential operations research, financial monitoring, and program activity monitoring \u2013 and will facilitate learning for IGAD and its member states. The data collected will then be analyzed and disseminated to stakeholders within IGAD to inform the decisions they make.", + "ner_text": [ + [ + 1016, + 1030, + "named" + ], + [ + 1076, + 1099, + "Indicator data <> data description" + ], + [ + 1101, + 1124, + "Indicator data <> data description" + ], + [ + 1126, + 1155, + "Indicator data <> data description" + ], + [ + 1157, + 1177, + "Indicator data <> data description" + ], + [ + 1183, + 1210, + "Indicator data <> data description" + ] + ], + "validated": true, + "empirical_context": "The M & E system for the project will use indicators, linked to the objectives of the project, to track the implementation status and project outcomes. Indicator data will be obtained from five sources of data \u2013 biological surveillance, behavioral surveillance, essential operations research, financial monitoring, and program activity monitoring \u2013 and will facilitate learning for IGAD and its member states. The data collected will then be analyzed and disseminated to stakeholders within IGAD to inform the decisions they make.", + "type": "data", + "explanation": "In this context, 'Indicator data' is indeed used as a structured collection of data obtained from multiple sources to track project outcomes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'Indicator data' is a dataset because it refers to data collected from various sources for analysis.", + "contextual_reason_agent": "In this context, 'Indicator data' is indeed used as a structured collection of data obtained from multiple sources to track project outcomes.", + "contextual_signal": "follows 'data will be obtained from five sources of data'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "076_Rwanda-Socio-Economic-Inclusion-of-Refugees-and-Host-Communities-in-Rwanda-Project", + "page": 29, + "text": "The World Bank Socio-economic Inclusion of Refugees & Host Communities in Rwanda Project ( P164130 ) Page 25 of 82 73. The project will, to the extent possible, utilize existing government systems and procedures. Districts implement M & E activities under the coordination of LODA and use the LODA-administered Monitoring, Evaluation, and Information System ( MEIS ). The MEIS facilitates coordination of social protection planning, household profiling and M & E of projects from grassroots to central level. The system can generate reports with values on key figures and the progress of indicators. While the MEIS is operational and implemented nationally as the central MIS, the level of system utilization varies from district to district due to different levels of technical capacity. The SEIRHCP will build district capacity to use the MEIS. The MINEMA SPIU will receive required project data and information from the districts and consolidate it in the P-MIS. 74. Project M & E arrangements. The MINEMA SPIU will have a dedicated M & E team, which will include ( i ) a Kigali - based National Project M & E Specialist, who will oversee overall M & E implementation; and ( ii ) District field specialists ( embedded in district offices, financed by the project ) to monitor all project activities in the target districts.", + "ner_text": [ + [ + 959, + 964, + "named" + ] + ], + "validated": false, + "empirical_context": "The SEIRHCP will build district capacity to use the MEIS. The MINEMA SPIU will receive required project data and information from the districts and consolidate it in the P-MIS. 74.", + "type": "system", + "explanation": "However, P-MIS is described as a system rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed P-MIS is a dataset because it is mentioned in the context of receiving and consolidating project data.", + "contextual_reason_agent": "However, P-MIS is described as a system rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "026_Djibouti-Health-System-Strengthening-Project", + "page": 54, + "text": "The World Bank Djibouti Health System Strengthening ( P178033 ) Page 50 of 64 RESULT_FRAME_TBL_IO Indicator Name PBC Baseline End Target Average user experience score among PBF / DFF participating facilities ( Percentage ) 0. 00 60. 00 Primary health facilities with functional Facility Community Committee assessed by the PBF quality checklist ( Number ) 0. 00 80. 00 Communities with functioning community health workers per the norms set by the National Community Health Strategy ( Percentage ) 0. 00 60. 00 Communities that have formally declared the abandonment of the practice of FGM ( Percentage ) 0. 00 60. 00 Component 2: Strengthening institutions, citizen engagement and project management Facilities with DHIS-2 implementation ( DHIS-2 data entry at the facility level ) ( Percentage ) 35. 00 65. 00 Facilities that are connected to the National Logistics Management Information System ( eLMIS ) ( Percentage ) 0. 00 65. 00 Health surveys conducted and results made public ( Number ) 0. 00 2. 00 Expectant women using a transport voucher or staying in a maternal waiting home to ensure safe deliveries ( Number ) 0. 00 20, 000. 00 Of which refugees ( Number ) 0. 00 200. 00 Of which host community residents ( Number ) 0. 00 2, 000. 00 Completion of annual regional and national health fora with adopted resolutions ( Yes", + "ner_text": [ + [ + 741, + 747, + "named" + ] + ], + "validated": false, + "empirical_context": "00 60. 00 Component 2: Strengthening institutions, citizen engagement and project management Facilities with DHIS-2 implementation ( DHIS-2 data entry at the facility level ) ( Percentage ) 35. 00 65.", + "type": "system", + "explanation": "However, DHIS-2 is a health information system and not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed DHIS-2 is a dataset because it is associated with data entry and management.", + "contextual_reason_agent": "However, DHIS-2 is a health information system and not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 53, + "text": "The World Bank Uganda Digital Acceleration Project - GovNet ( P171305 ) Page 41 of 76 kiosk ( within 1 km from home ) Of which Refugees Number of individuals in host communities and refugee settlements with broadband penetration ( 50 % of total population ) Annually Surveys / Data from UCC Surveys will be conducted in all 13 host communities and 14 refugee settlements to determine beneficiaries that can access internet through 1 ) Personal Subscriptio ns 2 ) Wi-Fi hotspot ( within 1 km from home ) 3 ) Internet kiosk ( within 1 km from home ) NITA-U, UCC & OPM Of which Hosts Number of individuals in host communities and refugee settlements with broadband penetration ( 50 % of total population ). Annual Surveys / Data from UCC Surveys will be conducted in all 13 host communities and 14 refugee settlements to determine beneficiaries that can access internet through 1 ) Personal Subscriptio ns 2 ) Wi-Fi hotspot ( within 1 km from home ) 3 ) Internet NITA-U, UCC & OPM", + "ner_text": [ + [ + 287, + 298, + "named" + ], + [ + 15, + 21, + "UCC Surveys <> data geography" + ], + [ + 384, + 397, + "UCC Surveys <> reference population" + ], + [ + 463, + 476, + "UCC Surveys <> data description" + ] + ], + "validated": true, + "empirical_context": "The World Bank Uganda Digital Acceleration Project - GovNet ( P171305 ) Page 41 of 76 kiosk ( within 1 km from home ) Of which Refugees Number of individuals in host communities and refugee settlements with broadband penetration ( 50 % of total population ) Annually Surveys / Data from UCC Surveys will be conducted in all 13 host communities and 14 refugee settlements to determine beneficiaries that can access internet through 1 ) Personal Subscriptio ns 2 ) Wi-Fi hotspot ( within 1 km from home ) 3 ) Internet kiosk ( within 1 km from home ) NITA-U, UCC & OPM Of which Hosts Number of individuals in host communities and refugee settlements with broadband penetration ( 50 % of total population ). Annual Surveys / Data from UCC Surveys will be conducted in all 13 host communities and 14 refugee settlements to determine beneficiaries that can access internet through 1 ) Personal Subscriptio ns 2 ) Wi-Fi hotspot ( within 1 km from home ) 3 ) Internet NITA-U, UCC & OPM", + "type": "survey", + "explanation": "In this context, 'UCC Surveys' is explicitly mentioned as a source of data collection for determining internet access among beneficiaries.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Surveys' which typically indicates a structured collection of data.", + "contextual_reason_agent": "In this context, 'UCC Surveys' is explicitly mentioned as a source of data collection for determining internet access among beneficiaries.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "073_Ethiopia-Consolidated-Water-Supply-Sanitation-and-Hygiene-Account-Project", + "page": 83, + "text": "The focus on this Project is to establish a solid M & E foundation, which will enable the sector to move toward results-based implementation approaches. 99. The program will support the NWCO, RWCOs, and WASH Sector PMUs for regular organizing of the WASH Annual Review Meetings to track implementation progress and provide feedback for improvement. This platform will be used to enhance learning and experience sharing among program woredas and towns. Under the platform, the best-performing woredas and towns will be selected based on a clear evaluation guideline using predefined service delivery result indicators and will be recognized / awarded. This is expected to create positive competition toward meeting intended results among program woredas and towns. Once this initiative is tested and piloted under the CWA, the platform will be scaled up to the wider OWNP. Information from the MIS will be the basis for selecting best-performing woredas and will be used as motivation to encourage regular updating of information into the MIS. Implementation Support Plan and Resource Requirements 100. Many of the Project team members are based out of the World Bank \u2019 s country office in Addis Ababa, which helps provide timely and effective implementation support to the Client. Semiannual supervision missions and targeted follow-up technical missions will focus on the areas described in the following paragraphs. 101. Strategic support: The World Bank implementation support missions will meet with national and local authorities to: ( i ) review progress on the Project \u2019 s activities; ( ii ) discuss strategic alignment of the Project \u2019 s different activities and the activities of relevant stakeholders; and ( iii ) evaluate progress on cross-cutting issues, such as M & E, gender, training, communication, dissemination of Project results and experiences, and coordination between relevant stakeholders. Table A1. 8: Thematic Support Time Focus Skills Needed Resource Partner Role First 12 months Preparation of tendering contracts; feasibility studies and detailed engineering designs; safeguards screening and mitigation plans Procurement, FM, safeguards and infrastructure specialists; hydrology and water resources specialists Supervision budget Provide support, national expertise, and technical advice", + "ner_text": [ + [ + 1038, + 1041, + "named" + ] + ], + "validated": false, + "empirical_context": "Once this initiative is tested and piloted under the CWA, the platform will be scaled up to the wider OWNP. Information from the MIS will be the basis for selecting best-performing woredas and will be used as motivation to encourage regular updating of information into the MIS. Implementation Support Plan and Resource Requirements 100.", + "type": "system", + "explanation": "However, the context indicates that 'MIS' is mentioned as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MIS' suggests a management information system that could store data.", + "contextual_reason_agent": "However, the context indicates that 'MIS' is mentioned as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "078_Afghanistan-Eshteghal-Zaiee-Karmondena-EZ-Kar-Project", + "page": 27, + "text": "Available survey data indicate that once returned to Afghanistan, most refugees return to their province of origin to be in proximity to family and friends or, if they return elsewhere, they do so for safety and economic reasons. Afghan returnee households are large and although most families have at least one person working for pay, they have low job stability and low wages. According to survey data, most returnees work as daily wage laborers in non \u2010 agriculture and they generally experience a decrease in the employment rate, wage, and job stability after returning to Afghanistan. Data indicate that there were as many men as women among Afghans living in Pakistan in 2011 but more women ( 54 percent ) than men returned to Afghanistan between 2015 and 2017. 31 D. Results Chain 44. The project supports, through a programmatic, multi \u2010 sector, multi \u2010 implementation agency approach, the short, medium, and long \u2010 term measures required to increase economic integration of Afghan returnees, IDPs, and host communities in the cities supported by the project. The project will implement a range of policy and operational activities identified through multiple consultations and surveys with the GoIRA, potential beneficiaries, and stakeholders ( e. g. UN agencies, civil society, refugees ).", + "ner_text": [ + [ + 10, + 21, + "named" + ], + [ + 53, + 64, + "survey data <> data geography" + ], + [ + 71, + 79, + "survey data <> reference population" + ], + [ + 230, + 256, + "survey data <> reference population" + ], + [ + 647, + 654, + "survey data <> reference population" + ], + [ + 665, + 673, + "survey data <> data geography" + ], + [ + 677, + 681, + "survey data <> reference year" + ], + [ + 733, + 744, + "survey data <> data geography" + ], + [ + 983, + 999, + "survey data <> reference population" + ], + [ + 1315, + 1333, + "survey data <> usage context" + ] + ], + "validated": true, + "empirical_context": "Available survey data indicate that once returned to Afghanistan, most refugees return to their province of origin to be in proximity to family and friends or, if they return elsewhere, they do so for safety and economic reasons. Afghan returnee households are large and although most families have at least one person working for pay, they have low job stability and low wages.", + "type": "survey", + "explanation": "In this context, 'survey data' is explicitly mentioned as a source of information that informs the analysis of refugee return patterns.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'survey data' typically refers to structured information collected from respondents.", + "contextual_reason_agent": "In this context, 'survey data' is explicitly mentioned as a source of information that informs the analysis of refugee return patterns.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "054_Sudan-Basic-Education-Emergency-Support-Project", + "page": 12, + "text": "The World Bank Sudan Basic Education Emergency Support Project ( P172812 ) Page 8 of 40 10. Despite recent progress in student enrollments, the education sector suffers from multiple challenges: ( i ) low and unequal levels of access and completion; ( ii ) low and stagnant student learning outcomes; ( iii ) poor education system management; ( iv ) inadequate learning environments; and ( v ) low level of public spending on education and significant contributions from households. In Sudan, the school system is also currently severely affected by the COVID-19 outbreak, with all education institutes closed since March 14, 2020. An estimated 6. 2 million students are out of school due to the lockdown ( Annual School Census, 2018 ). If this situation is permitted to continue unabated, it could have profound, long-term negative impacts on the country ' s development. Low and unequal levels of access and completion 11. Sudan has not managed to cope with the increased demand for education imposed by the high population growth hence the overall level of access to basic education in Sudan has been stagnant over the past decade. GER has been stagnant and low compared to other comparator countries: 72 percent ( 2008 / 09 ) and 73 percent ( 2016 / 17 ).", + "ner_text": [ + [ + 707, + 727, + "named" + ], + [ + 4, + 14, + "Annual School Census <> publisher" + ], + [ + 15, + 20, + "Annual School Census <> data geography" + ], + [ + 486, + 491, + "Annual School Census <> data geography" + ], + [ + 729, + 733, + "Annual School Census <> publication year" + ], + [ + 925, + 930, + "Annual School Census <> data geography" + ], + [ + 1089, + 1094, + "Annual School Census <> data geography" + ], + [ + 1218, + 1227, + "Annual School Census <> reference year" + ] + ], + "validated": true, + "empirical_context": "An estimated 6. 2 million students are out of school due to the lockdown ( Annual School Census, 2018 ). If this situation is permitted to continue unabated, it could have profound, long-term negative impacts on the country ' s development.", + "type": "census", + "explanation": "The context confirms it is a dataset as it provides specific data (6.2 million students) used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a census, which typically involves structured data collection.", + "contextual_reason_agent": "The context confirms it is a dataset as it provides specific data (6.2 million students) used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "158_40156", + "page": 47, + "text": "and other objectives of the IGAD Regional HIV / AIDS Partnership Program ( IRAPP ). IGAD will submit annual plans to the World Bank for no-objection on a consolidated basis. Sub-component ( b ) 5. The sub-component is aiming to provide HIV / STI prevention, care, support and treatment services to cross-border and mobile populations. The Mapping Surveys conducted in the 7 IGAD countries pointed out the significant mobility of these populations, which increases their vulnerability to HIV and STI. The analysis further found that there was a lack of HIV interventions in these areas, and that where present, the capacity and coverage was extremely limited. Despite this high level of vulnerability, the existing HIV and STI programs do not address CBMP in most IGAD countries and moreover, in the border areas, the health facilities are deficient or often completely missing. 6. The Regional Expert Meeting ( Nairobi, Kenya, June 2006 ) recommended initiating, promoting and supporting programs at strategic CBMP concentrated areas by improving knowledge, capacity, infrastructure and care. This subcomponent will support reinforcement of institutional capacity which may include staff training, basic equipment and medical supplies for selected health facilities ( in the area of the \u201c hot-spot interventions \u201d ) in border areas to deliver HIV / STI prevention, treatment, care and support service packages for CBMPs.", + "ner_text": [ + [ + 339, + 354, + "named" + ], + [ + 298, + 333, + "Mapping Surveys <> reference population" + ], + [ + 374, + 388, + "Mapping Surveys <> data geography" + ], + [ + 932, + 936, + "Mapping Surveys <> publication year" + ] + ], + "validated": true, + "empirical_context": "The sub-component is aiming to provide HIV / STI prevention, care, support and treatment services to cross-border and mobile populations. The Mapping Surveys conducted in the 7 IGAD countries pointed out the significant mobility of these populations, which increases their vulnerability to HIV and STI. The analysis further found that there was a lack of HIV interventions in these areas, and that where present, the capacity and coverage was extremely limited.", + "type": "survey", + "explanation": "In the context, 'Mapping Surveys' is explicitly mentioned as a source of information that provides data on the mobility of populations and their vulnerability to HIV and STI.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'Mapping Surveys' is a dataset because it refers to a structured collection of data gathered from multiple countries.", + "contextual_reason_agent": "In the context, 'Mapping Surveys' is explicitly mentioned as a source of information that provides data on the mobility of populations and their vulnerability to HIV and STI.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 19, + "text": "Subcomponent 1. 2: Strengthening health sector stewardship ( US $ 3. 39 million equivalent ) will support strengthening health sector stewardship in selected refugee hosting districts through: ( a ) improving availability, quality, and use of routine health data via, inter alia, ( i ) developing and implementing a digital human resources database, ( ii ) digitizing DHIS and integrating selected parallel reporting systems, ( iii ) providing training, equipment, and operational support to health services providers for implementation and operationalization of HMIS, ( iv ) creating a user \u2010 friendly dashboard for decision making; and ( v ) supporting data review meetings and data quality checks; and ( b ) providing training to, and building capacity of, key managerial and technical staff on selected health system strengthening subjects. The project will support a real \u2010 time system monitoring of staff presence at HFs. Training of managerial and technical staff at provincial and district levels in various health system strengthening areas directly linked to improving effectiveness, efficiency, and sustainability of service delivery will include contract management of private \u2010 public partnerships, public financial management, monitoring and supervision, human resources for health ( HRH ) management, and supply chain management.", + "ner_text": [ + [ + 368, + 372, + "named" + ] + ], + "validated": false, + "empirical_context": "2: Strengthening health sector stewardship ( US $ 3. 39 million equivalent ) will support strengthening health sector stewardship in selected refugee hosting districts through: ( a ) improving availability, quality, and use of routine health data via, inter alia, ( i ) developing and implementing a digital human resources database, ( ii ) digitizing DHIS and integrating selected parallel reporting systems, ( iii ) providing training, equipment, and operational support to health services providers for implementation and operationalization of HMIS, ( iv ) creating a user \u2010 friendly dashboard for decision making; and ( v ) supporting data review meetings and data quality checks; and ( b ) providing training to, and building capacity of, key managerial and technical staff on selected health system strengthening subjects. The project will support a real \u2010 time system monitoring of staff presence at HFs.", + "type": "system", + "explanation": "'DHIS' is mentioned as part of a system that is being digitized, but it is not explicitly described as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'DHIS' is a dataset because it is related to health data management.", + "contextual_reason_agent": "'DHIS' is mentioned as part of a system that is being digitized, but it is not explicitly described as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 11, + "validated": 5, + "not_validated": 6 + } + }, + { + "filename": "172_multi0page", + "page": 47, + "text": ". ' - ': - '; 1 - Key Performance | Data Collectiori Strategy, Hierarchy of Objectives J Indicators.. Critcal, Assumptions Project Components / Inputs: ( budget for each Project reports: ( from Components to Sub-components: component ) Outputs ) 1. Support to service US $ 36M Detailed quarterly reports on Schools in need of providers to rehabilitate the the procurement and rehabilitation are identified provision of education distribution status of based on a priority critena. services ( School Grants classroom facilities Program ). Annual EMIS survey report The PSC is functioning successfully. Detailed quarterly report on SPs have: ( i ) key personnel; civil works activities by the ( ii ) capacity to procure goods Civil Works unit of the and services according to planning division. project regulations; ( iii ) financial management and reporting capacity; and ( iv ) capacity to monitor and report on project activities. Monthly financial statements The MEST implements a and quarterly progress reports monutoring and supervision from the service providers. plan. Field supervision. 2. Enhanced institutional US $ 4. OM Capacity-building coordinator capacity of the MEST. recruited. 2. 1 Establishment of a PCU. US $ 0. 9M Key high quality personnel recruited.", + "ner_text": [ + [ + 545, + 549, + "named" + ] + ], + "validated": false, + "empirical_context": "services ( School Grants classroom facilities Program ). Annual EMIS survey report The PSC is functioning successfully. Detailed quarterly report on SPs have: ( i ) key personnel; civil works activities by the ( ii ) capacity to procure goods Civil Works unit of the and services according to planning division.", + "type": "program", + "explanation": "EMIS is mentioned in the context of a program and not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with a survey report.", + "contextual_reason_agent": "EMIS is mentioned in the context of a program and not as a data source or structured collection of data.", + "contextual_signal": "mentioned only as a program, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "021_BOSIB05e6fc47e0770aeec00ad5e11774f2", + "page": 12, + "text": "The World Bank Uganda Climate Smart Agricultural Transformation Project ( P173296 ) Page 7 of 81 STRATEGIC CONTEXT A. Country Context 1. Uganda \u2019 s gross domestic product ( GDP ) growth has been declining and poverty has been increasing. GDP growth averaged close to 8 percent per year the decade before 2012 but has since slowed to around 5 percent and is further projected to decline partly because of the Corona Virus Disease 2019 ( COVID-19 ) crisis. The latest poverty data show that poverty has moderately increased since 2012 / 13. According to the Uganda National Household Survey ( UNHS ), between 2012 and 2016, Uganda \u2019 s poverty rate declined to 21. 4 percent, that resulted in around 1. 4 million Ugandans slipping into poverty. A sizable portion of Uganda \u2019 s population remains vulnerable to poverty and significant welfare setbacks in the wake of a shock. About 44 percent are considered vulnerable and susceptible to falling into poverty because of climate and other shocks. While 8. 4 percent of households moved out of poverty in 2021, 10. 2 percent slipped into poverty in response to shocks. 1 2.", + "ner_text": [ + [ + 556, + 588, + "named" + ], + [ + 15, + 21, + "Uganda National Household Survey <> data geography" + ], + [ + 137, + 143, + "Uganda National Household Survey <> data geography" + ], + [ + 304, + 308, + "Uganda National Household Survey <> reference year" + ], + [ + 466, + 478, + "Uganda National Household Survey <> data type" + ], + [ + 556, + 562, + "Uganda National Household Survey <> data geography" + ], + [ + 607, + 611, + "Uganda National Household Survey <> reference year" + ], + [ + 616, + 620, + "Uganda National Household Survey <> reference year" + ], + [ + 622, + 628, + "Uganda National Household Survey <> data geography" + ], + [ + 633, + 671, + "Uganda National Household Survey <> data description" + ], + [ + 710, + 718, + "Uganda National Household Survey <> reference population" + ], + [ + 1049, + 1053, + "Uganda National Household Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "The latest poverty data show that poverty has moderately increased since 2012 / 13. According to the Uganda National Household Survey ( UNHS ), between 2012 and 2016, Uganda \u2019 s poverty rate declined to 21. 4 percent, that resulted in around 1.", + "type": "survey", + "explanation": "This is indeed a dataset as it is referenced in the context as a source of poverty data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a national survey that typically collects and provides data on household poverty.", + "contextual_reason_agent": "This is indeed a dataset as it is referenced in the context as a source of poverty data used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "016_BOSIB029693bc600f090cb0745b2b12ee69", + "page": 88, + "text": "The KUSP2 Results Framework reflects the Operations \u2019 ToC and is structured around the PDO and the five RAs. The RAs and indicators are directly linked to DLIs. Results Framework indicators are assessed and updated by APA results, except those related to Component 1. The M & E framework is intended to reinforce achievement of Program results. 43. APAs will be the main basis for tracking progress with respect to the Program \u2019 s Results Framework. APAs will be the principal source of data for tracking results indicators at the sub-national level. Additional information on sub-national results indicators will be provided through M & E / reporting systems and at municipal and county levels, which will be regularly consolidated by SDHUD. National level indicators ( linked to component 1 ) will be tracked through bi-annual M & E reports produced by SDHUD. 44. Through their engagement in and implementation of KUSP and KDSP, counties / municipalities have gained considerable M & E experience. However, to fill remaining M & E gaps, the Program will provide capacity building support to national and sub-national stakeholders. In particular, the Program will build on county M & E systems and provide training and assistance on: ( i ) data collection; ( ii ) data quality and integrity control; and ( iii ) linking data to informing decision-making processes. The Program will require that an M & E specialist be included in CPCTs and in the NPCT.", + "ner_text": [ + [ + 634, + 659, + "named" + ] + ], + "validated": false, + "empirical_context": "APAs will be the principal source of data for tracking results indicators at the sub-national level. Additional information on sub-national results indicators will be provided through M & E / reporting systems and at municipal and county levels, which will be regularly consolidated by SDHUD. National level indicators ( linked to component 1 ) will be tracked through bi-annual M & E reports produced by SDHUD.", + "type": "system", + "explanation": "However, it is not a dataset as it refers to systems for monitoring and evaluation rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'reporting systems' which suggests data collection.", + "contextual_reason_agent": "However, it is not a dataset as it refers to systems for monitoring and evaluation rather than a structured collection of data.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "041_Burkina-Faso-Support-to-Land-and-Mining-Management-Strengthening-Project", + "page": 28, + "text": "will improve the knowledge and access to geological information to minimize investment risks and promote sound, transparent, and inclusive economic development through activities to: ( a ) assess approximately 15 existing targets or anomalies, particularly with respect to, inter alia, base metals, lithium and rare earths ( minerals critical for the green economy ); ( b ) compile and analyze existing data to develop new targets and acquisition of new geophysical, geochemical and geological data at a scale of 1: 50 000 for 20 maps and 1: 100 000; ( c ) develop a web-map application / portal demonstrating the value of common territorial datasets easily adaptable to a myriad of non-mining applications; ( d ) strengthen and expand the geographic information systems ( GIS ) into an IT hub enabling interconnection between agencies within the MEMC, MINEFID, and other departments; and ( e ) train staff, provide tools and equipment to BUMIGEB and support its promotional activities. The mapping targets minerals in short supply for the development of renewable energy to enable Burkina to potentially tap into expected high market demands for minerals by the low carbon economy. 40. 2. 1. 2. 1 Prospectivity Mapping for ASM.", + "ner_text": [ + [ + 567, + 595, + "named" + ] + ], + "validated": false, + "empirical_context": "will improve the knowledge and access to geological information to minimize investment risks and promote sound, transparent, and inclusive economic development through activities to: ( a ) assess approximately 15 existing targets or anomalies, particularly with respect to, inter alia, base metals, lithium and rare earths ( minerals critical for the green economy ); ( b ) compile and analyze existing data to develop new targets and acquisition of new geophysical, geochemical and geological data at a scale of 1: 50 000 for 20 maps and 1: 100 000; ( c ) develop a web-map application / portal demonstrating the value of common territorial datasets easily adaptable to a myriad of non-mining applications; ( d ) strengthen and expand the geographic information systems ( GIS ) into an IT hub enabling interconnection between agencies within the MEMC, MINEFID, and other departments; and ( e ) train staff, provide tools and equipment to BUMIGEB and support its promotional activities. The mapping targets minerals in short supply for the development of renewable energy to enable Burkina to potentially tap into expected high market demands for minerals by the low carbon economy.", + "type": "application", + "explanation": "However, it is not a dataset but rather a tool for displaying data, as it is described as a web-map application/portal.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves mapping and data visualization.", + "contextual_reason_agent": "However, it is not a dataset but rather a tool for displaying data, as it is described as a web-map application/portal.", + "contextual_signal": "mentioned only as a tool, not as a data source", + "tags": [] + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 44, + "text": "The key delivery mechanisms put in place for the project, such as the MIS, will enable the project to produce regular progress reports. The PIU will organize annual financial audits for the project, annual reviews of progress, and a mid-term review to guide the project implementation after the first 2. 5 years of implementation. The mid-term review will involve project \u2019 s stakeholders and civil society in the review of performance, intermediary results, institutional arrangements, and outcomes. It will confirm the plans and processes for expansion of the project. 129. A Monitoring and Evaluation manual was produced under Merankabandi. The manual will be updated for the project, even though most of the monitoring and evaluation processes will be kept the same. The following table describes the main M & E instruments of the project. Table 4. Project M & E instruments Instrument Description Data to be collected Management Information System The MIS will collect information from different processes including registration, enrolment, payments, and claims. Information on registration and enrolment is obtained through direct collection of data; payment information is provided by the service providers and claims are fed into the system by different collection channels - Socio-economic data of households enrolled - Number of households receiving cash transfers - Amounts paid to beneficiaries - Claims submitted by beneficiaries Social Registry The Social Registry compiles socio - economic information from households that will be used to determine eligibility for social programs - Socio-economic data of households registered Iterative Beneficiary Monitoring The IBM is a phone-based survey to a sample of households ( beneficiaries and non-beneficiaries of social programs ) to inquire on program performance, satisfaction, and context - Satisfaction information of beneficiaries and non - beneficiaries of social programs - Qualitative information on the vulnerability context", + "ner_text": [ + [ + 923, + 952, + "named" + ] + ], + "validated": false, + "empirical_context": "Table 4. Project M & E instruments Instrument Description Data to be collected Management Information System The MIS will collect information from different processes including registration, enrolment, payments, and claims. Information on registration and enrolment is obtained through direct collection of data; payment information is provided by the service providers and claims are fed into the system by different collection channels - Socio-economic data of households enrolled - Number of households receiving cash transfers - Amounts paid to beneficiaries - Claims submitted by beneficiaries Social Registry The Social Registry compiles socio - economic information from households that will be used to determine eligibility for social programs - Socio-economic data of households registered Iterative Beneficiary Monitoring The IBM is a phone-based survey to a sample of households ( beneficiaries and non-beneficiaries of social programs ) to inquire on program performance, satisfaction, and context - Satisfaction information of beneficiaries and non - beneficiaries of social programs - Qualitative information on the vulnerability context", + "type": "system", + "explanation": "However, it is described as a system that collects information rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves the collection of various types of data.", + "contextual_reason_agent": "However, it is described as a system that collects information rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 23, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 19 of 74 protective support to HHs and investment in resilience building community assets will help sustain livelihoods, strengthen resilience, and prevent the most vulnerable from falling into destitution or being forcibly displaced. It will also directly support the Government \u2019 s Community Empowerment and Socioeconomic Development Strategy for Refugee Hosting Areas in South Sudan, with cash transfers promoting section 4. 6 of the strategy on creation of livelihood and income generating opportunities given the lack of employment prospects in refugee-hosting environments. 37. In the absence of an enabling environment for widescale mobile payment systems, beneficiaries will receive physical cash at the time of payment, except for Juba where mobile money payment will be piloted. A financial service provider ( i. e., paying agent ), which will be competitively selected by the MAFS, will deliver cash to beneficiaries. The MAFS will provide the recipient list and amount of money to the financial service provider, and the list of beneficiaries will be generated from the MIS. The MIS will capture beneficiaries ' biometric data, which will be used to ensure that only the eligible individuals will receive the cash transfer. The financial service provider pays beneficiaries verifying them biometrically.", + "ner_text": [ + [ + 1185, + 1188, + "named" + ] + ], + "validated": false, + "empirical_context": "The MAFS will provide the recipient list and amount of money to the financial service provider, and the list of beneficiaries will be generated from the MIS. The MIS will capture beneficiaries ' biometric data, which will be used to ensure that only the eligible individuals will receive the cash transfer. The financial service provider pays beneficiaries verifying them biometrically.", + "type": "system", + "explanation": "However, the context indicates that 'MIS' is described as a management information system but not explicitly as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MIS' suggests a system that manages information.", + "contextual_reason_agent": "However, the context indicates that 'MIS' is described as a management information system but not explicitly as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "061_Turkey-Formal-Employment-Creation-Project", + "page": 62, + "text": "The PIU will prepare semiannual project progress reports to be shared with the World Bank, by taking into account different reporting requirements under the grant and loan components. The TKYB is accustomed to collecting such information from PFIs and beneficiary enterprises for previous World Bank projects. A midterm and end line citizen engagement survey will be conducted by the TKYB to seek feedback from beneficiary firms on their satisfaction with the project. The PIU will discuss the survey results with PFIs and the results will inform project implementation, as appropriate. The financial performance of the TKYB will be monitored through independent auditors \u2019 reports and separate management letters confirming adherence to prudential norms. Monitoring of core intermediate result indicators at the PFI level will enable the TKYB and the World Bank team to take action in case of a significant deviation for a specific PFI which may affect the progress toward the PDO. Though, it is not included in the Results Framework, the PIU will also report the statistics on formal employment creation in the loan beneficiary firms. Environmental and Social 37. The grant and loan programs will be implemented by the TKYB with the support of local PFIs. The project will leverage the TKYB \u2019 s extensive experience in providing access to finance services to LEs and, through intermediaries ( such as banks and leasing companies ), to SMEs. As far as grants are concerned, the TKYB will manage the assignment of grants centrally through the PIU, a team of technical experts, and with the technical assistance of the World Bank. 38. The TKYB will undertake the overall responsibility of the project implementation and coordination through its PIU that was established under the Private Sector Renewable Energy and Energy Efficiency Projects and continued to operate under the Geothermal Development Project. However, the capacity of the former PIU established to administer World Bank projects will be increased by assigning / hiring", + "ner_text": [ + [ + 312, + 358, + "named" + ], + [ + 79, + 89, + "midterm and end line citizen engagement survey <> publisher" + ], + [ + 188, + 192, + "midterm and end line citizen engagement survey <> author" + ], + [ + 289, + 299, + "midterm and end line citizen engagement survey <> publisher" + ], + [ + 384, + 388, + "midterm and end line citizen engagement survey <> author" + ], + [ + 411, + 428, + "midterm and end line citizen engagement survey <> reference population" + ], + [ + 852, + 862, + "midterm and end line citizen engagement survey <> publisher" + ], + [ + 1221, + 1225, + "midterm and end line citizen engagement survey <> publisher" + ], + [ + 1479, + 1483, + "midterm and end line citizen engagement survey <> publisher" + ], + [ + 1618, + 1628, + "midterm and end line citizen engagement survey <> publisher" + ], + [ + 1638, + 1642, + "midterm and end line citizen engagement survey <> author" + ], + [ + 1975, + 1985, + "midterm and end line citizen engagement survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "The TKYB is accustomed to collecting such information from PFIs and beneficiary enterprises for previous World Bank projects. A midterm and end line citizen engagement survey will be conducted by the TKYB to seek feedback from beneficiary firms on their satisfaction with the project. The PIU will discuss the survey results with PFIs and the results will inform project implementation, as appropriate.", + "type": "survey", + "explanation": "This is indeed a dataset as it is described as a survey collecting structured feedback from beneficiary firms.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects feedback, which is a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is described as a survey collecting structured feedback from beneficiary firms.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 31, + "text": "The EMIS will build on efforts that are under way including by UNICEF through Niger \u2019 s Data Must Speak initiative which creates data dashboards at the school level; ( b ) support ( i ) the implementation of a national standardized assessment of students; and ( ii ) building the capacity of National Assessment Unit; and ( c ) M & E results of the project literacy intervention ( under Subcomponent 1. 2 ); and ( d ) support teacher deployment and retention measures in refugee-hosting areas and training in contingency planning in the event of new influx of populations. 31 Notably, a census of contractual teachers was conducted, in 2018, with the establishment of a single identification mechanism, which led to the registration of all contractual teachers at the MEP. Recent efforts also include an organizational audit of MES, the elaboration and distribution of HR procedures manuals at the MEP, training of trainers on teacher management, diagnosis of HR functions at the MEP and MES, elaboration of a compendium of all HR legal texts, and the elaboration of a strategy to reform HR management in both ministries. 32 These include the Capacity and Performance of Public Sector for Service Delivery Project and Support to Quality Education Project ( Projet d \u2019 Appui \u00e0 une \u00c9ducation de Qualit\u00e9, PAEQ, P132405 ).", + "ner_text": [ + [ + 4, + 8, + "named" + ] + ], + "validated": false, + "empirical_context": "The EMIS will build on efforts that are under way including by UNICEF through Niger \u2019 s Data Must Speak initiative which creates data dashboards at the school level; ( b ) support ( i ) the implementation of a national standardized assessment of students; and ( ii ) building the capacity of National Assessment Unit; and ( c ) M & E results of the project literacy intervention ( under Subcomponent 1. 2 ); and ( d ) support teacher deployment and retention measures in refugee-hosting areas and training in contingency planning in the event of new influx of populations.", + "type": "program", + "explanation": "However, EMIS is mentioned as a program and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is related to data collection and management.", + "contextual_reason_agent": "However, EMIS is mentioned as a program and not explicitly as a data source or structured collection of data.", + "contextual_signal": "mentioned only as a program, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 54, + "text": "DLR 5. 3: Enhanced digital literacy / skills. DLR 5. 4: Utilization of the HRMIS and other digital platforms for recording competitive recruitments, performance-based promotions, and digital upskilling. Data source / Agency DLR 5. 0: SPAC DLR 5. 1 and 2: SPAC Assessment Center HR interview and evaluation records, as well as HRMIS data records. DLR 5. 3: Training registry maintained by the IPA and HRMIS data records. DLR 5. 4: HRMIS data reports. Verification Entity KACE. Procedure DLR 5. 0: Verification of adopted SPAC by-law and subsidiary regulation. DLR 5. 1: Verification of recruitment records and competency assessment reports shared biannually by the SPAC and competence-based recruitments recorded by the HRMIS. DLR 5. 2: Verification of promotion records and competency assessment reports shared biannually by the SPAC and performance-based promotions recorded by the HRMIS and other digital platforms. DLR 5. 3: Verification of digital skill certifying training records shared annually by IPA and digital skill credentials recorded by the HRMIS and other digital platforms. DLR 5. 4: Cross-validation of HRMIS and other digital platforms data reports with SPAC Assessment Center interview and evaluation records and IPA training assessment reports. DLI 6 on the establishment of digital health standards across a national Health Information Exchange ( HIE ) Formula Scalable. The DLI disburses up to US $ 29 million across the three following DLRs:", + "ner_text": [ + [ + 430, + 435, + "named" + ] + ], + "validated": false, + "empirical_context": "DLR 5. 4: HRMIS data reports. Verification Entity KACE.", + "type": "system", + "explanation": "HRMIS is mentioned as a system, not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HRMIS is a dataset because it is associated with data reports.", + "contextual_reason_agent": "HRMIS is mentioned as a system, not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "084_Lebanon-PAD-final-06082018", + "page": 35, + "text": "Building a strong evidence base on the impact of interventions in the Program will be particularly important for components that make use of grant funding to subsidize firms or individuals ( matching grant, entrepreneurship grant, and subsidy for first-time jobseekers ), as this evidence will guide decisions on if and how to implement these components following the pilot phase. Where possible, randomized controlled trials are envisaged to generate convincing evidence on the performance of the VC matching and entrepreneurship grant components. The degree of over - subscription of programs and the quality of applications will further determine whether randomization is feasible and desirable. Administrative data available from the MOF will allow for a high-quality non - experimental evaluation of activities including support to the TSEZ, and the value chains and broadband access components ( difference in difference approaches with pre-trends ). Approximately US $ 600, 000 have been secured to fund robust impact evaluation from the Jobs Multi-Donor Trust Fund \u201d ( parent Trust Fund number TF072322 ). 66. Impact evaluation will draw upon data collected for monitoring of the Program \u2019 s results, as well as additional dedicated surveys. Data collected primarily for monitoring will play an important role in facilitating impact evaluation. This is true for both: ( i ) data on actions and beneficiaries that is collected through the Program \u2019 s M & E system; and ( ii ) administrative data made available by the GOL to enable monitoring. The design of evaluations will continue to evolve alongside the NJP as program details are being refined. The administrative details of program elements will influence identification strategies ( for instance, roll-out schedules and outreach campaigns ).", + "ner_text": [ + [ + 1483, + 1502, + "named" + ], + [ + 699, + 718, + "administrative data <> data type" + ], + [ + 738, + 741, + "administrative data <> publisher" + ], + [ + 855, + 899, + "administrative data <> data description" + ], + [ + 1525, + 1528, + "administrative data <> publisher" + ], + [ + 1821, + 1839, + "administrative data <> usage context" + ] + ], + "validated": true, + "empirical_context": "Data collected primarily for monitoring will play an important role in facilitating impact evaluation. This is true for both: ( i ) data on actions and beneficiaries that is collected through the Program \u2019 s M & E system; and ( ii ) administrative data made available by the GOL to enable monitoring. The design of evaluations will continue to evolve alongside the NJP as program details are being refined.", + "type": "data", + "explanation": "In this context, 'administrative data' is indeed used as a data source for monitoring, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'administrative data' is a dataset because it refers to a collection of data used for monitoring and evaluation.", + "contextual_reason_agent": "In this context, 'administrative data' is indeed used as a data source for monitoring, confirming its role as a dataset.", + "contextual_signal": "mentioned as a data source for monitoring", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 11, + "validated": 10, + "not_validated": 1 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 30, + "text": "The Project will undertake further consultations with the relevant stakeholders to ensure that the design of the water infrastructure considers the associated risks. C. Financial Management 66. The MWE has a fully functioning Accounts Department headed by the Assistant Commissioner of Accounts. The MWE has an Internal Audit Unit that includes four internal auditors from the MoFPED. This unit reports to an audit committee at the MoFPED. The Project \u2019 s activities and transactions implemented by the MWE will be approved and authorized by the MWE \u2019 s Permanent Secretary who is the Accounting Officer. The main accounts of the MWE are computerized with Integrated Financial Management Systems ( IFMS ). However, this system is currently only operational for government funds as the project module is not yet fully operational. As a result, project financial reports cannot be generated directly from the IFMS. 67. For the NWSC, the Project \u2019 s transactions will be managed within the existing set-up of the NWSC. The Managing Director, who is the Accounting Officer, will approve and authorize activities and transactions implemented by NWSC. All transactions will be processed in accordance with the NWSC \u2019 s policies and procedures. The NWSC has a fully functional Finance and Accounts Department headed by the Deputy Managing Director for Finance and Corporate Strategy. The Accounting Unit of the NWSC is computerized with Iscala accounting systems.", + "ner_text": [ + [ + 656, + 695, + "named" + ] + ], + "validated": false, + "empirical_context": "The Project \u2019 s activities and transactions implemented by the MWE will be approved and authorized by the MWE \u2019 s Permanent Secretary who is the Accounting Officer. The main accounts of the MWE are computerized with Integrated Financial Management Systems ( IFMS ). However, this system is currently only operational for government funds as the project module is not yet fully operational.", + "type": "system", + "explanation": "However, it is described as a system for managing financial accounts, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'systems' which can imply data management.", + "contextual_reason_agent": "However, it is described as a system for managing financial accounts, not as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 46, + "text": "In addition, a survey will be conducted in sampled SPG woredas as part of the impact evaluation to measure composite indicators not collected through routine information systems. 71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2. The MOH will share HCO-related data with the regions and woredas to provide any disaggregated data needed to monitor HCO results indicators and DLIs. Likewise, the central MOE is responsible for the management of EMIS and NLA which collects varying aspects of data at the school level. MOE will validate and share disaggregated data with regions and woredas to monitor HCO results indicators and DLIs. Other M & E data sources will be coordinated with various implementation units and service providers. The SPG impact evaluation survey will be 71 Such as Minimal Acceptable Diet for children 6 \u2013 23 months. Minimal Acceptable Diet is calculated according to breasfeeding status, number of food groups consumed, and number of times fed. https: / / inddex. nutrition. tufts. edu / data4diets / indicator / minimum-acceptable-diet-mad.", + "ner_text": [ + [ + 703, + 719, + "named" + ], + [ + 51, + 62, + "HCO-related data <> data geography" + ], + [ + 764, + 782, + "HCO-related data <> data type" + ], + [ + 1240, + 1263, + "HCO-related data <> data description" + ] + ], + "validated": true, + "empirical_context": "DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2. The MOH will share HCO-related data with the regions and woredas to provide any disaggregated data needed to monitor HCO results indicators and DLIs. Likewise, the central MOE is responsible for the management of EMIS and NLA which collects varying aspects of data at the school level.", + "type": "data", + "explanation": "In the context, 'HCO-related data' is mentioned as data that will be shared for monitoring purposes, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific data related to HCO results indicators.", + "contextual_reason_agent": "In the context, 'HCO-related data' is mentioned as data that will be shared for monitoring purposes, indicating it is used as a data source.", + "contextual_signal": "mentioned as data to monitor HCO results indicators", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 60, + "text": "This indicator will be measured, at a minimum, on a quarterly basis SNSOP Management Information System Attendance data collected during each training session Implementing Partner Number of beneficiary households receiving Direct Income Support The number of total beneficiary HHs that are selected to participate in DIS under sub-component 1. 2, in accordance with the Project Operations Manual, This indicator will be measured, at a minimum, Registration and payment data from the SNSOP MIS Beneficiary data will be collected during registration and updated over the course of project implementation. Selected Implementing Partner", + "ner_text": [ + [ + 444, + 473, + "named" + ], + [ + 180, + 244, + "Registration and payment data <> data description" + ] + ], + "validated": true, + "empirical_context": "This indicator will be measured, at a minimum, on a quarterly basis SNSOP Management Information System Attendance data collected during each training session Implementing Partner Number of beneficiary households receiving Direct Income Support The number of total beneficiary HHs that are selected to participate in DIS under sub-component 1. 2, in accordance with the Project Operations Manual, This indicator will be measured, at a minimum, Registration and payment data from the SNSOP MIS Beneficiary data will be collected during registration and updated over the course of project implementation. Selected Implementing Partner", + "type": "data", + "explanation": "This is indeed a dataset as it refers to structured data collected for monitoring and evaluation purposes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific data collected during project implementation.", + "contextual_reason_agent": "This is indeed a dataset as it refers to structured data collected for monitoring and evaluation purposes.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "036_Tajikistan-Resilient-Landscape-Restoration-Project", + "page": 35, + "text": "Percentage of PUUs / FUGs / CIGs that have women members in decision-making roles on their councils / boards / committees ( included in the Results Framework ) 71. Citizen engagement. The project has a citizen engagement-oriented design and will benefit from feedback monitoring throughout implementation. During preparation, the project carried out stakeholder consultations with civil society, academia, and other relevant stakeholders, and their feedback were duly incorporated into the project design. Project implementation will include mechanisms to engage citizens, beneficiaries, and stakeholders. The project will carry out meaningful stakeholder consultations through focus groups and surveys; employ monitoring mechanisms such as satisfaction surveys, GRMs, and multi - stakeholder forums; and deploy tools for remote consultations and, where appropriate, organize socially distanced gatherings, following local regulations. The team will work closely with the gender and social specialist and will ensure that the existing consultation plans have been designed with citizen engagement in mind and / or have a stand-alone citizen engagement plan. According to best practices, this will also be reflected in the scope of the activities in the project. Table 3. Citizen Engagement Mechanisms, Project Activities, and Indicators Citizen Engagement Mechanism Project Activity Indicator Participatory planning and participatory monitoring Various resource management plans to be supported by the project, such those for catchments, forest management, and pasture management, will be developed through participatory planning, including participatory watershed GIS mapping, and Share of management plans supported by the project are developed through participatory planning and include participatory monitoring. ( This indicator is not used in the", + "ner_text": [ + [ + 741, + 761, + "named" + ], + [ + 563, + 571, + "satisfaction surveys <> reference population" + ], + [ + 1867, + 1885, + "satisfaction surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "Project implementation will include mechanisms to engage citizens, beneficiaries, and stakeholders. The project will carry out meaningful stakeholder consultations through focus groups and surveys; employ monitoring mechanisms such as satisfaction surveys, GRMs, and multi - stakeholder forums; and deploy tools for remote consultations and, where appropriate, organize socially distanced gatherings, following local regulations. The team will work closely with the gender and social specialist and will ensure that the existing consultation plans have been designed with citizen engagement in mind and / or have a stand-alone citizen engagement plan.", + "type": "survey", + "explanation": "In this context, 'satisfaction surveys' are explicitly mentioned as part of the monitoring mechanisms, indicating they are used to collect data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'satisfaction surveys' imply a structured collection of responses from participants.", + "contextual_reason_agent": "In this context, 'satisfaction surveys' are explicitly mentioned as part of the monitoring mechanisms, indicating they are used to collect data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "031_IBArchive-bd2c789e-ee04-4df7-a219-9409a5f705d3", + "page": 83, + "text": "The World Bank Chad Energy Access Scale Up Project ( P174495 ) Page 78 of 87 elderly people who are more exposed to these alternatives by spending more time at home. Recently, these traditional sources of lighting have started to be replaced by lamps and flashlights powered by dry-cell batteries. This change is also taking place in Chad where the results of the most recent survey of households indicate that 84 percent of the non-electrified households in Chad rely on dry-cell battery-powered flashlights. On average, rural households in Chad pay CFAF 2, 518 per month on lighting alternatives and about CFAF 1, 467 per month to charge their mobile device outside of their house. These costs incurred by households, nevertheless, include taxes and other transfer payments ( for example, dry - cell battery flashlights and their components are sold by local shops from which the seller derives a margin ) that need to be excluded from the economic analysis. For the analysis, it is assumed that about 35 percent of the current expenditure are taxes and other transfer payments.", + "ner_text": [ + [ + 376, + 396, + "named" + ], + [ + 4, + 14, + "survey of households <> publisher" + ], + [ + 15, + 19, + "survey of households <> data geography" + ], + [ + 334, + 338, + "survey of households <> data geography" + ], + [ + 522, + 538, + "survey of households <> reference population" + ], + [ + 1096, + 1114, + "survey of households <> usage context" + ] + ], + "validated": true, + "empirical_context": "Recently, these traditional sources of lighting have started to be replaced by lamps and flashlights powered by dry-cell batteries. This change is also taking place in Chad where the results of the most recent survey of households indicate that 84 percent of the non-electrified households in Chad rely on dry-cell battery-powered flashlights. On average, rural households in Chad pay CFAF 2, 518 per month on lighting alternatives and about CFAF 1, 467 per month to charge their mobile device outside of their house.", + "type": "survey", + "explanation": "This is indeed a dataset as it provides empirical data on household reliance on dry-cell battery-powered flashlights in Chad.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data on household lighting usage.", + "contextual_reason_agent": "This is indeed a dataset as it provides empirical data on household reliance on dry-cell battery-powered flashlights in Chad.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 41, + "text": "36 Target Values Data Collection and Reporting Project Outcome Indicators Baseline YR1 YR2 YR3 YR4 YR5 YR6 Frequency and Reports Data Collection Instruments Responsibility for Data Collection policy and planning. 2. 6 Extent to which SIS / EMIS is producing data for continuous monitoring of the 32 key performance indicators. 10 indicators 12 indicators 20 Indicators 32 Indicators Yrs 1, 3 and 6 External assessment of EMIS Directorate of Educational Planning ( DEP ) 3. Teaching and Learning Resource Development 3. 1 Teacher policies revised to support application of national teacher standards. Nil Policies identified 50 % target policies revised 100 % of target policies revised Yrs 1, 3 and 6 MoE Report DTQS / PSPS 3. 2 Number / percentage of newly appointed teachers completing post - recruitment initial training in ETC. Nil 6 % 30 % 50 % 60 % 70 % 80 % Annual MoE Reports DTQS / ETC 3. 3 Number / percentage of new teachers appointed using a competency - based model.", + "ner_text": [ + [ + 234, + 237, + "named" + ] + ], + "validated": false, + "empirical_context": "2. 6 Extent to which SIS / EMIS is producing data for continuous monitoring of the 32 key performance indicators. 10 indicators 12 indicators 20 Indicators 32 Indicators Yrs 1, 3 and 6 External assessment of EMIS Directorate of Educational Planning ( DEP ) 3.", + "type": "system", + "explanation": "'SIS' is referred to as a system, not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'SIS' is a dataset because it is mentioned in the context of producing data for monitoring indicators.", + "contextual_reason_agent": "'SIS' is referred to as a system, not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 99, + "text": "training of CPC animators, and the school councils Results Area 5: Establishment of a standardized student learning assessment system for primary and secondary education \u2022 To undertake an assessment of current efforts in student learning assessments and develop a plan for undertaking regular assessments \u2022 Support for the establishment of the national unit for learning outcomes assessment and build its technical capacity \u2022 Development of assessment tests, administer them and produce learning assessment reports Results Area 6: Integrated EMIS functional and operational \u2022 Support for the development of national strategy for integrated EMIS involving four ministries \u2022 Support for increased technical capacity in support of EMIS systems development and information management \u2022 Support for training at deconcentrated levels on the use of the information technology platform for data management, descriptive statistics, and data management techniques ( data validation, tabulations, metadata and database management, report generation, among others ). Support will also include EMIS monitoring at central and deconcentrated levels. Results Area 7: Improved learning environment and quality of education for children in host community schools with refugees \u2022 Survey of schools to identify potential beneficiaries from refugee-affected local councils \u2022 Mobilization and sensitization of communities in reaching refugees for enrolling and retaining children in school \u2022 Development of Response Strategy and Plan 2019-2023 Priority Area 3: Improving service delivery through performance-based Financing ( PBF ) at school level \u2022 Strengthening capacity of school management councils \u2022 Mobilizing and strengthening community participation in school management", + "ner_text": [ + [ + 640, + 644, + "named" + ] + ], + "validated": false, + "empirical_context": "training of CPC animators, and the school councils Results Area 5: Establishment of a standardized student learning assessment system for primary and secondary education \u2022 To undertake an assessment of current efforts in student learning assessments and develop a plan for undertaking regular assessments \u2022 Support for the establishment of the national unit for learning outcomes assessment and build its technical capacity \u2022 Development of assessment tests, administer them and produce learning assessment reports Results Area 6: Integrated EMIS functional and operational \u2022 Support for the development of national strategy for integrated EMIS involving four ministries \u2022 Support for increased technical capacity in support of EMIS systems development and information management \u2022 Support for training at deconcentrated levels on the use of the information technology platform for data management, descriptive statistics, and data management techniques ( data validation, tabulations, metadata and database management, report generation, among others ). Support will also include EMIS monitoring at central and deconcentrated levels.", + "type": "system", + "explanation": "However, EMIS is mentioned as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with data management and information systems.", + "contextual_reason_agent": "However, EMIS is mentioned as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 56, + "text": "46 amushingonare ) for a total of 8 to 10 members with at least half of the members from civil society. Following the FIDA model, an NGO will be recruited to provide technical assistance to the targeting committees to ensure that they follow the criteria and adhere to the process. The committee list will be public and the final cash transfer beneficiary list will be validated in a colline in an assembly. All households in the listing will be surveyed by a data collection firm to compute a multi-dimensional proxy-means test ( PMT ) score, and rank households by their poverty status. 28. As the program expands, the process may be reviewed. Based on the progress on coordinating other information and additional funding available for the program, at mid-term review, a decision will be taken to continue with the initial process; or to apply the questionnaire to all potentially eligible households, based on the small-area estimate of extreme poverty in the commune ( and the community validation may then occur afterwards ); or to use only the community-based targeting. 29. The targeting questionnaire will collect basic socio-economic information about the household for the PMT calculation, as well as key information on specific vulnerabilities, access to basic social services, and livelihood strategies.", + "ner_text": [ + [ + 1086, + 1109, + "named" + ] + ], + "validated": false, + "empirical_context": "29. The targeting questionnaire will collect basic socio-economic information about the household for the PMT calculation, as well as key information on specific vulnerabilities, access to basic social services, and livelihood strategies.", + "type": "questionnaire", + "explanation": "However, it is not a dataset itself but rather a tool for gathering data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves collecting information.", + "contextual_reason_agent": "However, it is not a dataset itself but rather a tool for gathering data.", + "contextual_signal": "mentioned only as a tool for data collection, not as a data source", + "tags": [] + }, + { + "filename": "066_Jordan-Youth-Technology-and-Jobs-Project", + "page": 18, + "text": "A two-pronged approach is required to address the gaps in supply of digital skills: ( i ) immediate attention to the stock of youth that is currently in the job market with inadequate digital skills, complemented by: ( ii ) institutionalizing a link between emerging skills needs in the digital sector and the design of curricula in the education system and complementary rapid response programs to improve the preparedness of the flow of job-seekers in the digital sector. The project will support creating a Customer Relationship Management System ( CRM ) to maintain a database of all trainees / graduates to ensure that they are plugged into relevant offline and online job matching activities ( e. g., employment matchmaking platforms, job fairs, etc. ). The project will leverage a local job-matching platform that was launched by the Ministry of Labor recently25 to support connections between supply and demand. 28. Component 1 will address the five core weaknesses identified in the Intaj \u2019 s study26 underlying the skills mismatch directly and indirectly by developing a strong alignment between the supply and demand sides, boosting demand for digitally skilled youth in Jordan, and ensuring that training activities produce demand-driven skills for the market. Direct and frequent feedback collected from private sector employers will inform the alignment 25 https: / / sajjil. gov. jo / en / the-middle-east 26 http: / / intaj. net / wp-content / uploads / 2017 / 12 / Labor-Market-Study-2016-ICT-Fresh-Graduates. pdf", + "ner_text": [ + [ + 510, + 549, + "named" + ] + ], + "validated": false, + "empirical_context": "A two-pronged approach is required to address the gaps in supply of digital skills: ( i ) immediate attention to the stock of youth that is currently in the job market with inadequate digital skills, complemented by: ( ii ) institutionalizing a link between emerging skills needs in the digital sector and the design of curricula in the education system and complementary rapid response programs to improve the preparedness of the flow of job-seekers in the digital sector. The project will support creating a Customer Relationship Management System ( CRM ) to maintain a database of all trainees / graduates to ensure that they are plugged into relevant offline and online job matching activities ( e. g.", + "type": "system", + "explanation": "However, the term refers to a system rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it mentions a system that maintains a database.", + "contextual_reason_agent": "However, the term refers to a system rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 19, + "text": "The World Bank Building Skills for Human Capital Development in South Sudan ( P178654 ) Page 15 of 66 23. Functional mechanisms to support evidence-based decision-making and management of the sector are needed. MoGEI has an Education Management Information System ( EMIS ), but its implementation has not been tackled comprehensively to date and the existing data essential for sector evaluation and development has significant gaps. The EMIS requires dissemination to all states and harmonization with other databases in the MoGEI, such as the examination results, South Sudan School Attendance and Monitoring System, Human Resources Management Information System, and Financial Management Information System. The existing data collection instruments also need revision to incorporate the required information for monitoring the implementation of key indicators. Quality, reliable data is fundamental to inform education decisions and policies and enhance transparency. Accordingly, strengthening the EMIS and the annual school survey process to provide quality education information, in a timely manner, at every administrative level is essential to support the functions of the education ministries. 24. The education governing ministries lack the policy tools and implementation capacity needed for education sector management. The General Education system is characterized by inequitable allocation of teachers across the states, unclear deployment drivers, and outdated salary structure.", + "ner_text": [ + [ + 724, + 751, + "named" + ] + ], + "validated": false, + "empirical_context": "The EMIS requires dissemination to all states and harmonization with other databases in the MoGEI, such as the examination results, South Sudan School Attendance and Monitoring System, Human Resources Management Information System, and Financial Management Information System. The existing data collection instruments also need revision to incorporate the required information for monitoring the implementation of key indicators. Quality, reliable data is fundamental to inform education decisions and policies and enhance transparency.", + "type": "instrument", + "explanation": "However, it refers to tools or methods for collecting data rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'data collection instruments' implies a method for gathering data.", + "contextual_reason_agent": "However, it refers to tools or methods for collecting data rather than a structured collection of data itself.", + "contextual_signal": "mentioned only as a tool for data collection, not as a data source", + "tags": [] + }, + { + "filename": "073_Ethiopia-Consolidated-Water-Supply-Sanitation-and-Hygiene-Account-Project", + "page": 86, + "text": "Where the area of intervention potentially extends beyond the woreda \u2019 s administrative boundaries, efforts may require management models that vary from traditional WASH Committees ( WASHCOMs ). The selection and readiness criteria for rural communities to be supported under this Project include the following. Selection Criteria ( i ) Level of WASH access and coverage based on the WASH baseline data compiled from regions and verified by the federal WASH ministers: Efforts to measure this will primarily focus on the following indicators ( i ) the level of rural water supply coverage in line with the GTP II standard; ( ii ) household sanitation coverage based on the MoH definition for improved household latrines; ( iii ) proportion of ODF kebeles in a woreda; ( iv ) WASH coverage for schools; and ( v ) WASH coverage for health facilities. The selection criteria include the level of stunting ( based on data collected from the health MIS at the woreda level ) as well as the \u201c prevalence of acute watery diarrhea \u201d ( also using data from the woreda-level health MIS ). These will be used as proxies to prioritize areas with poor sanitation. Woredas with relatively low levels of WASH coverage based on these indicators will be given priority. ( ii ) Level of ongoing assistance in the woreda: Woredas with a lower level of ongoing support from other financing sources will be given priority for financing from the Project. Readiness Criteria ( i ) Compliance with safeguard requirements, based on initial screenings as outlined in the ESMF.", + "ner_text": [ + [ + 1052, + 1075, + "named" + ], + [ + 62, + 68, + "woreda-level health MIS <> data geography" + ], + [ + 384, + 402, + "woreda-level health MIS <> data type" + ], + [ + 552, + 588, + "woreda-level health MIS <> data description" + ], + [ + 630, + 659, + "woreda-level health MIS <> data description" + ], + [ + 673, + 676, + "woreda-level health MIS <> publisher" + ], + [ + 760, + 766, + "woreda-level health MIS <> data geography" + ], + [ + 884, + 901, + "woreda-level health MIS <> data description" + ], + [ + 955, + 961, + "woreda-level health MIS <> data geography" + ], + [ + 987, + 1022, + "woreda-level health MIS <> data description" + ], + [ + 1566, + 1584, + "woreda-level health MIS <> usage context" + ] + ], + "validated": true, + "empirical_context": "Selection Criteria ( i ) Level of WASH access and coverage based on the WASH baseline data compiled from regions and verified by the federal WASH ministers: Efforts to measure this will primarily focus on the following indicators ( i ) the level of rural water supply coverage in line with the GTP II standard; ( ii ) household sanitation coverage based on the MoH definition for improved household latrines; ( iii ) proportion of ODF kebeles in a woreda; ( iv ) WASH coverage for schools; and ( v ) WASH coverage for health facilities. The selection criteria include the level of stunting ( based on data collected from the health MIS at the woreda level ) as well as the \u201c prevalence of acute watery diarrhea \u201d ( also using data from the woreda-level health MIS ). These will be used as proxies to prioritize areas with poor sanitation.", + "type": "system", + "explanation": "This is a dataset as it is explicitly mentioned as a source of data for health indicators at the woreda level.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a health management information system that collects data at the woreda level.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a source of data for health indicators at the woreda level.", + "contextual_signal": "mentioned as a data source for health indicators", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "015_BOSIB0e7334a5d0570a3e40f8ae4d0c1266", + "page": 20, + "text": "In the case of the Ministry of Public Transport the presence of women is predominant in areas such as translation, reception, and mailing. Women face various barriers during their career cycle, which affects their entry into and career development in jobs linked to the road sector. Some of the barriers relate to their recruitment ( limited information ); hiring processes that are not gender-sensitive; retention issues such as sexual harassment ( SH ) in the workplace, work family balance, and limited benefits, and promotion; and limited training or access to mentoring schemes. The project will leverage the existing collaboration framework with the National Advanced School of Public Works in Yaound\u00e9, under the PDST Project, to continue the partnership with the MINTP to support activities aiming at promoting women \u2019 s entry in the Transport sector. Thus, the project will continue to support efforts to increase the number of women in STEM42, then facilitate the transition from universities to work in the Transport sector in the long term. ( e ) The Douala \u2013 N \u2019 Djamena Intra \u2013 Interregional Transport Corridor ( 1, 842 km ) 26. The Far North of Cameroon is a trade crossroads; however, cross-border trade is adversely impacted by the 39 https: / / www. roadsafetyfacility. org / country / cameroon 40 Global Health Observatory data repository accessed on February 1, 2022. http: / / apps. who. int / gho / data / node. main. A997? lang = en 41 2021 data from the International Labour Organization: https: / / data. worldbank. org / indicator / SL. TLF. ACTI. MA. ZS? locations = CM & name_desc = false 42 Science, Technology, Engineering, and Mathematics.", + "ner_text": [ + [ + 1315, + 1340, + "named" + ] + ], + "validated": false, + "empirical_context": "roadsafetyfacility. org / country / cameroon 40 Global Health Observatory data repository accessed on February 1, 2022. http: / / apps.", + "type": "organization", + "explanation": "However, it is actually an organization that provides access to data rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'data repository' in the context.", + "contextual_reason_agent": "However, it is actually an organization that provides access to data rather than a structured collection of data itself.", + "contextual_signal": "mentioned only as an organization, not as a data source", + "tags": [] + }, + { + "filename": "005_BOSIB-8191b179-7209-4faa-b5e0-11783bcd492d", + "page": 18, + "text": "The World Bank Costa Rica Results in Education ( CORE ) ( P181174 ) Page 9 25. Subcomponent 1. 2: Information Systems Modernization ( US $ 10 million ). The administrative reform will be accompanied by inter-related interventions in information systems, harmonized with the Hacienda Digital project of the Ministry of Finance ( MH ) and other Government systems such as the National System of Information and Single Registry ( SINIRUBE ). Figure 2 is a color-coded schematic showing the inter-related systems to be reformed: Education Infrastructure, Human Talent including Teachers, Student Welfare ( Scholarships, School Feeding and Transportation ), expansion of the migratory module in SABER, School Management, and management of non-university colleges regulated by MEP ( Educaci\u00f3n Parauniversitaria ). This subcomponent will finance consultancy firms and individuals and procurement for the development, deployment, licensing, and hosting of software. A limited number of computer purchases would be included in the subcomponent, but systems would be hosted on cloud servers with MEP computers accessing the cloud servers through secure connections.", + "ner_text": [ + [ + 374, + 424, + "named" + ], + [ + 15, + 25, + "National System of Information and Single Registry <> data geography" + ] + ], + "validated": true, + "empirical_context": "2: Information Systems Modernization ( US $ 10 million ). The administrative reform will be accompanied by inter-related interventions in information systems, harmonized with the Hacienda Digital project of the Ministry of Finance ( MH ) and other Government systems such as the National System of Information and Single Registry ( SINIRUBE ). Figure 2 is a color-coded schematic showing the inter-related systems to be reformed: Education Infrastructure, Human Talent including Teachers, Student Welfare ( Scholarships, School Feeding and Transportation ), expansion of the migratory module in SABER, School Management, and management of non-university colleges regulated by MEP ( Educaci\u00f3n Parauniversitaria ).", + "type": "registry", + "explanation": "This is a dataset as it is referred to as a 'Single Registry', indicating it functions as a data source for the mentioned interventions.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'System of Information' and 'Registry', which suggest a structured collection of data.", + "contextual_reason_agent": "This is a dataset as it is referred to as a 'Single Registry', indicating it functions as a data source for the mentioned interventions.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 42, + "text": "The World Bank Mauritania Social Safety Net System Project II ( P171125 ) Page 37 of 61 RESULT_FRAME_TBL_IO Indicator Name DLI Baseline Intermediate Targets End Target 1 2 3 4 ( Yes / No ) The common financial vehicle has been established ( Yes / No ) No No Yes Yes Yes Yes IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Programs using the Social Registry to select their beneficiaries Number of user agreements signed by the Social Registry Semester Project Semester report Supervision Social Registry Directorate SSN programs ' beneficiary households avoiding negative coping strategies ( national / refugees ) This indicators tracks the percentage of households benefiting from the shock - response program which avoid negative coping strategies as a result of the program. Negative Coping strategies refer to those included in the Coping Strategy Index ( CSI ): reduce number of meals eaten in a day, consume seed stock held for next season, and so on.", + "ner_text": [ + [ + 496, + 511, + "named" + ], + [ + 4, + 14, + "Social Registry <> publisher" + ], + [ + 15, + 25, + "Social Registry <> data geography" + ], + [ + 686, + 708, + "Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "The World Bank Mauritania Social Safety Net System Project II ( P171125 ) Page 37 of 61 RESULT_FRAME_TBL_IO Indicator Name DLI Baseline Intermediate Targets End Target 1 2 3 4 ( Yes / No ) The common financial vehicle has been established ( Yes / No ) No No Yes Yes Yes Yes IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Programs using the Social Registry to select their beneficiaries Number of user agreements signed by the Social Registry Semester Project Semester report Supervision Social Registry Directorate SSN programs ' beneficiary households avoiding negative coping strategies ( national / refugees ) This indicators tracks the percentage of households benefiting from the shock - response program which avoid negative coping strategies as a result of the program. Negative Coping strategies refer to those included in the Coping Strategy Index ( CSI ): reduce number of meals eaten in a day, consume seed stock held for next season, and so on.", + "type": "registry", + "explanation": "In the context, it is mentioned as a source for selecting beneficiaries, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Social Registry' suggests a structured collection of data related to beneficiaries.", + "contextual_reason_agent": "In the context, it is mentioned as a source for selecting beneficiaries, indicating it functions as a data source.", + "contextual_signal": "mentioned as a data source for selecting beneficiaries", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "042_Burundi-Skills-for-Jobs-Women-and-Youth-Project", + "page": 59, + "text": "The World Bank Burundi Skills for Jobs: Women and Youth Project ( P164416 ) Page 53 of 102 revised programs in collaboration with the private sector. These programs will be defined in the POM and so will the parameters for the private sector collaboration and engagement Of which - Number of students who obtained certification from new, revised programs in collaboration with the private sector - Female The indicator measures the cumulated number of female youth graduating from formal TVET institutions with certification from new and revised programs in collaboration with the private sector. These programs will be defined in the POM and so will the parameters for the private sector collaboration and engagement Annual Enrolment records at sector training hubs Administrative data ( registry of sectoral hubs ) M & E specialist within the PIU Of which - Number of students who obtained certification from new, revised programs in collaboration with the private sector - Refugees The indicator measures the cumulated number of refugee youth graduating from formal TVET institutions with certification from new and revised programs in collaboration with the private sector.", + "ner_text": [ + [ + 767, + 786, + "named" + ], + [ + 4, + 14, + "Administrative data <> publisher" + ], + [ + 15, + 22, + "Administrative data <> data geography" + ], + [ + 1032, + 1045, + "Administrative data <> reference population" + ] + ], + "validated": true, + "empirical_context": "These programs will be defined in the POM and so will the parameters for the private sector collaboration and engagement Of which - Number of students who obtained certification from new, revised programs in collaboration with the private sector - Female The indicator measures the cumulated number of female youth graduating from formal TVET institutions with certification from new and revised programs in collaboration with the private sector. These programs will be defined in the POM and so will the parameters for the private sector collaboration and engagement Annual Enrolment records at sector training hubs Administrative data ( registry of sectoral hubs ) M & E specialist within the PIU Of which - Number of students who obtained certification from new, revised programs in collaboration with the private sector - Refugees The indicator measures the cumulated number of refugee youth graduating from formal TVET institutions with certification from new and revised programs in collaboration with the private sector.", + "type": "registry", + "explanation": "In this context, 'Administrative data' is indeed used as a data source for measuring indicators related to student certifications.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'Administrative data' is a dataset because it implies a structured collection of information used for analysis.", + "contextual_reason_agent": "In this context, 'Administrative data' is indeed used as a data source for measuring indicators related to student certifications.", + "contextual_signal": "mentioned as a data source for measuring indicators", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "033_IBArchive-38b35d89-2c5c-4636-ab7e-0ba662b95a5f", + "page": 36, + "text": "Technical assistance will be provided on three different dimensions under this component: ( a ) Capacity development to manage and implement project activities: A range of project management skills including development of work plans and budgets, financial management and technical audits, procurement, environmental and social safeguards, and communications strategy will be developed for the key involved agencies including MENFOP, MT, CDE and related entities. ( b ) The role for Monitoring and Evaluation will be split between two ministries with immediate project related activities falling within the purview of MENFOP, and longer-term system support being the responsibility of MT. The project will support capacity building of the technical capabilities of Observatoire de l ' emploi et des qualifications ( ONEQ ) and the National Agency of Employment and Vocational Training ( ANEFIP ), the development of survey instruments and processing tools, support other surveys and studies, and support for the development of a functioning labor market system. The project will also contract a globally recognized academic institution to conduct independent, high-quality monitoring and evaluation of programs, carry out tracer and employer satisfaction surveys, student and teacher evaluations, and impact evaluations over the entire project period. Finally, a communications strategy will be employed to support the reforms envisaged under this project, and strengthen the participation of girls and women, persons with disabilities, and refugees. 65.", + "ner_text": [ + [ + 1222, + 1262, + "named" + ], + [ + 1493, + 1508, + "tracer and employer satisfaction surveys <> reference population" + ], + [ + 1510, + 1535, + "tracer and employer satisfaction surveys <> reference population" + ], + [ + 1541, + 1549, + "tracer and employer satisfaction surveys <> reference population" + ], + [ + 1570, + 1588, + "tracer and employer satisfaction surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "The project will support capacity building of the technical capabilities of Observatoire de l ' emploi et des qualifications ( ONEQ ) and the National Agency of Employment and Vocational Training ( ANEFIP ), the development of survey instruments and processing tools, support other surveys and studies, and support for the development of a functioning labor market system. The project will also contract a globally recognized academic institution to conduct independent, high-quality monitoring and evaluation of programs, carry out tracer and employer satisfaction surveys, student and teacher evaluations, and impact evaluations over the entire project period. Finally, a communications strategy will be employed to support the reforms envisaged under this project, and strengthen the participation of girls and women, persons with disabilities, and refugees.", + "type": "survey", + "explanation": "These surveys are explicitly mentioned as part of the project activities, indicating they are used to gather data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'tracer and employer satisfaction surveys' imply structured data collection efforts.", + "contextual_reason_agent": "These surveys are explicitly mentioned as part of the project activities, indicating they are used to gather data.", + "contextual_signal": "mentioned as part of the project activities to gather data", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "076_Rwanda-Socio-Economic-Inclusion-of-Refugees-and-Host-Communities-in-Rwanda-Project", + "page": 70, + "text": "The World Bank Socio-economic Inclusion of Refugees & Host Communities in Rwanda Project ( P164130 ) Page 66 of 82 The MINEMA-SPIU, BRD, and Districts are the implementing agencies of SEIRHCP. Districts hosting refugee camps have prior experience in implementing projects of a similar nature funded by World Bank and other development partners. The District staff are familiar with the World Bank \u2019 s procurement guidelines but new to the procurement regulations. BRD also has experience in implementing two World-Bank funded projects. MINEMA is not familiar with World Bank procurement guidelines. Thus, tailored training will be offered to MINEMA, BRD and district staff by the World Bank. Based on the assessment the project risk is rated \u2018 Substantial. \u2019 3. STEP. The proposed project will use STEP, a planning and tracking system that provides data on procurement activities, establishes benchmarks, monitors delays, and measures procurement performance. There are ongoing initiatives to harmonize STEP with the government \u2019 s e-procurement system, but until this process is complete, both will be used in parallel. 4. Procurement risk assessment.", + "ner_text": [ + [ + 762, + 766, + "named" + ] + ], + "validated": false, + "empirical_context": "\u2019 3. STEP. The proposed project will use STEP, a planning and tracking system that provides data on procurement activities, establishes benchmarks, monitors delays, and measures procurement performance.", + "type": "system", + "explanation": "However, STEP is described as a planning and tracking system, not a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed STEP is a dataset because it provides data on procurement activities.", + "contextual_reason_agent": "However, STEP is described as a planning and tracking system, not a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 15, + "validated": 1, + "not_validated": 14 + } + }, + { + "filename": "093_PAD2574-ARABIC-PUBLIC-PAD-final-02262018-AR-Clean-%D9%85%D9%8A", + "page": 3, + "text": "of Expenditure \u0628\u064a\u0627\u0646 \u0627\u0644\u0646\u0641\u0642\u0627\u062a SPS Stated Preference Survey ( s ) \u0627\u0644\u062f\u0631\u0627\u0633 \u0629 / \u0627\u0644\u062f\u0631\u0627\u0633\u0627\u062a \u0627\u0627\u0644\u0633\u062a\u0642\u0635\u0627\u0626\u064a\u0629 \u062d\u0648\u0644 \u0627\u0644\u062a\u0641\u0636\u064a\u0627\u0644\u062a \u0627\u0644\u0645 \u064f \u0639\u0644\u0646\u0629", + "ner_text": [ + [ + 28, + 56, + "named" + ] + ], + "validated": true, + "empirical_context": "of Expenditure \u0628\u064a\u0627\u0646 \u0627\u0644\u0646\u0641\u0642\u0627\u062a SPS Stated Preference Survey ( s ) \u0627\u0644\u062f\u0631\u0627\u0633 \u0629 / \u0627\u0644\u062f\u0631\u0627\u0633\u0627\u062a \u0627\u0627\u0644\u0633\u062a\u0642\u0635\u0627\u0626\u064a\u0629 \u062d\u0648\u0644 \u0627\u0644\u062a\u0641\u0636\u064a\u0627\u0644\u062a \u0627\u0644\u0645 \u064f \u0639\u0644\u0646\u0629", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to a specific survey designed to collect data on stated preferences.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Survey' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a specific survey designed to collect data on stated preferences.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "187_multi-page", + "page": 22, + "text": "The Unit consists of a Unit Director, two Project Coordinators ( one from the MOF and one from the Council of Ministers ), one Procurement Officer, and one Financial Management Officer. Project Coordination: Project coordination will be ensured through the inter-ministerial committee mentioned in the above paragraph, as well as by close collaboration between the Deputy Minister of Finance, the Director of the Department of Public Administration and the Secretary General within the Council of Ministers. Monitoring and Evaluation Design: The Department within the Office of the Prime Minister for implementation of the Public Administration Reform Program will have a monitoring and evaluation systern for which an appropriate computerized Management Information System ( MIS ) will be set up under the project. It will track three main types of variables, which are identified in the detailed Project Implementation Plan and the implementation monitoring plan identified above: * financial indicators * intermediate impact indicators * physical indicators The UIPARP will provide IDA with sermi-annual reports on project progress by April 30 and October 31 of each year, beginning in October of the year 2000. Furthermore, surveys will be taken to obtain stakeholders ' evaluations of the impact of reforms implemented under the Project. In particular, a survey of officials has already been undertaken to establish baseline measurements of a wide variety of aspects of the quality of the current public administration. That survey will be repeated during the penultimate year of the project to capture progress in changing officials ' views of how well the public expenditure management systems and processes as well as the human resource management practices meet key - 19 -", + "ner_text": [ + [ + 1360, + 1379, + "named" + ], + [ + 1085, + 1088, + "survey of officials <> publisher" + ], + [ + 1209, + 1213, + "survey of officials <> publication year" + ], + [ + 1797, + 1815, + "survey of officials <> usage context" + ] + ], + "validated": true, + "empirical_context": "Furthermore, surveys will be taken to obtain stakeholders ' evaluations of the impact of reforms implemented under the Project. In particular, a survey of officials has already been undertaken to establish baseline measurements of a wide variety of aspects of the quality of the current public administration. That survey will be repeated during the penultimate year of the project to capture progress in changing officials ' views of how well the public expenditure management systems and processes as well as the human resource management practices meet key - 19 -", + "type": "survey", + "explanation": "This is indeed a dataset as it involves structured data collection through surveys to assess stakeholder evaluations.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects evaluations from stakeholders.", + "contextual_reason_agent": "This is indeed a dataset as it involves structured data collection through surveys to assess stakeholder evaluations.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "005_BOSIB-8191b179-7209-4faa-b5e0-11783bcd492d", + "page": 13, + "text": "Two recent studies provide a detailed general analysis of policy options. 18 For STEAM courses, the causes relate to: ( i ) enjoyment of reading and preference for mathematics; ( ii ) self-efficacy or belief in own capabilities, often reinforced by teachers; ( iii ) parental support or lack thereof due to low socioeconomic status; ( iv ) teacher preparation to deal effectively with diversity. Figure 1a: PISA learning assessment scores: gender comparison Source: PISA; Differences greater than the grey bands ( + or - two standard errors ) are statistically significant. 16 Source: Government Budget for 2024 and MEP School Census data from 2023. 17 Program-Based budgeting at a government-wide level is being implemented under the Fiscal Management Improvement Project ( P172352, Loan 9075-CR ), known as Hacienda Digital. Investing in readiness to implement program-based budgeting at MEP, the biggest Ministry ( in terms of budget and staff ) is a priority for the Government of Costa Rica. 18 Desigualdades por g\u00e9nero en Primaria y Secundaria, Chapter 4 in Noveno Estado de la Educaci\u00f3n, 2023; and Villlobos and Azofeifa, La paradoja en educaci\u00f3n, alta inversi\u00f3n del PIB y alta brechas de g\u00e9nero, Logos ( II ) 1, 2021.", + "ner_text": [ + [ + 407, + 411, + "named" + ], + [ + 644, + 648, + "PISA <> reference year" + ], + [ + 985, + 995, + "PISA <> data geography" + ], + [ + 1095, + 1099, + "PISA <> reference year" + ], + [ + 1105, + 1127, + "PISA <> author" + ], + [ + 1220, + 1224, + "PISA <> publication year" + ] + ], + "validated": true, + "empirical_context": "18 For STEAM courses, the causes relate to: ( i ) enjoyment of reading and preference for mathematics; ( ii ) self-efficacy or belief in own capabilities, often reinforced by teachers; ( iii ) parental support or lack thereof due to low socioeconomic status; ( iv ) teacher preparation to deal effectively with diversity. Figure 1a: PISA learning assessment scores: gender comparison Source: PISA; Differences greater than the grey bands ( + or - two standard errors ) are statistically significant. 16 Source: Government Budget for 2024 and MEP School Census data from 2023.", + "type": "dataset", + "explanation": "In this context, 'PISA' refers to a dataset used for empirical analysis of educational outcomes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'PISA' is a dataset because it is referenced in relation to learning assessment scores.", + "contextual_reason_agent": "In this context, 'PISA' refers to a dataset used for empirical analysis of educational outcomes.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 79, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 74 of 117 prior to construction. Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "ner_text": [ + [ + 744, + 749, + "named" + ], + [ + 589, + 608, + "NEMIS <> data type" + ], + [ + 777, + 793, + "NEMIS <> reference population" + ], + [ + 942, + 945, + "NEMIS <> author" + ] + ], + "validated": true, + "empirical_context": "Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "type": "system", + "explanation": "NEMIS is indeed a dataset as it is described as an online platform that collects and manages data related to primary education.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is referenced in the context of data utilization for education management.", + "contextual_reason_agent": "NEMIS is indeed a dataset as it is described as an online platform that collects and manages data related to primary education.", + "contextual_signal": "mentioned as a data source for managing primary education", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 58, + "text": "GROW Beneficiaries of Project Investments Components Activities Potential Beneficiaries Estimated Number of Beneficiaries Component 1: Support for Women Empowerment and Enterprise Development Services, including in host and refugee communities ( Total IDA: US $ 42 million, WHR US $ 7. 95m ) Total: 255, 000 ( direct ) 1, 147, 500 ( indirect ) Sub-component 1A: Supporting creation and strengthening of women platforms, community mobilization, and mindset change ( IDA US $ 5 million, including WHR US $ 450, 000 ) \u2022 Mobilization of women and girls in target districts to establish platforms at district level of new and existing women entrepreneurs. \u2022 Setting up a national digital platform for women entrepreneurs \u2022 Setting up a database of women - owned / managed businesses \u2022 Communication and outreach campaign \u2022 Service provider is contracted to conduct sessions on social norms / women safety \u2022 Advocacy on policy issues impacting women entrepreneurs \u2022 Women and adolescent girls \u2022 Existing women entrepreneurs \u2022 Refugee women \u2022 Men, male partners, community leaders benefiting from participating in behavior change interventions. \u2022 Women business leaders 150, 000 women and adolescent girls Estimated 1, 147, 500 men, male partners, communities, and household members indirectly benefit from platform and communication campaign", + "ner_text": [ + [ + 731, + 777, + "named" + ], + [ + 630, + 649, + "database of women - owned / managed businesses <> reference population" + ], + [ + 938, + 957, + "database of women - owned / managed businesses <> reference population" + ], + [ + 998, + 1017, + "database of women - owned / managed businesses <> reference population" + ], + [ + 1020, + 1033, + "database of women - owned / managed businesses <> reference population" + ], + [ + 1140, + 1162, + "database of women - owned / managed businesses <> reference population" + ] + ], + "validated": true, + "empirical_context": "95m ) Total: 255, 000 ( direct ) 1, 147, 500 ( indirect ) Sub-component 1A: Supporting creation and strengthening of women platforms, community mobilization, and mindset change ( IDA US $ 5 million, including WHR US $ 450, 000 ) \u2022 Mobilization of women and girls in target districts to establish platforms at district level of new and existing women entrepreneurs. \u2022 Setting up a national digital platform for women entrepreneurs \u2022 Setting up a database of women - owned / managed businesses \u2022 Communication and outreach campaign \u2022 Service provider is contracted to conduct sessions on social norms / women safety \u2022 Advocacy on policy issues impacting women entrepreneurs \u2022 Women and adolescent girls \u2022 Existing women entrepreneurs \u2022 Refugee women \u2022 Men, male partners, community leaders benefiting from participating in behavior change interventions. \u2022 Women business leaders 150, 000 women and adolescent girls Estimated 1, 147, 500 men, male partners, communities, and household members indirectly benefit from platform and communication campaign", + "type": "database", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a 'database' that collects information on women-owned/managed businesses.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured collection of information about women-owned or managed businesses.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a 'database' that collects information on women-owned/managed businesses.", + "contextual_signal": "described as a database that collects information", + "tags": [] + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 30, + "text": "The scope of the HCO expenditure is determined by: ( a ) focusing on woreda-level spending of the GPG across the four main human capital-related sectors62 ( i. e., the scope is woreda-level expenditures ); ( b ) excluding contributions from other World Bank-financed activities from this scope ( e. g., investments through GEQIP-E, CALM, Health SDG AF, and WASH CWA supply-side investments ) ( i. e., focuses on complementary investments to the ongoing World Bank-financed projects ); and ( c ) allocating expenditures proportionate to historical sectoral budget shares on woreda spending. To avoid overlap with the block grant contribution from the World Bank-financed Second Additional Financing ( AF ) to ESPES, the two disbursements will be sequenced and based on a different set of Disbursement-Linked Indicators ( DLIs ). 43. The expenditure framework for the HCO is based on the lessons learned and experiences from the ESPES program. The ESPES program expenditure framework which was based on the estimated woreda level 59 M. Frost and C. Rolleston ( 2013 ), \u201c Improving Education Quality, Equity and Access: A Report on Findings from the Young Lives School Survey ( Round 1 ) in Ethiopia, \u201d ( Oxford, UK: Young Lives ). 60 https: / / glcopmcgill. ca / wp-content / uploads / 2019 / 11 / Gender-Strategy-for-the-Education-and-Training-Sector-Ethiopia-Ministry-of-Education. pdf 61 Roads are part of the government \u2019 s five \u201c pro-poor \u201d sectors but are not included as a \u201c human capital \u201d sector in the HCO. 62 The four sectors cover health, education, agriculture, and water", + "ner_text": [ + [ + 1147, + 1172, + "named" + ], + [ + 1031, + 1039, + "Young Lives School Survey <> author" + ], + [ + 1044, + 1056, + "Young Lives School Survey <> author" + ], + [ + 1059, + 1063, + "Young Lives School Survey <> publication year" + ], + [ + 1188, + 1196, + "Young Lives School Survey <> data geography" + ], + [ + 1214, + 1225, + "Young Lives School Survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "Frost and C. Rolleston ( 2013 ), \u201c Improving Education Quality, Equity and Access: A Report on Findings from the Young Lives School Survey ( Round 1 ) in Ethiopia, \u201d ( Oxford, UK: Young Lives ). 60 https: / / glcopmcgill.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned in the context as a survey that provides findings from empirical research.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned in the context as a survey that provides findings from empirical research.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 58, + "text": "Proposed activities: Four main activities will be undertaken under this subcomponent: ( a ) improvement of the methodology and support to data collection of ECAM 5 planned for 2018 or 2019; ( b ) analysis of the fourth population census in 2017; 37 ( c ) poverty mapping owing to the ongoing complementary ECAM 4; and ( d ) the design of a system of labor statistics by improving LFS and enterprises and informal sector surveys. The project will finance trainings on poverty mapping and workshops to produce regional monography of poverty. The project will also provide TA to INS to train and assist targeted ministries in integrating national statistics in the definition of sector strategies, programs, and public investment projects. 37 This activity budgeted for US $ 1 million will complement the resources ( US $ 4 million ) provided by another World Bank - financed project in the health sector. In total, the World Bank contribution in the financing of the population will amount to US $ 5 million.", + "ner_text": [ + [ + 306, + 312, + "named" + ], + [ + 240, + 244, + "ECAM 4 <> reference year" + ], + [ + 255, + 270, + "ECAM 4 <> data description" + ], + [ + 851, + 861, + "ECAM 4 <> publisher" + ], + [ + 917, + 927, + "ECAM 4 <> publisher" + ] + ], + "validated": true, + "empirical_context": "Proposed activities: Four main activities will be undertaken under this subcomponent: ( a ) improvement of the methodology and support to data collection of ECAM 5 planned for 2018 or 2019; ( b ) analysis of the fourth population census in 2017; 37 ( c ) poverty mapping owing to the ongoing complementary ECAM 4; and ( d ) the design of a system of labor statistics by improving LFS and enterprises and informal sector surveys. The project will finance trainings on poverty mapping and workshops to produce regional monography of poverty.", + "type": "survey", + "explanation": "In the context, 'ECAM 4' is mentioned as part of ongoing activities related to data collection and poverty mapping, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'ECAM 4' is referenced in the context of data collection and analysis activities.", + "contextual_reason_agent": "In the context, 'ECAM 4' is mentioned as part of ongoing activities related to data collection and poverty mapping, indicating it functions as a data source.", + "contextual_signal": "mentioned as a data source for poverty mapping", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 51, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 40 Indicator Name People within 200m ( 0. 2km ) of an improved water source benefitted in refugees hosting districts Definition / Description Percent of refugees and citizens in host communities that have an improved water source within 200m of walking distance. Frequency Bi-annually Data Source Primary sources include: MWE national M & E system, Refugee Database, Refugees GIS Mapping for WASH, Water Supply Atlas, and UPMiS. Secondary sources include: MWE / DWD / DWRM progress reports, annual water & environments sector performance report, NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE / DWD Rural Indicator Name of which women Definition / Description Frequency Bi-annually. Data Source Primary sources include: MWE national M & E system, Refugee Database, Refugees GIS Mapping for WASH, Water Supply Atlas, and UPMiS. Secondary sources include: MWE / DWD / DWRM progress reports, annual water & environments sector performance report, NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE / DWD Rural.", + "ner_text": [ + [ + 401, + 426, + "named" + ] + ], + "validated": true, + "empirical_context": "2km ) of an improved water source benefitted in refugees hosting districts Definition / Description Percent of refugees and citizens in host communities that have an improved water source within 200m of walking distance. Frequency Bi-annually Data Source Primary sources include: MWE national M & E system, Refugee Database, Refugees GIS Mapping for WASH, Water Supply Atlas, and UPMiS. Secondary sources include: MWE / DWD / DWRM progress reports, annual water & environments sector performance report, NWSC annual report.", + "type": "system", + "explanation": "In the context, it is mentioned as a primary data source, indicating it functions as a structured collection of data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a 'system' that collects and monitors data.", + "contextual_reason_agent": "In the context, it is mentioned as a primary data source, indicating it functions as a structured collection of data.", + "contextual_signal": "mentioned as a primary data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "010_BOSIB1554c314c0a2187c019d7e85bc2a91", + "page": 18, + "text": "The Directorate of HPTs, MoH will work closely with KEMSA to ensure maximum efficiency in implementation of this sub-component. 16. Sub-component 1. 2: Health financing and quality of care reforms ( US $ 15 million ): This sub-component will support the recently introduced Government UHC reforms, including but not limited to the transition from the NHIF to the Social Health Authority ( SHA ). Areas of support include development of regulations and implementation roadmaps, design and rationalization of a benefit package, developing a framework for review of the benefit package including strengthening capacity for the health technology assessment, design of business processes and claims processing, stakeholder engagement among others. Additionally, the project will support the MoH to establish / strengthen regulatory bodies and operationalize quality of care reforms for improved strategic purchasing. 17. Sub-component 1. 3: Improve availability and use of quality data for decision making ( US $ 10 million ): This sub - component will support the Government to improve generation and use of strategic information for decision making, specifically through conducting relevant cross-sectional surveys including, but not limited to, the WHO STEPwise approach to non-communicable diseases ( NCD ) risk factor surveillance ( STEPS ) survey, and the Household Health Expenditure and Utilization Survey. Climate sensitive planning for surveys will be used and questions on climate and health impacts will be included in the survey to generate relevant data to inform decision making. Support will also be provided towards dissemination of findings to the lowest level.", + "ner_text": [ + [ + 1247, + 1347, + "named" + ] + ], + "validated": true, + "empirical_context": "Sub-component 1. 3: Improve availability and use of quality data for decision making ( US $ 10 million ): This sub - component will support the Government to improve generation and use of strategic information for decision making, specifically through conducting relevant cross-sectional surveys including, but not limited to, the WHO STEPwise approach to non-communicable diseases ( NCD ) risk factor surveillance ( STEPS ) survey, and the Household Health Expenditure and Utilization Survey. Climate sensitive planning for surveys will be used and questions on climate and health impacts will be included in the survey to generate relevant data to inform decision making.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey that generates relevant data for decision making.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey designed to collect data on NCD risk factors.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey that generates relevant data for decision making.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 89, + "text": "DLI 9 on interactive statistics Jordan MDTF funded BE and RE Technical Assistance to the Department of Statistics. Technical assistance and capacity building to support digitalization, data harmonization and the establishment of the National Data Repository. DLI 10 on institutionalizing effective health data use US-Center for Disease Control ( CDC ) ( on public health surveillance ). Advisory services by a qualified technical institution to help design the Health Data Quality Assurance ( HDQA ) and identify a suitable firm to support the government in carrying out the HDQA.", + "ner_text": [ + [ + 233, + 257, + "named" + ] + ], + "validated": true, + "empirical_context": "DLI 9 on interactive statistics Jordan MDTF funded BE and RE Technical Assistance to the Department of Statistics. Technical assistance and capacity building to support digitalization, data harmonization and the establishment of the National Data Repository. DLI 10 on institutionalizing effective health data use US-Center for Disease Control ( CDC ) ( on public health surveillance ).", + "type": "repository", + "explanation": "The term is indeed a dataset as it refers to a structured collection of data intended for use in the context of public health surveillance.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'data' in its name and is associated with data harmonization.", + "contextual_reason_agent": "The term is indeed a dataset as it refers to a structured collection of data intended for use in the context of public health surveillance.", + "contextual_signal": "mentioned as part of the establishment of the National Data Repository", + "tags": [] + }, + { + "filename": "015_BOSIB0e7334a5d0570a3e40f8ae4d0c1266", + "page": 78, + "text": "Various climate model projections show a wide range of changes over Cameroon, with some projecting increases in average annual rainfall, and others a decrease. 69 Cameroon \u2019 s NDC indicates an intensification of droughts, and an increase in frequency and intensity of flooding events. 70 The MDK road corridor to be rehabilitated, and the network of communal and earth roads are in the Sudano \u2013 Sahelian region in the Far North of Cameroon. During rainy seasons traffic flow is interrupted for extended periods of time; it causes damage to existing culverts and bridges, and erodes surfaces due to over-embankment flow. The major risk hazards identified by the climate and disaster risk screening through the World Bank study \" Vulnerability Assessment and Adaptation Strategy of the Cameroon Road Network, \" as well as the analytical work carried out by the World Bank \u2019 s project team were extreme temperatures, heavy rainfall events, flooding, landslides, and erosion. 71 Rainfall projections indicate that the road \u2019 s exposure to heavy downpours and sustained periods of rainfall is likely to increase over time. 68 Think Hazard, consulted on February 17, 2021. URL: https: / / thinkhazard. org / en / report / 45-cameroon. 69 WBG Climate Knowledge Portal, consulted on February 17, 2021. URL: https: / / climateknowledgeportal. worldbank. org / country / cameroon / climate-data-historical. 70 Cameroon \u2019 s Nationally Determined Contribution to the United Nations Framework Convention on Climate Change; Revised in 2021. URL; https: / / unfccc. int / sites / default / files / NDC / 2022-06 / CDN percent20r percentC3 percentA9vis percentC3 percentA9e percent20CMR percent20finale percent20sept percent202021. pdf 71 Climate Vulnerability Assessment and Adaptation Strategy for the Cameroon Road Network; 20 July 2021; World Bank.", + "ner_text": [ + [ + 1232, + 1260, + "named" + ] + ], + "validated": false, + "empirical_context": "org / en / report / 45-cameroon. 69 WBG Climate Knowledge Portal, consulted on February 17, 2021. URL: https: / / climateknowledgeportal.", + "type": "portal", + "explanation": "However, it is mentioned as a portal and not specifically as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Knowledge Portal', which suggests a collection of information.", + "contextual_reason_agent": "However, it is mentioned as a portal and not specifically as a data source or structured collection of data.", + "contextual_signal": "mentioned only as a portal, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 9, + "validated": 1, + "not_validated": 8 + } + }, + { + "filename": "143_749060PAD0P124010Box374388B00OUO090", + "page": 65, + "text": "The results monitoring framework assesses progress towards the PDO through key indicators, focusing on achieving improvement in quantity and quality of social services provided by MOSA, expanded coverage of SSNs ( the NPTP ), and viable income generating sub - projects ( see Annex 1 ). In addition, intermediate indicators will monitor the progress of each component over the life of the project. Monitoring and evaluation ( M & E ) arrangements will be centralized at the level of the PM, which will have a dedicated M & E Specialist, and rely on an M & E system adapted to the needs of each component. A computerized, modular MIS will be the central piece of the M & E system and will include: ( a ) a database on capacity building activities carried out under Component 1 and their beneficiaries ( e. g., date, place, topic, and the names of beneficiaries of a specific training ); ( b ) a module to track the processing of applications under Component 2, input baseline data for approved sub-projects, as well as monitor the physical progress in sub - project implementation and related tranche disbursements; ( c ) a module to register households in the NPTP, record the results of their eligibility assessment ( including their NPTP score ), and follow their utilization of benefits; ( d ) a financial management module for the whole project. 52. The SDCs will be the primary providers of information to be entered into the MIS and will be trained accordingly, as well as equipped with computers and connected to the internet. The MIS will be designed to allow data entry directly from the field, while the PM will perform data checks and produce summary reports. For transparency purposes, part of the data contained in the MIS will be accessible to the public ( on a viewing-only basis ) through the MOSA website. The M & E specialist will randomly visit SDCs, sub-project sites, and beneficiary households to verify the accuracy of the data entered, accompanied by the Internal Auditor as needed. 66", + "ner_text": [ + [ + 1538, + 1541, + "named" + ] + ], + "validated": false, + "empirical_context": "The SDCs will be the primary providers of information to be entered into the MIS and will be trained accordingly, as well as equipped with computers and connected to the internet. The MIS will be designed to allow data entry directly from the field, while the PM will perform data checks and produce summary reports. For transparency purposes, part of the data contained in the MIS will be accessible to the public ( on a viewing-only basis ) through the MOSA website.", + "type": "system", + "explanation": "'MIS' is not a dataset; it is described as a management information system that stores records but not as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it involves data entry and management.", + "contextual_reason_agent": "'MIS' is not a dataset; it is described as a management information system that stores records but not as a data source itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 49, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XL Direct project beneficiaries \u2013 Refugees and Host Communities Description Number of direct beneficiaries of digital identification, EMR and Tawjihi digitalization who are refugees or living in host communities with a high concentration of refugees ( > 10 percent of the population ), disaggregated by gender Frequency Annual Data source Reports on digital ID implementation by MODEE, administrative data from the EHS, Education Management Information System ( EMIS ) Methodology for Data Collection Data collected and compiled from administrative sources by MOPIC to measure the number of refugee beneficiaries and host communities Responsibility for Data Collection MOPIC Result Area 2 on enhanced government effectiveness through digitalization Increased access to reliable and efficient digital student assessment ( DLI ) Description Percentage of students sitting for the digital Tawjihi exam, including refugees. Frequency Annually. Data source MOE, Education Management Information System ( EMIS ) Methodology for Data Collection Review of EMIS-generated report. Responsibility for Data Collection MOE. Strengthened digital skills in civil service Description Number of senior officials and applicants to jobs in the civil service with digital skills certified as a result of training. Frequency Once, by Program closure. Data source Roster of digitally skilled trained and certified senior officials and applicants.", + "ner_text": [ + [ + 1052, + 1091, + "named" + ] + ], + "validated": false, + "empirical_context": "Frequency Annually. Data source MOE, Education Management Information System ( EMIS ) Methodology for Data Collection Review of EMIS-generated report. Responsibility for Data Collection MOE.", + "type": "system", + "explanation": "However, it is described as a system and not explicitly mentioned as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting it handles data.", + "contextual_reason_agent": "However, it is described as a system and not explicitly mentioned as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "036_Tajikistan-Resilient-Landscape-Restoration-Project", + "page": 81, + "text": "For over 30 years, Tajikistan has been without national-level data on its forests. During these decades, there have been significant quantitative and qualitative changes in forest ecosystems. The project will finance a national-level systematic NFI using a low sampling density. The NFI exercise will employ state-of-the-art methodologies for conducting forest inventories, including geospatial data. The NFI process will begin with a national land cover classification project that will ( a ) guide the selection of field locations for sample plot inventory and ( b ) classify the entire national land cover according to nationally agreed categories. The NFI will establish key parameters such as the total areas of forest by type and ownership ( as needed ), total standing volumes by species and size class, regeneration, incidence of pests and disease, and the distribution of key indicator species for biodiversity conservation. Other relevant data will also be collected, for example, evidence of illegal removals, erosion, forest fires, condition / species of pasture, and so on, as required. 24. Forest management plans. The project will finance the preparation and implementation of sustainable forest management plans for eight SFMEs in the project sites. Preparation of the plans will", + "ner_text": [ + [ + 47, + 66, + "named" + ] + ], + "validated": false, + "empirical_context": "For over 30 years, Tajikistan has been without national-level data on its forests. During these decades, there have been significant quantitative and qualitative changes in forest ecosystems.", + "type": "data", + "explanation": "'National-level data' is mentioned in a general context without specifying a structured collection or source, indicating it is not a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'national-level data' refers to a structured collection of data due to the term 'data' being present.", + "contextual_reason_agent": "'National-level data' is mentioned in a general context without specifying a structured collection or source, indicating it is not a dataset.", + "contextual_signal": "mentioned only as a concept, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "062_Turkey-Municipal-Services-Improvement-Project", + "page": 48, + "text": "The World Bank Municipal Services Improvement Project in Refugee Affected Areas ( P169996 ) Page 44 of 94 Wastewater treatment plants constructed or rehabilitated Number of wastewater treatment plants constructed or rehabilitated due to project activities. Semi - annually Reports from PIUs Municipal PIUs and ILBANK PMU Landfills constructed or rehabilitated Number of landfills constructed or rehabilitated due to project activities. Semi - annually Reports from PIUs Municipal PIUs and ILBANK PMU Satisfaction rate of female users of environmental infrastructure services and facilities provided by the project Satisfaction rate of female users of environmental infrastructure services and facilities provided by the project and their perception of responsiveness of the project to their needs and preferences. Start and end of the project. Targeted beneficiary satisfaction survey A targeted beneficiary satisfaction survey will be carried out among the targeted community groups at the start of the project and at an appropriate time after the services have been provided, towards the close of the project. The survey will be carried out by selected NGOs and consultants supported through the project, and results will be reported to the municipalities and ILBANK and the World Bank and EU. Number of municipal authorities and SKIs provided with capacity building support through the Project This indicator measures the number of municipal authorities and SKIs benefitted from capacity building activities in the selected municipalities that have been provided through this Project.", + "ner_text": [ + [ + 844, + 884, + "named" + ], + [ + 4, + 14, + "Targeted beneficiary satisfaction survey <> publisher" + ], + [ + 958, + 983, + "Targeted beneficiary satisfaction survey <> reference population" + ], + [ + 1277, + 1287, + "Targeted beneficiary satisfaction survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "Start and end of the project. Targeted beneficiary satisfaction survey A targeted beneficiary satisfaction survey will be carried out among the targeted community groups at the start of the project and at an appropriate time after the services have been provided, towards the close of the project. The survey will be carried out by selected NGOs and consultants supported through the project, and results will be reported to the municipalities and ILBANK and the World Bank and EU.", + "type": "survey", + "explanation": "This is indeed a dataset as it is a structured collection of data specifically designed to gather information from targeted community groups.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured survey aimed at collecting data on beneficiary satisfaction.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data specifically designed to gather information from targeted community groups.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 15, + "text": "6 function of secondary education graduation and competitive screening for university admission. The Tawjihi is administered twice a year to grade 12 students, although any Jordanian having completed grade 12 is entitled to take the examination. In addition, Jordan has participated in the Trends in International Mathematics and Science Study ( TIMSS ) for grade 8 in mathematics and science since 1999, and PISA for 15 \u2010 year \u2010 olds in reading, mathematics, and science since 200616. Jordan also recently started administering the Early Grade Reading and Math Assessments ( EGRA and EGMA ) 17 for grades 2 and 3 students, although this assessment is run by external donors and has not been institutionalized. 19. The general Secondary Certificate Examination ( Tawjihi ) is the single most influential and decisive high \u2010 stakes exam in Jordan \u2019 s education system. This examination has been used for many decades with the dual purpose of a gateway to high school ( upon achieving a passing score ), and to determine the admissions track to higher education. Results from the exam split students into either technical education and vocational training ( TVET ) tertiary colleges or universities ( with extremely demanding cut \u2010 off scores to access the most sought \u2010 after faculties and programs ).", + "ner_text": [ + [ + 533, + 573, + "named" + ] + ], + "validated": false, + "empirical_context": "In addition, Jordan has participated in the Trends in International Mathematics and Science Study ( TIMSS ) for grade 8 in mathematics and science since 1999, and PISA for 15 \u2010 year \u2010 olds in reading, mathematics, and science since 200616. Jordan also recently started administering the Early Grade Reading and Math Assessments ( EGRA and EGMA ) 17 for grades 2 and 3 students, although this assessment is run by external donors and has not been institutionalized. 19.", + "type": "assessment", + "explanation": "However, it is described as an assessment run by external donors and not as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves assessments that could generate data.", + "contextual_reason_agent": "However, it is described as an assessment run by external donors and not as a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as an assessment, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + } + }, + { + "filename": "159_38147core", + "page": 71, + "text": "Assessment Survey will be annexed to the CSP. Data from the UNHCR supervised survey in turn includes demographic characteristics o f beneficiary families, income and livelihood activities, housing condition, land ownership and other vulnerability conditions etc updated in April 2006. This complements the CSP providing extensive baseline data for future development and monitoring purposes. Continuous Social Impact Assessment: The CSIA will record community perceptions and grievances on the identification o f IDP households for participation in the housing program, the resentment between IDP and non IDP communities and social issues that impact upon project implementation. This would offer a useful tool for the PPU to introduce mid-course corrections to address grass roots concerns. Please see Annex 3 for more information. Social Safeguard Issues: Overall, the PHP i s expected to have limited negative social impact. None o f the PHP components include activities that trigger a drastic change with adverse social consequences. The housing assistance will provide support for the construction or upgrading o f already existing temporary or partly-completed houses o f those IDP families with uncontestable land title. This i s a socially feasible program targeted at poor IDP families who were forcibly displaced and have since lived in temporary shelters in their preferred location o f choice for 16 years.", + "ner_text": [ + [ + 60, + 83, + "named" + ], + [ + 60, + 65, + "UNHCR supervised survey <> publisher" + ], + [ + 101, + 153, + "UNHCR supervised survey <> data description" + ], + [ + 273, + 283, + "UNHCR supervised survey <> publication year" + ], + [ + 330, + 343, + "UNHCR supervised survey <> data type" + ], + [ + 513, + 527, + "UNHCR supervised survey <> reference population" + ], + [ + 1278, + 1295, + "UNHCR supervised survey <> reference population" + ], + [ + 1435, + 1453, + "UNHCR supervised survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "Assessment Survey will be annexed to the CSP. Data from the UNHCR supervised survey in turn includes demographic characteristics o f beneficiary families, income and livelihood activities, housing condition, land ownership and other vulnerability conditions etc updated in April 2006. This complements the CSP providing extensive baseline data for future development and monitoring purposes.", + "type": "survey", + "explanation": "This is indeed a dataset as it provides structured data on beneficiary families and their conditions, used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a supervised survey that collects various demographic and socioeconomic data.", + "contextual_reason_agent": "This is indeed a dataset as it provides structured data on beneficiary families and their conditions, used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "033_IBArchive-38b35d89-2c5c-4636-ab7e-0ba662b95a5f", + "page": 45, + "text": "The World Bank Djibouti Skills Development for Employment Project ( P175483 ) Page 41 of 68 significant emphasis on ensuring that youth, and specifically those belonging to the most disadvantaged groups \u2013 women, refugees, and persons with disabilities, will be able to participate in this program and hence data gathered will be disaggregated by sub-group. Data gathered at the training provider level will include information on leadership and management, school resources, teacher and student management, infrastructure information, program initiation and completion, beneficiary surveys, and TVET satisfaction survey. Project implementation will be monitored through supervision missions and others conducted jointly by the Government and the World Bank. A set of technical, infrastructure and process evaluations and audits will be supported regularly to inform the project of adaptation measures being undertaken by the project. Lessons learned from relevant assessments will be used for course correction during project implementation. C. Sustainability 87. Project sustainability in this context is difficult to assess given that the overwhelming share of development expenditures comes from development partners. Most of government financing is used to cover recurrent expenditure in the post-basic education and training sector.", + "ner_text": [ + [ + 570, + 589, + "named" + ], + [ + 4, + 14, + "beneficiary surveys <> publisher" + ], + [ + 15, + 23, + "beneficiary surveys <> data geography" + ], + [ + 205, + 210, + "beneficiary surveys <> reference population" + ], + [ + 226, + 251, + "beneficiary surveys <> reference population" + ], + [ + 415, + 568, + "beneficiary surveys <> data description" + ], + [ + 746, + 756, + "beneficiary surveys <> publisher" + ] + ], + "validated": true, + "empirical_context": "The World Bank Djibouti Skills Development for Employment Project ( P175483 ) Page 41 of 68 significant emphasis on ensuring that youth, and specifically those belonging to the most disadvantaged groups \u2013 women, refugees, and persons with disabilities, will be able to participate in this program and hence data gathered will be disaggregated by sub-group. Data gathered at the training provider level will include information on leadership and management, school resources, teacher and student management, infrastructure information, program initiation and completion, beneficiary surveys, and TVET satisfaction survey. Project implementation will be monitored through supervision missions and others conducted jointly by the Government and the World Bank.", + "type": "survey", + "explanation": "In the context, 'beneficiary surveys' is explicitly mentioned as part of the data gathered for the project, indicating it is used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'beneficiary surveys' implies a structured collection of data gathered from participants.", + "contextual_reason_agent": "In the context, 'beneficiary surveys' is explicitly mentioned as part of the data gathered for the project, indicating it is used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "066_Jordan-Youth-Technology-and-Jobs-Project", + "page": 53, + "text": "providing digital skills curriculum monitoring systems and reports PMU through schools providing digital skills curriculum", + "ner_text": [ + [ + 10, + 54, + "named" + ] + ], + "validated": false, + "empirical_context": "providing digital skills curriculum monitoring systems and reports PMU through schools providing digital skills curriculum", + "type": "system", + "explanation": "However, it is described as a system for monitoring rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'monitoring systems' which suggests data collection.", + "contextual_reason_agent": "However, it is described as a system for monitoring rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "153_476340PAD0ZR0P101Official0Use0Only1", + "page": 62, + "text": "10. System Users will include MECNT, as well as the Ministry of Finance and the Ministry of Planning. These ministries will utilize project reports to assess program implementation and implement adaptive management. Additional system users include: Ministry of Finance Ministry of Planning Provincial delegations Local and international NGOs Private enterprises Civil Society Donors Congo Basin Forest Partnership \u2019 s State of the Forest Report Observatoire des Forits d \u2019 Afiique Centrale ( OFAC ) Data 1 1. Baseline Data: Where available, baseline data has been collected for the project. The abundance of indicator species i s based on the 2009 baseline established for the State of the Forest Report, which i s anticipated to be published in 2009 \u201d. Baselines for knowledge of forest rights and income in targeted rural areas will be established in the first year of the program with the help of the monitoring and evaluation expertise. 12. found. The same sources will be used routinely to avoid inconsistencies and misinterpretations. Data Sources: The monitoring tables, below, indicate where the data for each indicator will be 13. Data Collection: Primary and secondary data will be recorded according to their specified formats at the central and provincial levels. Where possible, this data will be cross-referenced annually with a data gathering homologue, either within the Ministry or an external agency, to ensure accuracy.", + "ner_text": [ + [ + 541, + 554, + "named" + ], + [ + 643, + 647, + "baseline data <> publication year" + ] + ], + "validated": true, + "empirical_context": "Additional system users include: Ministry of Finance Ministry of Planning Provincial delegations Local and international NGOs Private enterprises Civil Society Donors Congo Basin Forest Partnership \u2019 s State of the Forest Report Observatoire des Forits d \u2019 Afiique Centrale ( OFAC ) Data 1 1. Baseline Data: Where available, baseline data has been collected for the project. The abundance of indicator species i s based on the 2009 baseline established for the State of the Forest Report, which i s anticipated to be published in 2009 \u201d.", + "type": "data", + "explanation": "In this context, 'baseline data' is explicitly mentioned as collected for the project, indicating it serves as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'baseline data' is a dataset because it refers to collected data used for analysis.", + "contextual_reason_agent": "In this context, 'baseline data' is explicitly mentioned as collected for the project, indicating it serves as a data source.", + "contextual_signal": "described as collected for the project", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "015_BOSIB0e7334a5d0570a3e40f8ae4d0c1266", + "page": 28, + "text": "This component will also support the following activities: ( i ) development and operationalization of a road accident database management system disaggregated by gender, refugee, and host communities; ( ii ) training and awareness campaigns for schoolchildren, motorbike drivers, truck drivers, and refugee and host populations in the project area, differentiated for refugee and host community audiences. The road accident database management system will collate all data associated with road accidents, including data that can be attributed to climate change ( poor visibility, slippery surfaces, flooding, poor-quality pavement surface; ( iii ) the development of mobility plans that consider the voices of the beneficiary communities, where specific consultations with low - income women and groups in a situation of vulnerability will be established to identify their mobility priorities. Mobility plans will consider elements such as road prioritization to improve access to main destinations \u2014 like education and health facilities \u2014 climate vulnerabilities, identification of the best location for complementary interventions like storage facilities to help reduce women \u2019 s travel times, and definition of design features such as those related to violence prevention ( openness, visibility, lighting ); and ( iv ) various technical assistance activities to support the Ministry of Transport and the Road Safety Observatory in coordinating road safety management at the national level.", + "ner_text": [ + [ + 105, + 145, + "named" + ], + [ + 262, + 279, + "road accident database management system <> reference population" + ], + [ + 281, + 294, + "road accident database management system <> reference population" + ], + [ + 300, + 328, + "road accident database management system <> reference population" + ] + ], + "validated": true, + "empirical_context": "This component will also support the following activities: ( i ) development and operationalization of a road accident database management system disaggregated by gender, refugee, and host communities; ( ii ) training and awareness campaigns for schoolchildren, motorbike drivers, truck drivers, and refugee and host populations in the project area, differentiated for refugee and host community audiences. The road accident database management system will collate all data associated with road accidents, including data that can be attributed to climate change ( poor visibility, slippery surfaces, flooding, poor-quality pavement surface; ( iii ) the development of mobility plans that consider the voices of the beneficiary communities, where specific consultations with low - income women and groups in a situation of vulnerability will be established to identify their mobility priorities.", + "type": "database", + "explanation": "This is a dataset as it is described as a 'road accident database management system' that collates all data associated with road accidents.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'database' which often implies a structured collection of data.", + "contextual_reason_agent": "This is a dataset as it is described as a 'road accident database management system' that collates all data associated with road accidents.", + "contextual_signal": "mentioned as a data source that collates all data associated with road accidents", + "tags": [] + }, + { + "filename": "192_multi-page", + "page": 26, + "text": "These include the following: Active women clients / total active clients ( in \u00b0 %: Previous project experience with village credit associations shows that, while male heads of households usually sign the loan agreements, women are often involved in loan decision making and the investment activity. Project beneficiary assessments will confirm the degree of women ' s participation on an annual basis. Averane loan balance as % of GDP per capita: This measure of poverty targeting is expected to rise over the life of the project, as better-off flatland areas join the SCA movement starting in the third year of the project. However, time series data collected in beneficiary assessments and DTU evaluations for specific villages will enable the project to measure performance using this indicator. - 23 -", + "ner_text": [ + [ + 634, + 650, + "named" + ], + [ + 821, + 839, + "time series data <> usage context" + ] + ], + "validated": true, + "empirical_context": "Averane loan balance as % of GDP per capita: This measure of poverty targeting is expected to rise over the life of the project, as better-off flatland areas join the SCA movement starting in the third year of the project. However, time series data collected in beneficiary assessments and DTU evaluations for specific villages will enable the project to measure performance using this indicator. - 23 -", + "type": "data", + "explanation": "In this context, it is indeed a dataset as it refers to data collected for measuring performance over time in evaluations.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'time series data' suggests a structured collection of data over time.", + "contextual_reason_agent": "In this context, it is indeed a dataset as it refers to data collected for measuring performance over time in evaluations.", + "contextual_signal": "follows 'will enable the project to measure performance using this indicator'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 50, + "text": "Data source / Agency Annual reports on the usage of digital services from MODEE, sectoral ministries, and the private sector. Verification Entity KACE. Procedure Indicator values will be collected from MODEE \u2019 s administrative data on the usage of transactional digital services that use trusted, people-centric DPI and cross-checked by the IVA through spot surveys. DLI 2: Number of individuals adopting people-centric digital identity Formula The DLI will disburse US $ 3 for each unique individual activating people-centric digital identity, up to a total 3. 5 million individuals, in the limit of US $ 10. 5 million. Moreover, it will disburse the following additional amounts: \u2022 US $ 4 for each woman activating people-centric digital identity, up to 1. 75 million women, in the limit of US $ 7 million \u2022 US $ 5 for each elder activating people-centric digital identity, up to 200, 000 elders, in the limit of US $ 1 million \u2022 US $ 15 for each refugee activating people-centric digital identity, up to 100, 000 refugees, in the limit of US $ 1. 5 million Description The Program disburses against the number of unique individuals activating people-centric digital identity, disaggregated by type of user ( women, elders, refugees ). Data source / Agency Annual reports on digital ID implementation by MODEE. Verification Entity KACE.", + "ner_text": [ + [ + 353, + 365, + "named" + ] + ], + "validated": false, + "empirical_context": "Verification Entity KACE. Procedure Indicator values will be collected from MODEE \u2019 s administrative data on the usage of transactional digital services that use trusted, people-centric DPI and cross-checked by the IVA through spot surveys. DLI 2: Number of individuals adopting people-centric digital identity Formula The DLI will disburse US $ 3 for each unique individual activating people-centric digital identity, up to a total 3.", + "type": "survey", + "explanation": "'Spot surveys' are mentioned as a method of cross-checking data, not as a structured collection of data themselves.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'spot surveys' is a dataset because it involves data collection.", + "contextual_reason_agent": "'Spot surveys' are mentioned as a method of cross-checking data, not as a structured collection of data themselves.", + "contextual_signal": "mentioned only as a method, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 3, + "validated": 2, + "not_validated": 1 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 79, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 74 of 117 prior to construction. Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "ner_text": [ + [ + 280, + 285, + "named" + ], + [ + 589, + 608, + "NEMIS <> data type" + ], + [ + 777, + 793, + "NEMIS <> reference population" + ], + [ + 829, + 832, + "NEMIS <> author" + ] + ], + "validated": true, + "empirical_context": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 74 of 117 prior to construction. Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "type": "system", + "explanation": "NEMIS is indeed a dataset as it is explicitly mentioned as a source of data for various educational management purposes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is referenced in relation to data utilization for managing primary education.", + "contextual_reason_agent": "NEMIS is indeed a dataset as it is explicitly mentioned as a source of data for various educational management purposes.", + "contextual_signal": "mentioned as a data source for managing primary education", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 120, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 115 of 117 within the provisions of PPADA, 2015 and Regulations 2020. Training on auditing in the NEMIS Platform and monitor progressively how many of the audits are carried out on this platform. Fiduciary Systems MoE Recurrent Yearly Audits conducted on the NEMIS Platform. Preparation for Kenya to participate in the 2025 Programme for International Student Assessment \u2019 s for Development ( PISA-D ). Technical DLI 6 MoE and KNEC Due Date 31-Dec - 2025 a ) admission / signing up; b ) instrument translation and adaptation of instrument; ( c ) pre-test; ( d ) main survey; and ( e ) data analysis, report writing and dissemination Strengthened supply chain for sanitary towels and accountability Technical DLI 5 MoE Due Date 29-Dec - 2023 A module in NEMIS showing distribution of sanitary towels by school and number of girls benefiting. Review and analyze NEMIS capabilities on collecting, disaggregation and reporting on data for learners with special needs and disability and recommendations to close identified gaps. Environmental and Social Systems DLI 6 MoE KISE Due Date 30-Jun-2023 Reports generated from NEMIS with granular data on special needs children by category of disability, age and gender. Design and develop a GRM MIS module compatible with NEMIS and interoperable between agencies.", + "ner_text": [ + [ + 175, + 189, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 115 of 117 within the provisions of PPADA, 2015 and Regulations 2020. Training on auditing in the NEMIS Platform and monitor progressively how many of the audits are carried out on this platform. Fiduciary Systems MoE Recurrent Yearly Audits conducted on the NEMIS Platform.", + "type": "system", + "explanation": "However, the context indicates it is a system for auditing rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a platform that handles audits and data.", + "contextual_reason_agent": "However, the context indicates it is a system for auditing rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 139, + "text": "Using STEP, comprehensive information of all contracts for goods, non-consultancy services and consultants \u2019 services awarded under the subcomponent, for all contracts subject to the World Bank \u2019 s prior-review as well as post-review, will be available automatically, including but not limited to: a brief description of the contract, estimated cost, procurement method, timelines of the bidding process, the number of participated bidders, names and reasons of rejected bidders, the date of contract award, the name of awarded supplier, contractor or consultant, final contract value; and the contractual implementation period. 66. Selection methods. Table 8. 2 describes the various procurement methods and thresholds to be applied for procurement activities. The selection methods and World Bank review thresholds will be determined in the PPSD and procurement plans in STEP. The World Bank review thresholds will be determined based on individual activity risks while the prior review thresholds in the table are indicative of high-risk activities.", + "ner_text": [ + [ + 873, + 877, + "named" + ] + ], + "validated": false, + "empirical_context": "2 describes the various procurement methods and thresholds to be applied for procurement activities. The selection methods and World Bank review thresholds will be determined in the PPSD and procurement plans in STEP. The World Bank review thresholds will be determined based on individual activity risks while the prior review thresholds in the table are indicative of high-risk activities.", + "type": "program", + "explanation": "However, STEP is mentioned as a program related to procurement activities, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is mentioned in the context of procurement plans and thresholds.", + "contextual_reason_agent": "However, STEP is mentioned as a program related to procurement activities, not as a data source.", + "contextual_signal": "mentioned only as a program, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 15, + "validated": 1, + "not_validated": 14 + } + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 81, + "text": "Increasing digital data hosting and storage capacity and removing bottlenecks in the Government Service delivery Platforms to ensure the ability to quickly launch e-services during COVID-19 and any future health pandemics. Component 2: Enabling Digital Transformation of the Government The project will expedite the scale up of the existing shared digital government infrastructure to allow for speedy roll - out of new e-services in priority sectors, such as health, education, agriculture, justice, tourism, and trade. a. With more and more activities and interactions happening on-line, and with cyber threads on the rise around the world, the project will also support Strengthening of Uganda \u2019 s Cybersecurity Resilience and capabilities. b. Special attention will be paid to digital solutions to be developed in close collaboration with Ministry of Health, including Health Information Management System to support the logistics of vaccine distribution / tracking / reporting tools. c. Digital Literacy and Capacity Building activities are designed to quickly provide support for capacity building of employees on business continuity and remote working. Component3: Digital Inclusion of Refugees and Host communities a. Component 3 will through NBI extension and last mile connectivity programs connect refugees, host communities, public facilities, humanitarian organizations and private sector servicing refugees and host communities. Special attention will be paid to facilities that enhance pandemic measurement, prevention, and response. b. Component 3 will also enhance direct internet access at household and community levels, digital skills, digitization of MSMEs and enable e-services targeting refugees and host communities. Sub-components focused on skills and e-services are also contributing to better resilience and post-COVID-19 economic recovery.", + "ner_text": [ + [ + 873, + 909, + "named" + ] + ], + "validated": false, + "empirical_context": "b. Special attention will be paid to digital solutions to be developed in close collaboration with Ministry of Health, including Health Information Management System to support the logistics of vaccine distribution / tracking / reporting tools. c.", + "type": "system", + "explanation": "However, it is mentioned as a system supporting logistics, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information Management System' which suggests data handling.", + "contextual_reason_agent": "However, it is mentioned as a system supporting logistics, not as a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 16, + "text": "The government \u2019 s strategic goal for refugee management is to promote self-reliance, however, major gaps remain in achieving this aim. Data from the 2022 national census ( which included a refugee module for the first time ) and from the first round of a World Bank-led survey on refugee self-reliance showed that refugees generally enjoy good access to basic services such as education, health and water, but remain well short of self-reliance with respect to employment and income. 3 High poverty rates, lack of economic opportunity and limited access to finance in the refugee-hosting districts ( RHDs ) constrain employment and income-generating prospects for refugees and host communities alike. Building on the success of the ongoing Phase I of the World Bank-financed Socio-economic Inclusion of Refugees and Host Communities in Rwanda Project ( P164130 ) ( known in Kinyarwanda as \u201c Jya Mbere \u201d or \u201c move forward \u201d ), Phase II will support self-reliance for refugees and hosts through investments that will: ( a ) address the strain placed by the refugee presence on basic services and the environment in RHDs; ( b ) support the integration of refugees into national service delivery systems to lower costs and promote social cohesion; and ( c ) maximize the opportunity offered by the refugee presence to strengthen service delivery and economic opportunity for all. 4 Jya Mbere II will support the ongoing transition of the refugee response", + "ner_text": [ + [ + 150, + 170, + "named" + ] + ], + "validated": true, + "empirical_context": "The government \u2019 s strategic goal for refugee management is to promote self-reliance, however, major gaps remain in achieving this aim. Data from the 2022 national census ( which included a refugee module for the first time ) and from the first round of a World Bank-led survey on refugee self-reliance showed that refugees generally enjoy good access to basic services such as education, health and water, but remain well short of self-reliance with respect to employment and income. 3 High poverty rates, lack of economic opportunity and limited access to finance in the refugee-hosting districts ( RHDs ) constrain employment and income-generating prospects for refugees and host communities alike.", + "type": "census", + "explanation": "The 2022 national census is explicitly mentioned as a source of data in the context, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a national census, which typically involves structured data collection.", + "contextual_reason_agent": "The 2022 national census is explicitly mentioned as a source of data in the context, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 2, + "text": "et l ' Emploi ) ECAM Cameroon Household Survey ( Enqu\u00eate Camerounaise Aupr\u00e8s des M\u00e9nages ) EITI Extractive Industries Transparency Initiative EU European Union FAO Food and Agriculture Organization of the United Nations GDP Gross Domestic Product GIZ German technical cooperation agency ( Deutsche Gesellschaft fur International Zusammenarbeit ) GoC Government of Cameroon HIPC Heavily Indebted Poor Country HR Human Resources ICOR Incremental Output Ratio ICT Information and Communication Technology IDF Institutional Development Fund IFMIS Integrated Financial Information Management System IFR Interim Financial Report IMF International Monetary Fund INS National Institute of Statistics ( Institut National des Statistiques ) LFS Labor Force Survey M & E Monitoring and Evaluation MDG Millennium Development Goals MINEDUB Ministry of Basic Education ( Minist\u00e8re de l \u2019 Education de Base ) MINSANTE Ministry of Health ( Minist\u00e8re de la Sant\u00e9 ) MINEPAT Ministry of Economy, Planning, and Regional Development ( Minist\u00e8re de l \u2019 Economie, de la Planification et de l \u2019 Am\u00e9nagement du Territoire ) MINFI Ministry of Finance ( Minist\u00e8re des Finances ) MINMAP Ministry of Public Contracts ( Minist\u00e8re des March\u00e9s Publics ) MOOC Massive Open Online Courses MTEF Medium-Term Expenditure Framework MTBF Medium-Term Budgetary Framework NPF New Procurement Framework PBF Performance-Based Financing PCU Project Coordination Unit PDO Project Development Objective PEFA Public Expenditure and Financial Accountability PFM Public Financial Management PFMP Public Finance Modernization Plan PIB Public Investment Budget PIP Public Investment Program PIM Project Investment Management PM Prime Minister PNDP Community Development Program ( Programme National", + "ner_text": [ + [ + 21, + 46, + "named" + ], + [ + 21, + 29, + "Cameroon Household Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "et l ' Emploi ) ECAM Cameroon Household Survey ( Enqu\u00eate Camerounaise Aupr\u00e8s des M\u00e9nages ) EITI Extractive Industries Transparency Initiative EU European Union FAO Food and Agriculture Organization of the United Nations GDP Gross Domestic Product GIZ German technical cooperation agency ( Deutsche Gesellschaft fur International Zusammenarbeit ) GoC Government of Cameroon HIPC Heavily Indebted Poor Country HR Human Resources ICOR Incremental Output Ratio ICT Information and Communication Technology IDF Institutional Development Fund IFMIS Integrated Financial Information Management System IFR Interim Financial Report IMF International Monetary Fund INS National Institute of Statistics ( Institut National des Statistiques ) LFS Labor Force Survey M & E Monitoring and Evaluation MDG Millennium Development Goals MINEDUB Ministry of Basic Education ( Minist\u00e8re de l \u2019 Education de Base ) MINSANTE Ministry of Health ( Minist\u00e8re de la Sant\u00e9 ) MINEPAT Ministry of Economy, Planning, and Regional Development ( Minist\u00e8re de l \u2019 Economie, de la Planification et de l \u2019 Am\u00e9nagement du Territoire ) MINFI Ministry of Finance ( Minist\u00e8re des Finances ) MINMAP Ministry of Public Contracts ( Minist\u00e8re des March\u00e9s Publics ) MOOC Massive Open Online Courses MTEF Medium-Term Expenditure Framework MTBF Medium-Term Budgetary Framework NPF New Procurement Framework PBF Performance-Based Financing PCU Project Coordination Unit PDO Project Development Objective PEFA Public Expenditure and Financial Accountability PFM Public Financial Management PFMP Public Finance Modernization Plan PIB Public Investment Budget PIP Public Investment Program PIM Project Investment Management PM Prime Minister PNDP Community Development Program ( Programme National", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referred to as a 'Household Survey', indicating it collects structured data from households.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Survey' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referred to as a 'Household Survey', indicating it collects structured data from households.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "161_28046", + "page": 48, + "text": "Placed in the context o f the benefit incidence analysis, this may an indication that public spending i s crowding out private spending on health care. From the available data, it i s also clear the private spending on health care i s largely targeted towards the private formal and informal sector and only in marginal amounts towards the public sector. In terms o f regional breakdown o f private spending, data from 2001 indicates that private spending i s the highest in Conakry, the capital city ( 926. 7 million GNF ) as compared with very low amounts ( almost zero ) in Mamou, Boke and Faranah. The data clearly indicates that private spending in largely concentrated in urban areas. Almost 99 % o f private spending on health care i s on an out-of-pocket ( OOP ) basis at point o f service. Other sources such as private health insurance, public sector employer contributions consist o f only 1 % o f total private health spending. Point o f service payments are largely considered to be regressive and in addition there are concerns with the efficiency and quality o f these private expenditures.", + "ner_text": [ + [ + 161, + 175, + "named" + ] + ], + "validated": false, + "empirical_context": "Placed in the context o f the benefit incidence analysis, this may an indication that public spending i s crowding out private spending on health care. From the available data, it i s also clear the private spending on health care i s largely targeted towards the private formal and informal sector and only in marginal amounts towards the public sector. In terms o f regional breakdown o f private spending, data from 2001 indicates that private spending i s the highest in Conakry, the capital city ( 926.", + "type": "data", + "explanation": "'Available data' is not specified as a structured collection or dataset in the context, but rather refers to general information.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'available data' refers to a dataset because it implies a collection of information used for analysis.", + "contextual_reason_agent": "'Available data' is not specified as a structured collection or dataset in the context, but rather refers to general information.", + "contextual_signal": "mentioned only as general information, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "094_PAD-final-02262018", + "page": 47, + "text": "This indicator measures the improved speed objective of the project for public transport services. Name: Share of passengers satisfied with quality of formal bus system Percentage 0. 00 80. 00 Annual Public transport user surveys will be commissioned by the RPTA on an annual basis among the public bus users. The RPTA BRT and bus operators", + "ner_text": [ + [ + 200, + 229, + "named" + ], + [ + 105, + 168, + "Public transport user surveys <> data description" + ], + [ + 258, + 262, + "Public transport user surveys <> publisher" + ], + [ + 292, + 308, + "Public transport user surveys <> reference population" + ], + [ + 314, + 318, + "Public transport user surveys <> publisher" + ], + [ + 356, + 374, + "Public transport user surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "00 80. 00 Annual Public transport user surveys will be commissioned by the RPTA on an annual basis among the public bus users. The RPTA BRT and bus operators", + "type": "survey", + "explanation": "This is indeed a dataset as it involves structured data collection through surveys conducted among users.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to annual surveys that collect data from public bus users.", + "contextual_reason_agent": "This is indeed a dataset as it involves structured data collection through surveys conducted among users.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "108_Jordan-PforR-PAD-P159522-FINAL-DISCLOSURE-10052016", + "page": 65, + "text": "About 85 percent of Syrians refugees registered with UNHCR live in Jordanian communities, while 15 percent live in refugee camps. 34. Employment issues are a long-standing concern in Jordan. Many of the most educated Jordanians emigrate to work in the Gulf countries. High reservation wages and a preference for public sector work result in high unemployment rates at the same time as Jordan brings in a large number of economic migrants to fill less desirable jobs. Currently, there are approximately 650, 000 economic migrants in Jordan, 324, 000 of whom have work permits, while the remainder work informally. 12 Escribano, A., and J. L. Guasch. 2005. \u201c Assessing the Impact of the Investment Climate on Productivity using Firm-Level Data: Methodology and Cases of Guatemala, Honduras, and Nicaragua. \u201d World Bank Research Paper 3621.. 13 Qureshi and Valde. 2007. \u201c State-Business Relations, Investment Climate Reform, and Firm Productivity in Sub-Saharan Africa. \u201d 14 Mi\u0161kinis and Byrka. 2014. \u201c The Role of Investment Promotion Agencies in Attracting Foreign Direct Investment. \u201d Ekonomica. 15 The census lists approximately 640, 000 Egyptians and 634, 000 Palestinians ( without Jordanian ID numbers ).", + "ner_text": [ + [ + 726, + 741, + "named" + ], + [ + 183, + 189, + "Firm-Level Data <> data geography" + ], + [ + 420, + 437, + "Firm-Level Data <> reference population" + ], + [ + 635, + 647, + "Firm-Level Data <> author" + ], + [ + 649, + 653, + "Firm-Level Data <> publication year" + ], + [ + 806, + 816, + "Firm-Level Data <> publisher" + ], + [ + 842, + 859, + "Firm-Level Data <> author" + ], + [ + 972, + 990, + "Firm-Level Data <> author" + ], + [ + 992, + 996, + "Firm-Level Data <> publication year" + ] + ], + "validated": true, + "empirical_context": "2005. \u201c Assessing the Impact of the Investment Climate on Productivity using Firm-Level Data: Methodology and Cases of Guatemala, Honduras, and Nicaragua. \u201d World Bank Research Paper 3621.", + "type": "data", + "explanation": "'Firm-Level Data' is indeed used in the research as it indicates a specific type of data collected for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'Firm-Level Data' is a dataset because it refers to data collected at the firm level, which is often structured for analysis.", + "contextual_reason_agent": "'Firm-Level Data' is indeed used in the research as it indicates a specific type of data collected for empirical analysis.", + "contextual_signal": "described as data used for empirical analysis", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "154_431120PAD0P1021e0only10R20081014311", + "page": 40, + "text": "Accompanying and supporting documents are the BHRIMS plan 2003-2009, BHRIMS costing ( 2002 ), inventory o f stakeholders and baseline for indicators ( 2003 ). Management and support bodies have been established as the BHRIMS Technical Working Group, the BHRIMS secretariat in NACA, and the BHRIMS focal points at sector, program and district levels. 3. The information management has been decentralized under BHRIMS to the district level under the District Multisectoral A I D S Committees. The District A I D S Coordinators ( DACs ) who also serve as secretaries to the DMSACs, act as focal persons in data management at district level. Implementing partners and other stakeholders submit reports to the DMSACs. Data collection and analysis at district level has also been facilitated by the introduction in all districts o f a computerized system based on the UNAIDS \u2019 Country Response Information System. 4. The performance by BHRIMS has been demonstrated by the regular and timely production o f reports for the United Nation General Assembly Special Session on HIV / AIDS ( UNGASS ), Millennium Development Goal and for national level bodies. Capacity i s however a constraint and considerable effort has been put into training o f key stakeholders and supply o f IT material and software.", + "ner_text": [ + [ + 871, + 906, + "named" + ] + ], + "validated": false, + "empirical_context": "Implementing partners and other stakeholders submit reports to the DMSACs. Data collection and analysis at district level has also been facilitated by the introduction in all districts o f a computerized system based on the UNAIDS \u2019 Country Response Information System. 4.", + "type": "system", + "explanation": "However, it is described as a computerized system and not explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a data-related function.", + "contextual_reason_agent": "However, it is described as a computerized system and not explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 15, + "text": "Lack of local language proficiency; unregulated informal labor environments; limited access to arable land, inputs, and information for farming or to support climate resilient agriculture; and insufficient and unsuitable education and work experience all create barriers to refugees \u2019 integration into the labor market. 29 Ownership and use of a bank account in Uganda in 2017 stood at 66. 1 percent for men and 52. 7 percent for women. 30 by contrast, access to financial services for refugees is limited, particularly in the West Nile region, where only one in five refugee households have a loan and less 26 Uganda Enterprise Survey. 2014. 27 Oxfam ( 2016 ), Factors and Norms Influencing Unpaid Care Work: Household Survey Evidence from Five Rural Communities in Colombia, Ethiopia, the Philippines, Uganda, and Zimbabwe. 28 Government of Uganda ( 2020 ), National Child Policy, p 19. 29 World Bank. 2020. Uganda Market Solutions for Forced Displacement. Economic Opportunities for Host Communities and Refugees. 30 Global Findex indicators, 2017.", + "ner_text": [ + [ + 611, + 635, + "named" + ], + [ + 362, + 368, + "Uganda Enterprise Survey <> data geography" + ], + [ + 372, + 376, + "Uganda Enterprise Survey <> publication year" + ], + [ + 527, + 543, + "Uganda Enterprise Survey <> data geography" + ], + [ + 611, + 617, + "Uganda Enterprise Survey <> data geography" + ], + [ + 804, + 810, + "Uganda Enterprise Survey <> data geography" + ], + [ + 910, + 916, + "Uganda Enterprise Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "7 percent for women. 30 by contrast, access to financial services for refugees is limited, particularly in the West Nile region, where only one in five refugee households have a loan and less 26 Uganda Enterprise Survey. 2014.", + "type": "survey", + "explanation": "The Uganda Enterprise Survey is explicitly mentioned in the context as a source of data regarding financial services access.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey providing statistical information.", + "contextual_reason_agent": "The Uganda Enterprise Survey is explicitly mentioned in the context as a source of data regarding financial services access.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "153_476340PAD0ZR0P101Official0Use0Only1", + "page": 33, + "text": "The DEP will build upon and create connections among socio - economic and ecological data made available through various development and scientific efforts that are taking place in the country and the project area. Indicators related to capacity building will be integrated into an overall planning, monitoring and evaluation system ( SPSE ) maintained by the DEP. Specialized forest management and forest product tracking systems, as well as data related to participatory zoning will be maintained and made available through the Forest Inventory and Management Service ( SPIAF ) of the MECNT. 81. Most baselines for tracking progress have been produced within current development efforts that the project will build upon, collaborate with, or finance directly. These baselines are presented in Annex 3. New, original baselines for parameters such as sources of income or rural and indigenous peoples ' knowledge of the Forest Code will be obtained through new, project-funded baseline studies. These studies will be conducted in the first year and repeated sampling will be done over the life of the project. 82. The set of project indicators will serve a dual purpose. They will give project managers information useful for adaptive management, and they will give policy makers clear benchmarks for evaluating the project ' s effectiveness. Output and process indicators will include among others: ( i ) statistics on office rehabilitation, equipment, and staff training at central and field offices; ( ii ) remote-sensing-based statistics on deforestation, illegal logging, and land-use changes; ( iii ) statistics on water sanitation, feeder roads, and other small participatory infrastructure projects implemented with project financing; ( iv ) trends in the abundance of key bioindicator species; ( v ) number and area covered by various types of forest land management plans \" plans agreed upon by MECNT and other l1 Timber concessions, community forests, protected areas, conservation concessions, community hunting zones, and others. 21", + "ner_text": [ + [ + 53, + 89, + "named" + ], + [ + 201, + 213, + "socio - economic and ecological data <> data geography" + ], + [ + 360, + 363, + "socio - economic and ecological data <> publisher" + ] + ], + "validated": true, + "empirical_context": "The DEP will build upon and create connections among socio - economic and ecological data made available through various development and scientific efforts that are taking place in the country and the project area. Indicators related to capacity building will be integrated into an overall planning, monitoring and evaluation system ( SPSE ) maintained by the DEP.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to structured collections of data that will be used for planning, monitoring, and evaluation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a collection of data types relevant to socio-economic and ecological contexts.", + "contextual_reason_agent": "This is indeed a dataset as it refers to structured collections of data that will be used for planning, monitoring, and evaluation.", + "contextual_signal": "mentioned as part of a planning, monitoring and evaluation system", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 62, + "text": "While learning outcomes had been improving in Moldova before the pandemic ( figure 2. 1 ), there is significant inequality in learning, especially between the wealthiest and poorest households ( figure 1 ), and the pandemic has disproportionately affected the learning of students from poorer households, increasing this inequality ( figure 2 ). Inequality in learning outcomes leads to inequality in human capital, which in turn abets intergenerational transmission of poverty and poverty traps. The investments under the proposed project aim to not only recover the learning loss due to the pandemic but also strengthen the ability of the education system to improve learning outcomes of the most vulnerable including the poorest. The development benefits of education also extend to more environmentally friendly behavior. Investments in quality education lead to more rapid and sustainable economic growth and development. Figure 2. 1. Change in Reading Performance over 2009 \u2013 2018 Source: OECD PISA 2009 and 2018 data.", + "ner_text": [ + [ + 1000, + 1004, + "named" + ], + [ + 46, + 53, + "PISA <> data geography" + ], + [ + 982, + 986, + "PISA <> publication year" + ], + [ + 995, + 999, + "PISA <> publisher" + ], + [ + 1000, + 1009, + "PISA <> publication year" + ], + [ + 1014, + 1018, + "PISA <> publication year" + ] + ], + "validated": true, + "empirical_context": "1. Change in Reading Performance over 2009 \u2013 2018 Source: OECD PISA 2009 and 2018 data.", + "type": "dataset", + "explanation": "In this context, 'PISA' refers to a dataset as it is explicitly mentioned as a source of data for reading performance analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'PISA' is a dataset because it is associated with specific years of data collection.", + "contextual_reason_agent": "In this context, 'PISA' refers to a dataset as it is explicitly mentioned as a source of data for reading performance analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 51, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 40 Indicator Name People within 200m ( 0. 2km ) of an improved water source benefitted in refugees hosting districts Definition / Description Percent of refugees and citizens in host communities that have an improved water source within 200m of walking distance. Frequency Bi-annually Data Source Primary sources include: MWE national M & E system, Refugee Database, Refugees GIS Mapping for WASH, Water Supply Atlas, and UPMiS. Secondary sources include: MWE / DWD / DWRM progress reports, annual water & environments sector performance report, NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE / DWD Rural Indicator Name of which women Definition / Description Frequency Bi-annually. Data Source Primary sources include: MWE national M & E system, Refugee Database, Refugees GIS Mapping for WASH, Water Supply Atlas, and UPMiS. Secondary sources include: MWE / DWD / DWRM progress reports, annual water & environments sector performance report, NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE / DWD Rural.", + "ner_text": [ + [ + 477, + 495, + "named" + ], + [ + 4, + 14, + "Water Supply Atlas <> publisher" + ] + ], + "validated": true, + "empirical_context": "2km ) of an improved water source benefitted in refugees hosting districts Definition / Description Percent of refugees and citizens in host communities that have an improved water source within 200m of walking distance. Frequency Bi-annually Data Source Primary sources include: MWE national M & E system, Refugee Database, Refugees GIS Mapping for WASH, Water Supply Atlas, and UPMiS. Secondary sources include: MWE / DWD / DWRM progress reports, annual water & environments sector performance report, NWSC annual report.", + "type": "atlas", + "explanation": "It is indeed a dataset as it is mentioned as a primary source of data for the analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is listed among primary data sources.", + "contextual_reason_agent": "It is indeed a dataset as it is mentioned as a primary source of data for the analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 76, + "text": "It will also enable COVID-19 digital education solutions, including in RHDs and support the Ministry of Education \u2019 s digital agenda including its \u201c Can ' t Wait To Learn \u201d digital learning platforms. Integrated Water Management and Development Project ( P163782 ) Total 280 RSW / WHR 58 Improve sustainable provision of water supply and sanitation service delivery and support long-term investments in infrastructure development, in RHDs in the West Nile and Northern region. Locations targeted for solar based water pumping have already been identified in Arua, Yumbe, Moyo, Adjumani, Lamwo, and Kiryandongo Provide digital enabling environment for remove water monitoring and strengthen efficiencies and effectiveness of water management systems. Gender Based Violence and Violence Against Children Prevention and Response Services in Uganda \u2019 s Refugee - Hosting Districts Report Status: Analysis completed, Total 0. 5 RSW / WHR N / A To mitigate GBV and prevent violence against children through engagement in productive activities in 4 RHDs. Increased access to more affordable connectivity will also increase likelihood of GB online risks. Project will support the project objective indirectly by including awareness and mitigation measures in digital skills training. Digital connectivity will strengthen case management for GBV and violence against", + "ner_text": [ + [ + 20, + 28, + "named" + ] + ], + "validated": false, + "empirical_context": "It will also enable COVID-19 digital education solutions, including in RHDs and support the Ministry of Education \u2019 s digital agenda including its \u201c Can ' t Wait To Learn \u201d digital learning platforms. Integrated Water Management and Development Project ( P163782 ) Total 280 RSW / WHR 58 Improve sustainable provision of water supply and sanitation service delivery and support long-term investments in infrastructure development, in RHDs in the West Nile and Northern region.", + "type": "concept", + "explanation": "'COVID-19' is mentioned as a topic related to digital education solutions, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'COVID-19' is a dataset because it is often associated with data collection and reporting.", + "contextual_reason_agent": "'COVID-19' is mentioned as a topic related to digital education solutions, not as a structured collection of data.", + "contextual_signal": "mentioned only as a topic, not as a data source", + "tags": [] + }, + { + "filename": "041_Burkina-Faso-Support-to-Land-and-Mining-Management-Strengthening-Project", + "page": 89, + "text": "A baseline customer survey will be carried out during the first months of implementation of the project. Information on project beneficiaries will help to understand project impacts in a disaggregated manner. Data collection will support the establishment of a database for future financial and economic analysis. The PIT will be responsible for collecting and reporting data as agreed in the PM & E Section included in the PIM and reflected in the Results Framework. 182. The PIT will be responsible for implementing the Management Information System in cooperation with the respective regional / technical units in the selected regions. The data and information collection, processing and analysis system will be designed for a country-wide application. In response to the need for adequate information to develop a detailed economic and financial analysis of the project, data collection will also include: ( a ) registration cost of tenure documents ( costs of producing a land title and / or other land tenure records or rights documents ); ( b ) time required for a property transaction ( ownership transfer lease of public land, time to produce a land title ); ( c ) property valuation ( changes in market values of properties with different land tenure rights ); ( d ) difference in productivity of land with different types of tenure security attached to them ( farmland, customary, formalized tenure security, and urban and potentially commercial land ); ( e ) differences in investments in land with different types of security linked to them ( irrigation investment in titled land vs other, choice of land for commercial investments ); and ( f ) use of tenure documents to access credit and loans. 183. Implementation Support Plan ( Strategy and Approach for Implementation Support ) 184. The strategy for Project Implementation Support by the World Bank reflects the nature of the project and its risk profile. The strategy aims at making the Government more efficient while remaining focused on implementation of the risk mitigation measures identified. The strategy is also an indicative and flexible instrument which will be revisited during project implementation and as part of the Implementation Status and", + "ner_text": [ + [ + 2, + 26, + "named" + ], + [ + 120, + 141, + "baseline customer survey <> reference population" + ], + [ + 916, + 953, + "baseline customer survey <> data description" + ], + [ + 1052, + 1092, + "baseline customer survey <> data description" + ], + [ + 1174, + 1192, + "baseline customer survey <> data description" + ], + [ + 1856, + 1866, + "baseline customer survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "A baseline customer survey will be carried out during the first months of implementation of the project. Information on project beneficiaries will help to understand project impacts in a disaggregated manner.", + "type": "survey", + "explanation": "This is indeed a dataset as it is a structured collection of data intended to assess project impacts.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects structured information from customers.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data intended to assess project impacts.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "143_749060PAD0P124010Box374388B00OUO090", + "page": 15, + "text": "( b ) Beneficiaries of safety net programs ( Core Indicator ), tracking in particular the number of female beneficiaries and the total number of NPTP beneficiaries ( programs such as NPTP, which offer education credits, fees waivers and health subsidies, are defined in the core indicators as \" other social assistance programs \" ). ( c ) NPTP beneficiaries from extremely poor households as a share of total NPTP beneficiaries, as a measure of the targeting accuracy of the program. ( d ) Percentage of SDC beneficiaries reporting improved quality of services provided by SDCs, measuring the centers ' improved capacity. Information for this indicator will come from opinion polls, beneficiary surveys and social audits. ( e ) Percentage of citizens reporting MOSA ' s improved responsiveness in delivering social assistance and development grants, measuring MOSA ' s improved capacity. This indicator will be measured through opinion polls and / or beneficiary surveys. III. PROJECT DESCRIPTION A. Project Components 20. The SPPP will have the following four components: ( 1 ) Social Development Centers ( SDCs ); ( 2 ) Community Social Development Program ( CSD ); ( 3 ) National Poverty Targeting Program ( NPTP ); and ( 4 ) Project Management ( PM ).", + "ner_text": [ + [ + 668, + 681, + "named" + ] + ], + "validated": false, + "empirical_context": "( d ) Percentage of SDC beneficiaries reporting improved quality of services provided by SDCs, measuring the centers ' improved capacity. Information for this indicator will come from opinion polls, beneficiary surveys and social audits. ( e ) Percentage of citizens reporting MOSA ' s improved responsiveness in delivering social assistance and development grants, measuring MOSA ' s improved capacity.", + "type": "survey", + "explanation": "'Opinion polls' are mentioned as a source of information rather than a structured collection of data on their own.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'opinion polls' is a dataset because it involves collecting data from respondents.", + "contextual_reason_agent": "'Opinion polls' are mentioned as a source of information rather than a structured collection of data on their own.", + "contextual_signal": "mentioned as a source of information", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 58, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XLIX Against DLR 9. 2: US $ 1 million for each core micro-data set made available via an online portal to users in adherence with the access protocols for up to a total of US $ 8 million. Against DLR 9. 3: An additional US $ 400, 000 for every core micro-data set made available with indicators for gender, disability and residency status, for up to 5 micro data sets and in the limit of US $ 2 million. Description The DLI supports the operationalization of an interactive statistical interface by DOS to allow users to query key statistical indicators ( such as unemployment, labor force participation, and so on ). This can be done according to user-defined cross tabulations, including gender, age, education-level, nationality and governorate. The DLI also supports the integration of administrative data on the platform from key line ministries to allow for a \u2018 one-stop shop \u2019, with incentives for each additional line Ministry that is integrated.", + "ner_text": [ + [ + 885, + 904, + "named" + ] + ], + "validated": false, + "empirical_context": "This can be done according to user-defined cross tabulations, including gender, age, education-level, nationality and governorate. The DLI also supports the integration of administrative data on the platform from key line ministries to allow for a \u2018 one-stop shop \u2019, with incentives for each additional line Ministry that is integrated.", + "type": "data", + "explanation": "'Administrative data' is mentioned in a general context without being identified as a specific dataset or data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'administrative data' refers to a dataset due to its structured nature and potential use in analysis.", + "contextual_reason_agent": "'Administrative data' is mentioned in a general context without being identified as a specific dataset or data source.", + "contextual_signal": "mentioned only as a type of data, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 11, + "validated": 10, + "not_validated": 1 + } + }, + { + "filename": "153_476340PAD0ZR0P101Official0Use0Only1", + "page": 138, + "text": "APPENDIX A TO ANNEX 11: Location I Area & * I Min Democratic Republic of Congo M a x FOREST AND NATURE CONSERVATION PROJECT Okoku / Obokola Ntufia-Lowa Total SITE DESCRIPTION FOR GEF INVESTMENTS: MAIKO NATIONAL PARK 375 13 1 131 125 44 44 1750 612 1478 Maiko National Park Ndjula Bitule-Okungu 29. Maiko National Park i s the largest lowland forest park in eastern DRC. Remote sensing data indicates that more than 80 % of the landscape i s primary or old secondary forest. High-profile species that can be found in the landscape include Okapi, Congo Peacock, Chimpanzee, Forest Elephant, and the majority of the range of Grauer \u2019 s Gorilla. A recent survey ( October 2006 ) of the more accessible Southern Sector of the Maiko NP resulted in the following estimates for Gorilla abundance and the following elephant observations and encounter rates ( see Tables 1 & 2 below ). 3 7 2 0. 34 1 1 3 0. 06 Ntufia-Lowa Maiko NP I 875 I 306 I 866 Mundo I375 I 131 I 131 1 1 I Total 14 I 8 12 14 I I 30.", + "ner_text": [ + [ + 370, + 389, + "named" + ], + [ + 50, + 78, + "Remote sensing data <> data geography" + ], + [ + 196, + 215, + "Remote sensing data <> data geography" + ], + [ + 298, + 317, + "Remote sensing data <> data geography" + ], + [ + 545, + 558, + "Remote sensing data <> reference population" + ], + [ + 622, + 640, + "Remote sensing data <> reference population" + ], + [ + 660, + 672, + "Remote sensing data <> reference year" + ], + [ + 1010, + 1028, + "Remote sensing data <> usage context" + ] + ], + "validated": true, + "empirical_context": "Maiko National Park i s the largest lowland forest park in eastern DRC. Remote sensing data indicates that more than 80 % of the landscape i s primary or old secondary forest. High-profile species that can be found in the landscape include Okapi, Congo Peacock, Chimpanzee, Forest Elephant, and the majority of the range of Grauer \u2019 s Gorilla.", + "type": "data", + "explanation": "In this context, 'remote sensing data' is used to provide empirical evidence about the landscape of Maiko National Park, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'remote sensing data' suggests a collection of data obtained through remote sensing techniques.", + "contextual_reason_agent": "In this context, 'remote sensing data' is used to provide empirical evidence about the landscape of Maiko National Park, confirming its role as a data source.", + "contextual_signal": "indicates a collection of data used for analysis", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 64, + "text": "55 Source: World Development Indicators. - ga s denotes the probability that a child aged a, from population group g, is alive at age a. This probability stems from our own calculations and are based on data from Population and Housing Census 2015 and on a methodology given by the United Nations ( 2002 ) ( see Table A. 1 ). The probability of a child aged 5 surviving to the age of 18 is assumed to be one. - gkla \uf071 denotes the probability of an individual aged a, from population group g, and with completion level l, has k type of employment. - gkla y denotes the average real annual earnings received by an individual aged a, from population group g, with education level l, and employment type k. Given that these data are missing for unpaid workers, we artificially build this age and education level earnings profiles by decreasing the corresponding profiles of self \u2010 employed workers by 75 percent. - kd denotes the annualized growth rate of real annual earnings in k type of employment. This rate is estimated over the 2005 \u2010 2010 period and is assumed to be identical for all population groups and education levels.", + "ner_text": [ + [ + 213, + 247, + "named" + ] + ], + "validated": true, + "empirical_context": "- ga s denotes the probability that a child aged a, from population group g, is alive at age a. This probability stems from our own calculations and are based on data from Population and Housing Census 2015 and on a methodology given by the United Nations ( 2002 ) ( see Table A. 1 ).", + "type": "census", + "explanation": "This is indeed a dataset as it is explicitly mentioned as the source of data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a source of data for calculations.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as the source of data used for empirical analysis.", + "contextual_signal": "mentioned as a source of data for calculations", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 35, + "text": "This can also provide a platform for remote supervision and real-time risk monitoring of project activities. 89. Data verification will be built into the M & E arrangements. Data will be verified on a sample basis by a third - party monitoring ( TPM ) agent. The TPM will be recruited to monitor progress of project activities and verify project data on a sample basis throughout implementation. The TPM will report directly to the PSC. The information collected by the TPM will help triangulate the data submitted by the various implementing arms including NTTIs and universities. Spending reported by participants will be verified through an independent audit of MoGEI and MoHEST accounts. Indicators that are reported will be cross-referenced with EMIS data. Effective verification will depend on the quality of EMIS data available, which will be strengthened with support from the Project. 90. The Project will invest in an integrated system for the management and analysis of data collected through various channels for effective monitoring. The integrated monitoring system will be designed to capture the data needed to report across all the indicators outlined in the Results Framework. This system will incorporate capacities for data importing, either manually or directly via online survey and / or SMS survey platforms and EMIS data. Data collected on mobile devices by project monitoring officers or from the communities will also be uploaded to the system.", + "ner_text": [ + [ + 815, + 824, + "named" + ], + [ + 603, + 615, + "EMIS data <> reference population" + ], + [ + 1310, + 1330, + "EMIS data <> data type" + ] + ], + "validated": true, + "empirical_context": "Indicators that are reported will be cross-referenced with EMIS data. Effective verification will depend on the quality of EMIS data available, which will be strengthened with support from the Project. 90.", + "type": "data", + "explanation": "In the context, 'EMIS data' is explicitly mentioned as a source of information that will be cross-referenced and verified, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'EMIS data' is a dataset because it is referenced in the context of verification and quality assessment.", + "contextual_reason_agent": "In the context, 'EMIS data' is explicitly mentioned as a source of information that will be cross-referenced and verified, indicating it functions as a dataset.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "113_Project-Appraisal-Document-PAD-disclosable-version-P156479-08122016", + "page": 65, + "text": "These are key variables in computing estimated potential impacts on poverty and consumption. Therefore, if updated, nationally representative consumption data becomes available, it should be considered. 4. The benchmark benefit scenarios considered in this analysis are the following: ( a ) CfW pilot. XAF 1, 200 per day wage for 80 days of work implemented in N ' Djamena ( b ) CT pilot. XAF 15, 000 per month per household for a period of 24 months ( paid every two months ) implemented in one Sahel region and one Sudanian region 5. Based on the ECOSIT data, these benefit packages will cover at least 40 percent of the food poverty gap in the respective regions in which the pilots will be implemented. 6. Table 5. 1 summarizes the estimated impact on poverty and consumption of the benefits provided through the CfW pilot. This shows that the benchmark scenario of 80 days at XAF 1, 200 per day could cover 40 percent of the food poverty gap. Given an expected budget of US $ 2. 0 million for the implementation of this subcomponent, it is estimated that up to 9, 000 individuals could participate in the pilot.", + "ner_text": [ + [ + 549, + 560, + "named" + ], + [ + 496, + 508, + "ECOSIT data <> data geography" + ], + [ + 517, + 532, + "ECOSIT data <> data geography" + ], + [ + 1132, + 1150, + "ECOSIT data <> usage context" + ] + ], + "validated": true, + "empirical_context": "XAF 15, 000 per month per household for a period of 24 months ( paid every two months ) implemented in one Sahel region and one Sudanian region 5. Based on the ECOSIT data, these benefit packages will cover at least 40 percent of the food poverty gap in the respective regions in which the pilots will be implemented. 6.", + "type": "data", + "explanation": "In the context, 'ECOSIT data' is explicitly referenced as a source of information that informs the benefit packages, indicating it is indeed used as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'ECOSIT data' suggests a structured collection of information used for analysis.", + "contextual_reason_agent": "In the context, 'ECOSIT data' is explicitly referenced as a source of information that informs the benefit packages, indicating it is indeed used as a dataset.", + "contextual_signal": "follows 'based on' indicating it is a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "068_Burundi-Integrated-Community-Development-Project", + "page": 53, + "text": "The World Bank Burundi Integrated Community Development Project ( P169315 ) Page 48 of 86 involved in associations receiving grants under the project. Women - Beneficiaries with increased access to incremental economic opportunities Refugees - Beneficiaries with increased access to incremental economic opportunities Beneficiaries satisfied with services provided under the project Beneficiaries for this indicator is defined as direct beneficiaries under Turikumwe. Mid-term and end of project. Survey Survey PIU Women - Beneficiaries satisfied with services provided under the project Refugees - Beneficiaries satisfied with services provided under the project Number of learning events attended by government of Burundi to promote understanding of development responses to forced displacement This indicator will measure the number of learning events attended by civil servants to promote understanding of development responses to forced displacement. Quarterly Project M & E system Regular reports PIU Women beneficiaries reporting that they have a say in community decision-making We have no baseline for this indicator, but data will be collected during the baseline survey. The current target is to increase this amount by 20 percent.", + "ner_text": [ + [ + 497, + 510, + "named" + ], + [ + 4, + 14, + "Survey Survey <> publisher" + ], + [ + 15, + 22, + "Survey Survey <> data geography" + ], + [ + 151, + 172, + "Survey Survey <> reference population" + ], + [ + 716, + 723, + "Survey Survey <> data geography" + ], + [ + 1007, + 1026, + "Survey Survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "Mid-term and end of project. Survey Survey PIU Women - Beneficiaries satisfied with services provided under the project Refugees - Beneficiaries satisfied with services provided under the project Number of learning events attended by government of Burundi to promote understanding of development responses to forced displacement This indicator will measure the number of learning events attended by civil servants to promote understanding of development responses to forced displacement. Quarterly Project M & E system Regular reports PIU Women beneficiaries reporting that they have a say in community decision-making We have no baseline for this indicator, but data will be collected during the baseline survey.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to a survey that collects data on beneficiaries' satisfaction and other indicators.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey' which typically indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a survey that collects data on beneficiaries' satisfaction and other indicators.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "185_multi-page", + "page": 182, + "text": "Baseline ( KDHS, 1999 ): 90 percent * Economic situation will improve during Target ( KDHS, 2003 ): 100 percent project life. * Intensified prograrn of HIV / AIDS prevention, care and support will be * By 2003, the median age of first sex * Kenya Demographic and Health socially and culturally acceptable at among young women ( 15-24 years ) will Survey ( KDHS ) data community level. increase. Baseline ( KDHS, 1999 ): 12 years Target ( KDHS, 2003 ): 14 years * Reports from the Ministry of * By 2004, HIV / AIDS curricula will be Education and other Ministries integrated and examinable in an increasing number of primary and secondary schools, and various post-graduate institutions. Baseline: less than I percent Target: 75 percent", + "ner_text": [ + [ + 86, + 90, + "named" + ], + [ + 17, + 21, + "KDHS <> reference year" + ], + [ + 92, + 96, + "KDHS <> publication year" + ], + [ + 241, + 246, + "KDHS <> data geography" + ], + [ + 412, + 416, + "KDHS <> reference year" + ] + ], + "validated": true, + "empirical_context": "Baseline ( KDHS, 1999 ): 90 percent * Economic situation will improve during Target ( KDHS, 2003 ): 100 percent project life. * Intensified prograrn of HIV / AIDS prevention, care and support will be * By 2003, the median age of first sex * Kenya Demographic and Health socially and culturally acceptable at among young women ( 15-24 years ) will Survey ( KDHS ) data community level.", + "type": "survey", + "explanation": "KDHS is indeed a dataset as it is explicitly mentioned as a survey providing data for empirical analysis regarding health and demographic trends.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed KDHS is a dataset because it is referenced alongside specific data points and percentages related to health and demographic information.", + "contextual_reason_agent": "KDHS is indeed a dataset as it is explicitly mentioned as a survey providing data for empirical analysis regarding health and demographic trends.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "154_431120PAD0P1021e0only10R20081014311", + "page": 14, + "text": "With respect to tuberculosis ( TB ), directly-observed therapy ( DOTS ) coverage in Botswana i s 100 percent. While the TB case detection rate remained high at 80 percent in 2006, it dropped from the 88 percent level achieved in 2000. TB treatment success has also declined from 77 percent in 2000 to 70 percent in 2005, raising concerns regarding TB treatment resistance. An estimated 54 percent o f new TB patients are HIV-positive and 38 percent o f A I D S deaths are due to TB. TB mortality has increased from 236. 2 ( per 100, 000 ) in 1990 to 670. 2 ( per 100, 000 ) in 2005. Compared to the national A I D S treatment program, and despite o f the high co-prevalence, the national health system has not been able to implement an equally strong response to TB. Implementation o f the joint TB / HIV / AIDS strategy needs to be strengthened, particularly at community level where TB and HIV / AIDS programs continue to operate largely along parallel lines. The proposed Project will address strategic gaps supporting joint TB and HIV prevention efforts. 18. The HIV / AIDS-related burden o f disease has had the result o f consigning most health care workers to focus on various medical and, to a lesser degree, preventive aspects o f the Makgabaneng Survey ( 2003 ). BAIS I1 ( 2004 ). 5", + "ner_text": [ + [ + 1244, + 1262, + "named" + ], + [ + 84, + 92, + "Makgabaneng Survey <> data geography" + ], + [ + 1265, + 1269, + "Makgabaneng Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "18. The HIV / AIDS-related burden o f disease has had the result o f consigning most health care workers to focus on various medical and, to a lesser degree, preventive aspects o f the Makgabaneng Survey ( 2003 ). BAIS I1 ( 2004 ).", + "type": "survey", + "explanation": "The context confirms it is a dataset as it is explicitly mentioned as a survey that contributes to understanding the HIV/AIDS-related burden of disease.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically involves structured data collection.", + "contextual_reason_agent": "The context confirms it is a dataset as it is explicitly mentioned as a survey that contributes to understanding the HIV/AIDS-related burden of disease.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 57, + "text": "Given the large demand of the national statistical system, the choice is made to focus either on the areas where the World Bank clearly has a comparative advantage among the donors or areas not supported by the other donors, namely, household survey, population census, national account, and archiving and dissemination. At the core of all this are two aspects: data collection and capacity building. The philosophy underlying data collection is to improve the design of the surveys to take into account the most recent methodological approaches. As for capacity building, the preference is given either to train staff locally or to use the learning-by - doing approach. Three subcomponents are distinguished as follows: Subcomponent 4. 1. Improving poverty-related data 39. Objective. The objective of this subcomponent is to improve the poverty related data production and analysis. 40. Current status. INS has implemented living conditions surveys in 1996, 2001, 2007, and 2014. The last three surveys have used very close methodologies and poverty indicators are comparable over", + "ner_text": [ + [ + 233, + 249, + "named" + ], + [ + 905, + 908, + "household survey <> author" + ], + [ + 925, + 950, + "household survey <> data type" + ], + [ + 954, + 958, + "household survey <> reference year" + ], + [ + 960, + 964, + "household survey <> reference year" + ], + [ + 966, + 970, + "household survey <> reference year" + ], + [ + 976, + 980, + "household survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "Given the large demand of the national statistical system, the choice is made to focus either on the areas where the World Bank clearly has a comparative advantage among the donors or areas not supported by the other donors, namely, household survey, population census, national account, and archiving and dissemination. At the core of all this are two aspects: data collection and capacity building.", + "type": "survey", + "explanation": "In this context, it is confirmed as a dataset since it is mentioned alongside other data collection efforts and is a recognized method for gathering empirical data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'household survey' implies a structured collection of data collected from households.", + "contextual_reason_agent": "In this context, it is confirmed as a dataset since it is mentioned alongside other data collection efforts and is a recognized method for gathering empirical data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "076_Rwanda-Socio-Economic-Inclusion-of-Refugees-and-Host-Communities-in-Rwanda-Project", + "page": 30, + "text": "The World Bank Socio-economic Inclusion of Refugees & Host Communities in Rwanda Project ( P164130 ) Page 26 of 82 77. Project-specific MIS ( P-MIS ). The P-MIS will be web-based and user-friendly and will not duplicate existing systems. It will be used to gather and consolidate data across the components. It will allow the project to track and report on project-specific data requirements disaggregated by group and sub-group ( refugee, host community, women, children, etc. ). The P-MIS will be housed at the MINEMA SPIU and accessible via a web-based application in project districts and at BRD. The system will be used to ( a ) collect data and information and monitor progress of project activities; and ( b ) generate reports, figures and tables for project M & E. 78. Regular progress monitoring will be complemented by externally-administered surveys to measure progress and effectiveness of activities and to understand beneficiary perceptions of socio-economic inclusion. Periodic technical audits will be carried out to verify infrastructure quality. A baseline study will be conducted during the inception phase and followed up with annual surveys and other assessments, a midterm evaluation, and at project closing, to evaluate qualitative and quantitative aspects of project results.", + "ner_text": [ + [ + 485, + 490, + "named" + ] + ], + "validated": false, + "empirical_context": "It will allow the project to track and report on project-specific data requirements disaggregated by group and sub-group ( refugee, host community, women, children, etc. ). The P-MIS will be housed at the MINEMA SPIU and accessible via a web-based application in project districts and at BRD. The system will be used to ( a ) collect data and information and monitor progress of project activities; and ( b ) generate reports, figures and tables for project M & E.", + "type": "system", + "explanation": "However, P-MIS is described as a system rather than a structured collection of data or a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed P-MIS is a dataset because it is associated with data collection and reporting.", + "contextual_reason_agent": "However, P-MIS is described as a system rather than a structured collection of data or a dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "076_Rwanda-Socio-Economic-Inclusion-of-Refugees-and-Host-Communities-in-Rwanda-Project", + "page": 30, + "text": "The World Bank Socio-economic Inclusion of Refugees & Host Communities in Rwanda Project ( P164130 ) Page 26 of 82 77. Project-specific MIS ( P-MIS ). The P-MIS will be web-based and user-friendly and will not duplicate existing systems. It will be used to gather and consolidate data across the components. It will allow the project to track and report on project-specific data requirements disaggregated by group and sub-group ( refugee, host community, women, children, etc. ). The P-MIS will be housed at the MINEMA SPIU and accessible via a web-based application in project districts and at BRD. The system will be used to ( a ) collect data and information and monitor progress of project activities; and ( b ) generate reports, figures and tables for project M & E. 78. Regular progress monitoring will be complemented by externally-administered surveys to measure progress and effectiveness of activities and to understand beneficiary perceptions of socio-economic inclusion. Periodic technical audits will be carried out to verify infrastructure quality. A baseline study will be conducted during the inception phase and followed up with annual surveys and other assessments, a midterm evaluation, and at project closing, to evaluate qualitative and quantitative aspects of project results.", + "ner_text": [ + [ + 142, + 147, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Socio-economic Inclusion of Refugees & Host Communities in Rwanda Project ( P164130 ) Page 26 of 82 77. Project-specific MIS ( P-MIS ). The P-MIS will be web-based and user-friendly and will not duplicate existing systems.", + "type": "system", + "explanation": "However, the context describes it as a project-specific system and does not indicate it functions as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'MIS' which often refers to Management Information Systems that handle data.", + "contextual_reason_agent": "However, the context describes it as a project-specific system and does not indicate it functions as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 11, + "text": "1 I. STRATEGIC CONTEXT A. Country Context 1. Burundi \u2019 s economic performance improved over the last decade but the gains are fragile and poverty and vulnerability remain widespread. After the Arusha breakthrough peace agreements in 2000 and the subsequent decline in violence by 2005, the Government of Burundi managed to stabilize the country ' s economy in a fragile environment. However, since early 2015, the political crisis has reversed some of these previous gains and triggered a severe economic crisis, which impacts the most vulnerable and their ability to meet basic needs1. The economy contracted by seven percent in 2015 and prospects for recovery are still uncertain. At the same time, several donors have suspended aid to the country, decreasing the overall contribution from 13 to 10. 3 percent of Gross Domestic Product ( GDP ) between 2014 and 2015. Furthermore, public debt is increasing. 2. The latest poverty data2 shows that nearly two-thirds of Burundians are poor and that the 2015 Millennium Development Goals ( MDG ) target on poverty reduction ( 17. 1 percent ) was not met. Per capita gross national income more than doubled between 2005 ( US $ 130 ) and 2013 ( US $ 280 ) in nominal terms, but fell to US $ 270 in 2015. The country is now the second poorest in Africa after Malawi.", + "ner_text": [ + [ + 923, + 936, + "named" + ], + [ + 45, + 52, + "poverty data2 <> data geography" + ], + [ + 404, + 408, + "poverty data2 <> publication year" + ], + [ + 863, + 867, + "poverty data2 <> reference year" + ], + [ + 969, + 979, + "poverty data2 <> reference population" + ], + [ + 1002, + 1006, + "poverty data2 <> publication year" + ], + [ + 1103, + 1135, + "poverty data2 <> data description" + ] + ], + "validated": true, + "empirical_context": "2. The latest poverty data2 shows that nearly two-thirds of Burundians are poor and that the 2015 Millennium Development Goals ( MDG ) target on poverty reduction ( 17. 1 percent ) was not met.", + "type": "data", + "explanation": "This is indeed a dataset as it provides empirical information on poverty levels in Burundi.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific data on poverty levels.", + "contextual_reason_agent": "This is indeed a dataset as it provides empirical information on poverty levels in Burundi.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "084_Lebanon-PAD-final-06082018", + "page": 28, + "text": "18 women \u2019 s rights in the workplace ( targeting both employers and employees, including refugees ); ( ii ) share best practices amongst private sector firms in retaining and attracting women; ( iii ) raise awareness on the economic impact of stronger participation of women in the labor market; ( iv ) address gender norms; and ( v ) address concerns and raise awareness on the economic impact of sexual harassment at the workplace25. b. Gender-focused household and employer surveys. Supported by local and international experts and in partnership with relevant stakeholders, eight household and employer levels surveys will be conducted to shed light on the challenges faced by women in the Lebanese labor market. Such gender-disaggregated data are missing today and are critical for the newly established Ministry for relevant and adequate evidence-based policy making on gender. c. Gender database with gender-disaggregated data. A database will be set up at the OMSWA to compile existing and collect new ( see previous bullet point ) gender-disaggregated data. The data will be publicly available. d. Childcare provision action plan and launch of a pilot project. High-quality childcare accessibility and affordability are widely accepted as necessary areas needing improvement to enhance women \u2019 s participation in the labor market in Lebanon.", + "ner_text": [ + [ + 439, + 484, + "named" + ], + [ + 578, + 621, + "Gender-focused household and employer surveys <> data description" + ], + [ + 681, + 686, + "Gender-focused household and employer surveys <> reference population" + ], + [ + 694, + 715, + "Gender-focused household and employer surveys <> data geography" + ], + [ + 722, + 747, + "Gender-focused household and employer surveys <> data type" + ], + [ + 968, + 973, + "Gender-focused household and employer surveys <> publisher" + ] + ], + "validated": true, + "empirical_context": "b. Gender-focused household and employer surveys. Supported by local and international experts and in partnership with relevant stakeholders, eight household and employer levels surveys will be conducted to shed light on the challenges faced by women in the Lebanese labor market.", + "type": "survey", + "explanation": "This is indeed a dataset as it involves structured surveys designed to gather empirical data on challenges faced by women in the labor market.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to structured surveys aimed at collecting data on gender issues.", + "contextual_reason_agent": "This is indeed a dataset as it involves structured surveys designed to gather empirical data on challenges faced by women in the labor market.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "161_28046", + "page": 14, + "text": "The project \u2019 s Task Force, also under the supervision o f the Secretary General, would follow i t s technical implementation and would ensure objectives are reached. The project would finance the recurrent costs of this Task Force, its logistical support ( transport and computer system ), technical assistance, and study tours in other African countries. Institutional responsibilities for the Medical Waste Management Plan would rest ( a ) for the overall responsibility, with the MOH Directorate o f Equipment and Infrastructure ( DIEM ), and ( b ) for the decentralized levels, with the General Director o f Hospitals, the Head o f the Health Centers and o f the Health Posts. 3. Monitoring and evaluation of outcomes / results A yearly health expenditures tracking survey would be carried out, as well as a yearly client satisfaction survey focusing on quality o f care. Those two tools along with data from the health information system, would provide the data necessary to assess progress and identify bottlenecks. Guinea has a management information system quite sophisticated where a monitoring o f health centers activities and finances i s undertaken every six months, thus providing facility-based information. For evaluation purposes, a DHS was be carried out in 2004 which will serve as base-line.", + "ner_text": [ + [ + 735, + 777, + "named" + ], + [ + 1023, + 1029, + "yearly health expenditures tracking survey <> data geography" + ], + [ + 1277, + 1281, + "yearly health expenditures tracking survey <> publication year" + ], + [ + 1328, + 1346, + "yearly health expenditures tracking survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "3. Monitoring and evaluation of outcomes / results A yearly health expenditures tracking survey would be carried out, as well as a yearly client satisfaction survey focusing on quality o f care. Those two tools along with data from the health information system, would provide the data necessary to assess progress and identify bottlenecks.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey that will provide data necessary for assessment.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is described as a survey that collects data on health expenditures.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey that will provide data necessary for assessment.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 12, + "text": "The sub - component will also have a range of specific interventions including: ( i ) the design and delivery of initial teacher training and accreditation; ( ii ) refinement of the teacher ranking system linked to TPD; ( iii ) the design and delivery of school leadership programs; ( iv ) teacher and principal appraisal; ( v ) a Trainer Development Program; and ( vi ) the development of a QA system for ITT and TPD. 24. Subcomponent 3. 2: Curriculum, Assessment and Learning Resources will: ( i ) where necessary review and revise the curriculum, learning materials and teacher guides developed under ERfKE I; ( ii ) develop a framework for long \u2013 term, continuing curriculum review; ( iii ) support the development of tools and training for assessment activities at the classroom and national levels ( NAfKE, TIMSS and PISA will be covered under Sub-component 2. 1 ); ( iv ) introduce benchmarking and item banking and related training; ( v ) update ICT equipment for schools and support existing ICT infrastructure; ( vi ) ensure ICT connectivity available in all schools to permit blended e-learning; ( vii ) ensure equity in distribution of learning resources across Jordan ( Financing for the goods associated with this activity is reflected in Component 5. ); and finally ( viii ) conduct Tawjihi-ERfKE alignment activities.", + "ner_text": [ + [ + 813, + 818, + "named" + ], + [ + 1174, + 1180, + "TIMSS <> data geography" + ] + ], + "validated": true, + "empirical_context": "Subcomponent 3. 2: Curriculum, Assessment and Learning Resources will: ( i ) where necessary review and revise the curriculum, learning materials and teacher guides developed under ERfKE I; ( ii ) develop a framework for long \u2013 term, continuing curriculum review; ( iii ) support the development of tools and training for assessment activities at the classroom and national levels ( NAfKE, TIMSS and PISA will be covered under Sub-component 2. 1 ); ( iv ) introduce benchmarking and item banking and related training; ( v ) update ICT equipment for schools and support existing ICT infrastructure; ( vi ) ensure ICT connectivity available in all schools to permit blended e-learning; ( vii ) ensure equity in distribution of learning resources across Jordan ( Financing for the goods associated with this activity is reflected in Component 5. )", + "type": "assessment", + "explanation": "TIMSS is indeed a dataset as it is a well-known international assessment that provides data on educational performance.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed TIMSS is a dataset because it is mentioned in the context of assessment activities.", + "contextual_reason_agent": "TIMSS is indeed a dataset as it is a well-known international assessment that provides data on educational performance.", + "contextual_signal": "mentioned as part of assessment activities", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + }, + "term_stats": { + "total": 3, + "validated": 2, + "not_validated": 1 + } + }, + { + "filename": "008_BOSIB-60d57288-4e09-4519-ae6c-ffdc0037e0b1", + "page": 57, + "text": "These interim financial reports ( IFRs ) will be produced with information extracted from the Bertha System and will consolidate the Project \u2019 s financial data for all components using the cash basis. 59. At the end of each fiscal year, the CIM-AMFRI will prepare the annual financial statements for the Project, which will be audited. The second semester IFRs with accompanying notes will serve as the Project \u2019 s annual financial statements to be audited. 60. The following biannual IFRs [ to be prepared in Reais ] will be prepared for Project monitoring and management purposes and submitted to the Bank: a. IFR 1-A \u2013 Sources and Uses of Funds by Disbursement Category ( period to date, year to date, Project to date ): Shows the budgeted amounts against the actual expenditures ( i. e., documented expenditures ), including a variance analysis b. IFR 1-B \u2013 Uses of Funds by Project Component ( period to date, year to date, Project to date ): Shows the budgeted amounts against the actual expenditures ( i. e., documented expenditures ), including a variance analysis c. IFR 1-C \u2013 DA bank reconciliation, and accompanying bank statements d. Cash flow for the following period 61.", + "ner_text": [ + [ + 6, + 31, + "named" + ] + ], + "validated": false, + "empirical_context": "These interim financial reports ( IFRs ) will be produced with information extracted from the Bertha System and will consolidate the Project \u2019 s financial data for all components using the cash basis. 59.", + "type": "document", + "explanation": "However, it is not a dataset but rather a document summarizing financial information.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves financial data consolidation.", + "contextual_reason_agent": "However, it is not a dataset but rather a document summarizing financial information.", + "contextual_signal": "mentioned only as a report, not as a data source", + "tags": [] + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 82, + "text": "The World Bank Strengthening Public Sector Efficiency and Statistical Capacity Project ( P151155 ) Page 79 of 93 \uf0b7 Increased share of competitive procurement methods \uf0b7 Increase value for money for public contract \uf0b7 Operationalization of MOOC Subcomponent 3. 2: Enhancing the capacity of organizations in the procurement system to carry out their functions \uf0b7 Clarified procurement rules, guidelines, and procedures \uf0b7 Setting an e-tracking system for the procurement chain 0. 5m Subcomponent 3. 3: Streamlining the procurement regulatory framework \uf0b7 Revised procurement code and related instruments 0. 7m Subcomponent 3. 4: Improving the management and monitoring of procurement performance: \uf0b7 Piloting individual performance contract approach in the procurement system \uf0b7 RRI to support procurement process performance in the pilot 3. 3m Improved decision - making process based on reliable statistical data Component 4: Enhancing the use of statistics for policy making \uf0b7 Timely production of reliable statistical data \uf0b7 Statistics widely disseminated Subcomponent 4. 1: Improvement of poverty-related data \uf0b7 Production of a series of Poverty Notes ( based on ECAM 4 and high - frequency surveys ) \uf0b7 Production of ECAM 5 \uf0b7 Analysis of the population census \uf0b7 Production of the LFS 5. 4m Subcomponent 4. 2: Strengthening the national accounts production \uf0b7 Quarterly production of improved national accounts ( including revised methodology for cross-border trade statistics ) \uf0b7 Creation of an economic simulation tool for MINEPAT simulation 2", + "ner_text": [ + [ + 1238, + 1255, + "named" + ], + [ + 4, + 14, + "population census <> publisher" + ] + ], + "validated": true, + "empirical_context": "3m Improved decision - making process based on reliable statistical data Component 4: Enhancing the use of statistics for policy making \uf0b7 Timely production of reliable statistical data \uf0b7 Statistics widely disseminated Subcomponent 4. 1: Improvement of poverty-related data \uf0b7 Production of a series of Poverty Notes ( based on ECAM 4 and high - frequency surveys ) \uf0b7 Production of ECAM 5 \uf0b7 Analysis of the population census \uf0b7 Production of the LFS 5. 4m Subcomponent 4.", + "type": "census", + "explanation": "In this context, it is explicitly mentioned as part of the production of reliable statistical data, indicating its use as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'population census' typically refers to a structured collection of demographic data.", + "contextual_reason_agent": "In this context, it is explicitly mentioned as part of the production of reliable statistical data, indicating its use as a data source.", + "contextual_signal": "follows 'production of' indicating it is a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + }, + "term_stats": { + "total": 6, + "validated": 5, + "not_validated": 1 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 42, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 31 Indicator Name Percent of the service areas achieve cost recovery ratio of 1. 1 under the project Definition / Description Percent of service areas supported by the project that achieve cost recovery ratio of 1. 1. Cost recovery ratio is the revenues collected over operating expenses for selected service areas under the project. Baseline was derived from the existing service areas that have been gazetted: Gulu, Mbale, Adjumani, Busia, Namasale, Kyegegwa-Mpara-Ruyonza, Namungalwe-Kaliro and Budaka-Kadama-Tirinyi-Kibuku - Butaleja-Busolwe. Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC Accounting System ( SCALA ), Refugee Database and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE / DWD Rural and Urban / NWSC", + "ner_text": [ + [ + 703, + 725, + "named" + ] + ], + "validated": false, + "empirical_context": "Baseline was derived from the existing service areas that have been gazetted: Gulu, Mbale, Adjumani, Busia, Namasale, Kyegegwa-Mpara-Ruyonza, Namungalwe-Kaliro and Budaka-Kadama-Tirinyi-Kibuku - Butaleja-Busolwe. Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC Accounting System ( SCALA ), Refugee Database and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report.", + "type": "system", + "explanation": "However, the context indicates it is referred to as a system, not explicitly as a data source or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Accounting System' in its name, suggesting it could contain structured data.", + "contextual_reason_agent": "However, the context indicates it is referred to as a system, not explicitly as a data source or dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 58, + "text": "Typical grievances range from information requests, questions about exclusion and inclusion from the program by the community-based targeting or the proxy-means test, requests for correction of errors in registration ( identification, household composition, socio - economic status ), complaints about stakeholders \u2013 program officials, service providers including mobile providers and financial intermediaries and NGOs in charge of the promotion activities, local committees, authorities ). The grievance redress will be built to provide resolution as close to the point of occurrence as possible and to guarantee accessibility by beneficiaries. If resolution is not satisfactory to the plaintiff, voluntary mediation via traditional conflict resolution mechanism available at the commune level could provide an additional layer of redress before escalating to the provincial and central levels. 37. This sub-component will support: ( a ) The development of an overall Management Information System, with modules including beneficiary lists, payment-related information, basic monitoring of operational processes including the delivery of BCC activities and the participation of beneficiary households, grievance redress records. The MIS should also produce regular reports on program", + "ner_text": [ + [ + 969, + 998, + "named" + ] + ], + "validated": false, + "empirical_context": "37. This sub-component will support: ( a ) The development of an overall Management Information System, with modules including beneficiary lists, payment-related information, basic monitoring of operational processes including the delivery of BCC activities and the participation of beneficiary households, grievance redress records. The MIS should also produce regular reports on program", + "type": "system", + "explanation": "However, it is described as a system that manages information rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes modules for managing data related to beneficiaries and payments.", + "contextual_reason_agent": "However, it is described as a system that manages information rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 36, + "text": "M & E teams will be established as members of the PITs at both the national. They will be responsible for collecting and sharing information presented in the results framework in accordance with the procedures laid out in the M & E monitoring plan, and entering the data into the integrated data platform. Data from each implementing agency will be aggregated with the data of others and used as the basis of quarterly progress reports. 82. Data generation and reporting. The data to track the key performance indicators come from ( a ) project-specific surveys and questionnaires, ( b ) project service providers ( women \u2019 s entrepreneurship platform managers, trainers, PFIs, facility managers ); ( c ) local governments; ( d ) consultant reports; and ( e ) supervising engineers \u2019 reports on construction progress. The MGLSD PIT is responsible for preparing the quarterly financial and progress report, consolidating information from each and from the integrated data platform. It will submit the submit quarterly project progress reports to the World Bank, the PTC, the PSC, and to other the stakeholders within 45 days of the end of each quarter.", + "ner_text": [ + [ + 537, + 580, + "named" + ] + ], + "validated": true, + "empirical_context": "Data generation and reporting. The data to track the key performance indicators come from ( a ) project-specific surveys and questionnaires, ( b ) project service providers ( women \u2019 s entrepreneurship platform managers, trainers, PFIs, facility managers ); ( c ) local governments; ( d ) consultant reports; and ( e ) supervising engineers \u2019 reports on construction progress. The MGLSD PIT is responsible for preparing the quarterly financial and progress report, consolidating information from each and from the integrated data platform.", + "type": "survey", + "explanation": "In the context, it is explicitly mentioned as a source of data for tracking key performance indicators.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to structured tools used for data collection.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as a source of data for tracking key performance indicators.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "021_BOSIB05e6fc47e0770aeec00ad5e11774f2", + "page": 12, + "text": "The World Bank Uganda Climate Smart Agricultural Transformation Project ( P173296 ) Page 7 of 81 STRATEGIC CONTEXT A. Country Context 1. Uganda \u2019 s gross domestic product ( GDP ) growth has been declining and poverty has been increasing. GDP growth averaged close to 8 percent per year the decade before 2012 but has since slowed to around 5 percent and is further projected to decline partly because of the Corona Virus Disease 2019 ( COVID-19 ) crisis. The latest poverty data show that poverty has moderately increased since 2012 / 13. According to the Uganda National Household Survey ( UNHS ), between 2012 and 2016, Uganda \u2019 s poverty rate declined to 21. 4 percent, that resulted in around 1. 4 million Ugandans slipping into poverty. A sizable portion of Uganda \u2019 s population remains vulnerable to poverty and significant welfare setbacks in the wake of a shock. About 44 percent are considered vulnerable and susceptible to falling into poverty because of climate and other shocks. While 8. 4 percent of households moved out of poverty in 2021, 10. 2 percent slipped into poverty in response to shocks. 1 2.", + "ner_text": [ + [ + 466, + 478, + "named" + ], + [ + 4, + 14, + "poverty data <> publisher" + ], + [ + 15, + 21, + "poverty data <> data geography" + ], + [ + 137, + 143, + "poverty data <> data geography" + ], + [ + 528, + 537, + "poverty data <> reference year" + ], + [ + 616, + 620, + "poverty data <> reference year" + ], + [ + 710, + 718, + "poverty data <> reference population" + ], + [ + 1049, + 1053, + "poverty data <> publication year" + ] + ], + "validated": true, + "empirical_context": "GDP growth averaged close to 8 percent per year the decade before 2012 but has since slowed to around 5 percent and is further projected to decline partly because of the Corona Virus Disease 2019 ( COVID-19 ) crisis. The latest poverty data show that poverty has moderately increased since 2012 / 13. According to the Uganda National Household Survey ( UNHS ), between 2012 and 2016, Uganda \u2019 s poverty rate declined to 21.", + "type": "data", + "explanation": "In this context, 'poverty data' is indeed used as a source of information to support claims about poverty trends.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'poverty data' refers to a dataset because it is mentioned in the context of statistical analysis regarding poverty rates.", + "contextual_reason_agent": "In this context, 'poverty data' is indeed used as a source of information to support claims about poverty trends.", + "contextual_signal": "follows 'the latest poverty data show that'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "185_multi-page", + "page": 43, + "text": "44 Project Components: Inputs ( budget for each Project Reports: From Components to component ): Outputs: 1 ) Prevention US $ TBD National and / or sub-regional HIV / AlDS progress reports 2 ) Care and support US $.. TBD Financial data from MOF and line ministries 3 ) Capacity building US $. TBD Project financial data 4 ) Partnerships US $.. TBD Survey data", + "ner_text": [ + [ + 348, + 359, + "named" + ] + ], + "validated": true, + "empirical_context": ". TBD Survey data", + "type": "survey", + "explanation": "In this context, 'Survey data' is explicitly mentioned, indicating it is used as a data source for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Survey data' implies a structured collection of data collected from a survey.", + "contextual_reason_agent": "In this context, 'Survey data' is explicitly mentioned, indicating it is used as a data source for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "013_BOSIB0efb09b920d90858a0135df22da7d1", + "page": 13, + "text": "A Kebele ID is often required to access public and private services, obtain other IDs such as driver \u2019 s licenses and passports, and formal procedures like proving land ownership. 14 The features of the Kebele ID vary by kebele, but they generally display handwritten demographic information and address and include a stapled photo. 13. According to the 2017 ID4D-Findex Survey, 36 percent of the population aged 18 and older lack a Kebele ID, with a significant gender gap of 46 percent of women lacking one compared to 25 percent of men, creating barriers for a large portion of people to access services and economic opportunities. Kebele ID coverage reaches 70 percent for adults older than 25 and 80 percent for the highest income quintile. Obtaining a Kebele ID often requires residing in a location for a minimum period ( for example, six months ), which leads to exclusion of internal migrants and refugees. Being tied to residence also means that a Kebele ID cannot serve as a continuous identification throughout the life of an individual, as one may move. Most Kebele IDs display holder \u2019 s ethnicity, which is a potential source of discrimination. The lack of uniformity among the forms of Kebele ID cards and the ease of forgery add substantial identity risks for service providers.", + "ner_text": [ + [ + 359, + 377, + "named" + ], + [ + 354, + 358, + "ID4D-Findex Survey <> publication year" + ], + [ + 397, + 425, + "ID4D-Findex Survey <> reference population" + ], + [ + 677, + 697, + "ID4D-Findex Survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "13. According to the 2017 ID4D-Findex Survey, 36 percent of the population aged 18 and older lack a Kebele ID, with a significant gender gap of 46 percent of women lacking one compared to 25 percent of men, creating barriers for a large portion of people to access services and economic opportunities. Kebele ID coverage reaches 70 percent for adults older than 25 and 80 percent for the highest income quintile.", + "type": "survey", + "explanation": "This is indeed a dataset as it is used to present empirical data regarding the population's access to Kebele IDs.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides statistical data on Kebele ID coverage.", + "contextual_reason_agent": "This is indeed a dataset as it is used to present empirical data regarding the population's access to Kebele IDs.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "154_431120PAD0P1021e0only10R20081014311", + "page": 93, + "text": "Unless counseling and testing i s closely linked with interventions to change sexual behavior ( e. g., reduction in the number o f sexual partners ), VCT and RHT will not realize their potential to avert HIV infections. 12. The country ' s HIV / MDS treatment response suflers from technical eficiency issues that are reflected in higher costs - Botswana ' s annual per-patient cost o f the first-line antiretroFira1 drugs i s relatively high. De Korte, Mazonde and Darkoh ( 2004 ) estimated the per-patient cost in the early phase o f the treatment program to be around US $ 690. On the other hand, using the spending figures from the NASA exercise and the actual number o f patients on ART for the year, the average per patient cost i s about US $ 760 per year. Back-of-the-envelop estimates made by Mapiki ( 2007 ) based on September 2006 patient and drug use data o f PEPFAR show that the cost-per-patient o f antiretroviral drugs alone i s US $ 839. Compared to these three cost estimates, the Thai MOPH antiretroviral cost for first line drugs i s about US $ 502, or about two-thirds o f the Botswana cost. ( Note, however, that Thailand ' s GPO produces a first-line fixed dose combination o f AZT + 3TC + NVP, which could explain the lower cost ). Certain drug All countries in southern Africa suffer from high Gini coefficients, indicating high income inequality. Botswana has succeeded in reducing poverty, although the poverty decline has been uneven across regions. Income inequality remains one o f the highest in the world, however. 15 84", + "ner_text": [ + [ + 827, + 867, + "named" + ] + ], + "validated": true, + "empirical_context": "On the other hand, using the spending figures from the NASA exercise and the actual number o f patients on ART for the year, the average per patient cost i s about US $ 760 per year. Back-of-the-envelop estimates made by Mapiki ( 2007 ) based on September 2006 patient and drug use data o f PEPFAR show that the cost-per-patient o f antiretroviral drugs alone i s US $ 839. Compared to these three cost estimates, the Thai MOPH antiretroviral cost for first line drugs i s about US $ 502, or about two-thirds o f the Botswana cost.", + "type": "data", + "explanation": "This is indeed a dataset as it provides specific data used for empirical analysis regarding patient and drug use.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific patient and drug use data from a particular time period.", + "contextual_reason_agent": "This is indeed a dataset as it provides specific data used for empirical analysis regarding patient and drug use.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "162_PAD12050PAD0P15264600PUBLIC00Box391428B", + "page": 53, + "text": "This will enable management decisions to be made based on an assessment of whether the program is moving towards its objectives. A periodic financial audit will be conducted which will also help to identify and mitigate any potential sign of fraud and governance issues. The program monitoring system will rely on regular and accurate data collection and analysis to identify the timely implementation of activities, the achievement of intended results, and positive and negative unintended effects. i. HIS will form the basis for a well-functioning monitoring system and will comprise: ( i ) design of a program database; ( ii ) development of registers and forms to gather data ( enrollment registers, provider data collection forms ); ( iii ) development of the claims processing system, which will feed data into the HIS; ( iv ) collection and analysis of program indicators; ( v ) design of wider monitoring and verification activities ( such as spot checks of providers, user satisfaction surveys through sampling of beneficiaries at the household level ) which will provide data with which to counter-check the HIS data; and ( vi ) capacity building of MoPH in monitoring of the EHCP program. ii. Sources of Data: ( i ) the routine data collected through the claims processing system; ( ii ) data routinely collected at health facility level; ( iii ) additional data collected by MoPH such as from enrollment registers, spot checks and ad-hoc surveys; ( iv ) information collected during the verification processes; and ( v ) data from the MoPH rapid facility assessment.", + "ner_text": [ + [ + 821, + 824, + "named" + ] + ], + "validated": false, + "empirical_context": "i. HIS will form the basis for a well-functioning monitoring system and will comprise: ( i ) design of a program database; ( ii ) development of registers and forms to gather data ( enrollment registers, provider data collection forms ); ( iii ) development of the claims processing system, which will feed data into the HIS; ( iv ) collection and analysis of program indicators; ( v ) design of wider monitoring and verification activities ( such as spot checks of providers, user satisfaction surveys through sampling of beneficiaries at the household level ) which will provide data with which to counter-check the HIS data; and ( vi ) capacity building of MoPH in monitoring of the EHCP program. ii.", + "type": "system", + "explanation": "'HIS' is described as a monitoring system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'HIS' is a dataset because it is mentioned in the context of data collection and monitoring.", + "contextual_reason_agent": "'HIS' is described as a monitoring system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 38, + "text": "Reformed Student Assessment and Certification System \uf0b7 First phase of Tawjihi reform completed and action plan for reform rollout is produced ( DLR # 7 ) \uf0b7 Reform of the Tawjihi is fully implemented so that its secondary graduation and certification function is separated from its function as a screening mechanism for university entrance ( DLR # 7 ) \uf0b7 Timely and adequate resources and tools are available to the Program Strengthened Management Education System ( DLI # 8 ) \uf0b7 Geographical Information System ( GIS ) operational and updated with latest data ( DLR # 8 ) \uf0b7 Approved annual budget increased to meet PforR Program needs ( DLR # 8 ) 2. Improved Teaching and Learning Conditions ( continued ) Improve School Climate \uf0b7 Develop and program interventions for improving socioemotional learning in schools with high proportion of Syrian refugees \uf0b7 Program for improving socio \u2010 emotional learning completed ( DLR # 5 ) \uf0b7 Proportion of schools with high number of Syrian refugees implementing the socioemotional learning program ( DLR # 5 ) \uf0b7 Reduction in number of reports of school disruption in targeted schools Improved school climate ( DLI # 5 ) Common Focus on refugees across Result Areas", + "ner_text": [ + [ + 477, + 508, + "named" + ] + ], + "validated": false, + "empirical_context": "Reformed Student Assessment and Certification System \uf0b7 First phase of Tawjihi reform completed and action plan for reform rollout is produced ( DLR # 7 ) \uf0b7 Reform of the Tawjihi is fully implemented so that its secondary graduation and certification function is separated from its function as a screening mechanism for university entrance ( DLR # 7 ) \uf0b7 Timely and adequate resources and tools are available to the Program Strengthened Management Education System ( DLI # 8 ) \uf0b7 Geographical Information System ( GIS ) operational and updated with latest data ( DLR # 8 ) \uf0b7 Approved annual budget increased to meet PforR Program needs ( DLR # 8 ) 2. Improved Teaching and Learning Conditions ( continued ) Improve School Climate \uf0b7 Develop and program interventions for improving socioemotional learning in schools with high proportion of Syrian refugees \uf0b7 Program for improving socio \u2010 emotional learning completed ( DLR # 5 ) \uf0b7 Proportion of schools with high number of Syrian refugees implementing the socioemotional learning program ( DLR # 5 ) \uf0b7 Reduction in number of reports of school disruption in targeted schools Improved school climate ( DLI # 5 ) Common Focus on refugees across Result Areas", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Geographical Information System' suggests a structured collection of geographic data.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 30, + "text": "MINEMA will have overall responsibility for M & E, including the preparation of consolidated quarterly progress reports and an Annual Report to the World Bank. MINEMA, BRD, RTDA and BDF will have M & E specialists responsible for collecting and collating data on project outputs and outcomes. Reporting templates will be reviewed for Phase II and included in an updated M & E section in the PIM. The existing Project MIS will continue to be utilized. 55. Project results will be captured through baseline, annual and endline surveys. This will include surveys of users of project-financed infrastructure ( schools, health facilities, water systems, markets, etc. ) to directly track satisfaction with Jya Mbere II investments. Regular phone surveys will also be conducted for beneficiaries under component two to gather data on business sustainability and profitability to allow for real-time learning and course correction as necessary.", + "ner_text": [ + [ + 409, + 420, + "named" + ] + ], + "validated": false, + "empirical_context": "Reporting templates will be reviewed for Phase II and included in an updated M & E section in the PIM. The existing Project MIS will continue to be utilized. 55.", + "type": "system", + "explanation": "However, 'Project MIS' is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Project MIS' is a dataset because it includes 'MIS' which often refers to Management Information Systems that handle data.", + "contextual_reason_agent": "However, 'Project MIS' is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 4, + "validated": 1, + "not_validated": 3 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 79, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 74 of 117 prior to construction. Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "ner_text": [ + [ + 1289, + 1294, + "named" + ], + [ + 4, + 14, + "NEMIS <> publisher" + ], + [ + 578, + 581, + "NEMIS <> author" + ], + [ + 589, + 608, + "NEMIS <> data type" + ], + [ + 777, + 793, + "NEMIS <> reference population" + ] + ], + "validated": true, + "empirical_context": "Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "type": "system", + "explanation": "NEMIS is indeed a dataset as it is described as an online platform that collects and manages data related to education, specifically for refugee learners.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is referenced in the context of data utilization for managing primary education.", + "contextual_reason_agent": "NEMIS is indeed a dataset as it is described as an online platform that collects and manages data related to education, specifically for refugee learners.", + "contextual_signal": "mentioned as a data source for managing primary education", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 73, + "text": "The World Bank CAMEROON Education Reform Support Project ( P160926 ) Page 70 of 148 DLI Protocol to Evaluate Achievement of the DLI and Data / Result Verification Data Source / Agency Verification Entity Definitions Verification Procedure list, management ( a school management council is one of the criteria of creation ) \u2022 Community pre-school Centers ( CPC ): These are pre-schools set up by the communities, managed by the community, with trained animators as teachers, using an adapted curriculum, and supervised by MINEDUB Years 2, 3, 4 and 5 \u2022 Pupils enrolled in CPC: number of pupils enrolled in rural CPC respecting standards ( see cumulative numbers in DLI table ) \u2022 Enrollment data to be provided by community pre-school centers ( CPCs ) and validated by IAEB ( Inspection d \u2019 Arondissement de l \u2019 Education de Base ) \u2022 Budget approved for subsequent years for CPC activities: Grant to schools, advocacy; sensitization; training; monitoring and final evaluation in year 5 ) MINEDUB submit documentation to IVA through the PCMU IVA agency verifies and sends the report to the Bank Disbursement for each DLI every year will be contingent upon verified achievement of every respective DLI result of that year Year 5 \u2022 Final evaluation of intervention on access to pre-school in rural areas completed and publicly disclosed \u2022 Public disclosure of final evaluation means availability of evaluation report on MINEDUB website \u2022 Budget", + "ner_text": [ + [ + 677, + 692, + "named" + ], + [ + 4, + 14, + "Enrollment data <> publisher" + ], + [ + 325, + 353, + "Enrollment data <> reference population" + ], + [ + 711, + 739, + "Enrollment data <> reference population" + ] + ], + "validated": true, + "empirical_context": "The World Bank CAMEROON Education Reform Support Project ( P160926 ) Page 70 of 148 DLI Protocol to Evaluate Achievement of the DLI and Data / Result Verification Data Source / Agency Verification Entity Definitions Verification Procedure list, management ( a school management council is one of the criteria of creation ) \u2022 Community pre-school Centers ( CPC ): These are pre-schools set up by the communities, managed by the community, with trained animators as teachers, using an adapted curriculum, and supervised by MINEDUB Years 2, 3, 4 and 5 \u2022 Pupils enrolled in CPC: number of pupils enrolled in rural CPC respecting standards ( see cumulative numbers in DLI table ) \u2022 Enrollment data to be provided by community pre-school centers ( CPCs ) and validated by IAEB ( Inspection d \u2019 Arondissement de l \u2019 Education de Base ) \u2022 Budget approved for subsequent years for CPC activities: Grant to schools, advocacy; sensitization; training; monitoring and final evaluation in year 5 ) MINEDUB submit documentation to IVA through the PCMU IVA agency verifies and sends the report to the Bank Disbursement for each DLI every year will be contingent upon verified achievement of every respective DLI result of that year Year 5 \u2022 Final evaluation of intervention on access to pre-school in rural areas completed and publicly disclosed \u2022 Public disclosure of final evaluation means availability of evaluation report on MINEDUB website \u2022 Budget", + "type": "data", + "explanation": "'Enrollment data' is indeed a dataset as it is explicitly mentioned to be provided by community pre-school centers and is used for verification purposes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'Enrollment data' is a dataset because it refers to specific information about pupil enrollment numbers.", + "contextual_reason_agent": "'Enrollment data' is indeed a dataset as it is explicitly mentioned to be provided by community pre-school centers and is used for verification purposes.", + "contextual_signal": "follows 'data to be provided by community pre-school centers'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "042_Burundi-Skills-for-Jobs-Women-and-Youth-Project", + "page": 92, + "text": "According to the 2008 General Population and Housing Census, 80. 2 percent of the population owns land of which 62. 5 percent are men and 17. 7 percent are women. 55 In the absence of a formal inheritance law, women often are subject to discrimination enshrined in customary norms, in which the daughter \u201c comes in fifth place in the order of succession. \u201d Women also face challenges in accessing formal lines of credit due to a lack of collateral. USAID ( 2018 ) reports qualitative research that although Burundian women traditionally have more access to informal credit through community-based savings and lending groups, they rarely control the use of the credit at the household level. 8. Female entrepreneurs may also need additional training that allows them to overcome social gender roles and an aversion to risk and develop an \u2018 entrepreneurial mindset. \u2019 For example, a study in Togo found that female entrepreneurs who received \u2018 personal initiative training \u2019 \u2014 a proactive, self-starting approach that taught participants to look for ways to differentiate their business from others, anticipate problems, overcome setbacks, and foster better planning skills to create opportunities \u2014 saw their profits increase by 40 percent ( compared to a 5 percent increase for those who received traditional business training ).", + "ner_text": [ + [ + 22, + 59, + "named" + ], + [ + 17, + 21, + "General Population and Housing Census <> publication year" + ] + ], + "validated": true, + "empirical_context": "According to the 2008 General Population and Housing Census, 80. 2 percent of the population owns land of which 62.", + "type": "census", + "explanation": "The context confirms it is a dataset as it provides empirical data about land ownership from a census.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a census, which typically involves structured data collection.", + "contextual_reason_agent": "The context confirms it is a dataset as it provides empirical data about land ownership from a census.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "090_Djibouti-Public-Admin-PAD-PAD2604-04062018", + "page": 14, + "text": "When measured against OGP criteria on fiscal transparency, ATI, public official asset disclosures, and civil liberties, Djibouti has a score of only 17 percent, whereas the threshold for eligibility to OGP is set at 75 percent. Djibouti \u2019 s OGP eligibility score would be higher if recent reforms in the areas of asset disclosure, fiscal transparency, and ATI are taken into account. The Government is committed to substantially improving its score and eventually becoming an OGP member. Regarding the strengthening of assets declaration, in future, it will be critical to review and verify the truthfulness of declarations made. Currently, there are few methods and procedures for implementing verification. In addition, there is an absence of cooperation across public institutions. Setting up a Unique ID System to Improve Public Administration 11. Djibouti has several functional ( or sector-specific ) citizen registry systems with corresponding ID numbers that are not currently integrated; as such, there are inefficiencies in ensuring eligible citizens have access to services through better targeting, planning, and monitoring. Safety net programs for poor families \u2014 or for access to schools, health centers, and financial services \u2014 are hindered by the lack of a reliable foundational ID system.", + "ner_text": [ + [ + 907, + 931, + "named" + ] + ], + "validated": false, + "empirical_context": "Setting up a Unique ID System to Improve Public Administration 11. Djibouti has several functional ( or sector-specific ) citizen registry systems with corresponding ID numbers that are not currently integrated; as such, there are inefficiencies in ensuring eligible citizens have access to services through better targeting, planning, and monitoring. Safety net programs for poor families \u2014 or for access to schools, health centers, and financial services \u2014 are hindered by the lack of a reliable foundational ID system.", + "type": "system", + "explanation": "However, it is described as a system rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'citizen registry systems' implies a collection of data related to citizens.", + "contextual_reason_agent": "However, it is described as a system rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "153_476340PAD0ZR0P101Official0Use0Only1", + "page": 86, + "text": "Expected Outputs or Outcomes from Component 3 ( i ) ( ii ) ( iii ) ( iv ) The administrative and financial management of ICCN and key sites are improved; Data on the 16 priority conservation sites are regularly updated and readily available at both the central level and on the ground through SyGIAP; The Conservation Law i s adopted and largely disseminated; and Maiko National Park i s effectively protected ( number of guards trained and equipped and seizures and infractions noted ) and managed ( infrastructure, and scientific studies realized ) with the participation of the local population ( community development initiatives implemented ). Stable trend in abundance of key bio-indicator species ( v ) Training offered through all project components 64. Training will be offered in all project components and by a wide array of sources: forest schools, training consultants, and training units in charge of discrete components of larger MOD contracts. The training program supported by the Project will consist of a variety of training modules and refresher courses. They can be divided into: i ) training in specific management systems, ii ) courses aimed at creating basic forest and nature conservation skills. Training; in specialized management systems 65.", + "ner_text": [ + [ + 293, + 299, + "named" + ] + ], + "validated": false, + "empirical_context": "Expected Outputs or Outcomes from Component 3 ( i ) ( ii ) ( iii ) ( iv ) The administrative and financial management of ICCN and key sites are improved; Data on the 16 priority conservation sites are regularly updated and readily available at both the central level and on the ground through SyGIAP; The Conservation Law i s adopted and largely disseminated; and Maiko National Park i s effectively protected ( number of guards trained and equipped and seizures and infractions noted ) and managed ( infrastructure, and scientific studies realized ) with the participation of the local population ( community development initiatives implemented ). Stable trend in abundance of key bio-indicator species ( v ) Training offered through all project components 64.", + "type": "system", + "explanation": "However, SyGIAP is described as a system rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is mentioned in the context of data availability.", + "contextual_reason_agent": "However, SyGIAP is described as a system rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 18, + "text": "The updated methodology will reflect the main improvements achieved during the first phase ( locality based, participative process, grievance mechanism, and so on ). 25. Households from Hodh Chargui, including those in the Mbera Refugee Camp that are considered poor, will be included in the Social Registry. This inclusion will be a positive step towards better coordination between humanitarian actors and the Government and between emergency and development support. The methodology to identify poor refugee households for inclusion in the Social Registry will be based on the profiling work carried out in 2018 by the UNHCR and WFP. It identified six different household categories: ( a ) deprived; ( b ) precarious; ( c ) unstable; ( d ) fragile ( e ) emerging; and ( f ) catalyst ( see figure 3 ). Each category gradually receives ( or will receive ) differentiated assistance based on its needs. This profiling work was carried out with the participation of camp and Government authorities. Based on preliminary data analysis, categories 1, 2 and 3 ( \u201c deprived \u201d, \u201c precarious \u201d, and \u201c unstable \u201d ) which include approximately 7, 500 households will enter the Social Registry. 26. This sub-component will finance: ( a ) the costs related to the community targeting ( Social Registry enrolment officers and related logistics and material ); ( b ) the socio-economic data collection costs for up to 350, 000 households ( 70, 000 households per year ); and ( c ) the costs related to quality insurance ( grievance mechanism, toll-free number, spot-check surveys, process evaluation, and so on ). Figure 3: Refugee categorization in Mauritania Source: UNHCR", + "ner_text": [ + [ + 1275, + 1290, + "named" + ], + [ + 223, + 241, + "Social Registry <> data geography" + ], + [ + 498, + 521, + "Social Registry <> reference population" + ], + [ + 610, + 614, + "Social Registry <> publication year" + ], + [ + 622, + 627, + "Social Registry <> publisher" + ], + [ + 1637, + 1647, + "Social Registry <> data geography" + ], + [ + 1656, + 1661, + "Social Registry <> publisher" + ] + ], + "validated": true, + "empirical_context": "26. This sub-component will finance: ( a ) the costs related to the community targeting ( Social Registry enrolment officers and related logistics and material ); ( b ) the socio-economic data collection costs for up to 350, 000 households ( 70, 000 households per year ); and ( c ) the costs related to quality insurance ( grievance mechanism, toll-free number, spot-check surveys, process evaluation, and so on ). Figure 3: Refugee categorization in Mauritania Source: UNHCR", + "type": "registry", + "explanation": "In the context, it is associated with socio-economic data collection for households, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Social Registry' suggests a structured collection of socio-economic data.", + "contextual_reason_agent": "In the context, it is associated with socio-economic data collection for households, indicating it functions as a data source.", + "contextual_signal": "mentioned as a source for socio-economic data collection", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 95, + "text": "The World Bank Niger Accelerating Electricity Access Project ( Hask\u00e9 ) ( P174034 ) Page 88 of 174 Generation capacity of renewable energy constructed or rehabilitated The indicator captures the total solar PV capacity that has been installed via mini grids or rehabilitated through solar PV hybridization. Semi - annually. NIGELEC and ANPER project database. Determined based on the reporting of the contractors responsible installing / hybridizing mini grids. NIGELEC / ANPER. Net greenhouse gas ( GHG ) emissions Project net greenhouse gas ( GHG ) emissions are calculated as an annual average of the difference between project gross ( absolute ) emissions aggregated over the economic lifetime of the project and the emissions of a baseline ( counterfactual ) scenario aggregated over the same time horizon. They are reported in metric tons of carbon dioxide equivalent per year. Annually NIGELEC ' s commercial department / ANPER and ANERSOL project databases. Based on data provided in the Owner ' s engineer report. 5. 73 million tCO2e are expected to be reduced during economic life of the project for an approximate annual reduction of 779, 215 tCO2e. MTR targets are the same as the end targeted because the indicator measures the annual average reductions over the entire economic analysis period.", + "ner_text": [ + [ + 938, + 945, + "named" + ] + ], + "validated": false, + "empirical_context": "They are reported in metric tons of carbon dioxide equivalent per year. Annually NIGELEC ' s commercial department / ANPER and ANERSOL project databases. Based on data provided in the Owner ' s engineer report.", + "type": "project", + "explanation": "'ANERSOL' is mentioned as a project, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'ANERSOL' is a dataset because it is mentioned alongside databases.", + "contextual_reason_agent": "'ANERSOL' is mentioned as a project, not as a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 4, + "validated": 1, + "not_validated": 3 + } + }, + { + "filename": "155_432110PAD0P09717632B01off0use0only1", + "page": 81, + "text": "It revealed several weaknesses in the PFM system attributed to years of civil war, including budget formulation and execution, financial reporting and oversight systems as well as weak linkages between agreed policies for budgeting planning and execution. However since then, significant progress has been made in all areas. 2. The introduction and now fully operational of an interim Financial Management Information System ( FMIS ) which generates standard quarterly budget execution reports and reports on poverty-reducing expenditure and / or HIPC expenditure execution, 3. The adoption and implementation of a new unified functional and economic budget classification system and a double-entry accounting system has served to improve budget monitoring while also addressing weakened treasury controls. As a result, the closure of the Government \u2019 s extra-budgetary accounts is successfully on track. The Audit Court ( Cour des Comptes ) established in 2004 has been an important step towards the strengthening of jurisdictional control over public finance management", + "ner_text": [ + [ + 38, + 48, + "named" + ] + ], + "validated": false, + "empirical_context": "It revealed several weaknesses in the PFM system attributed to years of civil war, including budget formulation and execution, financial reporting and oversight systems as well as weak linkages between agreed policies for budgeting planning and execution. However since then, significant progress has been made in all areas.", + "type": "system", + "explanation": "However, the context indicates that it refers to a system rather than a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'PFM system' sounds like a structured collection of data.", + "contextual_reason_agent": "However, the context indicates that it refers to a system rather than a data source.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 55, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 51 of 74 IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Beneficiaries of social safety net programs This indicator will be measured at least on a quarterly basis during missions and ISRs SNSOP MIS which hosts beneficiary registration and payment data The implementing partner will collect beneficiary data during targeting and registration. The payment service provider will document payment data and share with the implementing partner Implementing Partner Beneficiaries of social safety net programs - Female This indicator will be measured at least on a quarterly basis during missions and ISRs SNSOP MIS which hosts beneficiary registration and payment data The implementing partner will collect beneficiary data during targeting and registration. The payment service provider and implementing agency will document payment data Implementing Partner Beneficiary households of social safety net programs - Refugees The number of total beneficiaries HHs that are This indicator will be SNSOP MIS which hosts The implementing partner will collect Implementing Partner", + "ner_text": [ + [ + 1237, + 1246, + "named" + ], + [ + 4, + 14, + "SNSOP MIS <> publisher" + ], + [ + 306, + 349, + "SNSOP MIS <> reference population" + ], + [ + 459, + 500, + "SNSOP MIS <> data description" + ], + [ + 1158, + 1166, + "SNSOP MIS <> reference population" + ] + ], + "validated": true, + "empirical_context": "The payment service provider will document payment data and share with the implementing partner Implementing Partner Beneficiaries of social safety net programs - Female This indicator will be measured at least on a quarterly basis during missions and ISRs SNSOP MIS which hosts beneficiary registration and payment data The implementing partner will collect beneficiary data during targeting and registration. The payment service provider and implementing agency will document payment data Implementing Partner Beneficiary households of social safety net programs - Refugees The number of total beneficiaries HHs that are This indicator will be SNSOP MIS which hosts The implementing partner will collect Implementing Partner", + "type": "management information system", + "explanation": "It is indeed a dataset as it is explicitly mentioned to host data related to beneficiaries and payments.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a system that hosts beneficiary registration and payment data.", + "contextual_reason_agent": "It is indeed a dataset as it is explicitly mentioned to host data related to beneficiaries and payments.", + "contextual_signal": "mentioned as a data source that hosts beneficiary registration and payment data", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 18, + "validated": 10, + "not_validated": 8 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 19, + "text": "The proposed project will also support the Social Registry Directorate to scale-up its communication and outreach, while strengthening data protection. Few line ministries currently use the Social Registry ( most users are linked to projects supported by the World Bank ) 9. In addition, knowledge about the objectives, modalities and strengths of the Social Registry is limited, within the government and among partners. These gaps will be addressed by intensifying the Social Registry \u2019 s communication and outreach efforts. User agreements will specify authorized uses of the data and protection of personal data. 30. The project will also encourage the use of the Social Registry as a tool for program coordination. The objective of this activity is to increase the SCAPP Directorate, Taazour and sectorial ministries \u2019 ownership of the Social Registry, by fostering its use for integrated service provision to poor and vulnerable households, and for better planning of investments according to needs. 31. This sub-component will finance the costs associated with: ( a ) the pilot Social Registry card and its evaluation, potential scale-up to 200, 000 households; ( b ) the development of the outreach strategy and related tools and communication campaign; and ( c ) workshops, analytical and coordination capacity building.", + "ner_text": [ + [ + 841, + 856, + "named" + ], + [ + 915, + 945, + "Social Registry <> reference population" + ], + [ + 1345, + 1363, + "Social Registry <> usage context" + ] + ], + "validated": true, + "empirical_context": "The project will also encourage the use of the Social Registry as a tool for program coordination. The objective of this activity is to increase the SCAPP Directorate, Taazour and sectorial ministries \u2019 ownership of the Social Registry, by fostering its use for integrated service provision to poor and vulnerable households, and for better planning of investments according to needs. 31.", + "type": "registry", + "explanation": "The context indicates that the Social Registry is used for program coordination and integrated service provision, suggesting it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a 'Social Registry' which implies a structured collection of data.", + "contextual_reason_agent": "The context indicates that the Social Registry is used for program coordination and integrated service provision, suggesting it functions as a data source.", + "contextual_signal": "mentioned as a tool for program coordination", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 17, + "text": "The overall lower secondary level repetition rates are higher for male students than female students. Drop-out rates rose sharply from 3. 7 percent to 7 percent between 2013 and 2015 and are higher for female students. In terms of learning outcomes, the 2018 Early Grade Mathematics Assessment ( EGMA ) test results showed that on average, grade two students were only able to solve 1 out of 6 mathematics problems, and almost 60 percent had zero scores. Although they represent slight improvements over the 2017 EGMA scores, weak performance in mathematics remains a major concern. 23. Assessment systems are not fully utilized for learning and policy decision-making and need to be redesigned to capture student performance against learning standards. Fifth grade OTI results are managed at the national level while the second-grade results are managed at the school level. Currently, only basic analysis is conducted for the fifth grade OTI and BEF exam results. Strengthened MENFOP capacity to analyze and use assessment data would optimize the feedback systems to inform teaching practices. 24. A process of curriculum modernization has begun. The existing curriculum was introduced in 2011, and a revision is currently underway. The revision aims to make appropriate adjustments, additions and improvements to introduce innovative pedagogical practices, focus on mathematics, languages and science, integrate Information, Communication and Technology ( ICT ) into curricula, and develop digital and 21st century skills. Revisions have started, and work related to materials development, trialing and production will begin in September 2019.", + "ner_text": [ + [ + 259, + 293, + "named" + ] + ], + "validated": false, + "empirical_context": "7 percent to 7 percent between 2013 and 2015 and are higher for female students. In terms of learning outcomes, the 2018 Early Grade Mathematics Assessment ( EGMA ) test results showed that on average, grade two students were only able to solve 1 out of 6 mathematics problems, and almost 60 percent had zero scores. Although they represent slight improvements over the 2017 EGMA scores, weak performance in mathematics remains a major concern.", + "type": "assessment", + "explanation": "However, it is mentioned as an assessment tool rather than a structured collection of data, indicating it is not a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Assessment' in the title, suggesting it could contain data on student performance.", + "contextual_reason_agent": "However, it is mentioned as an assessment tool rather than a structured collection of data, indicating it is not a dataset.", + "contextual_signal": "mentioned only as an assessment, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 17, + "text": "The global spread of COVID-19 has led to unprecedented disruptions in schooling, requiring understanding of how students \u2019 learning has changed. Moldova \u2019 s participation in PISA 2025 and the analysis of PISA 2022 results are important for managing learning recovery from the pandemic. The MoER consolidated and strengthened the student-level EMIS to support evidence-based decision-making. 22 EMIS now covers preschool, primary, general secondary, and vocational education data, which are regularly collected by the system. Reports on the performance of each school are produced and disclosed annually. However, significant differences exist between data reported in the EMIS open data portal and data published by the National Bureau of Statistics ( NBS ) of Moldova. A single integrated EMIS capable of linking information on students with population register data and financing data of the Ministry of Finance ( MoF ) is currently lacking. The current EMIS and established data reporting practices provide a solid foundation for the development of an integrated EMIS 22 Under support of the completed World Bank-financed Moldova Education Reform Project.", + "ner_text": [ + [ + 394, + 398, + "named" + ] + ], + "validated": false, + "empirical_context": "The MoER consolidated and strengthened the student-level EMIS to support evidence-based decision-making. 22 EMIS now covers preschool, primary, general secondary, and vocational education data, which are regularly collected by the system. Reports on the performance of each school are produced and disclosed annually.", + "type": "system", + "explanation": "However, EMIS is described as a system rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it involves the collection of educational data.", + "contextual_reason_agent": "However, EMIS is described as a system rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "031_IBArchive-bd2c789e-ee04-4df7-a219-9409a5f705d3", + "page": 77, + "text": "A statistical analysis of Chad \u2019 s Household Consumption and Informal Sector Surveys by the World Bank identified a number of factors influencing household poverty, including some that disproportionally affect female-headed households. For example, employment of heads of households in the public service or in nonagricultural activities ( which is less common among female heads ) is correlated with lower poverty incidence. Livestock ownership ( which is also less common among female-headed households ) is also correlated with lower poverty rates. 48 Women \u2019 s vulnerability within households is due to their lack of financial autonomy. ECOSIT 3 found that only 23 percent of women were responsible for deciding 44 United Nations fertility data ( 2017 ). https: / / www. un. org / en / development / desa / population / publications / dataset / fertility / wfd2017. asp 45 World Bank. 2015. Systematic Country Diagnostic, Chad. Report No. 96537-TD. Washington, DC: World Bank. 46 https: / / repository. uneca. org / ds2 / stream /? # / documents / b47f8dc9-8948-5d3e-bae5-bc8ff64fa7ba / page / 3 47 World Bank. 2013. Chad - Poverty Note: Dynamics of Poverty and Inequality Following the Rise of the Oil Sector. http: / / documents. worldbank. org / curated / en / 201821468015589462 / Chad-Poverty-note-dynamics-of-poverty-and-inequality - following-the-rise-of-the-oil-sector 48 World Bank. 2015. Systematic Country Diagnostic, Chad. Report No. 96537-TD. Washington, DC: World Bank.", + "ner_text": [ + [ + 35, + 84, + "named" + ], + [ + 26, + 30, + "Household Consumption and Informal Sector Surveys <> data geography" + ], + [ + 92, + 102, + "Household Consumption and Informal Sector Surveys <> publisher" + ], + [ + 210, + 234, + "Household Consumption and Informal Sector Surveys <> reference population" + ], + [ + 249, + 337, + "Household Consumption and Informal Sector Surveys <> data description" + ], + [ + 426, + 445, + "Household Consumption and Informal Sector Surveys <> data description" + ], + [ + 877, + 887, + "Household Consumption and Informal Sector Surveys <> publisher" + ], + [ + 889, + 893, + "Household Consumption and Informal Sector Surveys <> publication year" + ], + [ + 926, + 930, + "Household Consumption and Informal Sector Surveys <> data geography" + ], + [ + 969, + 979, + "Household Consumption and Informal Sector Surveys <> publisher" + ], + [ + 1103, + 1113, + "Household Consumption and Informal Sector Surveys <> publisher" + ], + [ + 1115, + 1119, + "Household Consumption and Informal Sector Surveys <> publication year" + ], + [ + 1121, + 1125, + "Household Consumption and Informal Sector Surveys <> data geography" + ], + [ + 1384, + 1394, + "Household Consumption and Informal Sector Surveys <> publisher" + ], + [ + 1433, + 1437, + "Household Consumption and Informal Sector Surveys <> data geography" + ], + [ + 1476, + 1486, + "Household Consumption and Informal Sector Surveys <> publisher" + ] + ], + "validated": true, + "empirical_context": "A statistical analysis of Chad \u2019 s Household Consumption and Informal Sector Surveys by the World Bank identified a number of factors influencing household poverty, including some that disproportionally affect female-headed households. For example, employment of heads of households in the public service or in nonagricultural activities ( which is less common among female heads ) is correlated with lower poverty incidence.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned in the context as a source of data for statistical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to surveys that collect data on household consumption and the informal sector.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned in the context as a source of data for statistical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 43, + "text": "The World Bank Mauritania Social Safety Net System Project II ( P171125 ) Page 38 of 61 Beneficiaries of social safety net programs Semester Programs MIS Request based on the programs MIS - assumptions: ( i ) 6 individuals per household and ( ii ) 70 % of Elmaouna beneficiaries are also benefitting from Tekavoul and should not be counted twice. Monitoring and evaluation team Beneficiaries of social safety net programs - Female Semester Programs MIS Request based on the programs MIS Monitoring and evaluation team Beneficiaries of social safety net programs - Refugees Number of indivuduals supported by safety net programs supported by the project ( with a ratio of 4 individuals per refugee households ) Semester Programs MIS Request based on the programs MIS Monitoring and evaluation team ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Households with complete information in Social Registry Number of households registred within the Social Registry database and for which the socio-economic data has been collected and recorded.", + "ner_text": [ + [ + 1046, + 1061, + "named" + ], + [ + 4, + 14, + "Social Registry <> publisher" + ], + [ + 564, + 572, + "Social Registry <> reference population" + ], + [ + 1006, + 1016, + "Social Registry <> reference population" + ], + [ + 1147, + 1166, + "Social Registry <> data type" + ] + ], + "validated": true, + "empirical_context": "The World Bank Mauritania Social Safety Net System Project II ( P171125 ) Page 38 of 61 Beneficiaries of social safety net programs Semester Programs MIS Request based on the programs MIS - assumptions: ( i ) 6 individuals per household and ( ii ) 70 % of Elmaouna beneficiaries are also benefitting from Tekavoul and should not be counted twice. Monitoring and evaluation team Beneficiaries of social safety net programs - Female Semester Programs MIS Request based on the programs MIS Monitoring and evaluation team Beneficiaries of social safety net programs - Refugees Number of indivuduals supported by safety net programs supported by the project ( with a ratio of 4 individuals per refugee households ) Semester Programs MIS Request based on the programs MIS Monitoring and evaluation team ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Households with complete information in Social Registry Number of households registred within the Social Registry database and for which the socio-economic data has been collected and recorded.", + "type": "database", + "explanation": "In the context, it is explicitly described as a database that contains registered households and their socio-economic data, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Social Registry' suggests a structured collection of socio-economic data.", + "contextual_reason_agent": "In the context, it is explicitly described as a database that contains registered households and their socio-economic data, confirming its role as a data source.", + "contextual_signal": "described as a database that contains socio-economic data", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "154_431120PAD0P1021e0only10R20081014311", + "page": 14, + "text": "percent in 2002 to 14. 7 percent in 2007. There was also a noted increased in the percentage o f young people 15-19 who had sex with more than one partner during the last 12 months - from 0. 3 percent in 200 1 to 17. 1 percent in 2005. 14. Recent analyses regarding major drivers o f the Botswana epidemic particularly implicate the role o f multiple concurrent sexual partners in increasing risk o f HIV infection and transmission. Multiple, concurrent partnerships accelerate HIV transmission because during the early, acute stage o f infection the virus may be passed on to several people over a short period o f time. Whde limited, data on partnership patterns in Botswana raise concern. In 2003, 24 percent o f sexually active men ( age 15-24 ) reported having sex with someone outside their primary relationship in the last yea?. Moreover, acceptance o f multiple partnerships appeared widespread from results o f the BAIS I1 survey, with 38 percent of respondents disagreeing with the statement \u201c Most people you care about stay faithful to a single partner at a time3. \u201d 15. With respect to clinical management o f STIs, in 2005, an estimated 85 percent o f STIs were properly diagnosed, counseled, and treated at health care facilities.", + "ner_text": [ + [ + 924, + 938, + "named" + ], + [ + 97, + 115, + "BAIS I1 survey <> reference population" + ], + [ + 230, + 234, + "BAIS I1 survey <> publication year" + ], + [ + 288, + 296, + "BAIS I1 survey <> data geography" + ], + [ + 668, + 676, + "BAIS I1 survey <> data geography" + ], + [ + 695, + 699, + "BAIS I1 survey <> reference year" + ], + [ + 716, + 735, + "BAIS I1 survey <> reference population" + ], + [ + 1132, + 1136, + "BAIS I1 survey <> publication year" + ], + [ + 1261, + 1279, + "BAIS I1 survey <> usage context" + ] + ], + "validated": true, + "empirical_context": ". Moreover, acceptance o f multiple partnerships appeared widespread from results o f the BAIS I1 survey, with 38 percent of respondents disagreeing with the statement \u201c Most people you care about stay faithful to a single partner at a time3. \u201d 15.", + "type": "survey", + "explanation": "The BAIS II survey is explicitly mentioned as a source of results, indicating it is used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that collects responses from participants.", + "contextual_reason_agent": "The BAIS II survey is explicitly mentioned as a source of results, indicating it is used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "033_IBArchive-38b35d89-2c5c-4636-ab7e-0ba662b95a5f", + "page": 24, + "text": "The World Bank Djibouti Skills Development for Employment Project ( P175483 ) Page 20 of 68 school examination are slightly better than boys. Notwithstanding these facts, girls still experience a higher dropout rate in the fifth grade, and which partially accounts for the disparities in enrollment in lower secondary. Also, only 38 percent of women report having completed at least primary education compared to about 57 percent of the men. Nearly 46 percent of girls are estimated to be out of school compared to about 39 percent of boys. There are also significant differences in primary school enrollment with only about 60 percent of girls enrolled compared to about 69 percent of boys, even while the gender gap in literacy rates for cohorts born between the mid-50s and mid-70s was around 24 percentage points and has now narrowed to about 2 percentage points for cohorts between the ages of 10-14 years. According to the initial results of a survey of out of school children in 2019, the average percentage of out-of-school girls in lower secondary is 18 percent while it stands at 13 percent for boys. The lower rates of enrollment for girls at the lower secondary level are particularly prevalent in rural areas of the country, with the average percentage of out-of-school girls in rural areas at the lower secondary level being 47 percent compared to 31 percent for boys.", + "ner_text": [ + [ + 950, + 982, + "named" + ], + [ + 4, + 14, + "survey of out of school children <> publisher" + ], + [ + 171, + 176, + "survey of out of school children <> reference population" + ], + [ + 986, + 990, + "survey of out of school children <> publication year" + ], + [ + 996, + 1056, + "survey of out of school children <> data description" + ] + ], + "validated": true, + "empirical_context": "There are also significant differences in primary school enrollment with only about 60 percent of girls enrolled compared to about 69 percent of boys, even while the gender gap in literacy rates for cohorts born between the mid-50s and mid-70s was around 24 percentage points and has now narrowed to about 2 percentage points for cohorts between the ages of 10-14 years. According to the initial results of a survey of out of school children in 2019, the average percentage of out-of-school girls in lower secondary is 18 percent while it stands at 13 percent for boys. The lower rates of enrollment for girls at the lower secondary level are particularly prevalent in rural areas of the country, with the average percentage of out-of-school girls in rural areas at the lower secondary level being 47 percent compared to 31 percent for boys.", + "type": "survey", + "explanation": "This is a dataset as it provides empirical data on enrollment rates and out-of-school children, used for analysis in the context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data on out-of-school children.", + "contextual_reason_agent": "This is a dataset as it provides empirical data on enrollment rates and out-of-school children, used for analysis in the context.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 38, + "text": "Annual DHIS ( 2 ) Administrative data Health Department, HMIS, PMU", + "ner_text": [ + [ + 7, + 11, + "named" + ] + ], + "validated": false, + "empirical_context": "Annual DHIS ( 2 ) Administrative data Health Department, HMIS, PMU", + "type": "system", + "explanation": "'DHIS' is mentioned as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'DHIS' is a dataset because it is associated with administrative data in health contexts.", + "contextual_reason_agent": "'DHIS' is mentioned as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 11, + "validated": 5, + "not_validated": 6 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 35, + "text": "The World Bank Education Quality Improvement Project ( P179363 ) Page 26 of 68 project manager, FM specialist, PS, M & E specialist, environmental specialist and social development specialist with experience in GBV prevention and response, civil works engineers, and other technical specialists to ensure timely, quality, transparent, and effective implementation of the civil works activities. Further details will be provided in the POM. The fiduciary assessment of the PMT of the MoER and NORLD has been completed and is reflected in the fiduciary sections. B. Results Monitoring and Evaluation Arrangements 68. The PDO-level results indicators and intermediate results indicators will be monitored using the following data: ( a ) data on education institutions and students generated by EMIS and e-Catalogue / e - Register ( disaggregated by gender, urban-rural divide, students with disabilities, and refugee students ); ( b ) results from the nationally representative national and international assessments of student performance and classroom observations; ( c ) regular survey data and administrative data of the MoER and NORLD; and ( d ) semiannual monitoring reports prepared by the MoER under support of the PMT. The MoER through the PMT will carry out the day-to-day coordination of M & E activities.", + "ner_text": [ + [ + 791, + 795, + "named" + ], + [ + 742, + 777, + "EMIS <> reference population" + ], + [ + 949, + 1063, + "EMIS <> data description" + ], + [ + 1071, + 1090, + "EMIS <> data type" + ], + [ + 1148, + 1177, + "EMIS <> data type" + ] + ], + "validated": true, + "empirical_context": "Results Monitoring and Evaluation Arrangements 68. The PDO-level results indicators and intermediate results indicators will be monitored using the following data: ( a ) data on education institutions and students generated by EMIS and e-Catalogue / e - Register ( disaggregated by gender, urban-rural divide, students with disabilities, and refugee students ); ( b ) results from the nationally representative national and international assessments of student performance and classroom observations; ( c ) regular survey data and administrative data of the MoER and NORLD; and ( d ) semiannual monitoring reports prepared by the MoER under support of the PMT. The MoER through the PMT will carry out the day-to-day coordination of M & E activities.", + "type": "system", + "explanation": "EMIS is indeed a data source as it generates data that is used for monitoring and evaluation of educational indicators.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is mentioned in the context of generating data on education institutions and students.", + "contextual_reason_agent": "EMIS is indeed a data source as it generates data that is used for monitoring and evaluation of educational indicators.", + "contextual_signal": "mentioned as a source generating data on education institutions and students", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "046_Cameroon-COVID-19-Preparedness-and-Response-Project", + "page": 35, + "text": "The project is likely to be carried out in areas where Indigenous Peoples are present. 89. GBV / SEA / SH Risks: The project has been given a preliminary rating of substantial risk for GBV / SEA / SH, based upon the country context and project-specific indicators. This risk rating is subject to validation following project approval, and GBV / SEA / SH risks will be further assessed and addressed during the implementation phase, which will include a review of the preliminary screening exercise and establishment of the corresponding measures to prevent and mitigate identified risks. The preliminary risk rating is presently supported by risk factors related to both country and project indicators. Despite the recognition of the importance of gender equality in the constitution and national development strategies, the situation of women and girls in Cameroon is alarming. The prevalence of physical and sexual violence committed by a husband or partner is the highest in Sub Saharan Africa ( SSA ) at 51 percent. Similarly, rates of sexual violence are higher than those in the SSA region at 29 percent. Acceptance of the use of violence by husbands / partners is also quite high in Cameroon, particularly by women. According to the Demographic Health Survey 2011, almost half of women ( 47 percent ) reported that men are justified for beating their wives, 38 percent of men share those views.", + "ner_text": [ + [ + 1240, + 1270, + "named" + ] + ], + "validated": true, + "empirical_context": "Acceptance of the use of violence by husbands / partners is also quite high in Cameroon, particularly by women. According to the Demographic Health Survey 2011, almost half of women ( 47 percent ) reported that men are justified for beating their wives, 38 percent of men share those views.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned as a survey providing empirical data used in the analysis of societal attitudes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides statistical data on attitudes towards violence.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a survey providing empirical data used in the analysis of societal attitudes.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "162_PAD12050PAD0P15264600PUBLIC00Box391428B", + "page": 11, + "text": "With only half of the population receiving health insurance, out-of-pocket expenditures ( OOPs ) represent a large source of health financing particularly for the poor households. The burden of household out-of-pocket spending is 37. 34 percent in 2012. 8 Lower income groups spent a higher percentage of their income ( 14 percent ) on health than those with higher income ( 4. 2 percent ). 9 The obligation to pay directly for services, is subjecting a large proportion of the population to financial hardship, even impoverishment. 3 A 2009 study shows that the prevalence of maternal deaths in the Bekaa is 21. 3 percent, more than double of the national average of 10. 7 percent ( IGSPS et al, 2012 ). 4 For example, Mount Lebanon has the highest density of physicians and nurses ( 42 percent and 33 percent ), compared to Beka \u2019 a, with only 7. 8 percent of Lebanon \u2019 s physicians and 19 percent of nurses ( Ammar, 2009 ). 5 WHO, NCD Country Profile, 2011. 6 MoPH, Vital Health Statistics, 2013. 7 National Health Accounts ( NHA ) data listed in World Bank, World Development Indicators, 2013. 8 MoPH website, National Health Accounts Summary Table, 2012. 9 NHA, 2005.", + "ner_text": [ + [ + 1002, + 1026, + "named" + ], + [ + 61, + 87, + "National Health Accounts <> data description" + ], + [ + 248, + 252, + "National Health Accounts <> publication year" + ], + [ + 600, + 605, + "National Health Accounts <> data geography" + ], + [ + 684, + 695, + "National Health Accounts <> author" + ], + [ + 720, + 733, + "National Health Accounts <> data geography" + ], + [ + 912, + 917, + "National Health Accounts <> author" + ], + [ + 994, + 998, + "National Health Accounts <> publication year" + ], + [ + 1029, + 1032, + "National Health Accounts <> acronym" + ], + [ + 1050, + 1060, + "National Health Accounts <> publisher" + ], + [ + 1092, + 1096, + "National Health Accounts <> publication year" + ], + [ + 1154, + 1158, + "National Health Accounts <> reference year" + ], + [ + 1167, + 1171, + "National Health Accounts <> publication year" + ] + ], + "validated": true, + "empirical_context": "6 MoPH, Vital Health Statistics, 2013. 7 National Health Accounts ( NHA ) data listed in World Bank, World Development Indicators, 2013. 8 MoPH website, National Health Accounts Summary Table, 2012.", + "type": "data", + "explanation": "In the context, it is mentioned as part of the National Health Accounts data, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced alongside other data sources and indicators.", + "contextual_reason_agent": "In the context, it is mentioned as part of the National Health Accounts data, indicating it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "107_PAD-Citizens-Charter-Afghanistan-P160567-Oct-7-Board-version-10072016", + "page": 121, + "text": "CCAP will explore with the Afghanistan Information and Communications Technology Sector Development Project the possibility of using mobile technology for community scorecard reporting. With 85 percent of the country currently having mobile access, the Citizens \u2019 Charter can use that available mobile application technology to expedite citizens \u2019 reporting and grievance redress. Performance indicators for gender, including gender-disaggregated data and citizen engagement, have been incorporated into the project \u2019 s Results Framework ( see Section VII ). 94. CCAP will also take advantage of the third party monitoring arrangements under the ARTF. The third party monitors will provide critical data and a level of additional evidence from the field to complement the government monitoring systems and Bank missions. CCAP will make use of their reviews of infrastructure quality as well as their satellite imagery data in order to verify infrastructure assets and gaps based upon the initial needs assessment. The third party monitors will also review the achievement of the service standards, social inclusion dimensions, and CDC organizational maturity. 95. Evaluation and Studies. The Citizens \u2019 Charter provides a rich environment for testing various hypotheses important for development effectiveness in Afghanistan as well as other fragile and conflict situations. The Government together with the World Bank has discussed eight studies, some to be funded through CCAP and others through additional government resources. Due to the purposive selection methodology by which locations were chosen for CCAP, a randomized impact evaluation may not be possible. However, several evaluations are planned and there is room to potentially undertake some nested experimental studies on citizens \u2019 monitoring and reporting.", + "ner_text": [ + [ + 900, + 922, + "named" + ], + [ + 27, + 38, + "satellite imagery data <> data geography" + ], + [ + 1313, + 1324, + "satellite imagery data <> data geography" + ], + [ + 1856, + 1874, + "satellite imagery data <> usage context" + ] + ], + "validated": true, + "empirical_context": "The third party monitors will provide critical data and a level of additional evidence from the field to complement the government monitoring systems and Bank missions. CCAP will make use of their reviews of infrastructure quality as well as their satellite imagery data in order to verify infrastructure assets and gaps based upon the initial needs assessment. The third party monitors will also review the achievement of the service standards, social inclusion dimensions, and CDC organizational maturity.", + "type": "data", + "explanation": "In this context, it is used as a source of information to verify infrastructure assets, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'satellite imagery data' suggests a collection of data points related to imagery.", + "contextual_reason_agent": "In this context, it is used as a source of information to verify infrastructure assets, indicating it functions as a dataset.", + "contextual_signal": "mentioned as a source of information to verify infrastructure assets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 17, + "text": "The World Bank Niger Learning Improvement for Results in Education Project ( P168779 ) Page 11 of 101 marriage in the world, with 77 percent of the girls being married before the age of 18 ( Demographic and Health Survey 2012 ) and 28 percent before the age of 15. 12. Niger \u2019 s security situation further complicates the delivery of basic education services in certain areas. The regions of Diffa, Tahoua, and Tillaberi, already characterized by high levels of poverty, are particularly affected by attacks by armed groups and extremists resulting in forced displacement of populations and school closures. For the academic year 2018 \u2013 2019, more than a hundred schools have been closed due terrorist threats on teachers, population, and security forces and burning of classrooms. 9 Since October 2018, 30 schools have been closed in the Diffa region due to insecurity. With the recent deterioration of the situation along the border with Burkina Faso, 128 primary schools and 17 secondary schools have suspended their activities in the Tillaberi region. In response, since March 2017, the Government of Niger declared a state of emergency in 12 departments in the regions of Tillaberi and Tahoua along the border of Burkina Faso and Mali.", + "ner_text": [ + [ + 191, + 220, + "named" + ], + [ + 15, + 20, + "Demographic and Health Survey <> data geography" + ], + [ + 148, + 153, + "Demographic and Health Survey <> reference population" + ], + [ + 221, + 225, + "Demographic and Health Survey <> publication year" + ], + [ + 269, + 274, + "Demographic and Health Survey <> data geography" + ], + [ + 1038, + 1047, + "Demographic and Health Survey <> data geography" + ], + [ + 1191, + 1197, + "Demographic and Health Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "The World Bank Niger Learning Improvement for Results in Education Project ( P168779 ) Page 11 of 101 marriage in the world, with 77 percent of the girls being married before the age of 18 ( Demographic and Health Survey 2012 ) and 28 percent before the age of 15. 12.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey providing empirical data used in the analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides statistical data on marriage rates.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey providing empirical data used in the analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "076_Rwanda-Socio-Economic-Inclusion-of-Refugees-and-Host-Communities-in-Rwanda-Project", + "page": 71, + "text": "The World Bank Socio-economic Inclusion of Refugees & Host Communities in Rwanda Project ( P164130 ) Page 67 of 82 Procurement Specialist; and ( vi ) a TA specializing in Procurement and contract management to be hired under the project finance. New procurement staff at MINEMA-SPIU, dedicated to the project, will be hired to support procurement functions. The Procurement Risk Assessment and Management System ( PRAMS ) will be finalized in due course. 8. General Project Procurement Profile. Procurement activities envisaged through the project are broadly grouped into: ( i ) construction works of schools, roads, health centers, market facilities, environmental rehabilitation and access to water supply, implemented by the MINEMA-SPIU and districts; ( ii ) capacity-building training and mentoring related to economic opportunity activities, implemented by BRD; ( iii ) procurement of goods for capacity building, implemented by BRD; and ( iv ) capacity-building for implementing agencies, hiring of staff, communication strategy, refugee integration, grievance redress mechanisms, etc., implemented by MINEMA. All these activities are packaged duly considering factors like geographic spread ( regions ) and similarity of requirements, and capacity of potential contractors / market players.", + "ner_text": [ + [ + 362, + 411, + "named" + ] + ], + "validated": false, + "empirical_context": "New procurement staff at MINEMA-SPIU, dedicated to the project, will be hired to support procurement functions. The Procurement Risk Assessment and Management System ( PRAMS ) will be finalized in due course. 8.", + "type": "system", + "explanation": "However, it is mentioned as a system for risk assessment and management, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'System' which can imply data handling.", + "contextual_reason_agent": "However, it is mentioned as a system for risk assessment and management, not as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "158_40156", + "page": 15, + "text": "It is, however, critical that the regional M & E system ( 3rd of the Three Ones at regional level ) is developed to track the achievement of the HIV goals and objectives set out in the regional HIV strategy. In turn, the regional HIV strategy can only be developed once more information about the drivers of transmission are known, as it is \u201c essential that the regional HIV strategy is based on a rigorous understanding of HIV transmission dynamics. It is vital to understand the proportion of HIV infections that may be attributed to each major source of transmission. Armed with an understanding of national and regional transmission dynamics and sources of infection, national and regional strategies and programs must be designed to address the major drivers of transmission \u201d ( Wilson, 2007 ). 46. The sequence of development is as follows: existing HIV-related epidemiological data is collected as part of the mapping assessment. This data will be updated in 2007 and then analyzed to determine the transmission drivers of the epidemic and the proportion of infection. With this information in hand, the regional HIV strategy will be developed, which will then inform the development of the regional HIV M & E system. Therefore, this project will be catalytic in ensuring that IGAD collects data to inform the development of its regional HIV strategy \u2013 the 2nd of the Three Ones. 11", + "ner_text": [ + [ + 856, + 888, + "named" + ], + [ + 784, + 790, + "HIV-related epidemiological data <> author" + ], + [ + 792, + 796, + "HIV-related epidemiological data <> publication year" + ], + [ + 966, + 970, + "HIV-related epidemiological data <> publication year" + ], + [ + 1006, + 1042, + "HIV-related epidemiological data <> data description" + ], + [ + 1051, + 1074, + "HIV-related epidemiological data <> data description" + ] + ], + "validated": true, + "empirical_context": "46. The sequence of development is as follows: existing HIV-related epidemiological data is collected as part of the mapping assessment. This data will be updated in 2007 and then analyzed to determine the transmission drivers of the epidemic and the proportion of infection.", + "type": "data", + "explanation": "This is indeed a dataset as it is described as collected data that will be analyzed for empirical research.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific collection of data related to HIV epidemiology.", + "contextual_reason_agent": "This is indeed a dataset as it is described as collected data that will be analyzed for empirical research.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "035_Senegal-Casamance-Economic-Development-Project", + "page": 70, + "text": "The World Bank Senegal, Casamance Economic Development Project ( P175325 ) Page 66 of 72 Annex 6: CEPD \u2019 s Gender-sensitive Approach Gap: Women are constrained from using health services due to lack of time and distance 1. Analysis: Although this project has many activities to address gender gaps further detailed in this annex, for the gender tag, the project \u2019 s theory of change addresses the issues of women being constrained from using health services due to a lack of time and distance. To illustrate, a 2011 Senegal Demographic and Health and Multiple Indicator Cluster Survey found that survey only 45 percent of births in Casamance are attended by trained personnel, leaving women more exposed to maternal mortality or birth complications. The survey also found that the primary barriers for accessing health services cited by women was distance of health facility ( 62 percent and 59 percent of women in Sedhiou and Kolda respectively ) and expenses ( 62 percent and 58 percent in Sedhiou and Ziguinchor respectively ) this is followed closely by social norms where women require permission to access health services or fear going alone. 29 These issues have continued with the COVID-19 pandemic and subsequent lockdowns.", + "ner_text": [ + [ + 516, + 584, + "named" + ], + [ + 24, + 33, + "Senegal Demographic and Health and Multiple Indicator Cluster Survey <> data geography" + ], + [ + 138, + 143, + "Senegal Demographic and Health and Multiple Indicator Cluster Survey <> reference population" + ], + [ + 407, + 412, + "Senegal Demographic and Health and Multiple Indicator Cluster Survey <> reference population" + ], + [ + 511, + 515, + "Senegal Demographic and Health and Multiple Indicator Cluster Survey <> publication year" + ], + [ + 632, + 641, + "Senegal Demographic and Health and Multiple Indicator Cluster Survey <> data geography" + ], + [ + 685, + 690, + "Senegal Demographic and Health and Multiple Indicator Cluster Survey <> reference population" + ], + [ + 837, + 842, + "Senegal Demographic and Health and Multiple Indicator Cluster Survey <> reference population" + ], + [ + 906, + 911, + "Senegal Demographic and Health and Multiple Indicator Cluster Survey <> reference population" + ], + [ + 915, + 922, + "Senegal Demographic and Health and Multiple Indicator Cluster Survey <> data geography" + ], + [ + 927, + 932, + "Senegal Demographic and Health and Multiple Indicator Cluster Survey <> data geography" + ], + [ + 992, + 999, + "Senegal Demographic and Health and Multiple Indicator Cluster Survey <> data geography" + ], + [ + 1004, + 1014, + "Senegal Demographic and Health and Multiple Indicator Cluster Survey <> data geography" + ], + [ + 1077, + 1082, + "Senegal Demographic and Health and Multiple Indicator Cluster Survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "Analysis: Although this project has many activities to address gender gaps further detailed in this annex, for the gender tag, the project \u2019 s theory of change addresses the issues of women being constrained from using health services due to a lack of time and distance. To illustrate, a 2011 Senegal Demographic and Health and Multiple Indicator Cluster Survey found that survey only 45 percent of births in Casamance are attended by trained personnel, leaving women more exposed to maternal mortality or birth complications. The survey also found that the primary barriers for accessing health services cited by women was distance of health facility ( 62 percent and 59 percent of women in Sedhiou and Kolda respectively ) and expenses ( 62 percent and 58 percent in Sedhiou and Ziguinchor respectively ) this is followed closely by social norms where women require permission to access health services or fear going alone.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly referenced in the context as a survey providing empirical data on health service access.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a survey that collects demographic and health data.", + "contextual_reason_agent": "This is a dataset as it is explicitly referenced in the context as a survey providing empirical data on health service access.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 35, + "text": "The World Bank Education Quality Improvement Project ( P179363 ) Page 26 of 68 project manager, FM specialist, PS, M & E specialist, environmental specialist and social development specialist with experience in GBV prevention and response, civil works engineers, and other technical specialists to ensure timely, quality, transparent, and effective implementation of the civil works activities. Further details will be provided in the POM. The fiduciary assessment of the PMT of the MoER and NORLD has been completed and is reflected in the fiduciary sections. B. Results Monitoring and Evaluation Arrangements 68. The PDO-level results indicators and intermediate results indicators will be monitored using the following data: ( a ) data on education institutions and students generated by EMIS and e-Catalogue / e - Register ( disaggregated by gender, urban-rural divide, students with disabilities, and refugee students ); ( b ) results from the nationally representative national and international assessments of student performance and classroom observations; ( c ) regular survey data and administrative data of the MoER and NORLD; and ( d ) semiannual monitoring reports prepared by the MoER under support of the PMT. The MoER through the PMT will carry out the day-to-day coordination of M & E activities.", + "ner_text": [ + [ + 1131, + 1136, + "named" + ] + ], + "validated": false, + "empirical_context": "Results Monitoring and Evaluation Arrangements 68. The PDO-level results indicators and intermediate results indicators will be monitored using the following data: ( a ) data on education institutions and students generated by EMIS and e-Catalogue / e - Register ( disaggregated by gender, urban-rural divide, students with disabilities, and refugee students ); ( b ) results from the nationally representative national and international assessments of student performance and classroom observations; ( c ) regular survey data and administrative data of the MoER and NORLD; and ( d ) semiannual monitoring reports prepared by the MoER under support of the PMT. The MoER through the PMT will carry out the day-to-day coordination of M & E activities.", + "type": "organization", + "explanation": "However, NORLD is referenced as an entity involved in the monitoring and evaluation process, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is mentioned in the context of data collection and monitoring.", + "contextual_reason_agent": "However, NORLD is referenced as an entity involved in the monitoring and evaluation process, not as a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 58, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XLIX Against DLR 9. 2: US $ 1 million for each core micro-data set made available via an online portal to users in adherence with the access protocols for up to a total of US $ 8 million. Against DLR 9. 3: An additional US $ 400, 000 for every core micro-data set made available with indicators for gender, disability and residency status, for up to 5 micro data sets and in the limit of US $ 2 million. Description The DLI supports the operationalization of an interactive statistical interface by DOS to allow users to query key statistical indicators ( such as unemployment, labor force participation, and so on ). This can be done according to user-defined cross tabulations, including gender, age, education-level, nationality and governorate. The DLI also supports the integration of administrative data on the platform from key line ministries to allow for a \u2018 one-stop shop \u2019, with incentives for each additional line Ministry that is integrated.", + "ner_text": [ + [ + 848, + 851, + "named" + ] + ], + "validated": false, + "empirical_context": "This can be done according to user-defined cross tabulations, including gender, age, education-level, nationality and governorate. The DLI also supports the integration of administrative data on the platform from key line ministries to allow for a \u2018 one-stop shop \u2019, with incentives for each additional line Ministry that is integrated.", + "type": "program", + "explanation": "However, 'DLI' is described as a platform or program rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'DLI' is a dataset because it is mentioned in the context of data integration and user-defined cross tabulations.", + "contextual_reason_agent": "However, 'DLI' is described as a platform or program rather than a structured collection of data.", + "contextual_signal": "mentioned only as a program, not as a data source", + "tags": [] + }, + { + "filename": "144_84657-PAD-P132268-Project-Commitment-Paper", + "page": 24, + "text": "The World Bank project has been focusing on capacity building, trainings and operational manuals. The envisaged OBA pilot intends to build on this initiative. Complementary technical assistance activities, such as assistance in developing a MIS, evaluating fee collection mechanisms and conducting awareness raising workshops and education campaigns will be undertaken to increase the chances of success. A key component of this assistance will include the development of a MIS to track performance across municipalities, thereby providing data to enable better management. Proper financial management, including revenue mobilization, planned expenditure and maintenance of financial discipline, is critical to effective delivery of urban services, including SWM. Therefore, the MIS will track OBA Targets and indicators, as well as other data useful to system managers in understanding the nature of continuing areas for improvement. The scope of work and costs of the technical assistance are provided in Annex 11. \uf0b7 Stakeholder buy-in is key to successful project implementation. Several preparatory activities have been carried out to gauge stakeholders \u2019 concerns and ensure their early engagement. Separate focus group meetings were held with both end-users and municipal SWM staff to discuss SWM issues and concerns. Community meetings were held in rural and urban areas within both Hebron and Bethlehem governorates ( five in Hebron and four in Bethlehem ), during which discussions were held and participants were asked to fill out questionnaires. The questionnaires generally covered demographics, waste management services, and waste management fees. Moreover, stakeholder meetings held during project preparation revealed that users \u2019 reluctance to pay stems from dissatisfaction with the current service level, and indicated a higher WTP for satisfactory level of service. Targets for fee increase were estimated taking into account this information. Consultations with key regional SWM management ( JSC-H & B, JSCs ) were also held to understand practical concerns and challenges in implementing the Project. A", + "ner_text": [ + [ + 241, + 244, + "named" + ] + ], + "validated": false, + "empirical_context": "The envisaged OBA pilot intends to build on this initiative. Complementary technical assistance activities, such as assistance in developing a MIS, evaluating fee collection mechanisms and conducting awareness raising workshops and education campaigns will be undertaken to increase the chances of success. A key component of this assistance will include the development of a MIS to track performance across municipalities, thereby providing data to enable better management.", + "type": "system", + "explanation": "However, 'MIS' is described as a management information system, not as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MIS' suggests a system that manages information.", + "contextual_reason_agent": "However, 'MIS' is described as a management information system, not as a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "006_BOSIB-2c22668f-f4ba-42f5-a0d1-7949b7b8fe34", + "page": 44, + "text": "The World Bank Zambia Refugee and Host Communities Project ( P503941 ) Page 41 9. External audit. The project audit will be carried out by the Office of the Auditor General who is mandated by law to do so. The implementing agencies will prepare annual financial statements for their operations, including for the project at the end of each financial year. The audit TOR will be prepared by the implementing agencies and cleared by the World Bank before each year \u2019 s audit. The auditors will produce an audit report and management letter highlighting areas / issues which will require addressing. The MoHAIS will be required to submit the audit report together with the management letter to the World Bank not later than six months after the end of the financial year. 10. FM supervision and implementation support. FM supervision support will be conducted on a semiannual basis and will include interviews and desk reviews such as the review of the IFRs, statements of expenditure, and audit reports. PROCUREMENT 11. Standard Procurement Documents ( SPD ). The World Bank \u2019 s SPDs will be used for procurement of goods, works, and non-consulting services under the open international competitive procurement approach. Similarly, selection of consultant firms will use the World Bank \u2019 s SPDs, in line with procedures described in the Procurement Regulations.", + "ner_text": [ + [ + 1018, + 1048, + "named" + ] + ], + "validated": false, + "empirical_context": "PROCUREMENT 11. Standard Procurement Documents ( SPD ). The World Bank \u2019 s SPDs will be used for procurement of goods, works, and non-consulting services under the open international competitive procurement approach.", + "type": "document", + "explanation": "However, it is not a dataset as it refers to procurement documents rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'documents' which could imply structured information.", + "contextual_reason_agent": "However, it is not a dataset as it refers to procurement documents rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 25, + "text": "The World Bank Cash for Jobs Project ( P175327 ) Page 21 of 86 ensure transparency, accountability, and digital inclusion in the delivery. Beneficiaries will be selected through a combination of targeting instruments. First, the poorest communes in the country will be selected making use of existing poverty maps, hazard exposure maps and climate change vulnerability maps. The collines29 within these communes, given that their poverty profiles are very similar, will be selected randomly through a lottery. This will be done to avoid any political interference in the selection of the collines. Secondly, all households in the selected collines will be registered in the country \u2019 s social registry and a PMT score will be calculated for all of them. Beneficiary quota will be established for each colline and the project will select the poorest households in each colline according to the PMT until reaching the proposed quota. Finally, a community validation process will be carried out to allow communities to correct inclusion and exclusion errors that might occur in the targeting process. The targeting process might be adapted in urban and refugee areas if necessary. Beneficiaries will receive Burundi Francs ( BIF ) 36, 00030 per month ( approx. US $ 18 ). This amount is equivalent to 20 percent of the household consumption of an average poor household, which is aligned to international standards. 49.", + "ner_text": [ + [ + 340, + 373, + "named" + ], + [ + 229, + 245, + "climate change vulnerability maps <> reference population" + ], + [ + 612, + 622, + "climate change vulnerability maps <> reference population" + ], + [ + 1205, + 1212, + "climate change vulnerability maps <> data geography" + ] + ], + "validated": true, + "empirical_context": "Beneficiaries will be selected through a combination of targeting instruments. First, the poorest communes in the country will be selected making use of existing poverty maps, hazard exposure maps and climate change vulnerability maps. The collines29 within these communes, given that their poverty profiles are very similar, will be selected randomly through a lottery.", + "type": "map", + "explanation": "In this context, it is used as a source of information to select beneficiaries based on vulnerability, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to maps that likely contain structured data on climate change vulnerability.", + "contextual_reason_agent": "In this context, it is used as a source of information to select beneficiaries based on vulnerability, indicating it functions as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 4, + "validated": 3, + "not_validated": 1 + } + }, + { + "filename": "184_multi-page", + "page": 19, + "text": "Djibouti ' s main resource base is its population and in order to achieve sustained development, the country needs to improve the quality of its human resource base. Quality starts with improved basic education and school enrollments. In addition, the issue of equity arises. According to the household expenditure survey data, in urban areas, the net enrollment rate ( NER ) at the primary level in the survey year ( 1996 ) was 50 % greater for the highest expenditure quintile compared to the lowest expenditure quintile. The inequity is even more pronounced in secondary education ( lower secondary education is part of basic education but the survey data does not separate the two ), where the NER of the highest quintile was more than 420 % higher than the NER of the lowest quintile. The income differences in enrollment are significantly higher than other countries in Africa. The problem in urban areas is access - demand exists among all groups but the rationing of school intake ends up benefiting the better off. Any further expansion of places will help the poorer segments of the population. Thus, public expenditure in basic education is justified both on the public good rationale and also on the equity rationale. The ten-year program proposed by the Government will also result in efficiency gains through lowered repetition and drop out rates.", + "ner_text": [ + [ + 293, + 326, + "named" + ], + [ + 0, + 8, + "household expenditure survey data <> data geography" + ], + [ + 331, + 342, + "household expenditure survey data <> reference population" + ], + [ + 348, + 396, + "household expenditure survey data <> data description" + ], + [ + 418, + 422, + "household expenditure survey data <> publication year" + ], + [ + 1377, + 1395, + "household expenditure survey data <> usage context" + ] + ], + "validated": true, + "empirical_context": "In addition, the issue of equity arises. According to the household expenditure survey data, in urban areas, the net enrollment rate ( NER ) at the primary level in the survey year ( 1996 ) was 50 % greater for the highest expenditure quintile compared to the lowest expenditure quintile. The inequity is even more pronounced in secondary education ( lower secondary education is part of basic education but the survey data does not separate the two ), where the NER of the highest quintile was more than 420 % higher than the NER of the lowest quintile.", + "type": "survey", + "explanation": "This is indeed a dataset as it provides structured data on household expenditures relevant to the analysis of enrollment rates.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific survey data used for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it provides structured data on household expenditures relevant to the analysis of enrollment rates.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "054_Sudan-Basic-Education-Emergency-Support-Project", + "page": 19, + "text": "The States with support from the Project Coordination Unit ( PCU ) will be responsible for capacity building at the community level ( empowerment, inclusion, gender sensitivity, school safety ) \u2013 train the localities and prepare them to perform their role in overseeing implementation of the school grants. The PCU will assess capacity of localities and schools in in participatory planning and monitoring of school results. 37. Key activities will include: \u2022 Assessing capacity of localities and schools in in participatory planning and monitoring of school results; \u2022 Training of school heads and PTAs in participatory planning and monitoring of school results, including learning; \u2022 Training of locality supervisors to provide support to schools as needed; and \u2022 Providing grants to schools to improve learning environments. 38. Selection of intervention schools: The project will target all public primary schools in Sudan. Rich school - level data obtained from the School Census in 2015-2019 with support from the BERP will be used for the targeting of project beneficiaries ( figure 4 ).", + "ner_text": [ + [ + 971, + 984, + "named" + ], + [ + 33, + 58, + "School Census <> author" + ], + [ + 895, + 917, + "School Census <> reference population" + ], + [ + 921, + 926, + "School Census <> data geography" + ], + [ + 933, + 952, + "School Census <> data type" + ], + [ + 988, + 997, + "School Census <> publication year" + ], + [ + 1020, + 1024, + "School Census <> publisher" + ] + ], + "validated": true, + "empirical_context": "Selection of intervention schools: The project will target all public primary schools in Sudan. Rich school - level data obtained from the School Census in 2015-2019 with support from the BERP will be used for the targeting of project beneficiaries ( figure 4 ).", + "type": "census", + "explanation": "In the context, it is explicitly mentioned that the 'School Census' data is used for targeting project beneficiaries, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'School Census' implies a structured collection of data collected from schools.", + "contextual_reason_agent": "In the context, it is explicitly mentioned that the 'School Census' data is used for targeting project beneficiaries, confirming its role as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "105_Mali-Reinsertion-of-Ex-combatants-Project", + "page": 33, + "text": "acceptance of ex-combatants ( disaggregated by age and gender ) % 0 40 % 50 % 60 % Annual Beneficiary survey, Annual evaluation / beneficiary assessment, External impact PIU", + "ner_text": [ + [ + 83, + 108, + "named" + ] + ], + "validated": true, + "empirical_context": "acceptance of ex-combatants ( disaggregated by age and gender ) % 0 40 % 50 % 60 % Annual Beneficiary survey, Annual evaluation / beneficiary assessment, External impact PIU", + "type": "survey", + "explanation": "In this context, it is explicitly mentioned as a survey used for evaluation, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'survey', which often implies a structured collection of data.", + "contextual_reason_agent": "In this context, it is explicitly mentioned as a survey used for evaluation, confirming its role as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 15, + "text": "Digital solutions can support delivery of essential services for firms ( for example, utility and tax payments, access to markets via digital platforms and e-commerce, and digital small - and medium - sized enterprises [ SMEs ] finance ); consumers ( for example, mobile money, remittances, and e-commerce ); and the most vulnerable ( for example, expanded social safety nets or humanitarian payment schemes to refugees ). Digital skills development can drive medium - and longer-term job creation. Introduction of innovative digital health solutions offers the opportunity to undertake holistic disease surveillance and monitoring ( for example, through geo-tracking applications ); leverage digital data and analytics from public systems to strategically allocate resources and preempt outbreaks; disseminate public health messages and cautionary guidelines ( for example, through SMSs in a low-tech environment like Uganda ); and improve inventory management of medical supplies. 20 The latter is especially relevant as regional and global production and supply chains have faced sustained interruptions and there will be further strain due to upcoming COVID-19 vaccine distribution. Enabling agile digital solutions for the education sector severely affected by pandemics further adds to the cross-sector potential to recover from COVID-19 with the help of digital solutions.", + "ner_text": [ + [ + 655, + 680, + "named" + ] + ], + "validated": false, + "empirical_context": "Digital skills development can drive medium - and longer-term job creation. Introduction of innovative digital health solutions offers the opportunity to undertake holistic disease surveillance and monitoring ( for example, through geo-tracking applications ); leverage digital data and analytics from public systems to strategically allocate resources and preempt outbreaks; disseminate public health messages and cautionary guidelines ( for example, through SMSs in a low-tech environment like Uganda ); and improve inventory management of medical supplies. 20 The latter is especially relevant as regional and global production and supply chains have faced sustained interruptions and there will be further strain due to upcoming COVID-19 vaccine distribution.", + "type": "application", + "explanation": "However, 'geo-tracking applications' are described as tools or methods rather than structured collections of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'geo-tracking applications' could imply the use of data for tracking purposes.", + "contextual_reason_agent": "However, 'geo-tracking applications' are described as tools or methods rather than structured collections of data.", + "contextual_signal": "mentioned only as an application, not as a data source", + "tags": [] + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 43, + "text": "The World Bank Education Quality Improvement Project ( P179363 ) Page 34 of 68 RESULT_FRAME_TBL_PDO Indicator Name PBC Baseline Intermediate Targets End Target 1 2 3 4 5 based on data generated by the integrated EMIS and used for education sector management including refugee response ( Text ) assessment, NBS and MoF data ( report on trends, regional variations in learning outcomes and inputs, and determinants of student learning ) assessment, NBS and MoF data ( report on trends, regional variations in learning outcomes and inputs, and determinants of student learning ) using its data, are publicly disseminated and used for education sector management including refugee response. PDO Table SPACE Intermediate Results Indicators by Components RESULT_FRAME_TBL_IO Indicator Name PBC Baseline Intermediate Targets End Target 1 2 3 4 5 Improving Quality of Teaching Teachers trained on professional standards under the project ( % of teachers in ECEC facilities and grades 1 \u2013 12 ) ( Percentage ) 0. 00 0. 00 5. 00 10. 00 15. 00 20. 00 20. 00 Rapid student assessment mechanism developed and piloted ( for selected grade ) ( Yes / No ) No No Yes Yes Yes Yes Yes Tutoring / accelerated learning program for disadvantaged students developed and implemented ( for selected grade ) ( Yes / No ) No No Yes Yes Yes Yes Yes Students benefiting from direct interventions to 0. 00 0. 00", + "ner_text": [ + [ + 212, + 216, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Education Quality Improvement Project ( P179363 ) Page 34 of 68 RESULT_FRAME_TBL_PDO Indicator Name PBC Baseline Intermediate Targets End Target 1 2 3 4 5 based on data generated by the integrated EMIS and used for education sector management including refugee response ( Text ) assessment, NBS and MoF data ( report on trends, regional variations in learning outcomes and inputs, and determinants of student learning ) assessment, NBS and MoF data ( report on trends, regional variations in learning outcomes and inputs, and determinants of student learning ) using its data, are publicly disseminated and used for education sector management including refugee response. PDO Table SPACE Intermediate Results Indicators by Components RESULT_FRAME_TBL_IO Indicator Name PBC Baseline Intermediate Targets End Target 1 2 3 4 5 Improving Quality of Teaching Teachers trained on professional standards under the project ( % of teachers in ECEC facilities and grades 1 \u2013 12 ) ( Percentage ) 0.", + "type": "system", + "explanation": "However, EMIS is described as a system, not explicitly as a data source or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with data generation and management.", + "contextual_reason_agent": "However, EMIS is described as a system, not explicitly as a data source or dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "161_28046", + "page": 19, + "text": "Table: Classification of Regions by Poverty Levels Lower Central Higher Forest Total Urban Rural Guinea Guinea Guinea Guinea Conakry Percent urban Dooulation 30 100 0 23 9 16 22 100 Population density 29 n. a. n. a 31 31 14 34 2429 Access to safe water 52 49 45 43 45 47 48 80 Percent poor 40 n. a. n. a 24 36 40 25 11 Percent with less than U S 3 0 0 40 24 52 42 51 62 33 7 Depth o f poverty ( gap between 13 7 18 14 17 23 9 1 Share o f National Poverty ) ( % ) 100 9 88 22 28 32 15 3 Per capita expenditure ( \u2018 000 469. 5 n. a. n. a 401. 3 377. 6 316. 0 484. 4 n. a. Share of food in total spending 50 43 61 5 1 61 55 57 36 Source: UNPOP 1998, RGPH, General Population and Housing Census 1996, Comprehensive Consumption Budget Survey 199411995, Household Consumption Survey. Taken from the \u2018 Guinea Country Status Report \u201d, In the same vein, health indicators also indicate inequity problems as the table below shows: Table: Household Health Behaviors: Comparison by Place of Residence and by Region Lower Central Higher Forest Indicators Country Urban Rural Guinea Guinea Guinea Guinea Conakry Child health % of newborns < 6 months old on exclusive Median duration of breast feeding 22 4 - 224 242 1 9 9 22 1 22 1 breastfeeding 95. 2 - - 987 9 3 6 9 6 0 9 2 6 95 9 14", + "ner_text": [ + [ + 652, + 694, + "named" + ] + ], + "validated": true, + "empirical_context": "a. Share of food in total spending 50 43 61 5 1 61 55 57 36 Source: UNPOP 1998, RGPH, General Population and Housing Census 1996, Comprehensive Consumption Budget Survey 199411995, Household Consumption Survey. Taken from the \u2018 Guinea Country Status Report \u201d, In the same vein, health indicators also indicate inequity problems as the table below shows: Table: Household Health Behaviors: Comparison by Place of Residence and by Region Lower Central Higher Forest Indicators Country Urban Rural Guinea Guinea Guinea Guinea Conakry Child health % of newborns < 6 months old on exclusive Median duration of breast feeding 22 4 - 224 242 1 9 9 22 1 22 1 breastfeeding 95.", + "type": "census", + "explanation": "This is indeed a dataset as it is a structured collection of data used for empirical analysis in the context of population and housing.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a source of data in the context.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data used for empirical analysis in the context of population and housing.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "073_Ethiopia-Consolidated-Water-Supply-Sanitation-and-Hygiene-Account-Project", + "page": 83, + "text": "The World Bank One WASH \u2014 Consolidated Water Supply, Sanitation, and Hygiene Account Project ( One WASH \u2014 CWA ) ( P167794 ) Page 79 of 103 the lack of logistics, getting real-time data on service delivery indicators such as functionality will continue to be a challenge. In response to this challenge, the CWA will provide support to explore options for community-based WASH data collection. The support will include: ( i ) assessment of the different community-based data collection tools that could complement and integrate with the WASH MIS; ( ii ) designing ( including the selection of frequently needed indicators ), piloting, and rolling out the selected community-based data collection tool; ( iii ) institutionalizing the data collection responsibility at the WASHCOM level ( inclusion of this responsibility as part of the WASHCOM legalization document ); and ( iv ) continuous capacity building of WASHCOM members for regular reporting. Information collected from community-level monitoring will be integrated into sector MIS. c. Introducing the rural water and sanitation information system ( SIASAR ) in Ethiopia: The WASH sector in Ethiopia can draw lessons from the SIASAR model implemented in different countries of Latin America, Africa, and Central Asia. The Project will build on the ongoing WASH MIS initiative for Ethiopia and will link to SIASAR to allow a compressive and broader range of data analysis for decision making.", + "ner_text": [ + [ + 1361, + 1367, + "named" + ] + ], + "validated": false, + "empirical_context": "Introducing the rural water and sanitation information system ( SIASAR ) in Ethiopia: The WASH sector in Ethiopia can draw lessons from the SIASAR model implemented in different countries of Latin America, Africa, and Central Asia. The Project will build on the ongoing WASH MIS initiative for Ethiopia and will link to SIASAR to allow a compressive and broader range of data analysis for decision making.", + "type": "system", + "explanation": "However, SIASAR is described as a model and system, not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed SIASAR is a dataset because it is mentioned in the context of data analysis.", + "contextual_reason_agent": "However, SIASAR is described as a model and system, not explicitly as a data source or structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "040_Iraq-COVID-19-Vaccination-Project", + "page": 47, + "text": "The World Bank Iraq COVID-19 Vaccination Project ( P177038 ) Page 42 of 54 UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Percentage of specific priority populations fully vaccinated The indicator will track the number of the eligible people as defined being among a specific set of priority groups in the National Deployment and Vaccination Plan ( NVDP ) / government prioritization list who are fully vaccinated from COVID-19 using vaccines that meet Bank ' s vaccine approval criteria. 3 months NDVP, digital vaccination registry, national paper-based vaccination registry Administrative data PMU / MOHE Percentage of fully vaccinated priority groups who are female The denominator is the number of people who were in the target groups and were fully vaccinated with 2 doses, and the numerator will be the number of women vaccinated with 2 doses in the target groups. 3 months NDVP, digital vaccination registry, national paper-based vaccination registry Administrative data PMU / MOHE Number of project-supported COVID-19 vaccinations sites with adequate health care waste management for vaccination The project will invest in providing adequate waste management equipment at the facility level.", + "ner_text": [ + [ + 675, + 716, + "named" + ], + [ + 15, + 19, + "national paper-based vaccination registry <> data geography" + ], + [ + 263, + 323, + "national paper-based vaccination registry <> data description" + ], + [ + 717, + 736, + "national paper-based vaccination registry <> data type" + ], + [ + 748, + 809, + "national paper-based vaccination registry <> data description" + ], + [ + 1356, + 1374, + "national paper-based vaccination registry <> usage context" + ] + ], + "validated": true, + "empirical_context": "The World Bank Iraq COVID-19 Vaccination Project ( P177038 ) Page 42 of 54 UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Percentage of specific priority populations fully vaccinated The indicator will track the number of the eligible people as defined being among a specific set of priority groups in the National Deployment and Vaccination Plan ( NVDP ) / government prioritization list who are fully vaccinated from COVID-19 using vaccines that meet Bank ' s vaccine approval criteria. 3 months NDVP, digital vaccination registry, national paper-based vaccination registry Administrative data PMU / MOHE Percentage of fully vaccinated priority groups who are female The denominator is the number of people who were in the target groups and were fully vaccinated with 2 doses, and the numerator will be the number of women vaccinated with 2 doses in the target groups. 3 months NDVP, digital vaccination registry, national paper-based vaccination registry Administrative data PMU / MOHE Number of project-supported COVID-19 vaccinations sites with adequate health care waste management for vaccination The project will invest in providing adequate waste management equipment at the facility level.", + "type": "registry", + "explanation": "This is a dataset as it is explicitly mentioned as a source of administrative data for tracking vaccination statistics.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured collection of vaccination records.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a source of administrative data for tracking vaccination statistics.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 16, + "text": "7 22. Moreover, investments in strengthening data systems ( both technical and financial ) need to continue to deepen the use of data for evidence \u2010 based decision making in the sector and further improve resource allocation. The MOE has successfully deployed an education management information system ( EMIS ), which is now hosting data on all schools and students in the system. Additional investments in a geographical information system ( GIS ) are ongoing and will allow MOE to better plan for expansion of access across all regions in the country. Leveraging the data available through the EMIS for decision making in the sector is a key opportunity for the MOE which will require additional technical assistance and capacity building to materialize. In addition, the collection, analysis, and use of student learning data and disaggregated and gender \u2010 sensitive data are essential for monitoring, targeting pedagogical interventions, and improving teacher practices in the classroom. C. Relationship to the Country Partnership Framework and Rationale for Use of Instrument 23. Relationship to the CPF. The proposed operation is fully aligned with the Jordan Country Partnership Framework ( CPF ) discussed by the World Bank Group Board on July 14, 2016. The CPF covers the period FY17 \u2013 22 and highlights the economic, geopolitical, and social challenges that Jordan has been facing, particularly with the Syrian refugee crisis.", + "ner_text": [ + [ + 597, + 601, + "named" + ], + [ + 230, + 233, + "EMIS <> publisher" + ], + [ + 346, + 353, + "EMIS <> reference population" + ], + [ + 358, + 366, + "EMIS <> reference population" + ], + [ + 477, + 480, + "EMIS <> publisher" + ], + [ + 665, + 668, + "EMIS <> publisher" + ], + [ + 808, + 829, + "EMIS <> data description" + ], + [ + 834, + 875, + "EMIS <> data description" + ], + [ + 1160, + 1166, + "EMIS <> data geography" + ], + [ + 1257, + 1261, + "EMIS <> publication year" + ], + [ + 1369, + 1375, + "EMIS <> data geography" + ] + ], + "validated": true, + "empirical_context": "Additional investments in a geographical information system ( GIS ) are ongoing and will allow MOE to better plan for expansion of access across all regions in the country. Leveraging the data available through the EMIS for decision making in the sector is a key opportunity for the MOE which will require additional technical assistance and capacity building to materialize. In addition, the collection, analysis, and use of student learning data and disaggregated and gender \u2010 sensitive data are essential for monitoring, targeting pedagogical interventions, and improving teacher practices in the classroom.", + "type": "system", + "explanation": "EMIS is indeed a data source as it provides data that is utilized for planning and decision making in the education sector.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is mentioned in the context of leveraging data for decision making.", + "contextual_reason_agent": "EMIS is indeed a data source as it provides data that is utilized for planning and decision making in the education sector.", + "contextual_signal": "mentioned as a data source for decision making", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 13, + "text": "These infrastructure issues, combined with institutional capacity constraints, resulted in a stagnation, or decline, in water service quality acutely affecting rural areas and district towns, where most of the population resides. Outdated pumps and distribution networks with excessive energy consumption and high non - revenue water ( NRW ) losses affect the financial performance of WSS service providers as energy costs account for around 30 \u2013 40 percent of their minimal budgets for operational expenses. While reduction of energy consumption by water utilities may not lead to significant reduction in GHG emissions, as the country is reliant on hydropower for almost 95 percent of electricity generation, optimizing energy use for water service provision will simultaneously improve financial performance of utilities and overcome seasonal constraints in availability of electricity which affects operation of the systems. 11. The burden of deficient water supply is especially affecting the poor. In rural areas, house connections are available to 34 percent of the poorest households compared to 80 percent in urban areas, which demonstrates that the gap in services is largely correlated with location rather than income of households. However, the gap between rich and poor in drinking water service provision is much less pronounced than is commonly seen in other low-income countries. Most of the gap is the result of location, as most poor people reside in rural areas. In urban areas, about 80 percent of the poorest population use house connections ( compared to 99 percent of the richest households ). Data from the 20 TajStat. 2020. Population Census Data.", + "ner_text": [ + [ + 1650, + 1672, + "named" + ], + [ + 160, + 171, + "Population Census Data <> data geography" + ], + [ + 176, + 190, + "Population Census Data <> data geography" + ], + [ + 1073, + 1091, + "Population Census Data <> reference population" + ], + [ + 1118, + 1129, + "Population Census Data <> data geography" + ], + [ + 1644, + 1648, + "Population Census Data <> publication year" + ] + ], + "validated": true, + "empirical_context": "2020. Population Census Data.", + "type": "census", + "explanation": "This is indeed a dataset as it refers to structured data collected during a population census.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Data' and refers to a census, which typically involves structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it refers to structured data collected during a population census.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 55, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 51 of 74 IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Beneficiaries of social safety net programs This indicator will be measured at least on a quarterly basis during missions and ISRs SNSOP MIS which hosts beneficiary registration and payment data The implementing partner will collect beneficiary data during targeting and registration. The payment service provider will document payment data and share with the implementing partner Implementing Partner Beneficiaries of social safety net programs - Female This indicator will be measured at least on a quarterly basis during missions and ISRs SNSOP MIS which hosts beneficiary registration and payment data The implementing partner will collect beneficiary data during targeting and registration. The payment service provider and implementing agency will document payment data Implementing Partner Beneficiary households of social safety net programs - Refugees The number of total beneficiaries HHs that are This indicator will be SNSOP MIS which hosts The implementing partner will collect Implementing Partner", + "ner_text": [ + [ + 848, + 857, + "named" + ], + [ + 4, + 14, + "SNSOP MIS <> publisher" + ], + [ + 306, + 349, + "SNSOP MIS <> reference population" + ], + [ + 459, + 500, + "SNSOP MIS <> data description" + ], + [ + 1158, + 1166, + "SNSOP MIS <> reference population" + ] + ], + "validated": true, + "empirical_context": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 51 of 74 IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Beneficiaries of social safety net programs This indicator will be measured at least on a quarterly basis during missions and ISRs SNSOP MIS which hosts beneficiary registration and payment data The implementing partner will collect beneficiary data during targeting and registration. The payment service provider will document payment data and share with the implementing partner Implementing Partner Beneficiaries of social safety net programs - Female This indicator will be measured at least on a quarterly basis during missions and ISRs SNSOP MIS which hosts beneficiary registration and payment data The implementing partner will collect beneficiary data during targeting and registration. The payment service provider and implementing agency will document payment data Implementing Partner Beneficiary households of social safety net programs - Refugees The number of total beneficiaries HHs that are This indicator will be SNSOP MIS which hosts The implementing partner will collect Implementing Partner", + "type": "system", + "explanation": "In the context, SNSOP MIS is explicitly mentioned as hosting data related to beneficiaries, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a system that hosts beneficiary registration and payment data.", + "contextual_reason_agent": "In the context, SNSOP MIS is explicitly mentioned as hosting data related to beneficiaries, indicating it functions as a data source.", + "contextual_signal": "mentioned as a data source that hosts beneficiary registration and payment data", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 18, + "validated": 10, + "not_validated": 8 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 55, + "text": "The targeting committee will include representatives from the colline development committee ( president and female member ), representatives from religious groups in the colline ( Caritas or priest, pastor ), representatives from the Child Protection Committee, the imboneza if present ( volunteer women in charge of addressing domestic violence and children issues, with support from MDPHSAG ), community health workers and the Red Cross, the community leader ( bachingonazi ) or customary leader ( Abagobo - 24 The poverty analysis on the household survey data ( ECVMB 2014 ) for the poverty map will provide the PMT coefficients and inform the design of a questionnaire to collect data on the variables associated with extreme poverty at the household-level.", + "ner_text": [ + [ + 565, + 575, + "named" + ] + ], + "validated": true, + "empirical_context": "The targeting committee will include representatives from the colline development committee ( president and female member ), representatives from religious groups in the colline ( Caritas or priest, pastor ), representatives from the Child Protection Committee, the imboneza if present ( volunteer women in charge of addressing domestic violence and children issues, with support from MDPHSAG ), community health workers and the Red Cross, the community leader ( bachingonazi ) or customary leader ( Abagobo - 24 The poverty analysis on the household survey data ( ECVMB 2014 ) for the poverty map will provide the PMT coefficients and inform the design of a questionnaire to collect data on the variables associated with extreme poverty at the household-level.", + "type": "survey", + "explanation": "It is indeed a dataset as it provides data used for empirical analysis related to poverty.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced in relation to a household survey data for poverty analysis.", + "contextual_reason_agent": "It is indeed a dataset as it provides data used for empirical analysis related to poverty.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 45, + "text": "The World Bank Socio-economic Inclusion of Refugees & Host Communities in Rwanda Project Phase II ( P509677 ) Page 30 Monitoring & Evaluation Plan: PDO Indicators by PDO Outcomes Improved access to basic services Beneficiaries with improved access to social and economic infrastructure and services ( Number ) Description Quantitative indicator counting number of beneficiaries provided with improved access to services and economic opportunties. This covers all beneficiaires for components 1 and 2. The data is then disaggregated by gender and by status ( refugee / host community ). Frequency Quarterly Data source Project MIS Methodology for Data Collection Monitoring project implementation. MIS database with population statistics for project sites. Data collected by MINEMA for refugee and host community populations in host districts ( camp and non-camp residents ). Beneficiary numbers should be included in detailed feasibility studies for each subproject. Responsibility for Data Collection MINEMA, supported by districts. Improved economic opportunities People benefiting from actions to expand and enable economic opportunity ( Number ) Description Quantitative indicator counting number of beneficiaries benefiting from activities under component 2, including capacity-building, matching grant, credit guarantee scheme and graduation programming ( this counts HH beneficiaries x 4, being the average HH size in the refugee camps ). The data is then disaggregated by gender and by status ( refugee / host community ).", + "ner_text": [ + [ + 697, + 709, + "named" + ], + [ + 4, + 14, + "MIS database <> publisher" + ], + [ + 74, + 80, + "MIS database <> data geography" + ], + [ + 322, + 344, + "MIS database <> data type" + ], + [ + 715, + 736, + "MIS database <> data description" + ], + [ + 785, + 823, + "MIS database <> reference population" + ] + ], + "validated": true, + "empirical_context": "Frequency Quarterly Data source Project MIS Methodology for Data Collection Monitoring project implementation. MIS database with population statistics for project sites. Data collected by MINEMA for refugee and host community populations in host districts ( camp and non-camp residents ).", + "type": "database", + "explanation": "The context confirms it is a dataset as it is described as a database containing population statistics used for monitoring project implementation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MIS database' suggests a structured collection of data.", + "contextual_reason_agent": "The context confirms it is a dataset as it is described as a database containing population statistics used for monitoring project implementation.", + "contextual_signal": "described as a management information system that stores records", + "tags": [] + }, + { + "filename": "059_Uganda-Investing-in-Forests-and-Protected-Areas-for-Climate-Smart-Development-Project", + "page": 48, + "text": "It comprises a detailed Data for this indicator will be collected twice during project lifetime: at mid term review and at end of project. METT scoring matrices combined by the Uganda Wildlife Authority. METT scoring matrix. Uganda Wildlife Authority", + "ner_text": [ + [ + 139, + 160, + "named" + ] + ], + "validated": false, + "empirical_context": "It comprises a detailed Data for this indicator will be collected twice during project lifetime: at mid term review and at end of project. METT scoring matrices combined by the Uganda Wildlife Authority. METT scoring matrix.", + "type": "matrix", + "explanation": "However, the context indicates that it is a scoring tool rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'scoring matrices' which suggests a structured format for data.", + "contextual_reason_agent": "However, the context indicates that it is a scoring tool rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a scoring matrix, not as a data source", + "tags": [] + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 43, + "text": "Bell, and C. H. Bullough. 2001. Can Skilled Attendance at Delivery Reduce Maternal Mortality in Developing Countries? Safe Motherhood Strategies: A Review of the Evidence. 72 Bhutta, Z. A., J. K. Das, R. Bahl, et al. 2014. \" Can Available Interventions End Preventable Deaths in Mothers, Newborn Babies, and Stillbirths, and At What Cost? \" The Lancet 384 ( 9940 ): 347 \u2013 370. 73 McGovern, M. E., and D. Canning. 2015. \u201c Vaccination and All \u2010 cause Child Mortality from 1985 to 2011: Global Evidence from the Demographic and Health Surveys. \u201d American Journal of Epidemiology 182 ( 9 ): 791 \u2013 798. doi: 10. 1093 / aje / kwv125.", + "ner_text": [ + [ + 509, + 539, + "named" + ], + [ + 380, + 395, + "Demographic and Health Surveys <> author" + ], + [ + 401, + 411, + "Demographic and Health Surveys <> author" + ], + [ + 413, + 417, + "Demographic and Health Surveys <> publication year" + ], + [ + 470, + 482, + "Demographic and Health Surveys <> reference year" + ] + ], + "validated": true, + "empirical_context": "2015. \u201c Vaccination and All \u2010 cause Child Mortality from 1985 to 2011: Global Evidence from the Demographic and Health Surveys. \u201d American Journal of Epidemiology 182 ( 9 ): 791 \u2013 798.", + "type": "survey", + "explanation": "It is indeed a dataset as it provides structured data used for empirical analysis in the context of child mortality and vaccination.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Demographic and Health Surveys' is a well-known source of empirical data on health indicators.", + "contextual_reason_agent": "It is indeed a dataset as it provides structured data used for empirical analysis in the context of child mortality and vaccination.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 11, + "text": "The World Bank SPF: Improved Livelihoods for Internally Displaced Persons in Azerbaijan ( P178125 ) Page 9 of 34 vulnerable due to their long-standing situation of displacement and who may not have an opportunity to return to their places of origin for some time. 9. Since November 2021, the World Bank has been providing analytical and technical assistance to the Government through a State and Peacebuilding Fund-financed, Bank-executed grant called Support for Peacebuilding and Recovery in Azerbaijan. This grant has financed analytical work that aims to inform a policy dialogue between the Bank and SCRI, including a survey of IDPs which focused on understanding the current livelihoods, service delivery, and social inclusion, and future aspirations; and a lessons learned paper on livelihood activities for IDPs. As IDPs are just beginning to return to liberated areas, it is a critical time for the Bank to engage with GoA on a policy dialogue as the steps that are taken now will have long-lasting effects. The Bank will mobilize its growing knowledge and experience in addressing the challenges of forced displacement to support the Government.", + "ner_text": [ + [ + 623, + 637, + "named" + ], + [ + 4, + 14, + "survey of IDPs <> publisher" + ], + [ + 45, + 73, + "survey of IDPs <> reference population" + ], + [ + 77, + 87, + "survey of IDPs <> data geography" + ], + [ + 273, + 286, + "survey of IDPs <> publication year" + ], + [ + 292, + 302, + "survey of IDPs <> publisher" + ], + [ + 1171, + 1189, + "survey of IDPs <> usage context" + ] + ], + "validated": true, + "empirical_context": "Since November 2021, the World Bank has been providing analytical and technical assistance to the Government through a State and Peacebuilding Fund-financed, Bank-executed grant called Support for Peacebuilding and Recovery in Azerbaijan. This grant has financed analytical work that aims to inform a policy dialogue between the Bank and SCRI, including a survey of IDPs which focused on understanding the current livelihoods, service delivery, and social inclusion, and future aspirations; and a lessons learned paper on livelihood activities for IDPs. As IDPs are just beginning to return to liberated areas, it is a critical time for the Bank to engage with GoA on a policy dialogue as the steps that are taken now will have long-lasting effects.", + "type": "survey", + "explanation": "This is indeed a dataset as it is described as a survey that collects data on IDPs' livelihoods and social inclusion.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is described as a survey that collects data on IDPs' livelihoods and social inclusion.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 27, + "text": "Financing under this component will provide support for the establishment of the WSS unit within the ministry, creation of the WSS MIS, development and approval of the necessary reporting protocols, and support for the preparation of the WSS sector assessment report to be published annually after the Year 3 of the project. The WSS unit will be tasked with ensuring coordination with the stakeholder agencies in designing climate-smart WSW interventions and their implementation at the utility and community levels. The MIS will ensure availability of the WSS sector data and monitor gender disaggregation of the utility workforce so that gender gaps in economic opportunities can be measured. The MIS data will lay the basis for development of national sectoral policies ( sectoral policies and monitoring reports content analysis ) and climate adaptation strategies. The component includes support to the Department of Geology in digitalization of the registry of wells used for water supply purposes as part of the water cadaster, with particular focus on the Khatlon region. Climate change is expected to lead to diminished groundwater recharge in some areas because of reduced precipitation and decreased runoff. Monitoring data for aquifer water level, changes in chemistry, and detection of", + "ner_text": [ + [ + 127, + 134, + "named" + ] + ], + "validated": false, + "empirical_context": "Financing under this component will provide support for the establishment of the WSS unit within the ministry, creation of the WSS MIS, development and approval of the necessary reporting protocols, and support for the preparation of the WSS sector assessment report to be published annually after the Year 3 of the project. The WSS unit will be tasked with ensuring coordination with the stakeholder agencies in designing climate-smart WSW interventions and their implementation at the utility and community levels.", + "type": "system", + "explanation": "However, the context indicates it is a system for management and coordination, not explicitly a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MIS' suggests a Management Information System that could store data.", + "contextual_reason_agent": "However, the context indicates it is a system for management and coordination, not explicitly a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "158_40156", + "page": 52, + "text": "29. Advocacy and communications about this new HIV ME system is essential. For this reason, communications plan will be included as part of the annual costed M & E work plan and will involve the PFO and M & E champions in each IGAD country. 30. Surveys and surveillance data will be used to track PDO-level project results. There are six types of surveys and surveillance that need to be carried out, partially with project funding ( additional resources would need to be mobilized by IGAD for all these surveys and surveillance to be carried out ): BSS, sentinel HIV surveillance, health facility survey, rapid assessment at hot spots, mapping assessment, and regional assessment. a ) BSS: Customized behavioral surveillance surveys ( BSSs ) will be undertaken amongst refugees, returnees, IDPs, and surrounding populations. The BSS will be customized to allow for the inclusion of unique characteristics of mobile populations, pre and post displacement, and conflict situations.", + "ner_text": [ + [ + 555, + 580, + "named" + ], + [ + 770, + 778, + "sentinel HIV surveillance <> reference population" + ], + [ + 780, + 789, + "sentinel HIV surveillance <> reference population" + ], + [ + 791, + 795, + "sentinel HIV surveillance <> reference population" + ] + ], + "validated": true, + "empirical_context": "Surveys and surveillance data will be used to track PDO-level project results. There are six types of surveys and surveillance that need to be carried out, partially with project funding ( additional resources would need to be mobilized by IGAD for all these surveys and surveillance to be carried out ): BSS, sentinel HIV surveillance, health facility survey, rapid assessment at hot spots, mapping assessment, and regional assessment. a ) BSS: Customized behavioral surveillance surveys ( BSSs ) will be undertaken amongst refugees, returnees, IDPs, and surrounding populations.", + "type": "survey", + "explanation": "In the context, 'sentinel HIV surveillance' is mentioned as part of the surveys and surveillance data used to track project results, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'sentinel HIV surveillance' refers to a specific type of data collection method.", + "contextual_reason_agent": "In the context, 'sentinel HIV surveillance' is mentioned as part of the surveys and surveillance data used to track project results, indicating it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "185_multi-page", + "page": 42, + "text": "43 Percent increase in number of facilities implementing and evaluating STI / TB diagnosis and treatment activities 4 ) Partnerships established Increase in number of public Survey data with key implementers in all sector institutions sectors and at all levels and implementing a strategy to increased financial and human address the impact of resources available to support HIV / AIDS on their sector HIV / AIDS activities Number of private firms implementing a strategy to cope with the HIV / AIDS impact on their firm Number of public and private sector organizations that implement HIV / AIDS strategies in the public interest ( i. e., beyond the scope of their immediate institutional and corporate interests ) Number of community-based organizations inplementing programs for orphans and vulnerable children Increased activity of the UN Theme Groups on HIV / AIDS Percent increase in total expenditures per individual I infected with HIV / AIDS,", + "ner_text": [ + [ + 174, + 185, + "named" + ] + ], + "validated": true, + "empirical_context": "43 Percent increase in number of facilities implementing and evaluating STI / TB diagnosis and treatment activities 4 ) Partnerships established Increase in number of public Survey data with key implementers in all sector institutions sectors and at all levels and implementing a strategy to increased financial and human address the impact of resources available to support HIV / AIDS on their sector HIV / AIDS activities Number of private firms implementing a strategy to cope with the HIV / AIDS impact on their firm Number of public and private sector organizations that implement HIV / AIDS strategies in the public interest ( i. e.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to data collected from surveys used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'survey data' typically refers to collected information from surveys.", + "contextual_reason_agent": "This is indeed a dataset as it refers to data collected from surveys used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 43, + "text": "The increase in service utilization is translated into deaths averted using published empirical studies; mortality rates for Balochistan were taken from the 2012 and 2017 \u2013 18 PDHS. 70 Effect sizes for the impacts of an additional skilled birth on maternal and neonatal mortality rates come from Graham, Bell, and Bullough ( 2001 ) 71 and Bhutta et al. ( 2014 ), 72 respectively. The effect size used to estimate the impact of measles vaccination on mortality of children under five is based on McGovern and Canning ( 2015 ). 73 Because their effect size is for full child vaccination, an adjustment factor of 0. 89 is applied to the share of children with 69 Pakistan Bureau of Statistics. Population Census 2017. http: / / www. pbs. gov. pk / content / block \u2010 wise \u2010 provisional \u2010 summary \u2010 results \u2010 6th \u2010 population \u2010 housing \u2010 census \u2010 2017 \u2010 january \u2010 03 \u2010 2018; for data from the 1998 and earlier censuses, see http: / / www. pbs. gov. pk / sites / default / files / population_census / Administrative % 20Units. pdf. 70 Maternal mortality rates from PDHS 2012 \u2013 13 are used since no maternal mortality data is available from PDHS 2017 \u2013 18. 71 Graham, W. J., J. S.", + "ner_text": [ + [ + 1059, + 1063, + "named" + ], + [ + 105, + 120, + "PDHS <> data description" + ], + [ + 125, + 136, + "PDHS <> data geography" + ], + [ + 157, + 161, + "PDHS <> reference year" + ], + [ + 166, + 175, + "PDHS <> reference year" + ], + [ + 296, + 322, + "PDHS <> author" + ], + [ + 339, + 352, + "PDHS <> author" + ], + [ + 463, + 482, + "PDHS <> reference population" + ], + [ + 495, + 515, + "PDHS <> author" + ], + [ + 660, + 689, + "PDHS <> publisher" + ], + [ + 709, + 713, + "PDHS <> publication year" + ], + [ + 1064, + 1073, + "PDHS <> reference year" + ], + [ + 1139, + 1148, + "PDHS <> publication year" + ], + [ + 1189, + 1207, + "PDHS <> usage context" + ] + ], + "validated": true, + "empirical_context": "pdf. 70 Maternal mortality rates from PDHS 2012 \u2013 13 are used since no maternal mortality data is available from PDHS 2017 \u2013 18. 71 Graham, W.", + "type": "survey", + "explanation": "PDHS is explicitly mentioned as a source of maternal mortality rates, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because PDHS is referenced in relation to maternal mortality rates, suggesting it contains relevant data.", + "contextual_reason_agent": "PDHS is explicitly mentioned as a source of maternal mortality rates, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 42, + "text": "The World Bank Mauritania Social Safety Net System Project II ( P171125 ) Page 37 of 61 RESULT_FRAME_TBL_IO Indicator Name DLI Baseline Intermediate Targets End Target 1 2 3 4 ( Yes / No ) The common financial vehicle has been established ( Yes / No ) No No Yes Yes Yes Yes IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Programs using the Social Registry to select their beneficiaries Number of user agreements signed by the Social Registry Semester Project Semester report Supervision Social Registry Directorate SSN programs ' beneficiary households avoiding negative coping strategies ( national / refugees ) This indicators tracks the percentage of households benefiting from the shock - response program which avoid negative coping strategies as a result of the program. Negative Coping strategies refer to those included in the Coping Strategy Index ( CSI ): reduce number of meals eaten in a day, consume seed stock held for next season, and so on.", + "ner_text": [ + [ + 643, + 658, + "named" + ], + [ + 4, + 14, + "Social Registry <> publisher" + ], + [ + 15, + 25, + "Social Registry <> data geography" + ], + [ + 686, + 708, + "Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "The World Bank Mauritania Social Safety Net System Project II ( P171125 ) Page 37 of 61 RESULT_FRAME_TBL_IO Indicator Name DLI Baseline Intermediate Targets End Target 1 2 3 4 ( Yes / No ) The common financial vehicle has been established ( Yes / No ) No No Yes Yes Yes Yes IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Programs using the Social Registry to select their beneficiaries Number of user agreements signed by the Social Registry Semester Project Semester report Supervision Social Registry Directorate SSN programs ' beneficiary households avoiding negative coping strategies ( national / refugees ) This indicators tracks the percentage of households benefiting from the shock - response program which avoid negative coping strategies as a result of the program. Negative Coping strategies refer to those included in the Coping Strategy Index ( CSI ): reduce number of meals eaten in a day, consume seed stock held for next season, and so on.", + "type": "registry", + "explanation": "In this context, it is indeed a dataset as it is used to select beneficiaries for programs and is referenced as a source of data collection.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Social Registry' suggests a structured collection of data related to beneficiaries.", + "contextual_reason_agent": "In this context, it is indeed a dataset as it is used to select beneficiaries for programs and is referenced as a source of data collection.", + "contextual_signal": "mentioned as a data source for selecting beneficiaries", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 47, + "text": "The World Bank Socio-economic Inclusion of Refugees & Host Communities in Rwanda Project Phase II ( P509677 ) Page 32 Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation including reporting from beneficiary insitutitons. BRD data fed to MINEMA Responsibility for Data Collection BRD and MINEMA. Matching grants issued to businesses in low-carbon industries ( Number ) Description Quantitative indicator counting number of matching grants made to businesses in low carbon industries. The low carbon industries will be identified in the Project Implementation Manual. Frequency Quarterly. Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation including reporting from beneficiary insitutitons. BRD data fed to MINEMA. Responsibility for Data Collection BRD and MINEMA. Beneficiaries of partial credit guarantee scheme that are still operational one year after intervention ( Percentage ) Description Quantitative indicator counting number of beneficiaires of the credit guarantee scheme that are operational 1 year after the intervention. Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation including reporting from beneficiaries. BDF data fed to MINEMA. Responsibility for Data Collection BDF and MINEMA Beneficiaries of matching grant that are still operational one year after intervention ( Percentage ) Description Quantitative indicator counting number of beneficiaires that received matching grants and that are operational 1 year after the intervention.", + "ner_text": [ + [ + 289, + 297, + "named" + ], + [ + 74, + 80, + "BRD data <> data geography" + ] + ], + "validated": true, + "empirical_context": "Methodology for Data Collection Monitoring project implementation including reporting from beneficiary insitutitons. BRD data fed to MINEMA Responsibility for Data Collection BRD and MINEMA. Matching grants issued to businesses in low-carbon industries ( Number ) Description Quantitative indicator counting number of matching grants made to businesses in low carbon industries.", + "type": "data", + "explanation": "In the context, 'BRD data' is explicitly referenced as part of the data collection process, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'BRD data' is a dataset because it is mentioned in the context of data collection and reporting.", + "contextual_reason_agent": "In the context, 'BRD data' is explicitly referenced as part of the data collection process, indicating it functions as a data source.", + "contextual_signal": "mentioned as part of data collection and reporting", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "173_multi0page", + "page": 18, + "text": "They need support to cope with the IH1V / AIDS epidemic. They are able to express their requirements. Appropriate mechanisms must be put in place to facilitate the provision of support. * MAP projects tended to lose momentum after Board approval. Funds available under a PHRD grant are being used to sensitize representatives of public sector and civil society organizations, to establish program coordination and implementation mechanisms and to train community development agents. A PPF was requested by Govemment and approved by IDA in April 2002. * Importance of partnerships: some MAPs have been prepared very fast and other development partners were not involved enough. In Guinea the MAP worked through the UNAIDS group which was extended to other non-UN partners. * The tendency for some National AIDS Commission to become bureaucracies: this was taken into account in Guinea by limiting the CNLS to six professionals, and by not creating decentralized Units. * Importance of monitoring and evaluation. The design of the monitoring and evaluation system should focus on who will use the indicators and how they will influence the decision-making process. Baseline data are essential for proper monitoring. 4. Indications of recipient commitment and ownership The Recipient ' s previous strategy to fight IIV / AIDS was revised using a multi-sectoral approach, which the Recipient recognizes as the only viable approach. Some important initiatives have already been taken both by various ministries and by development partners, civil society, NGOs, and within the private sector. Government created the National Committee for the Fight against IV / AIDS in March 2002, and selected the Executive Secretary of its Secretariat in May 2002. Concrete evidence of Recipient commitment will be manifested by the rate at which implementation will take place and by the adequate monitoring of the situation at the highest government levels. - 15 -", + "ner_text": [ + [ + 1163, + 1176, + "named" + ], + [ + 545, + 549, + "Baseline data <> publication year" + ], + [ + 1670, + 1674, + "Baseline data <> publication year" + ] + ], + "validated": true, + "empirical_context": "The design of the monitoring and evaluation system should focus on who will use the indicators and how they will influence the decision-making process. Baseline data are essential for proper monitoring. 4.", + "type": "data", + "explanation": "In this context, 'baseline data' is explicitly mentioned as essential for proper monitoring, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'baseline data' refers to a structured collection of data used for monitoring and evaluation.", + "contextual_reason_agent": "In this context, 'baseline data' is explicitly mentioned as essential for proper monitoring, indicating it is used as a data source.", + "contextual_signal": "mentioned as essential for proper monitoring", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "084_Lebanon-PAD-final-06082018", + "page": 115, + "text": "MOF will coordinate with all agencies to receive accurate and timely data and will be providing the World Bank with quarterly reports on the progress. 45. Strengthening Transparency: All Program documents will be uploaded into the Program website to be created in MOF. The Program website will have up-to-date information on the operational and financial aspects of the Program and will be accessible to the general public. As part of the outreach activities of the Program, information will be shared with the National Network for the Right of Access to Information, which is a is a multi-sectoral group formed upon the initiative of the Lebanese Parliamentarians against Corruption ( LebPAC ), the Lebanese Transparency Association52 ( LTA ) and Association pour la D\u00e9fense des Droits et des Libert\u00e9s ( ADDL ), in collaboration with the American Bar Association ( ABA ) Rule of Law Initiative in Lebanon. The e - Portal maintained by the Central Tender Board ( CTB ) will upload all the documents relating to public procurement and contract management relating to the Program. MOF and Central Inspection ( housing CTB ) will collaborate in case of any complaint related to public procurement and will inform the World Bank accordingly. The Program will 52 The Lebanese Transparency Association is a non-profit organization aimed at promoting transparency and deterring corruption in the public and private sector. http: / / www. transparency-lebanon. org /", + "ner_text": [ + [ + 911, + 921, + "named" + ] + ], + "validated": false, + "empirical_context": "As part of the outreach activities of the Program, information will be shared with the National Network for the Right of Access to Information, which is a is a multi-sectoral group formed upon the initiative of the Lebanese Parliamentarians against Corruption ( LebPAC ), the Lebanese Transparency Association52 ( LTA ) and Association pour la D\u00e9fense des Droits et des Libert\u00e9s ( ADDL ), in collaboration with the American Bar Association ( ABA ) Rule of Law Initiative in Lebanon. The e - Portal maintained by the Central Tender Board ( CTB ) will upload all the documents relating to public procurement and contract management relating to the Program. MOF and Central Inspection ( housing CTB ) will collaborate in case of any complaint related to public procurement and will inform the World Bank accordingly.", + "type": "system", + "explanation": "However, the e-Portal is described as a system for uploading documents, not as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves the uploading of documents related to public procurement.", + "contextual_reason_agent": "However, the e-Portal is described as a system for uploading documents, not as a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "054_Sudan-Basic-Education-Emergency-Support-Project", + "page": 42, + "text": "The World Bank Sudan Basic Education Emergency Support Project ( P172812 ) Page 38 of 40 ( figure A3-1 ). Probability of being poor \u2013 in the bottom 40 percent \u2013 is 61 percent lower for people with basic education compared to people without education. Figure A3-1: Distribution of population in Sudan by education attainment and wealth quintiles Source: estimations based on Sudan MICS 2014 / 15 data. 17. Educational attainment is highly associated with literacy rates. Even among those that never completed basic education, the share of literate people is above 80 percent after completing at least six grades compared to only 15 percent of people that attended only first grade of basic education ( figure A3-2 ). Figure A3-2: Women ' s literacy rates in Sudan, 2014 Source: estimations based on Sudan MICS, 2014 / 15 data. Note: a woman is literate if she is able to read parts of sentence or able to read whole sentence Impact on Internal Efficiency Estimates and Cost Savings 18.", + "ner_text": [ + [ + 798, + 808, + "named" + ], + [ + 4, + 14, + "Sudan MICS <> publisher" + ], + [ + 15, + 20, + "Sudan MICS <> data geography" + ], + [ + 185, + 212, + "Sudan MICS <> reference population" + ], + [ + 294, + 299, + "Sudan MICS <> data geography" + ], + [ + 374, + 379, + "Sudan MICS <> data geography" + ], + [ + 385, + 394, + "Sudan MICS <> reference year" + ], + [ + 757, + 762, + "Sudan MICS <> data geography" + ], + [ + 798, + 803, + "Sudan MICS <> data geography" + ] + ], + "validated": true, + "empirical_context": "Even among those that never completed basic education, the share of literate people is above 80 percent after completing at least six grades compared to only 15 percent of people that attended only first grade of basic education ( figure A3-2 ). Figure A3-2: Women ' s literacy rates in Sudan, 2014 Source: estimations based on Sudan MICS, 2014 / 15 data. Note: a woman is literate if she is able to read parts of sentence or able to read whole sentence Impact on Internal Efficiency Estimates and Cost Savings 18.", + "type": "survey", + "explanation": "It is indeed a dataset as it is explicitly mentioned as the source of data used for empirical analysis regarding women's literacy rates.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Sudan MICS' is referenced in relation to literacy rates and is associated with data estimations.", + "contextual_reason_agent": "It is indeed a dataset as it is explicitly mentioned as the source of data used for empirical analysis regarding women's literacy rates.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 28, + "text": "Results Monitoring and Evaluation 55. Monitoring and evaluation is a key element of the Project, because it provides the regular assessment of the Project \u2019 s performance to the Government, the World Bank and other development partners. In this Project, a core focus of both Component 1 and Component 2 is to build the capacity of the Government of Burundi to monitor its own programs in the Social Protection ( SP ) sector. The proposed Project includes a series of monitoring and evaluation activities for the cash transfer program. These include: process evaluations; regular spot checks and beneficiary surveys to evaluate the quality of implementation, the efficiency of the targeting and payment processes, and the overall satisfaction with the program; impact evaluation for the cash transfers and parts of the behavior change communication components. The key delivery mechanisms put in place for the CT program, such as the MIS, will eventually enable the implementing institutions to manage and monitor the implementation of their programs. 56. The Project Implementation Unit will organize annual financial audits for the Project, annual reviews of progress, and a mid-term review to guide the Project implementation after the first eight communes. The mid-term review will involve Project \u2019 s stakeholders and civil society in the review of performance, intermediary results, institutional arrangements and outcomes. It will confirm the plans and processes for expansion of the CT", + "ner_text": [ + [ + 933, + 936, + "named" + ] + ], + "validated": false, + "empirical_context": "These include: process evaluations; regular spot checks and beneficiary surveys to evaluate the quality of implementation, the efficiency of the targeting and payment processes, and the overall satisfaction with the program; impact evaluation for the cash transfers and parts of the behavior change communication components. The key delivery mechanisms put in place for the CT program, such as the MIS, will eventually enable the implementing institutions to manage and monitor the implementation of their programs. 56.", + "type": "system", + "explanation": "'MIS' is mentioned as a management information system but not as a data source, indicating it does not function as a dataset in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is an acronym that could imply a structured system for managing information.", + "contextual_reason_agent": "'MIS' is mentioned as a management information system but not as a data source, indicating it does not function as a dataset in this context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 57, + "text": "Data on payment of the livelihood grant will be collected through the SNSOP MIS that will be linked with SNSOP payment data The Implementing Partner responsible for Component 2 will be responsible for data collection Eligible beneficiary households with functional income-generating investments The total number of households with functional This indicator will be SNSOP Management Data will be collected through routine M & E Implementing Partner", + "ner_text": [ + [ + 105, + 123, + "named" + ], + [ + 217, + 248, + "SNSOP payment data <> reference population" + ] + ], + "validated": true, + "empirical_context": "Data on payment of the livelihood grant will be collected through the SNSOP MIS that will be linked with SNSOP payment data The Implementing Partner responsible for Component 2 will be responsible for data collection Eligible beneficiary households with functional income-generating investments The total number of households with functional This indicator will be SNSOP Management Data will be collected through routine M & E Implementing Partner", + "type": "dataset", + "explanation": "This is indeed a dataset as it is explicitly mentioned in the context as data collected for monitoring and evaluation purposes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific payment data related to a livelihood grant program.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned in the context as data collected for monitoring and evaluation purposes.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "066_Jordan-Youth-Technology-and-Jobs-Project", + "page": 34, + "text": "This will include support for setting up an M & E system and establishing a comprehensive CRM system, as well as relevant staff training on different aspects of M & E. The project evaluation system will help determine whether the training provided under the project translates into income opportunities. In addition to the outcome of obtaining a job or income opportunity, the project will include results indicators on skills development ( such as increased confidence, or other personal or social outcomes ). The project monitoring reports will contain, at a minimum, summary data on overall performance against project targets, implementation challenges experienced, and feedback received from project beneficiaries. 38 70. The World Bank will review the Results Framework submitted by the PMU as part of implementation support. The World Bank experts will discuss the progress and deviations with the PMU to identify any areas where additional help from the World Bank is needed. The PMU and the World Bank will also use results data to build awareness of project results among key beneficiaries and counterparts. Beneficiary feedback will also feed into regular monitoring. The M & E framework will leverage data collection activities undertaken by the independent verification agency ( IVA ) for DLIs. 71. The project will conduct rigorous impact evaluations to identify the absolute impact of the interventions prescribed by the project design on key outcomes and provide feedback on the relative efficacy of alternate design modalities to enable course corrections. 39 A consultation process to prioritize and select the questions and methodologies for the impact evaluations will take place between country stakeholders and researchers when project activities are being further defined. The impact evaluations would focus on the following knowledge gaps: \uf0b7 For Sub-component 1. 1, it will be important to assess the impact of skills trainings activities on the command 38 To the extent possible, any data gathered on Syrian refugees will be disaggregated by sex to identify specific gender gaps that need to be addressed. 39 Rigorous impact evaluations assess causality by identifying a counterfactual with experimental or quasi-experimental methodologies.", + "ner_text": [ + [ + 172, + 197, + "named" + ] + ], + "validated": false, + "empirical_context": "This will include support for setting up an M & E system and establishing a comprehensive CRM system, as well as relevant staff training on different aspects of M & E. The project evaluation system will help determine whether the training provided under the project translates into income opportunities. In addition to the outcome of obtaining a job or income opportunity, the project will include results indicators on skills development ( such as increased confidence, or other personal or social outcomes ).", + "type": "system", + "explanation": "However, it is described as a project evaluation system, not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'system' which can imply data collection.", + "contextual_reason_agent": "However, it is described as a project evaluation system, not as a data source or structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 39, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 35 of 74 Figure 3: Implementation Arrangements B. Results Monitoring and Evaluation Arrangements 73. The proposed SNSOP will develop a comprehensive M & E framework and plan, building on the existing ones under the SSSNP. The SNSOP will employ an innovative M & E system that relies primarily on electronic data collection to be stored and managed in the MIS, building on the M & E system using the Geo-Enabling Initiative for Monitoring and Supervision ( GEMS ) developed under SSSNP to allow for real time data collection and analysis, thus improving the efficiency and reducing cost of M & E. M & E activities will also be embedded in project activities where possible to minimize the burden on field-based staff. These flexible, remote arrangements allow the M & E system to adapt to various circumstances in South Sudan \u2019 s FCV context. Key M & E activities will include Registration Lessons Learned surveys that will assess the effectiveness of targeting and registration and identify areas for improvement. These surveys will provide baseline information on key demographics and socioeconomic indicators that will be tracked over the course of the project. There will also be Post Distribution Monitoring to monitor project implementation, mainly on payments under components 1 and 2.", + "ner_text": [ + [ + 970, + 1006, + "named" + ], + [ + 4, + 14, + "Registration Lessons Learned surveys <> publisher" + ], + [ + 907, + 918, + "Registration Lessons Learned surveys <> data geography" + ], + [ + 1135, + 1155, + "Registration Lessons Learned surveys <> data description" + ] + ], + "validated": true, + "empirical_context": "These flexible, remote arrangements allow the M & E system to adapt to various circumstances in South Sudan \u2019 s FCV context. Key M & E activities will include Registration Lessons Learned surveys that will assess the effectiveness of targeting and registration and identify areas for improvement. These surveys will provide baseline information on key demographics and socioeconomic indicators that will be tracked over the course of the project.", + "type": "survey", + "explanation": "These surveys are explicitly mentioned as providing baseline information and assessing effectiveness, indicating they function as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to surveys that collect data on demographics and socioeconomic indicators.", + "contextual_reason_agent": "These surveys are explicitly mentioned as providing baseline information and assessing effectiveness, indicating they function as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 79, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 74 of 117 prior to construction. Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "ner_text": [ + [ + 812, + 817, + "named" + ], + [ + 578, + 581, + "NEMIS <> author" + ], + [ + 589, + 608, + "NEMIS <> data type" + ], + [ + 777, + 793, + "NEMIS <> reference population" + ] + ], + "validated": true, + "empirical_context": "Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "type": "system", + "explanation": "NEMIS is indeed a dataset as it is described as an online platform that collects and manages data related to education, specifically for refugee learners.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is referenced in the context of data utilization for managing primary education.", + "contextual_reason_agent": "NEMIS is indeed a dataset as it is described as an online platform that collects and manages data related to education, specifically for refugee learners.", + "contextual_signal": "mentioned as a data source for managing primary education", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "103_Kenya-Hon-Africa-PAD-04072017", + "page": 93, + "text": "The World Bank Development Response to Displacement Impacts Project ( DRDIP ) in the Horn of Africa ( P161067 ) Page 91 of 120 geotagged photographs, biometric record of participation in physical labor ), and periodic audits through Independent Integrated Fiduciary and Accountability Review Agency will be required to enhance the efficiency and transparency during implementation. The PPSD further advises the establishment of a database of subprojects, micro projects, and income generation activities for the benefit of communities and knowledge sharing. The Intergovernmental Authority on Development ( IGAD ) in Eastern Africa has established a procurement unit at FDMM Regional Secretariat, Nairobi. The Procurement Unit, will be responsible for the implementation of activities that support the IGAD component. The procurement activities envisaged includes low value, low risk contracts for Goods and Consultant Services and Technical Assistance all of which would be sourced from the domestic market. Although IGAD has experience in implementing World Bank funded projects before, to augment the procurement capacity there will be need to hire an additional procurement staff at the secretariat. 47. Systematic Tracking of Exchanges in Procurement ( STEP ): The project will use STEP, a planning and tracking system, which would provide data on procurement activities, establish benchmarks, monitor delays and measure procurement performance. 48.", + "ner_text": [ + [ + 1287, + 1291, + "named" + ] + ], + "validated": false, + "empirical_context": "47. Systematic Tracking of Exchanges in Procurement ( STEP ): The project will use STEP, a planning and tracking system, which would provide data on procurement activities, establish benchmarks, monitor delays and measure procurement performance. 48.", + "type": "system", + "explanation": "However, STEP is described as a planning and tracking system, not a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed STEP is a dataset because it is mentioned in the context of providing data on procurement activities.", + "contextual_reason_agent": "However, STEP is described as a planning and tracking system, not a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 15, + "validated": 1, + "not_validated": 14 + } + }, + { + "filename": "061_Turkey-Formal-Employment-Creation-Project", + "page": 15, + "text": "Washington, DC: World Bank and World Food Programme. 8 Enterprise Surveys ( database ), International Finance Corporation and World Bank, Washington, DC, https: / / www. enterprisesurveys. org /. 9 Enterprise Surveys ( database ), International Finance Corporation and World Bank, Washington, DC., https: / / www. enterprisesurveys. org /. 10 Ayyagari, M., A. Demirg\u00fc\u00e7-Kunt, and V. Maksimovic. 2011. \u201c Small vs. Young Firms Across the World: Contribution to Employment, Job Creation, and Growth. \u201d Policy Research Working Paper 5631, World Bank, Washington, DC. 11 World Bank. 2014. Turkey \u2019 s Transitions: Integration, Inclusion, Institutions. Report 90509-TR. Washington, DC: World Bank. 12 World Bank 2014 and 2018 data of the Survey on the Access to Finance of Enterprises ( database ), European Central Bank, Frankfurt, https: / / www. ecb. europa. eu / stats / ecb_surveys / safe / html / index. en. html.", + "ner_text": [ + [ + 730, + 776, + "named" + ], + [ + 16, + 26, + "Survey on the Access to Finance of Enterprises <> publisher" + ], + [ + 126, + 136, + "Survey on the Access to Finance of Enterprises <> publisher" + ], + [ + 269, + 279, + "Survey on the Access to Finance of Enterprises <> publisher" + ], + [ + 534, + 544, + "Survey on the Access to Finance of Enterprises <> publisher" + ], + [ + 565, + 575, + "Survey on the Access to Finance of Enterprises <> publisher" + ], + [ + 577, + 581, + "Survey on the Access to Finance of Enterprises <> publication year" + ], + [ + 583, + 589, + "Survey on the Access to Finance of Enterprises <> data geography" + ], + [ + 678, + 688, + "Survey on the Access to Finance of Enterprises <> publisher" + ], + [ + 693, + 703, + "Survey on the Access to Finance of Enterprises <> publisher" + ], + [ + 704, + 708, + "Survey on the Access to Finance of Enterprises <> publication year" + ], + [ + 713, + 717, + "Survey on the Access to Finance of Enterprises <> publication year" + ], + [ + 791, + 812, + "Survey on the Access to Finance of Enterprises <> publisher" + ], + [ + 814, + 823, + "Survey on the Access to Finance of Enterprises <> data geography" + ] + ], + "validated": true, + "empirical_context": "Washington, DC: World Bank. 12 World Bank 2014 and 2018 data of the Survey on the Access to Finance of Enterprises ( database ), European Central Bank, Frankfurt, https: / / www. ecb.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned in the context as part of the data used by the World Bank.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned in the context as part of the data used by the World Bank.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 79, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 74 of 117 prior to construction. Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "ner_text": [ + [ + 1089, + 1094, + "named" + ], + [ + 578, + 581, + "NEMIS <> author" + ], + [ + 589, + 608, + "NEMIS <> data type" + ], + [ + 777, + 793, + "NEMIS <> reference population" + ], + [ + 829, + 832, + "NEMIS <> author" + ] + ], + "validated": true, + "empirical_context": "Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "type": "system", + "explanation": "NEMIS is indeed a dataset as it is described as an online platform that collects and manages data related to primary education.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is referenced in the context of data utilization for education management.", + "contextual_reason_agent": "NEMIS is indeed a dataset as it is described as an online platform that collects and manages data related to primary education.", + "contextual_signal": "mentioned as a data source for managing primary education", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 49, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XL Direct project beneficiaries \u2013 Refugees and Host Communities Description Number of direct beneficiaries of digital identification, EMR and Tawjihi digitalization who are refugees or living in host communities with a high concentration of refugees ( > 10 percent of the population ), disaggregated by gender Frequency Annual Data source Reports on digital ID implementation by MODEE, administrative data from the EHS, Education Management Information System ( EMIS ) Methodology for Data Collection Data collected and compiled from administrative sources by MOPIC to measure the number of refugee beneficiaries and host communities Responsibility for Data Collection MOPIC Result Area 2 on enhanced government effectiveness through digitalization Increased access to reliable and efficient digital student assessment ( DLI ) Description Percentage of students sitting for the digital Tawjihi exam, including refugees. Frequency Annually. Data source MOE, Education Management Information System ( EMIS ) Methodology for Data Collection Review of EMIS-generated report. Responsibility for Data Collection MOE. Strengthened digital skills in civil service Description Number of senior officials and applicants to jobs in the civil service with digital skills certified as a result of training. Frequency Once, by Program closure. Data source Roster of digitally skilled trained and certified senior officials and applicants.", + "ner_text": [ + [ + 515, + 554, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XL Direct project beneficiaries \u2013 Refugees and Host Communities Description Number of direct beneficiaries of digital identification, EMR and Tawjihi digitalization who are refugees or living in host communities with a high concentration of refugees ( > 10 percent of the population ), disaggregated by gender Frequency Annual Data source Reports on digital ID implementation by MODEE, administrative data from the EHS, Education Management Information System ( EMIS ) Methodology for Data Collection Data collected and compiled from administrative sources by MOPIC to measure the number of refugee beneficiaries and host communities Responsibility for Data Collection MOPIC Result Area 2 on enhanced government effectiveness through digitalization Increased access to reliable and efficient digital student assessment ( DLI ) Description Percentage of students sitting for the digital Tawjihi exam, including refugees. Frequency Annually.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 65, + "text": "The World Bank Cash for Jobs Project ( P175327 ) Page 61 of 86 RESULT_FRAME_TBL_IO Indicator Name PBC Baseline End Target Upgraded MIS to scale up interventions at national level ( Yes / No ) No Yes Beneficiaries receiving social protection transfers through digital payments ( Percentage ) 0. 00 90. 00 Integration of refugee and host communities into national social protection systems Approval of a Social Protection Strategy integrating refugees and host communities as target groups ( Yes / No ) No Yes Project management and implementation GRM in place and being used to monitor feedback trends, including with mobile phones provided by the project, with claims addressed and closed and monthly reports being prepared ( Yes / No ) No Yes Process evaluation carried out ( Number ) 0. 00 4. 00 IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Households in targeted areas included in the National Social Registry Number of households that are registered in the National Social Registry in project implementation areas Annual National Social Registry The Project Implementation Unit will obtain the data from the agency handling the National Social Registry Project Implementation Unit Households in targeted areas included Number of refugee Annual National Socio-economic data Agency handling the", + "ner_text": [ + [ + 1046, + 1070, + "named" + ], + [ + 4, + 14, + "National Social Registry <> publisher" + ], + [ + 1001, + 1011, + "National Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "00 4. 00 IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Households in targeted areas included in the National Social Registry Number of households that are registered in the National Social Registry in project implementation areas Annual National Social Registry The Project Implementation Unit will obtain the data from the agency handling the National Social Registry Project Implementation Unit Households in targeted areas included Number of refugee Annual National Socio-economic data Agency handling the", + "type": "registry", + "explanation": "It is indeed a dataset as it is described as a registry that collects and provides data for project implementation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a source of data for households in targeted areas.", + "contextual_reason_agent": "It is indeed a dataset as it is described as a registry that collects and provides data for project implementation.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 25, + "text": "This subcomponent will support improved planning, management, and evaluation of education reforms in the following areas: ( a ) improvement of the existing EMIS and making better use of data to support management decisions at all levels, ( b ) national and international student assessments, and ( c ) TA to support implementation of Components 1 and 2 of the project. 41. Improvement of the existing EMIS includes the following activities: ( a ) technical improvements to EMIS to enable integrated use of NBS and MoF data; ( b ) strengthening a system for the generation of aggregate data and reports based on primary data and integrate data on refugee children from Ukraine; ( c ) training to EMIS experts on data collection and management, ( d ) strengthening the MoER \u2019 s analytical capacity in using data; and ( e ) strengthening of EMIS data protection systems as well as data quality assurance.", + "ner_text": [ + [ + 473, + 477, + "named" + ] + ], + "validated": false, + "empirical_context": "41. Improvement of the existing EMIS includes the following activities: ( a ) technical improvements to EMIS to enable integrated use of NBS and MoF data; ( b ) strengthening a system for the generation of aggregate data and reports based on primary data and integrate data on refugee children from Ukraine; ( c ) training to EMIS experts on data collection and management, ( d ) strengthening the MoER \u2019 s analytical capacity in using data; and ( e ) strengthening of EMIS data protection systems as well as data quality assurance.", + "type": "system", + "explanation": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is involved in data management and collection.", + "contextual_reason_agent": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "078_Afghanistan-Eshteghal-Zaiee-Karmondena-EZ-Kar-Project", + "page": 32, + "text": "These assessments will capture social issues in areas where the project will be implemented. Other safeguards 64. Gender: As previously mentioned, women comprise a substantial share of the recent returnees. Returnee women are even less economically active than Afghan women overall. According to recent survey data, only 13 percent of documented Afghan refugee women are employed, with an even lower share for recently returned refugee women at 10 percent ( whereas female labor force participation overall in the country is approximately 19 percent ). 32 Afghan women ' s labor force participation is as low as 19 percent and most of them are engaged in the informal sector. Women \u2019 s access to markets \u2014 both customers and business owners \u2014 is constrained by both social norms as well as poor economic infrastructure. Women mostly rely on their male family members to make household purchases. Only 5 percent of all registered business belong to women. In addition, there is a significant gender gap in access to identity. Though reliable data are lacking, it is well \u2010 established that the vast majority of 32 World Bank. \u201c Living conditions and settlement decisions of recent Afghan returnees: Findings from a 2018 World Bank Phone Survey of Afghan Returnees \u201d, forthcoming.", + "ner_text": [ + [ + 303, + 314, + "named" + ], + [ + 346, + 366, + "survey data <> reference population" + ], + [ + 410, + 441, + "survey data <> reference population" + ], + [ + 1113, + 1123, + "survey data <> publisher" + ], + [ + 1214, + 1218, + "survey data <> publication year" + ], + [ + 1219, + 1229, + "survey data <> publisher" + ], + [ + 1294, + 1312, + "survey data <> usage context" + ] + ], + "validated": true, + "empirical_context": "Returnee women are even less economically active than Afghan women overall. According to recent survey data, only 13 percent of documented Afghan refugee women are employed, with an even lower share for recently returned refugee women at 10 percent ( whereas female labor force participation overall in the country is approximately 19 percent ). 32 Afghan women ' s labor force participation is as low as 19 percent and most of them are engaged in the informal sector.", + "type": "survey", + "explanation": "In this context, 'survey data' is indeed used as a data source to provide insights into the employment status of Afghan refugee women.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'survey data' is a dataset because it refers to collected information from a survey.", + "contextual_reason_agent": "In this context, 'survey data' is indeed used as a data source to provide insights into the employment status of Afghan refugee women.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "042_Burundi-Skills-for-Jobs-Women-and-Youth-Project", + "page": 41, + "text": "Biannual analysis of project management information by the PIU will inform project processes, allowing for timely actions and adjustments ( including learning how to better support females and minority groups ). Regular follow-ups by the frontline implementing agencies ( that is, sectoral hubs, apprenticeship CEMs, and entrepreneurship implementing agencies ) in the form of brief, agile tracer studies will be conducted, with on-the-ground support by a third party. Beneficiary surveys will be conducted by a third party annually to further measure achievement of results and inform adjustments that may be needed in project design and implementation arrangements. Periodic representative, sample-based, and unannounced monitoring visits are also expected to be conducted by third parties over the project period, to supplement internal monitoring and reporting. A midterm review will involve the project \u2019 s stakeholders to collectively review project results and implementation arrangements. 71. At the PIU, an M & E system will be set up to host and manage operational data for project indicators. The PIU will work with the general secretary in charge of technical and vocational education and trades and the Bureau of Planning and Statistics under the Permanent Secretary at MNESR to attend to links and ultimate integration between the project \u2019 s M & E system and the MNESR Education Management Information System. The M & E system will be supported by the World Bank \u2019 s Geo-Enabling for Monitoring and Supervision ( GEMS ) initiative, which leverages the use of portable ICT tools", + "ner_text": [ + [ + 1378, + 1423, + "named" + ] + ], + "validated": false, + "empirical_context": "At the PIU, an M & E system will be set up to host and manage operational data for project indicators. The PIU will work with the general secretary in charge of technical and vocational education and trades and the Bureau of Planning and Statistics under the Permanent Secretary at MNESR to attend to links and ultimate integration between the project \u2019 s M & E system and the MNESR Education Management Information System. The M & E system will be supported by the World Bank \u2019 s Geo-Enabling for Monitoring and Supervision ( GEMS ) initiative, which leverages the use of portable ICT tools", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a data-related function.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "113_Project-Appraisal-Document-PAD-disclosable-version-P156479-08122016", + "page": 45, + "text": "By having a database with larger geographical coverage, when crises / shocks happen, humanitarian organizations and government agencies will be able to respond faster and in a more coordinated way. The social registry and business functions for the two safety net pilots will be supported by the MIS developed under this Component 2 during the project. As the social registry \u2019 s use is expanded in the future, it can potentially de-linked from this MIS and have its own information system to facilitate flow of information between the social registry and other programs. 30. Payment system. The project will support the development of a payment system for Government safety nets. The payment system would enable Government to distribute the correct amount of benefits to the right people, at the right time, and with the right frequency, while minimizing transaction costs for both the program and the beneficiaries and allowing increased transparency and accountability of financial transactions. The project will use a small number of payment agencies to provide payments to beneficiaries and the selection of payment agencies will be supported by existing ( or new ) information outlining the various agencies and resources available, their pros and cons in the project areas and humanitarian and UN agencies experience for paying cash benefits in Chad. Payment agencies may be selected in each region based on the", + "ner_text": [ + [ + 202, + 217, + "named" + ], + [ + 12, + 20, + "social registry <> data type" + ] + ], + "validated": true, + "empirical_context": "By having a database with larger geographical coverage, when crises / shocks happen, humanitarian organizations and government agencies will be able to respond faster and in a more coordinated way. The social registry and business functions for the two safety net pilots will be supported by the MIS developed under this Component 2 during the project. As the social registry \u2019 s use is expanded in the future, it can potentially de-linked from this MIS and have its own information system to facilitate flow of information between the social registry and other programs.", + "type": "registry", + "explanation": "In the context, the 'social registry' is described as a system that supports information flow, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'social registry' implies a structured collection of data related to social services.", + "contextual_reason_agent": "In the context, the 'social registry' is described as a system that supports information flow, indicating it functions as a data source.", + "contextual_signal": "mentioned as a data source supporting business functions", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 8, + "validated": 7, + "not_validated": 1 + } + }, + { + "filename": "007_BOSIB-e8e37b29-1d61-491d-8aad-23a07cf57740", + "page": 88, + "text": "The World Bank Chad Agribusiness and Rural Transformation Project ( P179238 ) Page 84 of 88 Figure A8. 3. Barriers to account ownership, 2017 ( percent respondents without a financial institution account ) Source: Global Findex Database, 2017. 7. Similarly, the banking sector is exposed to vulnerabilities stemming from weaknesses in the credit reporting framework. Chad is a member of the Central Africa \u2019 s currency union ( CEMAC ), with monetary policy and financial sector regulatory and supervisory functions set at the regional level. The regional credit reporting system suffers from several weaknesses which impact the quality and availability of information about borrowers \u2019 behaviors. The regional Central Bank - Bank of Central African States ( Banque des Etats d \u2019 Afrique Centrale, BEAC ) has a credit risk registry ( Centrale des Risques ), but its effectiveness is hampered by the considerable delay in updating the information collected from the banks and the non-inclusion of data from microfinance institutions which account for a large number of loans. The efficiency of the supervisory framework also suffers from limited independence of the supervisory authority - Central Africa Banking Commission ( Commission Bancaire de l \u2019 Afrique Centrale, COBAC ), the need to better align prudential norms with best practices, and inadequate resources allocated to COBAC. 8.", + "ner_text": [ + [ + 810, + 830, + "named" + ], + [ + 15, + 19, + "credit risk registry <> data geography" + ], + [ + 137, + 141, + "credit risk registry <> publication year" + ], + [ + 238, + 242, + "credit risk registry <> publication year" + ], + [ + 367, + 371, + "credit risk registry <> data geography" + ] + ], + "validated": true, + "empirical_context": "The regional credit reporting system suffers from several weaknesses which impact the quality and availability of information about borrowers \u2019 behaviors. The regional Central Bank - Bank of Central African States ( Banque des Etats d \u2019 Afrique Centrale, BEAC ) has a credit risk registry ( Centrale des Risques ), but its effectiveness is hampered by the considerable delay in updating the information collected from the banks and the non-inclusion of data from microfinance institutions which account for a large number of loans. The efficiency of the supervisory framework also suffers from limited independence of the supervisory authority - Central Africa Banking Commission ( Commission Bancaire de l \u2019 Afrique Centrale, COBAC ), the need to better align prudential norms with best practices, and inadequate resources allocated to COBAC.", + "type": "registry", + "explanation": "This is indeed a dataset as it is described as a registry that collects information about borrowers' behaviors.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a credit risk registry, which implies a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is described as a registry that collects information about borrowers' behaviors.", + "contextual_signal": "mentioned as a registry that collects information", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "172_multi0page", + "page": 46, + "text": "grant program and oversight WMA A Finance Directorate able to Adequate procedures in manage all project funds and compliance with IDA financial treasury and to comply with management and accounting accounting, budgeting and rules. financial reporting requirements. Internal Audit unit able to Report on budget execution per eview all MEST operations quarter and annual budget forecast based on assessment of needs and pnorities. EMIS established with data on EMIS network infrastructure four school annual surveys and set up with procedures in place gradually integrating and for users. linking data on finance ( i. e. udget execution ); personnel Four annual school surveys ( payroll and deployment published. ecords ); and indicators on OL standards. EMIS data are effectively used EMIS reports. for planning and prospective work on the education sector. Improved personnel Audit report on personnel management system with management contracted out. effective procedures enforced for recruitment, deployment Records on MEST ' s staff on and payment of MEST ' s staff the payroll. and teachers on the payroll. 80 trainers trained in first year List of trainers published and of Project to serve as trainers evaluation report on training for in-service teacher training received. activities to be implemented via school grants. 600 SMCs established with Reports on school tnspections prior mobilization from the from Inspectorate. community and with capacity built ( training when required ) to monitor school grant", + "ner_text": [ + [ + 429, + 433, + "named" + ] + ], + "validated": false, + "empirical_context": "Internal Audit unit able to Report on budget execution per eview all MEST operations quarter and annual budget forecast based on assessment of needs and pnorities. EMIS established with data on EMIS network infrastructure four school annual surveys and set up with procedures in place gradually integrating and for users. linking data on finance ( i.", + "type": "system", + "explanation": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with data on network infrastructure and surveys.", + "contextual_reason_agent": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "155_432110PAD0P09717632B01off0use0only1", + "page": 30, + "text": "25 78. Survey and public consultations. The project is basing the design of the water service expansion and standpost management components on the results of a socio-economic study and household survey \u2013 commissioned specifically for this project - of household demand and willingness and ability to pay for different modes of service ( standposts vs. private connections ) in the peri-urban neighborhoods of Bujumbura. In addition, as part of the project preparation, multiple consultations were held between REGIDESO staff, members and local leaders of 26 neighborhoods, and the Bank project team, with the facilitation of local consultants. A participatory workshop was organized to discuss the design of the proposed program and to disseminate the findings of the initial focus group interviews, the household survey, and the experience of similar endeavors in other countries. Representatives of 26 peri - urban neighborhoods and their local leaders expressed strong support for the proposed program. They provided key input into the design of the program during break-out sessions where the details of the program were discussed, such as the mode of selection and terms of reference for standpost operators, and the respective roles of the project partners ( REGIDESO, user committees, local administrators, and operators ). 79. Willingness and ability to pay.", + "ner_text": [ + [ + 160, + 180, + "named" + ], + [ + 185, + 201, + "socio-economic study <> data type" + ], + [ + 409, + 418, + "socio-economic study <> data geography" + ], + [ + 804, + 820, + "socio-economic study <> data type" + ] + ], + "validated": true, + "empirical_context": "Survey and public consultations. The project is basing the design of the water service expansion and standpost management components on the results of a socio-economic study and household survey \u2013 commissioned specifically for this project - of household demand and willingness and ability to pay for different modes of service ( standposts vs. private connections ) in the peri-urban neighborhoods of Bujumbura.", + "type": "study", + "explanation": "This is indeed a dataset as it is specifically commissioned for the project to gather empirical data on household demand and payment capabilities.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a socio-economic study that likely contains structured data on household demand and payment willingness.", + "contextual_reason_agent": "This is indeed a dataset as it is specifically commissioned for the project to gather empirical data on household demand and payment capabilities.", + "contextual_signal": "mentioned as a study commissioned specifically for this project", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 21, + "validated": 8, + "not_validated": 13 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 23, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 19 of 74 protective support to HHs and investment in resilience building community assets will help sustain livelihoods, strengthen resilience, and prevent the most vulnerable from falling into destitution or being forcibly displaced. It will also directly support the Government \u2019 s Community Empowerment and Socioeconomic Development Strategy for Refugee Hosting Areas in South Sudan, with cash transfers promoting section 4. 6 of the strategy on creation of livelihood and income generating opportunities given the lack of employment prospects in refugee-hosting environments. 37. In the absence of an enabling environment for widescale mobile payment systems, beneficiaries will receive physical cash at the time of payment, except for Juba where mobile money payment will be piloted. A financial service provider ( i. e., paying agent ), which will be competitively selected by the MAFS, will deliver cash to beneficiaries. The MAFS will provide the recipient list and amount of money to the financial service provider, and the list of beneficiaries will be generated from the MIS. The MIS will capture beneficiaries ' biometric data, which will be used to ensure that only the eligible individuals will receive the cash transfer. The financial service provider pays beneficiaries verifying them biometrically.", + "ner_text": [ + [ + 1176, + 1179, + "named" + ] + ], + "validated": false, + "empirical_context": ", paying agent ), which will be competitively selected by the MAFS, will deliver cash to beneficiaries. The MAFS will provide the recipient list and amount of money to the financial service provider, and the list of beneficiaries will be generated from the MIS. The MIS will capture beneficiaries ' biometric data, which will be used to ensure that only the eligible individuals will receive the cash transfer.", + "type": "system", + "explanation": "'MIS' is mentioned as a management information system but not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it involves capturing data.", + "contextual_reason_agent": "'MIS' is mentioned as a management information system but not as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "182_multi0page", + "page": 66, + "text": "Procurement information will be recorded by the PCU and submitted to the Bank as part of the quarterly and annual progress reports. This information will include: revised cost estimates for the different contracts; revised timing of procurement actions, including advertising, bidding, contract award, and completion time for individual contracts; as well as compliance with aggregate limits ( within 15 % ) on specific methods of procurement. A Management Information System ( MIS ), with a procurement module will help the PCU monitor all procurement information. Co-financing: Yes - - Department for International Development, the Government of UK. Indicate the name of Procurement staff or Bank ' s part of the Task Team responsible for the procurement in the Project: Albania: Social Services Delivery Project Name: Seyoum Solomon ( ECSHD ) Ext: 32393 - 63 -", + "ner_text": [ + [ + 446, + 475, + "named" + ] + ], + "validated": false, + "empirical_context": "This information will include: revised cost estimates for the different contracts; revised timing of procurement actions, including advertising, bidding, contract award, and completion time for individual contracts; as well as compliance with aggregate limits ( within 15 % ) on specific methods of procurement. A Management Information System ( MIS ), with a procurement module will help the PCU monitor all procurement information. Co-financing: Yes - - Department for International Development, the Government of UK.", + "type": "system", + "explanation": "However, it is described as a system that helps monitor procurement information, not as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Management Information System' suggests a structured collection of data.", + "contextual_reason_agent": "However, it is described as a system that helps monitor procurement information, not as a data source itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "158_40156", + "page": 23, + "text": "There are six types of surveys and surveillance that need to be carried out, partially with project funding ( additional resources would need to be mobilized by IGAD for all these surveys and surveillance to be carried out ): BSS, sentinel HIV surveillance, health facility survey, rapid assessment at hot spots, mapping assessment, and regional assessment. \u0083 BSS: Customized behavioral surveillance surveys ( BSSs ) will be undertaken amongst refugees and surrounding populations ( IDPs and returnees if possible ). The BSS will be customized to allow for the inclusion of unique characteristics of mobile populations, pre and post displacement, and conflict situations. UNHCR already has BSS information for some of the camps to be supported by the project, and if resources are made available in future, it is envisioned that the other camps would be covered by BSS as well. \u0083 Sentinel surveillance: Sentinel surveillance will be undertaken by IGAD member states, who will add additional sentinel sites to their national sentinel surveillance system \u2013 one in each refugee site, IDP site and among the surrounding population. Initially, only those sites supported by this project will be added. Later, when the regional HIV M & E strategy has been developed, all the sites and camps will be added to national sentinel surveillance. 19", + "ner_text": [ + [ + 258, + 280, + "named" + ], + [ + 444, + 452, + "health facility survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "There are six types of surveys and surveillance that need to be carried out, partially with project funding ( additional resources would need to be mobilized by IGAD for all these surveys and surveillance to be carried out ): BSS, sentinel HIV surveillance, health facility survey, rapid assessment at hot spots, mapping assessment, and regional assessment. \u0083 BSS: Customized behavioral surveillance surveys ( BSSs ) will be undertaken amongst refugees and surrounding populations ( IDPs and returnees if possible ).", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned as a type of survey that will be conducted to gather data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific type of survey that collects data.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a type of survey that will be conducted to gather data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 79, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 74 of 117 prior to construction. Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "ner_text": [ + [ + 997, + 1002, + "named" + ], + [ + 589, + 608, + "NEMIS <> data type" + ], + [ + 777, + 793, + "NEMIS <> reference population" + ] + ], + "validated": true, + "empirical_context": "Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "type": "system", + "explanation": "NEMIS is indeed a dataset as it is explicitly mentioned as a source of data for managing primary education and is utilized for various educational allocations.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is referenced in the context of data utilization for education management.", + "contextual_reason_agent": "NEMIS is indeed a dataset as it is explicitly mentioned as a source of data for managing primary education and is utilized for various educational allocations.", + "contextual_signal": "mentioned as a data source for managing primary education", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "044_Turkey-Social-Entrepreneurship-Empowerment-and-Cohesion-in-Refugee-and-Host-Communities-in-Turkey-Project", + "page": 91, + "text": "Available at: https: / / www. files. ethz. ch / isn / 187409 / 09012015103629. pdf. 115 Ibid. 116 Ibid. 117 Erdo\u011fan, M. 2014. Syrians in Turkey: Social Acceptance and Integration Research. In World Food Programme. 2017. Social Cohesion in Turkey: Refugee and Host Community Online Survey. Available at: https: / / reliefweb. int / report / turkey / social-cohesion-turkey-refugee-and - host-community-online-survey-round-1-december-2017. 118 German Marshall Fund. 2015. Turkish Perceptions Survey. In World Food Programme. 2017. Social Cohesion in Turkey: Refugee and Host Community Online Survey. Available at: https: / / reliefweb. int / report / turkey / social-cohesion-turkey-refugee-and-host-community-online - survey-round-1-december-2017. 119 Center for Middle Eastern Strategic Studies. 2015. Effects of the Syrian Refugees on Turkey. Report No. 195. Ankara, Turkey. Available at: https: / / www. files. ethz. ch / isn / 187409 / 09012015103629. pdf", + "ner_text": [ + [ + 470, + 496, + "named" + ], + [ + 120, + 124, + "Turkish Perceptions Survey <> reference year" + ], + [ + 137, + 143, + "Turkish Perceptions Survey <> data geography" + ], + [ + 192, + 212, + "Turkish Perceptions Survey <> publisher" + ], + [ + 464, + 468, + "Turkish Perceptions Survey <> publication year" + ], + [ + 796, + 800, + "Turkish Perceptions Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "2015. Turkish Perceptions Survey. In World Food Programme.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to a structured collection of data gathered through a survey.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is labeled as a survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a structured collection of data gathered through a survey.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 46, + "text": "In addition, a survey will be conducted in sampled SPG woredas as part of the impact evaluation to measure composite indicators not collected through routine information systems. 71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2. The MOH will share HCO-related data with the regions and woredas to provide any disaggregated data needed to monitor HCO results indicators and DLIs. Likewise, the central MOE is responsible for the management of EMIS and NLA which collects varying aspects of data at the school level. MOE will validate and share disaggregated data with regions and woredas to monitor HCO results indicators and DLIs. Other M & E data sources will be coordinated with various implementation units and service providers. The SPG impact evaluation survey will be 71 Such as Minimal Acceptable Diet for children 6 \u2013 23 months. Minimal Acceptable Diet is calculated according to breasfeeding status, number of food groups consumed, and number of times fed. https: / / inddex. nutrition. tufts. edu / data4diets / indicator / minimum-acceptable-diet-mad.", + "ner_text": [ + [ + 897, + 901, + "named" + ], + [ + 186, + 189, + "EMIS <> publisher" + ], + [ + 764, + 782, + "EMIS <> data type" + ], + [ + 856, + 859, + "EMIS <> publisher" + ], + [ + 970, + 973, + "EMIS <> publisher" + ] + ], + "validated": true, + "empirical_context": "The MOH will share HCO-related data with the regions and woredas to provide any disaggregated data needed to monitor HCO results indicators and DLIs. Likewise, the central MOE is responsible for the management of EMIS and NLA which collects varying aspects of data at the school level. MOE will validate and share disaggregated data with regions and woredas to monitor HCO results indicators and DLIs.", + "type": "system", + "explanation": "EMIS is indeed a dataset as it is described as a system that collects varying aspects of data, functioning as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is mentioned in the context of collecting data at the school level.", + "contextual_reason_agent": "EMIS is indeed a dataset as it is described as a system that collects varying aspects of data, functioning as a data source.", + "contextual_signal": "mentioned as a data source that collects varying aspects of data", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "105_Mali-Reinsertion-of-Ex-combatants-Project", + "page": 80, + "text": "This review will validate the proposed reinsertion process or modify it if necessary. A final implementation report will be prepared within six months after the end of the project and include the contribution of the Government and donors. 81. Furthermore, M & E will include a MIS combining several robust databases to: ( i ) monitor each demobilized ex-combatant from demobilization through reinsertion, ( ii ) follow the implementing partners providing reinsertion support, and ( iii ) monitor the financial management. The various regional offices and the cantonment camps will be connected to the MIS via a wireless connection and data will be transmitted on a regular basis. The data will be centralized in Bamako and synchronized at each office to minimize any possible duplication.", + "ner_text": [ + [ + 277, + 280, + "named" + ] + ], + "validated": false, + "empirical_context": "81. Furthermore, M & E will include a MIS combining several robust databases to: ( i ) monitor each demobilized ex-combatant from demobilization through reinsertion, ( ii ) follow the implementing partners providing reinsertion support, and ( iii ) monitor the financial management. The various regional offices and the cantonment camps will be connected to the MIS via a wireless connection and data will be transmitted on a regular basis.", + "type": "system", + "explanation": "However, 'MIS' is described as a management information system, not as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MIS' suggests a system that manages information.", + "contextual_reason_agent": "However, 'MIS' is described as a management information system, not as a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "054_Sudan-Basic-Education-Emergency-Support-Project", + "page": 14, + "text": "The World Bank Sudan Basic Education Emergency Support Project ( P172812 ) Page 10 of 40 Figure 2: Enrollment pyramid and share of illiterate pupils Source: Education Sector Analysis, 2018. Source: http: / / www. earlygradereadingbarometer. org / Low and stagnant learning outcomes 16. Learning outcomes in Sudan schools are generally low. According to the National Learning Assessment ( NLA ) conducted in 2015 for Grade 3 pupils, the results were low in all domains of the assessment: reading, writing, and numeracy. For example, only 5 percent of pupils could read fluently ( more than 60 words per minute ) in Arabic, and 40 percent were not able to read at all. Furthermore, the assessment of reading speed among third graders indicated an average speed of 15 words per minute, which is far below the estimated minimum reading speed of 40 words per minute thought to be necessary to gain understanding of and meaning from the text. However, Sudan \u2019 s third graders did better in listening and comprehension compared to pupils from other Arabic Countries. 17. There is sign of slight improvements in learning outcomes at the national level.", + "ner_text": [ + [ + 357, + 385, + "named" + ], + [ + 15, + 20, + "National Learning Assessment <> data geography" + ], + [ + 307, + 312, + "National Learning Assessment <> data geography" + ], + [ + 388, + 391, + "National Learning Assessment <> acronym" + ], + [ + 407, + 411, + "National Learning Assessment <> publication year" + ], + [ + 416, + 430, + "National Learning Assessment <> reference population" + ], + [ + 684, + 731, + "National Learning Assessment <> data description" + ], + [ + 946, + 951, + "National Learning Assessment <> data geography" + ] + ], + "validated": true, + "empirical_context": "Learning outcomes in Sudan schools are generally low. According to the National Learning Assessment ( NLA ) conducted in 2015 for Grade 3 pupils, the results were low in all domains of the assessment: reading, writing, and numeracy. For example, only 5 percent of pupils could read fluently ( more than 60 words per minute ) in Arabic, and 40 percent were not able to read at all.", + "type": "assessment", + "explanation": "The National Learning Assessment is indeed a dataset as it provides structured results on learning outcomes for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to an assessment that provides measurable results.", + "contextual_reason_agent": "The National Learning Assessment is indeed a dataset as it provides structured results on learning outcomes for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + }, + "term_stats": { + "total": 3, + "validated": 2, + "not_validated": 1 + } + }, + { + "filename": "042_Burundi-Skills-for-Jobs-Women-and-Youth-Project", + "page": 59, + "text": "The World Bank Burundi Skills for Jobs: Women and Youth Project ( P164416 ) Page 53 of 102 revised programs in collaboration with the private sector. These programs will be defined in the POM and so will the parameters for the private sector collaboration and engagement Of which - Number of students who obtained certification from new, revised programs in collaboration with the private sector - Female The indicator measures the cumulated number of female youth graduating from formal TVET institutions with certification from new and revised programs in collaboration with the private sector. These programs will be defined in the POM and so will the parameters for the private sector collaboration and engagement Annual Enrolment records at sector training hubs Administrative data ( registry of sectoral hubs ) M & E specialist within the PIU Of which - Number of students who obtained certification from new, revised programs in collaboration with the private sector - Refugees The indicator measures the cumulated number of refugee youth graduating from formal TVET institutions with certification from new and revised programs in collaboration with the private sector.", + "ner_text": [ + [ + 718, + 742, + "named" + ], + [ + 15, + 22, + "Annual Enrolment records <> data geography" + ], + [ + 767, + 786, + "Annual Enrolment records <> data type" + ], + [ + 1032, + 1045, + "Annual Enrolment records <> reference population" + ] + ], + "validated": true, + "empirical_context": "These programs will be defined in the POM and so will the parameters for the private sector collaboration and engagement Of which - Number of students who obtained certification from new, revised programs in collaboration with the private sector - Female The indicator measures the cumulated number of female youth graduating from formal TVET institutions with certification from new and revised programs in collaboration with the private sector. These programs will be defined in the POM and so will the parameters for the private sector collaboration and engagement Annual Enrolment records at sector training hubs Administrative data ( registry of sectoral hubs ) M & E specialist within the PIU Of which - Number of students who obtained certification from new, revised programs in collaboration with the private sector - Refugees The indicator measures the cumulated number of refugee youth graduating from formal TVET institutions with certification from new and revised programs in collaboration with the private sector.", + "type": "registry", + "explanation": "This is indeed a dataset as it refers to administrative data that serves as a source for measuring enrolment statistics.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'records' which implies a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it refers to administrative data that serves as a source for measuring enrolment statistics.", + "contextual_signal": "described as administrative data (registry of sectoral hubs)", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "016_BOSIB029693bc600f090cb0745b2b12ee69", + "page": 19, + "text": "The World Bank Second Kenya Urban Support Program ( P177048 ) Page 14 of 117 boards can implement. An engagement framework and public-private forums would help identify private sector needs and priority interventions. 25 The second step is understanding the urban area \u2019 s private sector characteristics by having up-to-date data on businesses and their needs. Having such a database would help better understand critical infrastructure development needs. In addition, structured dialogue with the private sector would help identify bottlenecks and pain points, inform urban development plans, and develop targeted interventions to attract, retain and grow businesses. The private sector \u2019 s overall involvement in urban planning would help prioritize investments to address binding constraints for private sector development. 22. Supporting growth of businesses in areas hosting refugees could contribute to the economic development of the surrounding communities. Recent analytical work by the World Bank Group in Kakuma - Kalobeyei has highlighted the potential for further inclusion of refugees in the economic development of Turkana West. 26 Allowing refugees to engage in economic activities will contribute towards an integration of service delivery systems. With adequate support, the private sector can thrive and provide services in areas hosting refugees. In June 2020, the International Finance Corporation launched the Kakuma-Kalobeyei Challenge Fund to incentivize for-profit companies, social enterprises, and host community and refugee entrepreneurs to start or scale up operations. C.", + "ner_text": [ + [ + 314, + 329, + "named" + ] + ], + "validated": false, + "empirical_context": "An engagement framework and public-private forums would help identify private sector needs and priority interventions. 25 The second step is understanding the urban area \u2019 s private sector characteristics by having up-to-date data on businesses and their needs. Having such a database would help better understand critical infrastructure development needs.", + "type": "data", + "explanation": "However, 'up-to-date data' is described in a general sense and does not refer to a specific structured collection or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'up-to-date data' refers to a dataset because it implies a collection of information.", + "contextual_reason_agent": "However, 'up-to-date data' is described in a general sense and does not refer to a specific structured collection or dataset.", + "contextual_signal": "mentioned only as a general term, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 40, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 29 Monitoring & Evaluation Plan: PDO Indicators Indicator Name Direct Project Beneficiaries Definition / Description Beneficiaries are people or groups who directly receive benefits from interventions of the project ( water supply, sanitation, and WRM related investments ). Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC Area performance reports, Refugee Database and UPMiS. Secondary sources include: MWE / DWD / DWRM progress reports, annual water and environments sector performance report and NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE and NWSC Indicator Name of which female Definition / Description Beneficiaries are people or groups who directly receive benefits from interventions of the project ( water supply, sanitation, and WRM related investments ). Frequency Bi-annually Data Source MWE / DWD / DWRM progress reports, annual water & environments sector performance report, Water Supply Atlas ( database of water supply, functionality and distribution of all Uganda districts ), NWSC annual report and refugee database.", + "ner_text": [ + [ + 431, + 460, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Integrated Water Management and Development Project ( P163782 ) 29 Monitoring & Evaluation Plan: PDO Indicators Indicator Name Direct Project Beneficiaries Definition / Description Beneficiaries are people or groups who directly receive benefits from interventions of the project ( water supply, sanitation, and WRM related investments ). Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC Area performance reports, Refugee Database and UPMiS. Secondary sources include: MWE / DWD / DWRM progress reports, annual water and environments sector performance report and NWSC annual report.", + "type": "report", + "explanation": "However, it is not a dataset but rather a report that provides information, not a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'reports' which can imply structured data.", + "contextual_reason_agent": "However, it is not a dataset but rather a report that provides information, not a structured collection of data.", + "contextual_signal": "mentioned only as a report, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 20, + "validated": 4, + "not_validated": 16 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 22, + "text": "The subcomponent will: ( a ) finance procurement of information communication technology equipment at the national level and train staff on data entry and use; ( b ) train trainers to develop health facility staff data entry, management, and use capacity; ( c ) create interoperability and integration between data systems and ensure data sharing, storage and backup; ( d ) develop, print, and disseminate Standard Operating Procedures for HMIS data entry, cleaning, quality improvement, and use at all levels; ( e ) conduct data review meetings and generate data use tools; ( f ) establish and operate the National and State level HMIS and Monitoring and Evaluation ( M & E ) Technical Working Groups; ( g ) conduct data quality improvement activities at the facility and national level; ( h ) operationalize a national and state level research committee, building on existing structure; ( i ) conduct an annual health sector review meeting; and ( j ) maintain and institutionalize the Health Service Functionality ( HSF ) Database. 39.", + "ner_text": [ + [ + 987, + 1032, + "named" + ] + ], + "validated": true, + "empirical_context": "The subcomponent will: ( a ) finance procurement of information communication technology equipment at the national level and train staff on data entry and use; ( b ) train trainers to develop health facility staff data entry, management, and use capacity; ( c ) create interoperability and integration between data systems and ensure data sharing, storage and backup; ( d ) develop, print, and disseminate Standard Operating Procedures for HMIS data entry, cleaning, quality improvement, and use at all levels; ( e ) conduct data review meetings and generate data use tools; ( f ) establish and operate the National and State level HMIS and Monitoring and Evaluation ( M & E ) Technical Working Groups; ( g ) conduct data quality improvement activities at the facility and national level; ( h ) operationalize a national and state level research committee, building on existing structure; ( i ) conduct an annual health sector review meeting; and ( j ) maintain and institutionalize the Health Service Functionality ( HSF ) Database. 39.", + "type": "database", + "explanation": "This is a dataset as it is explicitly mentioned as a 'Database' that is maintained and institutionalized, indicating its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Database' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a 'Database' that is maintained and institutionalized, indicating its role as a data source.", + "contextual_signal": "mentioned as a database that is maintained and institutionalized", + "tags": [] + }, + { + "filename": "115_PAD1678-PAD-P152822-IDA-R2016-0090-1-Box394886B-OUO-9", + "page": 105, + "text": "District accountants will coordinate the follow up of accountabilities from the communities with the subcounty accountants. The Public Financial Management Act 2015 and related Regulations; Treasury Accounting Instructions, 2003; Local Government Act; Local Government ( Financial and Accounting ) Regulations, 2007; and Local Government Financial Management Manual. SUN Accounting System but likely to migrate to the Integrated Financial Management Information System ( IFMIS ) June 30 Djibouti: Agence Djiboutienne de D\u00e9veloppement Social ( Djiboutian Social Development Agency, ADDS ) ADDS accounting staff under the Department of Accounting includes a finance officer who reports to the \u201c Agent Comptable, \u201d but the PIU will need to recruit an accountant who will be responsible for all financial management aspects of the project. Project Financial Operation Manual, which needs to be updated to cover this project. TOMPRO Accounting system and a management information system that needs to improve its functionality with respect to its reporting mechanism. December 31 Internal Control and Internal Audit Arrangements 57. Internal controls. The internal control procedures will be documented in the financial management manuals that are shown in the table 4. 1 for each of the implementing entities and their PIMs that will take into consideration gaps in their existing financial management manuals", + "ner_text": [ + [ + 921, + 945, + "named" + ] + ], + "validated": false, + "empirical_context": "Project Financial Operation Manual, which needs to be updated to cover this project. TOMPRO Accounting system and a management information system that needs to improve its functionality with respect to its reporting mechanism. December 31 Internal Control and Internal Audit Arrangements 57.", + "type": "system", + "explanation": "However, it is mentioned as an accounting system and not as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'system' which can imply data handling.", + "contextual_reason_agent": "However, it is mentioned as an accounting system and not as a data source in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "042_Burundi-Skills-for-Jobs-Women-and-Youth-Project", + "page": 66, + "text": "The training will also be based on two elements: ( i ) the content of Annual starting year 2 Attendance lists Administrative date ( project records ) M & E Specialist within the PIU", + "ner_text": [ + [ + 70, + 109, + "named" + ] + ], + "validated": true, + "empirical_context": "The training will also be based on two elements: ( i ) the content of Annual starting year 2 Attendance lists Administrative date ( project records ) M & E Specialist within the PIU", + "type": "attendance list", + "explanation": "This is indeed a dataset as it is a structured collection of attendance records used for monitoring and evaluation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to attendance lists, which are structured collections of data.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of attendance records used for monitoring and evaluation.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "042_Burundi-Skills-for-Jobs-Women-and-Youth-Project", + "page": 58, + "text": "The World Bank Burundi Skills for Jobs: Women and Youth Project ( P164416 ) Page 52 of 102 RESULT_FRAME_TBL_IO Indicator Name PBC Baseline Intermediate Targets End Target 1 plan competitions ( Number ) Number of enabling organizations benefitting from capacity development activities ( Number ) 0. 00 30. 00 40. 00 Number of enabling organizations benefitting from capacity development activities - operating inside the refugee camps ( Number ) 0. 00 5. 00 5. 00 C3: Reinforcing Data Systems for Market-Oriented Skills Development, Project Management, and M & E ICT platform for information about training and income generating opportunities operational ( Yes / No ) No Yes Yes Share of surveyed beneficiaries satisfied with project interventions ( Percentage ) 0. 00 40. 00 75. 00 Evaluation conducted on the impact of the enhancement of informal apprenticeship and foundational skills training on labor market outcomes ( Yes / No ) No No Yes IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Number of students who obtained certification from new, revised programs in collaboration with the private sector The indicator measures the cumulated number of youth graduating from formal TVET institutions with certification from new and Annual Enrolment records at sector training hubs Administrative data ( registry of sector training hubs ) M & E specialist within the", + "ner_text": [ + [ + 1436, + 1455, + "named" + ], + [ + 4, + 14, + "Administrative data <> publisher" + ], + [ + 15, + 22, + "Administrative data <> data geography" + ], + [ + 1308, + 1354, + "Administrative data <> reference population" + ], + [ + 1387, + 1411, + "Administrative data <> data description" + ], + [ + 1536, + 1554, + "Administrative data <> usage context" + ] + ], + "validated": true, + "empirical_context": "00 75. 00 Evaluation conducted on the impact of the enhancement of informal apprenticeship and foundational skills training on labor market outcomes ( Yes / No ) No No Yes IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Number of students who obtained certification from new, revised programs in collaboration with the private sector The indicator measures the cumulated number of youth graduating from formal TVET institutions with certification from new and Annual Enrolment records at sector training hubs Administrative data ( registry of sector training hubs ) M & E specialist within the", + "type": "registry", + "explanation": "In this context, 'administrative data' is explicitly mentioned as a source for data collection, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'administrative data' often refers to structured collections of data used for analysis.", + "contextual_reason_agent": "In this context, 'administrative data' is explicitly mentioned as a source for data collection, indicating it functions as a dataset.", + "contextual_signal": "follows 'datasource' in the context", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "168_252640updated0version", + "page": 38, + "text": "Hierarchy of Objectives Output from each Component: Capacity Building and Policy Development ( a ) National Strategic Plan and Action Plans ( b ) Project Coordination and Administration ( c ) Capacity Building and Training, which would include on-the-job training for health personnel, advocacy, and social communication training ( e ) Monitoring and Evaluation Health Sector Responses to HIV / AIDS, STIs, Malaria, and TB a ) the strengthening o f the national sentinel surveillance system and sero-prevalence surveys Key Performance Indicators Output Indicators: National HIV / AIDS Strategic Plan i s implemented IC established and operational Coordination mechanisms establilshed and operating satisfactorily Satisfactory MIS At least 80 % o f annual plan executed 150 peer educators trained each year 50 % o f health personnel adequately trained to provide care o f sexually transmitted infections ( STIs ) and opportunistic infections ( 01s ) by 2006 and 75 % by end of the project. 85 % o f Djiboutians in vulnerable groups reached by a message on HIV / AID S / STI At least 14 messages on HIV / AIDS, Malaria and TB diffused on radio per week 50 % o f Djiboutians know 2 methods to protect themselves against malaria Data for monitoring o f outcome and impact indicators are collected regularly By the end o f 2005, at least five sentinel sites for the epidemiological surveillance o f HIV / AIDS will be functional,", + "ner_text": [ + [ + 495, + 518, + "named" + ], + [ + 998, + 1009, + "sero-prevalence surveys <> reference population" + ], + [ + 1318, + 1322, + "sero-prevalence surveys <> publication year" + ] + ], + "validated": true, + "empirical_context": "Hierarchy of Objectives Output from each Component: Capacity Building and Policy Development ( a ) National Strategic Plan and Action Plans ( b ) Project Coordination and Administration ( c ) Capacity Building and Training, which would include on-the-job training for health personnel, advocacy, and social communication training ( e ) Monitoring and Evaluation Health Sector Responses to HIV / AIDS, STIs, Malaria, and TB a ) the strengthening o f the national sentinel surveillance system and sero-prevalence surveys Key Performance Indicators Output Indicators: National HIV / AIDS Strategic Plan i s implemented IC established and operational Coordination mechanisms establilshed and operating satisfactorily Satisfactory MIS At least 80 % o f annual plan executed 150 peer educators trained each year 50 % o f health personnel adequately trained to provide care o f sexually transmitted infections ( STIs ) and opportunistic infections ( 01s ) by 2006 and 75 % by end of the project. 85 % o f Djiboutians in vulnerable groups reached by a message on HIV / AID S / STI At least 14 messages on HIV / AIDS, Malaria and TB diffused on radio per week 50 % o f Djiboutians know 2 methods to protect themselves against malaria Data for monitoring o f outcome and impact indicators are collected regularly By the end o f 2005, at least five sentinel sites for the epidemiological surveillance o f HIV / AIDS will be functional,", + "type": "survey", + "explanation": "In this context, 'sero-prevalence surveys' are explicitly mentioned as part of the monitoring and evaluation efforts, indicating they are used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'sero-prevalence surveys' typically involve structured data collection on health indicators.", + "contextual_reason_agent": "In this context, 'sero-prevalence surveys' are explicitly mentioned as part of the monitoring and evaluation efforts, indicating they are used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 69, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 57 refugee host areas percentage of refugee children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in high refugee host areas Share of new entrants in the first grade of primary school attended pre-primary education in 29 selected SPG woredas The indicator measures children enrolled in the first year of primary school with some exposure to preschool education and calculated by dividing the gross enrollment rate for pre-primary level by the gross intake rate of primary level. Annual EMIS Annual data collection through school census MOE and WOFED Pregnant women & caregivers of children 0-23 months participating in Community Conversations ( CC ) in 29 selected SPG woredas These are currently pregnant women and caregivers of children 0-23 months who attended at least 1 CC session per month Annual UNISE / DHIS2 UNISE / DHIS2 - - Data collected by woreda health offices MOH and MOF Quarterly reports of minutes of multisectoral committee meetings in 29 selected SPG woredas Quarterly minutes report submitted by WOFED to MOF and verified by ESS Annual Admin WOFED report quarterly meeting minutes with all committee members signed to MOF WOFED Students benefiting from direct interventions to enhance learning Annual Project M & E Questionnaires will be developed to keep track on number of beneficiaries MOE and WOEFD", + "ner_text": [ + [ + 1167, + 1172, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 57 refugee host areas percentage of refugee children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in high refugee host areas Share of new entrants in the first grade of primary school attended pre-primary education in 29 selected SPG woredas The indicator measures children enrolled in the first year of primary school with some exposure to preschool education and calculated by dividing the gross enrollment rate for pre-primary level by the gross intake rate of primary level. Annual EMIS Annual data collection through school census MOE and WOFED Pregnant women & caregivers of children 0-23 months participating in Community Conversations ( CC ) in 29 selected SPG woredas These are currently pregnant women and caregivers of children 0-23 months who attended at least 1 CC session per month Annual UNISE / DHIS2 UNISE / DHIS2 - - Data collected by woreda health offices MOH and MOF Quarterly reports of minutes of multisectoral committee meetings in 29 selected SPG woredas Quarterly minutes report submitted by WOFED to MOF and verified by ESS Annual Admin WOFED report quarterly meeting minutes with all committee members signed to MOF WOFED Students benefiting from direct interventions to enhance learning Annual Project M & E Questionnaires will be developed to keep track on number of beneficiaries MOE and WOEFD", + "type": "organization", + "explanation": "'WOFED' is mentioned as an organization responsible for submitting reports, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'WOFED' is a dataset because it appears in a context discussing data collection and reporting.", + "contextual_reason_agent": "'WOFED' is mentioned as an organization responsible for submitting reports, not as a structured collection of data.", + "contextual_signal": "'mentioned only as a project, not as a data source'", + "tags": [] + }, + { + "filename": "013_BOSIB0efb09b920d90858a0135df22da7d1", + "page": 29, + "text": "The World Bank Ethiopia Digital ID for Inclusion and Services Project ( P179040 ) Page 19 of 39 adoption. For example, authentication and e-KYC will be free for public and social impact services. Likewise, the project will continue to document generated savings and other benefits of adopting Fayda that will strengthen the case for the GoE to invest in continuous operating expenses. IV. PROJECT APPRAISAL SUMMARY A. Technical, Economic and Financial Analysis 61. The project is expected to contribute to sustainable economic growth, through long-term cost savings, efficiency, and productivity gains, fueled by greater digital adoption by residents and businesses. 40 Digital ID holds the promise of enabling economic value creation by fostering increased inclusion, increasing formalization, and promoting digitalization of services. The project activities will ease access to identification for millions of Ethiopians and residents, including women, people with disabilities, refugees, and IDPs. The ability to prove one \u2019 s identity is often a prerequisite for accessing many public and private sector services. By addressing the gap of 36 percent in ID ownership ( the population lacking a current version of paper based Kebele ID, according to the 2018 ID4D-Findex Survey ), the project will contribute to removing some of the most basic barriers that people face. 62.", + "ner_text": [ + [ + 1255, + 1278, + "named" + ] + ], + "validated": true, + "empirical_context": "The ability to prove one \u2019 s identity is often a prerequisite for accessing many public and private sector services. By addressing the gap of 36 percent in ID ownership ( the population lacking a current version of paper based Kebele ID, according to the 2018 ID4D-Findex Survey ), the project will contribute to removing some of the most basic barriers that people face. 62.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it provides empirical data regarding ID ownership in the population.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "The context confirms it is a dataset as it provides empirical data regarding ID ownership in the population.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 20, + "text": "The Government has affirmed both in national legislation and at representations to the Committee on Elimination of Racial Discrimination that refugees are to be treated equally under the law with respect to key socioeconomic rights: property ownership; security; access to the courts; access to labor market ( no nationality discrimination ); freedom of expression and movement; and access to basic services including access to health, education, and housing. In December 2018, 4 The latest DHS was in 2012. Fieldwork for the 2017 DHS took place between October 2017 and February 2018. Internal and independent audits determined that the results obtained from the survey cannot be reconciled and therefore should not be used for planning or programming purposes. Hence, the release of the results was canceled. 5 World Bank. 2020. Risk and Resilience Assessment for the Sahel Region.", + "ner_text": [ + [ + 531, + 534, + "named" + ] + ], + "validated": false, + "empirical_context": "In December 2018, 4 The latest DHS was in 2012. Fieldwork for the 2017 DHS took place between October 2017 and February 2018. Internal and independent audits determined that the results obtained from the survey cannot be reconciled and therefore should not be used for planning or programming purposes.", + "type": "program", + "explanation": "However, 'DHS' is mentioned in the context of audits and results that should not be used for planning, indicating it is not functioning as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'DHS' refers to a dataset because it is associated with fieldwork and results from a survey.", + "contextual_reason_agent": "However, 'DHS' is mentioned in the context of audits and results that should not be used for planning, indicating it is not functioning as a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 7, + "validated": 6, + "not_validated": 1 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 36, + "text": "The World Bank Generating Growth Opportunities and Productivity for Women Enterprises Uganda Project ( P176747 ) Page 32 of 77 B. Results Monitoring and Evaluation Arrangements Monitoring and Regular Reporting 79. Objectives and design. The objective of the M & E system is to track the project \u2019 s implementation progress and achievement of expected outcomes to enable the government ( national and sub-national ) and World Bank to address issues as they arise. An integrated web-based data collection platform will be established at the MGLSD into which data on implementation progress and outcomes will be entered will be entered to track implementation of project interventions and their outcomes. The MGLSD will contract a consulting firm to design and develop the integrated data platform, which will include an interface that allows the persons responsible for M & E at all implementing agencies to enter monitoring data that they collect. 80. The MGLSD will lead the overall M & E efforts. The MGLSD already has an experienced Planning Unit which has been responsible for leading the efforts to track government programs. Staff with specialized skills in ( a ) survey design, implementation, and analysis; ( b ) operations and maintenance of management information systems; and ( c ) data manager; and ( d ) others as needed will comprise the M & E team at the MGLSD. 81.", + "ner_text": [ + [ + 466, + 511, + "named" + ] + ], + "validated": false, + "empirical_context": "The objective of the M & E system is to track the project \u2019 s implementation progress and achievement of expected outcomes to enable the government ( national and sub-national ) and World Bank to address issues as they arise. An integrated web-based data collection platform will be established at the MGLSD into which data on implementation progress and outcomes will be entered will be entered to track implementation of project interventions and their outcomes. The MGLSD will contract a consulting firm to design and develop the integrated data platform, which will include an interface that allows the persons responsible for M & E at all implementing agencies to enter monitoring data that they collect.", + "type": "system", + "explanation": "However, it is described as a system for data entry and monitoring, not as a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'data collection platform'.", + "contextual_reason_agent": "However, it is described as a system for data entry and monitoring, not as a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "035_Senegal-Casamance-Economic-Development-Project", + "page": 69, + "text": "Key responsibilities of the community mobilizers will include ( a ) providing capacity support to communities to carry out ( i ) community diagnostic and mapping processes; ( ii ) facilitation of planning processes in a conflict-sensitive, climate-informed and inclusive manner ( including for women, youth, and people with disabilities ); ( iii ) participatory M & E including community monitoring, social audit, GRMs, and climate monitoring to feed into the KMP; and ( b ) activities on exclusionary social norms. The FPs will work closely with the ARDs who will be providing support to the communes to prepare, update and implement the LDPs and the agencies planning and implementing the structural sub-projects. 11. Leveraging Digital Tools. To support the implementation of the community centered approach throughout the sub-project cycle, the project will introduce the use of innovative digital tools ( CDD app ) for community mobilizers, CECs and communal authorities. The tool will enhance two-way communication between communities and government.", + "ner_text": [ + [ + 910, + 917, + "named" + ] + ], + "validated": false, + "empirical_context": "Leveraging Digital Tools. To support the implementation of the community centered approach throughout the sub-project cycle, the project will introduce the use of innovative digital tools ( CDD app ) for community mobilizers, CECs and communal authorities. The tool will enhance two-way communication between communities and government.", + "type": "tool", + "explanation": "However, the context indicates that the 'CDD app' is a tool for communication rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'app' could imply a digital tool that collects or manages data.", + "contextual_reason_agent": "However, the context indicates that the 'CDD app' is a tool for communication rather than a structured collection of data.", + "contextual_signal": "mentioned only as a tool, not as a data source", + "tags": [] + }, + { + "filename": "161_28046", + "page": 29, + "text": "Activity Monitoring and Evaluation. Activities on any level would be monitored following structured reporting and assessment forms and procedures. Given the program \u2019 s large scope o f interventions and decentralized nature of activities, it would be necessary to incorporate a coherent and consistent set o f indicators into all contracts / agreements funded by the project. For example, the health centers or prefectoral hospitals would be required to submit their plans following the logical framework outline linking inputs, process, outputs. Agreements / contracts would be performance-based and would thus identify all yearly indicators which those front-line health structures plan on achieving. These indicators would be compiled and aggregated in the annual report o f the Task Force. Outcome and Impact Monitoring and Evaluation. Another aspect o f the M & E system would be the monitoring o f the outcome and impact; this would be done by a Demographic and Health Survey at the beginning and end o f the project. In addition, data on deaths avoided would be calculated through operational research contracted to a specialized institution which would use DHS estimates as well as health structures records on coverage. Quality o f services would be checked yearly based on a simple checklist which describes the standards expected and which would be designed with the help o f GTZ. Such quality check would be contracted out to consultants.", + "ner_text": [ + [ + 1190, + 1215, + "named" + ] + ], + "validated": false, + "empirical_context": "Another aspect o f the M & E system would be the monitoring o f the outcome and impact; this would be done by a Demographic and Health Survey at the beginning and end o f the project. In addition, data on deaths avoided would be calculated through operational research contracted to a specialized institution which would use DHS estimates as well as health structures records on coverage. Quality o f services would be checked yearly based on a simple checklist which describes the standards expected and which would be designed with the help o f GTZ.", + "type": "records", + "explanation": "However, 'health structures records' is mentioned as a source of information rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'records' often imply a collection of data.", + "contextual_reason_agent": "However, 'health structures records' is mentioned as a source of information rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a source of information, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 5, + "validated": 3, + "not_validated": 2 + } + }, + { + "filename": "155_432110PAD0P09717632B01off0use0only1", + "page": 30, + "text": "25 78. Survey and public consultations. The project is basing the design of the water service expansion and standpost management components on the results of a socio-economic study and household survey \u2013 commissioned specifically for this project - of household demand and willingness and ability to pay for different modes of service ( standposts vs. private connections ) in the peri-urban neighborhoods of Bujumbura. In addition, as part of the project preparation, multiple consultations were held between REGIDESO staff, members and local leaders of 26 neighborhoods, and the Bank project team, with the facilitation of local consultants. A participatory workshop was organized to discuss the design of the proposed program and to disseminate the findings of the initial focus group interviews, the household survey, and the experience of similar endeavors in other countries. Representatives of 26 peri - urban neighborhoods and their local leaders expressed strong support for the proposed program. They provided key input into the design of the program during break-out sessions where the details of the program were discussed, such as the mode of selection and terms of reference for standpost operators, and the respective roles of the project partners ( REGIDESO, user committees, local administrators, and operators ). 79. Willingness and ability to pay.", + "ner_text": [ + [ + 185, + 201, + "named" + ], + [ + 409, + 418, + "household survey <> data geography" + ], + [ + 581, + 585, + "household survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "Survey and public consultations. The project is basing the design of the water service expansion and standpost management components on the results of a socio-economic study and household survey \u2013 commissioned specifically for this project - of household demand and willingness and ability to pay for different modes of service ( standposts vs. private connections ) in the peri-urban neighborhoods of Bujumbura.", + "type": "survey", + "explanation": "This is indeed a dataset as it is specifically commissioned for the project to gather empirical data on household demand and willingness to pay.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'household survey' implies a structured collection of data related to households.", + "contextual_reason_agent": "This is indeed a dataset as it is specifically commissioned for the project to gather empirical data on household demand and willingness to pay.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 62, + "text": "The World Bank Cash for Jobs Project ( P175327 ) Page 58 of 86 VII. RESULTS FRAMEWORK AND MONITORING Results Framework COUNTRY: Burundi Cash for Jobs Project Project Development Objectives ( s ) The development objective of the project is to strengthen management capacity, scale up safety net programs, and promote productive inclusion and access to jobs Project Development Objective Indicators RESULT_FRAME_TBL_PDO Indicator Name PBC Baseline End Target To strengthen management capacity Households in targeted areas included in the National Social Registry ( Number ) 0. 00 200, 000. 00 Households in targeted areas included in the National Social Registry - refugees, disaggregated by gender ( Number ) 0. 00 15, 000. 00 Households in targeted areas included in the National Social Registry - host communities, disaggregated by gender ( Number ) 0. 00 25, 000. 00 Proportion of households enrolled in the beneficiary database living below the extreme poverty line ( Percentage ) 0. 00 80. 00", + "ner_text": [ + [ + 636, + 660, + "named" + ], + [ + 128, + 135, + "National Social Registry <> data geography" + ], + [ + 491, + 501, + "National Social Registry <> reference population" + ], + [ + 663, + 671, + "National Social Registry <> reference population" + ], + [ + 726, + 736, + "National Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "00 200, 000. 00 Households in targeted areas included in the National Social Registry - refugees, disaggregated by gender ( Number ) 0. 00 15, 000.", + "type": "registry", + "explanation": "The National Social Registry is explicitly mentioned as containing data about households, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a registry that includes households disaggregated by gender.", + "contextual_reason_agent": "The National Social Registry is explicitly mentioned as containing data about households, indicating it functions as a data source.", + "contextual_signal": "described as a registry that includes households", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 63, + "text": "Age \u2010 earnings profiles are built based on the education levels of the Jordan, namely: ( i ) incomplete primary ( 0 \u2010 2 grades completed ); ( ii ) incomplete lower secondary ( 3 \u2010 9 grades completed ); ( iii ) incomplete upper secondary school ( 10 grades completed ); ( iv ) completed upper secondary but not post \u2010 secondary ( 11 \u2010 12 grades completed ); and ( v ) post \u2010 secondary ( 13 grades completed and above ). 4. These age \u2010 earnings profiles are constructed separately for two population groups which differ significantly in their education and labor market experiences, namely ( i ) men; and; ( ii ) women. These are enriched by incorporating: ( 1 ) the probability of employment in three types of employment ( wage employment, self \u2010 employment, and unpaid employment ); ( 2 ) the probability of employment in the public sector; ( 3 ) shifts in the income distribution given the likely changes in productivity over time; and ( 4 ) changes in the probability of staying alive over the individual \u2019 s working life. 5.", + "ner_text": [ + [ + 0, + 23, + "named" + ] + ], + "validated": false, + "empirical_context": "Age \u2010 earnings profiles are built based on the education levels of the Jordan, namely: ( i ) incomplete primary ( 0 \u2010 2 grades completed ); ( ii ) incomplete lower secondary ( 3 \u2010 9 grades completed ); ( iii ) incomplete upper secondary school ( 10 grades completed ); ( iv ) completed upper secondary but not post \u2010 secondary ( 11 \u2010 12 grades completed ); and ( v ) post \u2010 secondary ( 13 grades completed and above ). 4.", + "type": "concept", + "explanation": "However, it is not a dataset as it describes a concept rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes structured information about age and earnings based on education levels.", + "contextual_reason_agent": "However, it is not a dataset as it describes a concept rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a profile, not as a data source", + "tags": [] + }, + { + "filename": "073_Ethiopia-Consolidated-Water-Supply-Sanitation-and-Hygiene-Account-Project", + "page": 86, + "text": "Where the area of intervention potentially extends beyond the woreda \u2019 s administrative boundaries, efforts may require management models that vary from traditional WASH Committees ( WASHCOMs ). The selection and readiness criteria for rural communities to be supported under this Project include the following. Selection Criteria ( i ) Level of WASH access and coverage based on the WASH baseline data compiled from regions and verified by the federal WASH ministers: Efforts to measure this will primarily focus on the following indicators ( i ) the level of rural water supply coverage in line with the GTP II standard; ( ii ) household sanitation coverage based on the MoH definition for improved household latrines; ( iii ) proportion of ODF kebeles in a woreda; ( iv ) WASH coverage for schools; and ( v ) WASH coverage for health facilities. The selection criteria include the level of stunting ( based on data collected from the health MIS at the woreda level ) as well as the \u201c prevalence of acute watery diarrhea \u201d ( also using data from the woreda-level health MIS ). These will be used as proxies to prioritize areas with poor sanitation. Woredas with relatively low levels of WASH coverage based on these indicators will be given priority. ( ii ) Level of ongoing assistance in the woreda: Woredas with a lower level of ongoing support from other financing sources will be given priority for financing from the Project. Readiness Criteria ( i ) Compliance with safeguard requirements, based on initial screenings as outlined in the ESMF.", + "ner_text": [ + [ + 937, + 947, + "named" + ], + [ + 62, + 68, + "health MIS <> data geography" + ], + [ + 552, + 588, + "health MIS <> data description" + ], + [ + 630, + 659, + "health MIS <> data description" + ], + [ + 673, + 676, + "health MIS <> publisher" + ], + [ + 775, + 800, + "health MIS <> data description" + ], + [ + 884, + 901, + "health MIS <> data description" + ], + [ + 955, + 961, + "health MIS <> data geography" + ], + [ + 987, + 1022, + "health MIS <> data description" + ], + [ + 1566, + 1584, + "health MIS <> usage context" + ] + ], + "validated": true, + "empirical_context": "Selection Criteria ( i ) Level of WASH access and coverage based on the WASH baseline data compiled from regions and verified by the federal WASH ministers: Efforts to measure this will primarily focus on the following indicators ( i ) the level of rural water supply coverage in line with the GTP II standard; ( ii ) household sanitation coverage based on the MoH definition for improved household latrines; ( iii ) proportion of ODF kebeles in a woreda; ( iv ) WASH coverage for schools; and ( v ) WASH coverage for health facilities. The selection criteria include the level of stunting ( based on data collected from the health MIS at the woreda level ) as well as the \u201c prevalence of acute watery diarrhea \u201d ( also using data from the woreda-level health MIS ). These will be used as proxies to prioritize areas with poor sanitation.", + "type": "system", + "explanation": "In the context, 'health MIS' is explicitly mentioned as a source of data collected at the woreda level, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'health MIS' is a dataset because it is referenced in the context of collecting data for health indicators.", + "contextual_reason_agent": "In the context, 'health MIS' is explicitly mentioned as a source of data collected at the woreda level, indicating it functions as a data source.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "084_Lebanon-PAD-final-06082018", + "page": 83, + "text": "73 # DLI Bank financing allocated to the DLI Deadline for DLI Achievement Minimum DLI value to be achieved to trigger disbursements of Bank Financing Maximum DLI value ( s ) expected to be achieved for Bank disbursements purposes Determination of Financing Amount to be disbursed against achieved and verified DLI value ( s ) statistics database is established and the gender indicators are published online DLR9. 3: A pilot based on a technical study / assessment to increase access to childcare provision is launched by the OMSWA DLR 9. 2: full amount disbursed upon achievement of yes / no target", + "ner_text": [ + [ + 369, + 386, + "named" + ] + ], + "validated": false, + "empirical_context": "73 # DLI Bank financing allocated to the DLI Deadline for DLI Achievement Minimum DLI value to be achieved to trigger disbursements of Bank Financing Maximum DLI value ( s ) expected to be achieved for Bank disbursements purposes Determination of Financing Amount to be disbursed against achieved and verified DLI value ( s ) statistics database is established and the gender indicators are published online DLR9. 3: A pilot based on a technical study / assessment to increase access to childcare provision is launched by the OMSWA DLR 9.", + "type": "indicator", + "explanation": "'Gender indicators' are mentioned in the context of being published online but are not described as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'gender indicators' refers to a dataset because it includes the term 'indicators' which often relates to data metrics.", + "contextual_reason_agent": "'Gender indicators' are mentioned in the context of being published online but are not described as a structured collection of data or a data source.", + "contextual_signal": "mentioned only as indicators, not as a data source", + "tags": [] + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 44, + "text": "The World Bank Cash for Jobs Project ( P175327 ) Page 40 of 86 of both of it is to build the capacity of the GoB to monitor its own programs in the Social Protection sector. In this sense, several instruments will support the monitoring of the project and Social Protection programs in general: a. The Social Registry. It will help the MNSSAHRG, and SEP monitor the socio-economic situation of the poor and vulnerable households in Burundi. The socio-economic data being collected through the registry will allow government to monitor the situation and better design social policies. The registry will also collect data on beneficiaries enrolled in different social programs allowing to monitor the coverage of these. b. The project will support the mid-term review of the Social Protection Strategy that is meant to be approved at the beginning of 2022. c. SEP / CNPS will receive technical and financial support to perform their coordination tasks and lead the Social Protection Working Group allowing for a better monitoring of Social Protection interventions by different partners. 128. PDO indicators and Intermediate Results Indicators of the project will be measured through different instruments. These include process evaluations; regular spot checks and beneficiary surveys through mobile phone to evaluate the quality of implementation: the efficiency of the targeting and payment processes; and the overall satisfaction with the program.", + "ner_text": [ + [ + 445, + 464, + "named" + ], + [ + 398, + 428, + "socio-economic data <> reference population" + ], + [ + 432, + 439, + "socio-economic data <> data geography" + ], + [ + 623, + 674, + "socio-economic data <> reference population" + ], + [ + 849, + 853, + "socio-economic data <> publication year" + ], + [ + 1465, + 1483, + "socio-economic data <> usage context" + ] + ], + "validated": true, + "empirical_context": "It will help the MNSSAHRG, and SEP monitor the socio-economic situation of the poor and vulnerable households in Burundi. The socio-economic data being collected through the registry will allow government to monitor the situation and better design social policies. The registry will also collect data on beneficiaries enrolled in different social programs allowing to monitor the coverage of these.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to the structured collection of socio-economic data used for monitoring and policy design.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a collection of data being collected for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it refers to the structured collection of socio-economic data used for monitoring and policy design.", + "contextual_signal": "mentioned as data being collected through the registry", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 17, + "text": "COVID-19 has affected refugee livelihoods and increased income insecurity, sexual and gender-based violence ( GBV ), and anxiety. Women are more direly affected. Based on household surveys with over 1, 500 refugees in Kampala and the settlements as well as interviews with 185 key informants, the United Nations High Commissioner for Refugees ( UNHCR ) and UN Women found that household income loss has contributed to an increased incidence of GBV and negative coping mechanisms such as survival sex and sale of alcohol. Fifty-three percent of girls and 46 percent of women aged 18 to 24 years reported an additional unpaid work burden, with school closures also affecting their ability to access learning opportunities. 35 13. Limited connectivity also hampers humanitarian actors like the World Food Programme from providing cash - based solutions through mobile finance and increasing the efficiency of aid to refugees. Access to digital connectivity can enable refugees and their communities to access accurate and relevant information in appropriate languages, access business opportunities, and communicate with their families and host communities. As such, the Uganda Digital Acceleration Project ( UDAP-GovNet ) will be laying an important foundation needed to enable digital services delivery from key development actors serving refugees and RHDs. COVID-19 pressures have further highlighted the impact of this digital divide on refugees.", + "ner_text": [ + [ + 171, + 188, + "named" + ], + [ + 218, + 225, + "household surveys <> data geography" + ], + [ + 297, + 352, + "household surveys <> author" + ], + [ + 357, + 365, + "household surveys <> author" + ], + [ + 568, + 593, + "household surveys <> reference population" + ], + [ + 1463, + 1481, + "household surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "Women are more direly affected. Based on household surveys with over 1, 500 refugees in Kampala and the settlements as well as interviews with 185 key informants, the United Nations High Commissioner for Refugees ( UNHCR ) and UN Women found that household income loss has contributed to an increased incidence of GBV and negative coping mechanisms such as survival sex and sale of alcohol. Fifty-three percent of girls and 46 percent of women aged 18 to 24 years reported an additional unpaid work burden, with school closures also affecting their ability to access learning opportunities.", + "type": "survey", + "explanation": "In this context, 'household surveys' is confirmed as a dataset since it is explicitly mentioned as a source of information used in the research.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'household surveys' is a dataset because it refers to a structured collection of data gathered from refugees.", + "contextual_reason_agent": "In this context, 'household surveys' is confirmed as a dataset since it is explicitly mentioned as a source of information used in the research.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "115_PAD1678-PAD-P152822-IDA-R2016-0090-1-Box394886B-OUO-9", + "page": 86, + "text": "in the HOA by commissioning studies and / or focused research. Advocacy for holistic regional responses to forced displacement and mixed migration will be supported by data generated on displacement that would influence intervention in member states. 29. The key thrust areas for the Regional Secretariat will include: ( i ) engaging in policy through initiating dialogues with academic and research institutions on transitional solutions on displacement; ( ii ) generating evidence through research for innovative management of forced displacement and mixed migration; ( iii ) building capacity of countries and institutions in the HOA to innovatively respond to the displacement \u2013 migration nexus; ( iv ) providing knowledge management and M & E; ( v ) forging partnerships between humanitarian and development actors in the IGAD region to rethink the application of durable solutions; and ( vi ) consolidating the", + "ner_text": [ + [ + 168, + 198, + "named" + ], + [ + 633, + 636, + "data generated on displacement <> data geography" + ], + [ + 827, + 838, + "data generated on displacement <> data geography" + ] + ], + "validated": true, + "empirical_context": "in the HOA by commissioning studies and / or focused research. Advocacy for holistic regional responses to forced displacement and mixed migration will be supported by data generated on displacement that would influence intervention in member states. 29.", + "type": "data", + "explanation": "This is indeed a dataset as it is described as data that influences interventions, indicating its use in empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'data generated on displacement', suggesting a structured collection of information.", + "contextual_reason_agent": "This is indeed a dataset as it is described as data that influences interventions, indicating its use in empirical analysis.", + "contextual_signal": "mentioned as data that would influence intervention", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 48, + "text": "The program will prioritize the poorest areas ( with a combination of monetary poverty rate and chronic malnutrition at the province level and extreme poverty rate at the commune level ). The project will support a first phase of the program to establish transparent and rule-based processes and allow for the gradual fine-tuning of these processes and the instruments for program operation. For example, the involvement of communities in the targeting may evolve as better household survey data becomes available; cash payments may evolve, from cash to electronic or mobile payments; the enforcement of conditionalities may evolve with the availability of services and administrative capacity. The registry will start in 16 communes of the four provinces with the largest rates of monetary poverty and chronic malnutrition. Sub-component 1. 1. Cash Transfers ( US $ 19. 5 million equivalent ) 10. Payment amount and schedule. The level of transfers will be BIF 20, 000 ( US $ 12 equivalent ) per household and per month for 30 months. This corresponds to approximately 19", + "ner_text": [ + [ + 474, + 495, + "named" + ] + ], + "validated": true, + "empirical_context": "The project will support a first phase of the program to establish transparent and rule-based processes and allow for the gradual fine-tuning of these processes and the instruments for program operation. For example, the involvement of communities in the targeting may evolve as better household survey data becomes available; cash payments may evolve, from cash to electronic or mobile payments; the enforcement of conditionalities may evolve with the availability of services and administrative capacity. The registry will start in 16 communes of the four provinces with the largest rates of monetary poverty and chronic malnutrition.", + "type": "survey", + "explanation": "This is indeed a dataset as it is mentioned in the context of evolving processes based on the availability of better data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'household survey data' implies a structured collection of data collected from households.", + "contextual_reason_agent": "This is indeed a dataset as it is mentioned in the context of evolving processes based on the availability of better data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "089_UGANDA-PAD-04272018", + "page": 16, + "text": "The Program scope will also be expanded to better align with NDP II goals of wealth creation as well as the World Bank ' s twin goals on ending extreme poverty and boosting shared prosperity. Various elements have been introduced into the design of the AF to strengthen participating MLGs impact on promoting local economic development ( LED ) and job creation. Local firms in the formal sector face considerable constraints in establishing and sustaining their businesses, limiting prospects for the creation of more and better jobs. For example, according to World Bank Enterprise Survey Data for Uganda ( 2013 ), the main constraints include infrastructure deficits and access to land; regulatory barriers and corruption; and access to finance20. LGs have a role in helping or hindering the alleviation of these constraints to support private sector development and, consequently, job creation. The recent study undertaken by the World Bank / Ministry of Local Government ( MoLG ) on LED21 highlighted that LGs are currently doing little in this direction, with their main relationship with the private sector centering on tax collection and requests for donations. The study outlined some of the constraints faced by the private sector which are within the mandate of LGs. These fell under the four broad categories of infrastructure deficits, regulatory barriers, absence of enterprise support and institutional capacity gaps within LGs. 23.", + "ner_text": [ + [ + 561, + 594, + "named" + ], + [ + 108, + 118, + "World Bank Enterprise Survey Data <> publisher" + ], + [ + 362, + 373, + "World Bank Enterprise Survey Data <> reference population" + ], + [ + 561, + 571, + "World Bank Enterprise Survey Data <> publisher" + ], + [ + 599, + 605, + "World Bank Enterprise Survey Data <> data geography" + ], + [ + 608, + 612, + "World Bank Enterprise Survey Data <> publication year" + ], + [ + 645, + 687, + "World Bank Enterprise Survey Data <> data description" + ], + [ + 689, + 723, + "World Bank Enterprise Survey Data <> data description" + ], + [ + 933, + 943, + "World Bank Enterprise Survey Data <> publisher" + ], + [ + 1462, + 1480, + "World Bank Enterprise Survey Data <> usage context" + ] + ], + "validated": true, + "empirical_context": "Local firms in the formal sector face considerable constraints in establishing and sustaining their businesses, limiting prospects for the creation of more and better jobs. For example, according to World Bank Enterprise Survey Data for Uganda ( 2013 ), the main constraints include infrastructure deficits and access to land; regulatory barriers and corruption; and access to finance20. LGs have a role in helping or hindering the alleviation of these constraints to support private sector development and, consequently, job creation.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referenced as 'World Bank Enterprise Survey Data' which is used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Data' in the term and is associated with a recognized survey.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referenced as 'World Bank Enterprise Survey Data' which is used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "008_BOSIB-60d57288-4e09-4519-ae6c-ffdc0037e0b1", + "page": 57, + "text": "These interim financial reports ( IFRs ) will be produced with information extracted from the Bertha System and will consolidate the Project \u2019 s financial data for all components using the cash basis. 59. At the end of each fiscal year, the CIM-AMFRI will prepare the annual financial statements for the Project, which will be audited. The second semester IFRs with accompanying notes will serve as the Project \u2019 s annual financial statements to be audited. 60. The following biannual IFRs [ to be prepared in Reais ] will be prepared for Project monitoring and management purposes and submitted to the Bank: a. IFR 1-A \u2013 Sources and Uses of Funds by Disbursement Category ( period to date, year to date, Project to date ): Shows the budgeted amounts against the actual expenditures ( i. e., documented expenditures ), including a variance analysis b. IFR 1-B \u2013 Uses of Funds by Project Component ( period to date, year to date, Project to date ): Shows the budgeted amounts against the actual expenditures ( i. e., documented expenditures ), including a variance analysis c. IFR 1-C \u2013 DA bank reconciliation, and accompanying bank statements d. Cash flow for the following period 61.", + "ner_text": [ + [ + 94, + 107, + "named" + ] + ], + "validated": false, + "empirical_context": "These interim financial reports ( IFRs ) will be produced with information extracted from the Bertha System and will consolidate the Project \u2019 s financial data for all components using the cash basis. 59.", + "type": "system", + "explanation": "However, the context indicates that it is a system used to extract information, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'System' which can imply data handling.", + "contextual_reason_agent": "However, the context indicates that it is a system used to extract information, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "023_Ethiopia-Second-Phase-Development-Response-to-Displacement-Impacts-Project-in-the-Horn-of-Africa-Project", + "page": 37, + "text": "Low representation and influence of women on local decision-making bodies Under subcomponent 1 ( c ): new activities in Phase II \u2022 Training program for 12, 000 women to support female leadership in project community committees. \u2022 Community outreach to promote women \u2019 s voice and leadership in decision-making at the community level. \u2022 40 % target for women \u2019 s representation in community committees ( up from 37 % in Phase I ). \u2022 30 % target for women \u2019 s leadership in local committees ( new in Phase II ). \u2022 12, 000 women to be trained in female-only leadership program. High incidence of GBV and lack of access to quality services to respond to the needs of GBV survivors Under Component 1: new activities in Phase II \u2022 Appoint and train 120 GBV focal persons \u2022 GBV training for project staff, officials, and contractors \u2022 GBV awareness-raising and prevention campaigns, working with NGOs, officials and schools \u2022 Connect project implementing agencies with referral pathways for survivor-centered services \u2022 Safety audits / consultations with women to inform design and placement of infrastructure Under subcomponent 2 ( b ) \u2022 Focus on renewable energy sources to decrease dependence on firewood \u2022 Community planning committees and project implementers connected with referral pathways for GBV survivors via other WB operations \u2022 Design of supportive infrastructure informed by consultations with women or women \u2019 s groups \u2022 Number of women with access to renewable energy sources 60 Central Statistical Agency ( CSA ) [ Ethiopia ] and ICF. 2016. Ethiopia Demographic and Health Survey 2016: Key Indicators Report. Addis Ababa, Ethiopia, and Rockville, Maryland, USA. CSA and ICF.", + "ner_text": [ + [ + 1552, + 1590, + "named" + ], + [ + 1489, + 1515, + "Ethiopia Demographic and Health Survey <> publisher" + ], + [ + 1518, + 1521, + "Ethiopia Demographic and Health Survey <> publisher" + ], + [ + 1541, + 1544, + "Ethiopia Demographic and Health Survey <> publisher" + ], + [ + 1546, + 1550, + "Ethiopia Demographic and Health Survey <> publication year" + ], + [ + 1552, + 1560, + "Ethiopia Demographic and Health Survey <> data geography" + ], + [ + 1591, + 1595, + "Ethiopia Demographic and Health Survey <> publication year" + ], + [ + 1620, + 1641, + "Ethiopia Demographic and Health Survey <> data geography" + ], + [ + 1673, + 1676, + "Ethiopia Demographic and Health Survey <> publisher" + ], + [ + 1681, + 1684, + "Ethiopia Demographic and Health Survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "2016. Ethiopia Demographic and Health Survey 2016: Key Indicators Report. Addis Ababa, Ethiopia, and Rockville, Maryland, USA.", + "type": "survey", + "explanation": "This is a dataset as it refers to a specific survey that collects demographic and health data in Ethiopia.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is a dataset as it refers to a specific survey that collects demographic and health data in Ethiopia.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "040_Iraq-COVID-19-Vaccination-Project", + "page": 38, + "text": "Gender inequities and norms influence access to critical health services, as well as risk of exposure to disease, particularly in emergency situations and pandemics. Factors that constrain access to and use of health services by women in Iraq include limited mobility and financial capacity, competing demands of paid and unpaid work, and limited access to information. 10 The reported incidence of COVID-19 is higher among men than women \u2013 59 percent of registered COVID-19 cases in Iraq to date were among men. Moreover, women have also been impacted by the discontinuity of essential RMNCAH-N services, including for maternal and sexual and reproductive health, and GBV. 11 The GBV Information Management System ( GBVIMS ) has recorded a marked rise in the number of reported incidents of violence in 2020. 12 10 UN Women ( 2018 ), Gender Profile - Iraq, A situation analysis on gender equality and women empowerment in Iraq. 11 UN Women ( 2020 ). Report on the Impact of COVID-19 on Women. 12 Gender Based Violence Information Management System Annual Narrative Report. January \u2013 December 2020. https: / / iraq. unfpa. org / sites / default / files / resource-pdf / gbvims_narrative_report_of_2020. pdf", + "ner_text": [ + [ + 997, + 1048, + "named" + ] + ], + "validated": false, + "empirical_context": "Report on the Impact of COVID-19 on Women. 12 Gender Based Violence Information Management System Annual Narrative Report. January \u2013 December 2020.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information Management System' in its name, suggesting data handling.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 61, + "text": "into project documents and monitoring reports \u2022 Provide fiduciary, technical, and management oversight for health service delivery financed by IDA and linked MDTF High Level Steering Committee Donors, MoH, MoFP, WB, SMoH, UNICEF \u2022 Provide high level direction for the project \u2022 Meet every six months \u2022 Review project data, identify needed actions, and follow-up on actions during meetings Operational steering committee MoH, PMU, World Bank, Donors, UNICEF \u2022 Provide routine oversight and operational direction in line with overall direction from the HSC \u2022 Meet on a quarterly basis \u2022 Identify and discuss needed actions \u2022 Review project data, identify needed actions, and follow-up on actions during meetings UNICEF UNICEF contracted by the PMU \u2022 Sub-contract NGOs \u2022 Supervise and support NGOs \u2022 Sub-contract procurement and logistics agency \u2022 Supervise and support logistics agency \u2022 Develop capacity of CHDs Contracted Service Providers NGOs sub-contracted by UNICEF \u2022 Deliver health services \u2022 Engage with communities to support health service delivery", + "ner_text": [ + [ + 309, + 321, + "named" + ] + ], + "validated": true, + "empirical_context": "into project documents and monitoring reports \u2022 Provide fiduciary, technical, and management oversight for health service delivery financed by IDA and linked MDTF High Level Steering Committee Donors, MoH, MoFP, WB, SMoH, UNICEF \u2022 Provide high level direction for the project \u2022 Meet every six months \u2022 Review project data, identify needed actions, and follow-up on actions during meetings Operational steering committee MoH, PMU, World Bank, Donors, UNICEF \u2022 Provide routine oversight and operational direction in line with overall direction from the HSC \u2022 Meet on a quarterly basis \u2022 Identify and discuss needed actions \u2022 Review project data, identify needed actions, and follow-up on actions during meetings UNICEF UNICEF contracted by the PMU \u2022 Sub-contract NGOs \u2022 Supervise and support NGOs \u2022 Sub-contract procurement and logistics agency \u2022 Supervise and support logistics agency \u2022 Develop capacity of CHDs Contracted Service Providers NGOs sub-contracted by UNICEF \u2022 Deliver health services \u2022 Engage with communities to support health service delivery", + "type": "data", + "explanation": "In this context, 'project data' is used as a source of information for decision-making and oversight, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'project data' refers to a dataset because it is mentioned in the context of reviewing and identifying needed actions.", + "contextual_reason_agent": "In this context, 'project data' is used as a source of information for decision-making and oversight, indicating it functions as a dataset.", + "contextual_signal": "mentioned as a source for review and action identification", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "153_476340PAD0ZR0P101Official0Use0Only1", + "page": 64, + "text": "modules. This module will also prepares project M & E Reports and responds to the requests from users. Implementation Module: This module manages project implementation, and responsibilities will be shouldered primarily by the M & E team of the CU. A working group including representatives from main contractors and NGOs, will be formed to encourage dialogue regarding implementation and prevent blockage on procurement, disbursement, or activity implementation. MOD contractors will present contract work-plans including the goals for the year, broken down by activity. Activities described in the project description are broken down into tasks to be achieved over the 12 month period. At the end of the year, the total for each task equals loo %, and implementation progress i s measured through the % o f tasks achieved. Administrative Module: Headed by an M & E Specialist, this module will be a system of reminders and warnings o f M & E calendar deadlines, as specified in the M & E Manual. It will also manage the system of filing and receipt of reporting from the disparate stakeholders in DRC, Data Collection Module: This unit monitors indicator data for quality control, broader trends, and adaptive management of the project implementation. Reports from site visits to verify project implementation and indicator data will provide a double-check on reporting received by the UC.", + "ner_text": [ + [ + 1147, + 1161, + "named" + ], + [ + 1099, + 1102, + "indicator data <> data geography" + ] + ], + "validated": true, + "empirical_context": "Administrative Module: Headed by an M & E Specialist, this module will be a system of reminders and warnings o f M & E calendar deadlines, as specified in the M & E Manual. It will also manage the system of filing and receipt of reporting from the disparate stakeholders in DRC, Data Collection Module: This unit monitors indicator data for quality control, broader trends, and adaptive management of the project implementation. Reports from site visits to verify project implementation and indicator data will provide a double-check on reporting received by the UC.", + "type": "data", + "explanation": "In this context, 'indicator data' is indeed used as a source of information for monitoring and verifying project implementation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'indicator data' is a dataset because it refers to specific metrics monitored for quality control and project management.", + "contextual_reason_agent": "In this context, 'indicator data' is indeed used as a source of information for monitoring and verifying project implementation.", + "contextual_signal": "monitors indicator data for quality control", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "167_27761", + "page": 30, + "text": "Instead, proxy indicators will be used to show that there i s no further deterioration of baseline conditions, or there i s a net positive change. The working conditions in the West Bank and Gaza make it difficult to use the experimental quantitative methods in which outcomes are compared with those of a randomly assigned control group that i s otherwise eligible for the program and similar to the participants. In child nutrition, the monitoring will only measure the progress of the child from project year 0 when the project starts to year 4 when the project ends, or earlier when the child enrollment in the nutrition program ends at 5 years old. Comparing a sample of project beneficiaries to a comparison group with similar characteristics using baseline and follow-up surveys will assess project impact. This method will establish the net project impact. The baseline values will be updated during a pilot targeting phase where information will be collected in ten governorates, five in the West Bank and the other five in Gaza. The results from survey data, monitoring indicators, and qualitative assessments will be entered into the project \u2019 s Management Information System ( MIS ), and the following measurable indicators will be generated to determine project impact and output: ( i ) Health / Nutrition Grants. The following outcome indicators will be used: 0 0 0 0 Net change in the percentage children brought regularly to health centers for preventive care ( target: 2-10 % ). Net change in children 0-5 years old with complete immunization scheme ( target: 2-10 % ). Net change in children 0-5 years old complying with regular growth and health monitoring ( target: 2-15 % ). Net change in nutritional status ( as measured through anthropometrical indicators ) of children 0 - 3 years old ( target: 2-1596 ). 26 Where TBD i s used in the indicators below, the indicator i s being determined by the pilot program ( January-May, 2004 ). 27", + "ner_text": [ + [ + 1157, + 1186, + "named" + ] + ], + "validated": false, + "empirical_context": "The baseline values will be updated during a pilot targeting phase where information will be collected in ten governorates, five in the West Bank and the other five in Gaza. The results from survey data, monitoring indicators, and qualitative assessments will be entered into the project \u2019 s Management Information System ( MIS ), and the following measurable indicators will be generated to determine project impact and output: ( i ) Health / Nutrition Grants. The following outcome indicators will be used: 0 0 0 0 Net change in the percentage children brought regularly to health centers for preventive care ( target: 2-10 % ).", + "type": "system", + "explanation": "However, it is mentioned as a system that stores records, not as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Information System' which often relates to data management.", + "contextual_reason_agent": "However, it is mentioned as a system that stores records, not as a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 61, + "text": "The limited adoption of Sanad is due to low perceived relevance for many individuals and service providers, as well as to eligibility restricted so far to citizens, thus leaving various demographic groups ( such as non-Jordanians \u2013 amongst whom refugees, and military personnel ) ineligible for a digital ID. MODEE plans to increase the uptake of Sanad by: ( 1 ) expanding eligibility and outreach to enroll additional users ( for example, through the operationalization of new Government Service Centers ( GSCs ) or targeted visits to refugee communities ); ( 2 ) introducing new features to enhance trust, interoperability, security, and people centricity, such as adding a mechanism to collect user consent for data sharing; ( 3 ) implementing a Digital Public Infrastructure ( DPI ) 27 ecosystem approach that is aligned with international good practice, to achieve effective, end-to-end, digitalized service delivery workflows. Taking a trusted, people-centric DPI ecosystem approach offers the opportunity to embrace international standards and open internet technologies by using trusted data for eligibility purposes, access control, and the provision of efficient, in-demand digitalized services.", + "ner_text": [ + [ + 347, + 352, + "named" + ] + ], + "validated": false, + "empirical_context": "The limited adoption of Sanad is due to low perceived relevance for many individuals and service providers, as well as to eligibility restricted so far to citizens, thus leaving various demographic groups ( such as non-Jordanians \u2013 amongst whom refugees, and military personnel ) ineligible for a digital ID. MODEE plans to increase the uptake of Sanad by: ( 1 ) expanding eligibility and outreach to enroll additional users ( for example, through the operationalization of new Government Service Centers ( GSCs ) or targeted visits to refugee communities ); ( 2 ) introducing new features to enhance trust, interoperability, security, and people centricity, such as adding a mechanism to collect user consent for data sharing; ( 3 ) implementing a Digital Public Infrastructure ( DPI ) 27 ecosystem approach that is aligned with international good practice, to achieve effective, end-to-end, digitalized service delivery workflows. Taking a trusted, people-centric DPI ecosystem approach offers the opportunity to embrace international standards and open internet technologies by using trusted data for eligibility purposes, access control, and the provision of efficient, in-demand digitalized services.", + "type": "program", + "explanation": "'Sanad' is described as a program aimed at increasing digital ID uptake, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Sanad' is a dataset because it is mentioned in the context of digital ID and data sharing.", + "contextual_reason_agent": "'Sanad' is described as a program aimed at increasing digital ID uptake, not as a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "144_84657-PAD-P132268-Project-Commitment-Paper", + "page": 21, + "text": "22 A key component of the project is the development of SIPs corresponding to set targets. The SIPs aim at guiding service providers towards providing higher service level and improve financial sustainability. The SIPs outline the steps that M / VCs and JSCs need to take to meet the set OBA Targets. The actions in the SIPs are based on specific issues that different M / VCs face to manage their SWM system, allowing M / VCs to address their unique challenges differently while working towards common goals for the entire project area. Mechanism for independent output verification The OBA grant will be subject to independent verification to assess the scores for each OBA Target and its associated indicators. The IVA will review progress annually semi-annually and evaluate achievements against the agreed target for the indicators identified. Each review will result in a score against which the payment is prorated assuming the minimum passing score is achieved for each indicator. At the JSC level, the IVA will review the MIS records to check that scores have been calculated correctly and subsequently select a sample of that data entered in the MIS to verify whether it has been recorded accurately. Acceptable verification will trigger the transfer of the corresponding OBA grant to JSC-H & B. The scorecard will be used for both independent verification and overall project \u2019 s M & E purposes.", + "ner_text": [ + [ + 1156, + 1159, + "named" + ] + ], + "validated": false, + "empirical_context": "Each review will result in a score against which the payment is prorated assuming the minimum passing score is achieved for each indicator. At the JSC level, the IVA will review the MIS records to check that scores have been calculated correctly and subsequently select a sample of that data entered in the MIS to verify whether it has been recorded accurately. Acceptable verification will trigger the transfer of the corresponding OBA grant to JSC-H & B.", + "type": "system", + "explanation": "'MIS' is mentioned as a management information system that stores records, but it is not explicitly identified as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it involves records and data entry.", + "contextual_reason_agent": "'MIS' is mentioned as a management information system that stores records, but it is not explicitly identified as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 25, + "text": "16 NCHRD and MOE \u2014 which started under ERfKE II and contributed to producing key pieces of analytical work that have played a significant role in policy development \u2014 will continue under the proposed Program. 21 One example of an evaluation study can be to look at how gender issues are addressed across different policy proposals and program interventions. The annual plan for the M & E activities will be endorsed by the GPSC and shared with all concerned donors and education sector partners. 52. The MOE has strengthened its data systems and will continue to do so to meet the M & E requirements of the Program. Under ERfKE II22, the MOE completed the redesign of its OpenEMIS, which was successfully deployed in all public schools in Jordan. OpenEMIS is a comprehensive and integrated information and data collection system, which covers all education levels. The system captures disaggregated school level data on students, teachers, grades, subjects, and teaching periods for all MOE operated schools and institutions in the country. A dashboard linked to the OpenEMIS is currently being developed and will be accessible to individual units at the MOE to enable them to monitor their respective indicators. The Queen Rania Center ( QRC ) manages the EMIS and its dashboard.", + "ner_text": [ + [ + 1257, + 1261, + "named" + ] + ], + "validated": false, + "empirical_context": "A dashboard linked to the OpenEMIS is currently being developed and will be accessible to individual units at the MOE to enable them to monitor their respective indicators. The Queen Rania Center ( QRC ) manages the EMIS and its dashboard.", + "type": "system", + "explanation": "However, EMIS is described as a management information system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with data monitoring and management.", + "contextual_reason_agent": "However, EMIS is described as a management information system, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 25, + "text": "This subcomponent will support improved planning, management, and evaluation of education reforms in the following areas: ( a ) improvement of the existing EMIS and making better use of data to support management decisions at all levels, ( b ) national and international student assessments, and ( c ) TA to support implementation of Components 1 and 2 of the project. 41. Improvement of the existing EMIS includes the following activities: ( a ) technical improvements to EMIS to enable integrated use of NBS and MoF data; ( b ) strengthening a system for the generation of aggregate data and reports based on primary data and integrate data on refugee children from Ukraine; ( c ) training to EMIS experts on data collection and management, ( d ) strengthening the MoER \u2019 s analytical capacity in using data; and ( e ) strengthening of EMIS data protection systems as well as data quality assurance.", + "ner_text": [ + [ + 401, + 405, + "named" + ] + ], + "validated": false, + "empirical_context": "41. Improvement of the existing EMIS includes the following activities: ( a ) technical improvements to EMIS to enable integrated use of NBS and MoF data; ( b ) strengthening a system for the generation of aggregate data and reports based on primary data and integrate data on refugee children from Ukraine; ( c ) training to EMIS experts on data collection and management, ( d ) strengthening the MoER \u2019 s analytical capacity in using data; and ( e ) strengthening of EMIS data protection systems as well as data quality assurance.", + "type": "system", + "explanation": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is involved in data management and collection.", + "contextual_reason_agent": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "155_432110PAD0P09717632B01off0use0only1", + "page": 67, + "text": "The improved collaboration between operators and the main utility through the operators \u2019 association has also led to the reduction of corruption in the process of contract awards, the greater involvement of consumers in the oversight of operators \u2019 performance, the reduction of illegal connections and leaks in the networks, the increase in the bill recovery rate from operators, and an improved management of occasional network failures ( coupures sur r\u00e9seau ). A program of incentives allows operators to receive a 10 m3 storage tank if their performance is satisfactory. The number of operators has grown from 16 in 2004 to 174 in 2005, and their image has considerably improved as a result of the association \u2019 s achievements: they are now perceived as partners in the delivery of services. 23. In addition to the cases presented above, analytical work is being carried out in the World Bank \u2019 s Africa region to better understand the features of success in standpost management programs across different countries38. As part of this work, preliminary reviews of existing data highlight some important questions and lessons from standpost management programs so far: \u2022 Even in the presence of standposts, households rely on a mix of supply sources for their water consumption, including standposts, informal water vendors ( especially household resellers ) and natural, untreated sources; the use of standposts may be less prevalent than widely believed; 38 Luengo, M. \u201c Sources of Water for the Urban Poor: Informal Water Service Providers and Public Standposts \u201d ( January 2007 Draft ); personal communications with Sarah Keener ( AFTCS ); Keener and Banerjee, \u201c Experiences in Mozambique, Angola, Zambia and Lesotho \u201d ( forthcoming ).", + "ner_text": [ + [ + 1069, + 1082, + "named" + ] + ], + "validated": false, + "empirical_context": "In addition to the cases presented above, analytical work is being carried out in the World Bank \u2019 s Africa region to better understand the features of success in standpost management programs across different countries38. As part of this work, preliminary reviews of existing data highlight some important questions and lessons from standpost management programs so far: \u2022 Even in the presence of standposts, households rely on a mix of supply sources for their water consumption, including standposts, informal water vendors ( especially household resellers ) and natural, untreated sources; the use of standposts may be less prevalent than widely believed; 38 Luengo, M. \u201c Sources of Water for the Urban Poor: Informal Water Service Providers and Public Standposts \u201d ( January 2007 Draft ); personal communications with Sarah Keener ( AFTCS ); Keener and Banerjee, \u201c Experiences in Mozambique, Angola, Zambia and Lesotho \u201d ( forthcoming ).", + "type": "data", + "explanation": "'Existing data' is mentioned in a general sense and does not specify a structured collection or source used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'existing data' refers to a dataset because it implies a collection of information used for analysis.", + "contextual_reason_agent": "'Existing data' is mentioned in a general sense and does not specify a structured collection or source used for empirical analysis.", + "contextual_signal": "mentioned only as a general reference to information, not as a specific data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 40, + "text": "and disclosed by the Social Registry ( Number ) 0. 00 6. 00 12. 00 18. 00 24. 00 30. 00 Community members reporting positive feedback on household registration outcome for the Social Registry 0. 00 70. 00 75. 00 80. 00 80. 00 80. 00", + "ner_text": [ + [ + 21, + 36, + "named" + ], + [ + 88, + 105, + "Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "and disclosed by the Social Registry ( Number ) 0. 00 6.", + "type": "registry", + "explanation": "In the context, 'Social Registry' is explicitly mentioned, indicating it is used as a source of information.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Social Registry' suggests a structured collection of data.", + "contextual_reason_agent": "In the context, 'Social Registry' is explicitly mentioned, indicating it is used as a source of information.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "073_Ethiopia-Consolidated-Water-Supply-Sanitation-and-Hygiene-Account-Project", + "page": 11, + "text": "Over 52 million people in Ethiopia now live within 1. 5 kilometers ( km ) of an improved drinking water source compared with only 6 million people in 1990. Over the same period, rates of open defecation ( OD ) fell by 63 percent, which was the largest decrease observed in the world. 13 About 67 million people gained access to a latrine, at an average rate of 2. 6 million people per year. Sanitation and hygiene promotion were integrated into wider health-care delivery mechanisms and utilized behavioral change communication ( BCC ). 6. Notwithstanding the progress, much remains to be done. Just 10 percent of all latrines constructed in rural areas qualify as improved sanitation facilities. 14 A World Bank water supply, sanitation, and hygiene ( WASH ) poverty assessment15 reported that the 2016 Ethiopia Socioeconomic Survey found that a significant number of statistically sampled rural springs and wells were contaminated with E. coli. Diarrheal diseases were the second-leading cause of death in Ethiopia in 2017, 16 with lack of access to safe WSS services ranking as the second-highest risk factor for death and disability \u2014 just behind undernutrition.", + "ner_text": [ + [ + 799, + 833, + "named" + ] + ], + "validated": true, + "empirical_context": "Just 10 percent of all latrines constructed in rural areas qualify as improved sanitation facilities. 14 A World Bank water supply, sanitation, and hygiene ( WASH ) poverty assessment15 reported that the 2016 Ethiopia Socioeconomic Survey found that a significant number of statistically sampled rural springs and wells were contaminated with E. coli.", + "type": "survey", + "explanation": "This is indeed a dataset as it is used to report findings on sanitation facilities and contamination levels.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides empirical data.", + "contextual_reason_agent": "This is indeed a dataset as it is used to report findings on sanitation facilities and contamination levels.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "168_252640updated0version", + "page": 12, + "text": "The government recently created an Interministerial Committee against HIV / AIDS, malaria and TB ( IC ), as required in its National HIV / AIDS Strategic Plan, which will have a policy role ( see section C4 on institutional arrangements for more details ), and a Technical Interministerial Committee ( TIC ), which will be the technical arm o f the IC, to manage the response to HIV / AIDS. These two entities are to be assisted by an Executive Secretariat ( ES ). The Ministry o f Health hired a consulting firm to assist with the preparation o f the National HIV / AIDS Strategic Plan. The prevalence surveys carried out during project preparation provide baseline data for the general population, STI patients, military personnel, and CSWs. The prevalence surveys made it possible to identify the Priority Vulnerable Groups ( PVG ). Also during project preparation, KABP studies were carried out among the general population, school children, military personnel, and dockers in addition to the survey being carried out by Save the Children mentioned above. UNICEF i s implementing a Mother-to-Child Transmission ( MTCT ) pilot program which started at the end o f March 2003. The UNAIDS Thematic Group has also been reinvigorated.", + "ner_text": [ + [ + 869, + 881, + "named" + ] + ], + "validated": false, + "empirical_context": "The prevalence surveys made it possible to identify the Priority Vulnerable Groups ( PVG ). Also during project preparation, KABP studies were carried out among the general population, school children, military personnel, and dockers in addition to the survey being carried out by Save the Children mentioned above. UNICEF i s implementing a Mother-to-Child Transmission ( MTCT ) pilot program which started at the end o f March 2003.", + "type": "study", + "explanation": "'KABP studies' are mentioned as studies rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'KABP studies' refers to a dataset because it involves research among various populations.", + "contextual_reason_agent": "'KABP studies' are mentioned as studies rather than a structured collection of data or a data source.", + "contextual_signal": "mentioned only as a study, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 21, + "validated": 8, + "not_validated": 13 + } + }, + { + "filename": "035_Senegal-Casamance-Economic-Development-Project", + "page": 69, + "text": "Specifically, it will include six core functions: ( a ) e-learning, including instructional material with concrete guidance for CECs, facilitators, and communal authorities on their role at different stages of the annual investment cycles, ( b ) participatory diagnostics, designed to support the mapping of available infrastructure and services and document prioritized needs by different groups, ( c ) sub-project cycle, connected to the project MIS, which allows to submit agreed investment proposal, track approvals, and monitor implementation, ( d ) community pulse surveys, consisting of high-frequency or on demand surveys to gain regular community insights to help project implementation, ( e ) community scorecards to allow citizens to assess sub-projects, and ( f ) a GRM through multiple channels ( analogue, feature phones, smartphones, web form ) for citizens to voice complaints, concerns or provide feedback on project activities. Collected data will also feed into the publicly accessible Casamance Knowledge Management Platform developed under the Component 4, to inform dialogue among practitioners, policymakers and the public. The deployment of these tools will take into consideration connectivity and literacy constraints.", + "ner_text": [ + [ + 703, + 723, + "named" + ] + ], + "validated": false, + "empirical_context": "Specifically, it will include six core functions: ( a ) e-learning, including instructional material with concrete guidance for CECs, facilitators, and communal authorities on their role at different stages of the annual investment cycles, ( b ) participatory diagnostics, designed to support the mapping of available infrastructure and services and document prioritized needs by different groups, ( c ) sub-project cycle, connected to the project MIS, which allows to submit agreed investment proposal, track approvals, and monitor implementation, ( d ) community pulse surveys, consisting of high-frequency or on demand surveys to gain regular community insights to help project implementation, ( e ) community scorecards to allow citizens to assess sub-projects, and ( f ) a GRM through multiple channels ( analogue, feature phones, smartphones, web form ) for citizens to voice complaints, concerns or provide feedback on project activities. Collected data will also feed into the publicly accessible Casamance Knowledge Management Platform developed under the Component 4, to inform dialogue among practitioners, policymakers and the public.", + "type": "tool", + "explanation": "'Community scorecards' are mentioned as a tool for citizens to assess sub-projects, not as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'community scorecards' are a dataset because they involve the assessment of sub-projects by citizens, which could imply data collection.", + "contextual_reason_agent": "'Community scorecards' are mentioned as a tool for citizens to assess sub-projects, not as a structured collection of data or a data source.", + "contextual_signal": "mentioned only as a tool for assessment, not as a data source", + "tags": [] + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 13, + "text": "Regarding education, children in Burundi are expected to complete 7. 6 years of school by 18 years of age, but adjusting for quality, this represents only 5. 2 years of schooling. Children \u2019 s chronic malnutrition shows a concerning picture as 54 percent of children are stunted and so are at risk of cognitive and physical limitations that can last a lifetime. The high stunting levels impair cognitive development and impede children \u2019 s ability to cope with primary schooling. The country \u2019 s age-specific fertility rates for women aged 30-45 are the highest in the region, and its total fertility rate of six pregnancies per woman is the seventh highest in the world. With such persistently high fertility rates, Burundi \u2019 s population is expected to double by as early as 20405. 1 International Monetary Fund ( IMF ) World Economic Outlook 2 New household survey data will be released in the second semester of 2021 3 State of Food Security and Nutrition in the World Report. FAO, UNICEF, WFP and IFAD. 2018 4 World Bank. 2020 5 Human Capital Project. World Bank. 2020", + "ner_text": [ + [ + 851, + 872, + "named" + ], + [ + 33, + 40, + "household survey data <> data geography" + ], + [ + 717, + 724, + "household survey data <> data geography" + ], + [ + 916, + 920, + "household survey data <> publication year" + ], + [ + 981, + 984, + "household survey data <> publisher" + ], + [ + 1008, + 1012, + "household survey data <> reference year" + ], + [ + 1015, + 1025, + "household survey data <> publisher" + ], + [ + 1027, + 1031, + "household survey data <> publication year" + ], + [ + 1057, + 1067, + "household survey data <> publisher" + ] + ], + "validated": true, + "empirical_context": "With such persistently high fertility rates, Burundi \u2019 s population is expected to double by as early as 20405. 1 International Monetary Fund ( IMF ) World Economic Outlook 2 New household survey data will be released in the second semester of 2021 3 State of Food Security and Nutrition in the World Report. FAO, UNICEF, WFP and IFAD.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to new data collected from household surveys that will be used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'household survey data' implies a structured collection of data from surveys.", + "contextual_reason_agent": "This is indeed a dataset as it refers to new data collected from household surveys that will be used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 13, + "text": "The town councils act as water service providers ( water authorities ) and can elect to provide service directly, utilize community-based organizations, or employ private companies. The MWE provides the water authorities with support through its Water and Sanitation Sector Development Facilities ( WSDFs ) that provide financing and guidance for the design and implementation of WSS systems and through six regional umbrella organizations ( UOs ) that provide high-level assistance on operation and maintenance ( O & M ) related activities. The UOs are limited guarantee companies that have successfully provided technical, managerial, and financial management ( FM ) support to water authorities since 2001. Currently, the UOs receive subsidies from the GoU and development partners ( DPs ) to conduct these activities. 7. In the early 1990s, the GoU implemented significant policy reforms, including the commercialization and modernization of the NWSC. These reforms, coupled with significant capital 6 The number of refugees has increased to 1. 4 million as of December 2017, of which 52 percent are characterized by women and girls and 61 percent by children under 18 years. Approximately 75 percent of the refugees originate from South Sudan; 17 percent from the Democratic Republic of Congo; and 3 percent from Burundi, Somalia, and other countries. 7 Large urban towns are cities and municipalities defined as urban centers by the UBOS \u2019 National Population and Housing Census ( NPHC ). Urban centers include all areas gazetted as city, municipality, town council by the UBOS.", + "ner_text": [ + [ + 1446, + 1484, + "named" + ], + [ + 704, + 708, + "National Population and Housing Census <> publication year" + ], + [ + 1074, + 1078, + "National Population and Housing Census <> publication year" + ], + [ + 1439, + 1443, + "National Population and Housing Census <> publisher" + ], + [ + 1487, + 1491, + "National Population and Housing Census <> acronym" + ], + [ + 1579, + 1583, + "National Population and Housing Census <> publisher" + ] + ], + "validated": true, + "empirical_context": "Approximately 75 percent of the refugees originate from South Sudan; 17 percent from the Democratic Republic of Congo; and 3 percent from Burundi, Somalia, and other countries. 7 Large urban towns are cities and municipalities defined as urban centers by the UBOS \u2019 National Population and Housing Census ( NPHC ). Urban centers include all areas gazetted as city, municipality, town council by the UBOS.", + "type": "census", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a census that provides data on urban centers.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a census, which typically involves structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a census that provides data on urban centers.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 19, + "text": "The World Bank Building Skills for Human Capital Development in South Sudan ( P178654 ) Page 15 of 66 23. Functional mechanisms to support evidence-based decision-making and management of the sector are needed. MoGEI has an Education Management Information System ( EMIS ), but its implementation has not been tackled comprehensively to date and the existing data essential for sector evaluation and development has significant gaps. The EMIS requires dissemination to all states and harmonization with other databases in the MoGEI, such as the examination results, South Sudan School Attendance and Monitoring System, Human Resources Management Information System, and Financial Management Information System. The existing data collection instruments also need revision to incorporate the required information for monitoring the implementation of key indicators. Quality, reliable data is fundamental to inform education decisions and policies and enhance transparency. Accordingly, strengthening the EMIS and the annual school survey process to provide quality education information, in a timely manner, at every administrative level is essential to support the functions of the education ministries. 24. The education governing ministries lack the policy tools and implementation capacity needed for education sector management. The General Education system is characterized by inequitable allocation of teachers across the states, unclear deployment drivers, and outdated salary structure.", + "ner_text": [ + [ + 224, + 263, + "named" + ] + ], + "validated": false, + "empirical_context": "Functional mechanisms to support evidence-based decision-making and management of the sector are needed. MoGEI has an Education Management Information System ( EMIS ), but its implementation has not been tackled comprehensively to date and the existing data essential for sector evaluation and development has significant gaps. The EMIS requires dissemination to all states and harmonization with other databases in the MoGEI, such as the examination results, South Sudan School Attendance and Monitoring System, Human Resources Management Information System, and Financial Management Information System.", + "type": "system", + "explanation": "However, the context indicates that it is a system for management rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting it handles data.", + "contextual_reason_agent": "However, the context indicates that it is a system for management rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "110_Lebaon-RACE2-PforR-Board-Package-PAD-WB-9-5-16-09072016", + "page": 65, + "text": "Within each of these output areas, specific disbursement-linked indicators have been selected from RACE 2 that relate to the Bank supported Program. To achieve them, the Program proposes to support a number of interventions and activities that will contribute to these results, including training for school leaders and teachers, grants to schools to strengthen school-based management, activating a network of community liaisons, and undertaking a number of evaluation and learning assessment activities to help teachers detect more precisely which students are struggling with what parts of the curriculum. 25 Based on infrastructure data collected in 2011, using less than 2. 8 square meters of space per student in either first or second shift to indicate overcrowding. 26 Total capacity is calculated at 437, 895 using 2. 8 square meters per student, while total enrollment in the first shift is about 315, 000. As of May 2016, MEHE is in the process of updating the physical infrastructure assessment.", + "ner_text": [ + [ + 621, + 640, + "named" + ], + [ + 654, + 658, + "infrastructure data <> reference year" + ], + [ + 923, + 931, + "infrastructure data <> publication year" + ] + ], + "validated": true, + "empirical_context": "To achieve them, the Program proposes to support a number of interventions and activities that will contribute to these results, including training for school leaders and teachers, grants to schools to strengthen school-based management, activating a network of community liaisons, and undertaking a number of evaluation and learning assessment activities to help teachers detect more precisely which students are struggling with what parts of the curriculum. 25 Based on infrastructure data collected in 2011, using less than 2. 8 square meters of space per student in either first or second shift to indicate overcrowding.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to specific collected data used to assess overcrowding in schools.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'infrastructure data' is a dataset because it refers to collected information regarding space per student.", + "contextual_reason_agent": "This is indeed a dataset as it refers to specific collected data used to assess overcrowding in schools.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 42, + "text": "Annually Elmaouna annual capitalization report Spot checks reports Household survey Food Security Office", + "ner_text": [ + [ + 67, + 83, + "named" + ], + [ + 84, + 104, + "Household survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "Annually Elmaouna annual capitalization report Spot checks reports Household survey Food Security Office", + "type": "survey", + "explanation": "In the context, 'Household survey' is likely used as a data source for the reports mentioned.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Household survey' suggests a structured collection of data related to households.", + "contextual_reason_agent": "In the context, 'Household survey' is likely used as a data source for the reports mentioned.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "061_Turkey-Formal-Employment-Creation-Project", + "page": 15, + "text": "The World Bank Formal Employment Creation Project ( P171766 ) Page 11 of 86 Survey ). 6 Only 2 percent of respondents to a recent survey reported that they were working and had work permits. About 65 percent of the beneficiaries of the Emergency Social Safety Net Program ( ESSN ), a temporary humanitarian program, report that their main source of income is short-term informal work. 7 This will become a more significant problem once the ESSN comes to an end. 13. One of the most important contextual factors that limits formal job creation is the poor access to financing among firms. Credit service provision is less developed in many provinces where refugees live and work. According to the World Bank Enterprise Survey, most respondents ( 76 percent ) in the affected regions assert that access to finance deteriorated loan terms and conditions ( interest rates, maturity, and collateral requirements ). 8 Poor access to longer-term financing limits enterprises from investing, increasing production capacity, and providing sustainable employment opportunities.", + "ner_text": [ + [ + 696, + 724, + "named" + ], + [ + 4, + 14, + "World Bank Enterprise Survey <> publisher" + ], + [ + 130, + 136, + "World Bank Enterprise Survey <> data type" + ], + [ + 550, + 574, + "World Bank Enterprise Survey <> data description" + ], + [ + 639, + 648, + "World Bank Enterprise Survey <> data geography" + ], + [ + 696, + 706, + "World Bank Enterprise Survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "Credit service provision is less developed in many provinces where refugees live and work. According to the World Bank Enterprise Survey, most respondents ( 76 percent ) in the affected regions assert that access to finance deteriorated loan terms and conditions ( interest rates, maturity, and collateral requirements ). 8 Poor access to longer-term financing limits enterprises from investing, increasing production capacity, and providing sustainable employment opportunities.", + "type": "survey", + "explanation": "This is indeed a dataset as it is used to support claims about access to finance in the context of refugees.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides empirical data on access to finance.", + "contextual_reason_agent": "This is indeed a dataset as it is used to support claims about access to finance in the context of refugees.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 48, + "text": "Methodology for Data Collection Indicator values will be collected from the administrative data of MODEE and public - and private-sector relying parties on the use of transactional digital services that incorporate trusted, people-centric DPI, and cross-checked by the IVA through spot surveys. Responsibility for Data Collection MODEE. Improving trusted, people-centric data sharing Description Availability of trusted, people-centric data sharing [ Yes / No ]. Frequency Annually Data source ( a ) Annual reports on DPI implementation from MODEE, sectoral ministries, and the private sector, ( b ) MODEE \u2019 s software documentation and testing reports, and ( c ) Third-party assessment reports. Methodology for Data Collection Indicator values will be collected from ( a ) DPI implementation reports released by MODEE, sectoral ministries, and the private sector, ( b ) MODEE \u2019 s software documentation, and testing reports, and ( c ) Official reports submitted by third - party assessment bodies recruited to carry out the Privacy Impact Assessments. All indicator values will be cross - checked by the IVA. Responsibility for Data Collection MODEE. Enhanced management of medical records Description Promoting digital transformation in health servcie delivery by scaling up the coverage of an existing EMR platform to all MOH facility sites, prioritizing facility sites in districts with a high concentration of Syrian refugees. Frequency Biannualy. Data source Administrative data from the EHS. Methodology for Data Collection Data collected from the EHS to measure how many MOH facility sites had installed and operationalized the EMR platform. Responsibility for Data Collection MOH and EHS.", + "ner_text": [ + [ + 1636, + 1648, + "named" + ] + ], + "validated": false, + "empirical_context": "Data source Administrative data from the EHS. Methodology for Data Collection Data collected from the EHS to measure how many MOH facility sites had installed and operationalized the EMR platform. Responsibility for Data Collection MOH and EHS.", + "type": "program", + "explanation": "However, the context indicates that it is a platform rather than a dataset, as it is described in relation to data collection rather than as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'EMR platform' sounds like a structured system for managing data.", + "contextual_reason_agent": "However, the context indicates that it is a platform rather than a dataset, as it is described in relation to data collection rather than as a data source itself.", + "contextual_signal": "mentioned only as a platform, not as a data source", + "tags": [] + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 47, + "text": "The World Bank Education Quality Improvement Project ( P179363 ) Page 38 of 68 UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Participating teachers with improved teaching practices This indicator will be measured through the classroom observation tool before intervention and after completion of the training ( for a representative sample of teachers ). The details of the TEACH approach will be included in the POM. Annual Progress and monitoring reports Progress reports and data from participating schools based on TEACH tool MoER, PMT, ANACEC Improved learning outcomes of students benefitting from project-supported tutoring program ( disaggregated by gender, urban / rural, refugee / vulnerability status ) This indicator will measure the scores of students benefiting from tutoring program according to the results of the impact evaluation.", + "ner_text": [ + [ + 367, + 393, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Education Quality Improvement Project ( P179363 ) Page 38 of 68 UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Participating teachers with improved teaching practices This indicator will be measured through the classroom observation tool before intervention and after completion of the training ( for a representative sample of teachers ). The details of the TEACH approach will be included in the POM.", + "type": "tool", + "explanation": "However, it is described as a tool for observation rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves observation and measurement of teaching practices.", + "contextual_reason_agent": "However, it is described as a tool for observation rather than a structured collection of data.", + "contextual_signal": "mentioned only as a tool, not as a data source", + "tags": [] + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 33, + "text": "The key gaps identified in the MTR assessment are: ( a ) fewer women access loans from financial institutions to start their own businesses; ( b ) fewer women are starting businesses due to time poverty and capacity gaps; and ( c ) fewer women are in employment than men. Social gender norms and household dynamics play a critical role in causing these gaps. According to a 2022 study by Access to Finance Rwanda, deeply ingrained societal expectations shape how women participate in economic life and influence their capacity to leverage assets \u2014 particularly land and property \u2014 as collateral. 34 One pervasive norm is that women should prioritize family and caregiving responsibilities over business activities. This norm restricts their time and engagement in income-generating pursuits and weakens their perceived legitimacy as entrepreneurs, reducing their chances of qualifying for credit. Additionally, women are often expected to rely on family support, especially from spouses, instead of seeking independent financial solutions, distancing them from formal financial institutions and financial products. 31 Baseline failure rates by segment are extremely difficult to assess. As such, the analysis uses conservative assumptions based on extensive discussions with key stakeholders and potential beneficiaries. These assumptions have been tested for sensitivity. 32 CEIC data 33 In the absence of available survey data, the analysis uses proxies for these losses and repair costs from survey data from neighboring Burundi. Based on this data, annual flooding losses amount to ~ US $ 89 per household and repair costs amount to US $ 25. 60 per household. 34 Gendered Social Norms Diagnostic and their Impact on Women \u2019 s Financial Inclusion in Rwanda, 2022, Access to Finance Rwanda", + "ner_text": [ + [ + 1417, + 1428, + "named" + ], + [ + 63, + 68, + "survey data <> reference population" + ], + [ + 374, + 378, + "survey data <> publication year" + ], + [ + 388, + 412, + "survey data <> publisher" + ], + [ + 1524, + 1531, + "survey data <> data geography" + ], + [ + 1553, + 1609, + "survey data <> data description" + ], + [ + 1761, + 1765, + "survey data <> publication year" + ] + ], + "validated": true, + "empirical_context": "These assumptions have been tested for sensitivity. 32 CEIC data 33 In the absence of available survey data, the analysis uses proxies for these losses and repair costs from survey data from neighboring Burundi. Based on this data, annual flooding losses amount to ~ US $ 89 per household and repair costs amount to US $ 25.", + "type": "survey", + "explanation": "In this context, 'survey data' is indeed used as a data source for analysis of flooding losses and repair costs.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'survey data' is a dataset because it refers to collected information from surveys.", + "contextual_reason_agent": "In this context, 'survey data' is indeed used as a data source for analysis of flooding losses and repair costs.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "121_PAD1190-PAD-P152848-PUBLIC-Box391435B-LB-EESSP-Final-PAD-for-printing", + "page": 11, + "text": "The higher quality associated with private schools means that public-school students are likely to learn less and face more difficult job prospects upon graduation. This sets up inter-generational transmission of both lower learning levels and lower income. 9 Public schools exhibit lower academic outcomes in international and national assessments. The level of public school students was 10 percent lower than that of private schools in the 2011 Trends in International Mathematics and Science Study ( TIMSS ) results. Indeed, based on the 2004 household survey, poverty and education are highly correlated in Lebanon. 5 Lebanon \u2019 s inequality-adjusted HDI is 20. 8 percent lower than its HDI, among the largest losses in the group of countries in the high human development category. 6 World Economic Forum \u2019 s 2013 Human Capital Index 7 Further information about the level of private sector investments is expected from a forthcoming Education Expenditure Review. 8 World Bank Ed Stats 9 \u201c Poverty, Growth and Income Distribution in Lebanon, \u201d August 2008.", + "ner_text": [ + [ + 542, + 563, + "named" + ] + ], + "validated": true, + "empirical_context": "The level of public school students was 10 percent lower than that of private schools in the 2011 Trends in International Mathematics and Science Study ( TIMSS ) results. Indeed, based on the 2004 household survey, poverty and education are highly correlated in Lebanon. 5 Lebanon \u2019 s inequality-adjusted HDI is 20.", + "type": "survey", + "explanation": "This is indeed a dataset as it is referenced in the context of analyzing correlations between poverty and education.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey that likely contains structured data on households.", + "contextual_reason_agent": "This is indeed a dataset as it is referenced in the context of analyzing correlations between poverty and education.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "184_multi-page", + "page": 39, + "text": "These high repetition and drop-out rates imply that the average students needs 8. 6 years to complete six years of primary education and needs 5. 5 years to complete three years of lower secondary ( or middle school ). The high repetition rates peak at the terminal year of each cycle and are driven by competitive examinations which students need to take to pass from one stage to another. It is also driven in part by the curriculum which is geared to preparing students for the French baccalaureate examnination and may be contextually difficult for Djiboutians from less educated families. 3. Income and Gender Gaps in Enrollment Rates Even though the main constraint at present appears to be school places, there is already evidence of gender and income gaps which cannot be explained by lack of school places alone. These are expected to become more prominent over time as enrollment rates rise. According to the household expenditure survey data, in urban areas, the Net Enrollment Rate in Primary Enrollment is 50 % higher for the highest expenditure quintile compared to the lowest expenditure quintile. The inequity is even worse in secondary education ( lower secondary education is part of basic education but the survey data did not separate the two ), where the NER of the highest quintile is 420 % higher than the NER of the lowest quintile.", + "ner_text": [ + [ + 919, + 952, + "named" + ], + [ + 957, + 968, + "household expenditure survey data <> data geography" + ], + [ + 974, + 1015, + "household expenditure survey data <> data description" + ], + [ + 1372, + 1390, + "household expenditure survey data <> usage context" + ] + ], + "validated": true, + "empirical_context": "These are expected to become more prominent over time as enrollment rates rise. According to the household expenditure survey data, in urban areas, the Net Enrollment Rate in Primary Enrollment is 50 % higher for the highest expenditure quintile compared to the lowest expenditure quintile. The inequity is even worse in secondary education ( lower secondary education is part of basic education but the survey data did not separate the two ), where the NER of the highest quintile is 420 % higher than the NER of the lowest quintile.", + "type": "survey", + "explanation": "This is indeed a dataset as it provides structured data used for empirical analysis regarding enrollment rates.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey that collects data on household expenditures.", + "contextual_reason_agent": "This is indeed a dataset as it provides structured data used for empirical analysis regarding enrollment rates.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "026_Djibouti-Health-System-Strengthening-Project", + "page": 54, + "text": "The World Bank Djibouti Health System Strengthening ( P178033 ) Page 50 of 64 RESULT_FRAME_TBL_IO Indicator Name PBC Baseline End Target Average user experience score among PBF / DFF participating facilities ( Percentage ) 0. 00 60. 00 Primary health facilities with functional Facility Community Committee assessed by the PBF quality checklist ( Number ) 0. 00 80. 00 Communities with functioning community health workers per the norms set by the National Community Health Strategy ( Percentage ) 0. 00 60. 00 Communities that have formally declared the abandonment of the practice of FGM ( Percentage ) 0. 00 60. 00 Component 2: Strengthening institutions, citizen engagement and project management Facilities with DHIS-2 implementation ( DHIS-2 data entry at the facility level ) ( Percentage ) 35. 00 65. 00 Facilities that are connected to the National Logistics Management Information System ( eLMIS ) ( Percentage ) 0. 00 65. 00 Health surveys conducted and results made public ( Number ) 0. 00 2. 00 Expectant women using a transport voucher or staying in a maternal waiting home to ensure safe deliveries ( Number ) 0. 00 20, 000. 00 Of which refugees ( Number ) 0. 00 200. 00 Of which host community residents ( Number ) 0. 00 2, 000. 00 Completion of annual regional and national health fora with adopted resolutions ( Yes", + "ner_text": [ + [ + 717, + 723, + "named" + ] + ], + "validated": false, + "empirical_context": "00 60. 00 Component 2: Strengthening institutions, citizen engagement and project management Facilities with DHIS-2 implementation ( DHIS-2 data entry at the facility level ) ( Percentage ) 35. 00 65.", + "type": "system", + "explanation": "However, DHIS-2 is a health information system and not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed DHIS-2 is a dataset because it is associated with data entry and management.", + "contextual_reason_agent": "However, DHIS-2 is a health information system and not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "110_Lebaon-RACE2-PforR-Board-Package-PAD-WB-9-5-16-09072016", + "page": 36, + "text": "29 \uf0b7 C. 4 - MEHE and CERD at the central and regional levels are strengthened to lead and coordinate the planning, implementation, and evaluation of the relevant RACE 2 activities. 22. One of the main challenges faced during the implementation of the first phase of RACE is the unavailability of timely and reliable data, structured data collection systems, and systems-capacity to analyze data that could inform policy decisions or programmatic interventions. There are currently multiple data collection mechanisms working in parallel, in addition to long delays in the ability for MEHE or CERD to collect and use school-level data for decision-making. As a result, MEHE and CERD will develop a data management framework detailing how data about schools, students, teachers, and other personnel is collected, validated, categorized, and used by different stakeholders. An EMIS will be deployed in order to support the timely collection of the data, so that decision-making at both the school and the central MEHE and CERD levels is based on accurate and updated data. 23. For output C. 2, the current Lebanese formal curriculum dates from 1997 and has not seen major changes since that date. It is strongly centered on specific concepts or information that students should know, rather than on competencies and skills that learners should acquire.", + "ner_text": [ + [ + 616, + 633, + "named" + ], + [ + 584, + 588, + "school-level data <> author" + ], + [ + 748, + 755, + "school-level data <> reference population" + ], + [ + 757, + 765, + "school-level data <> reference population" + ], + [ + 1103, + 1111, + "school-level data <> data geography" + ], + [ + 1141, + 1145, + "school-level data <> reference year" + ] + ], + "validated": true, + "empirical_context": "One of the main challenges faced during the implementation of the first phase of RACE is the unavailability of timely and reliable data, structured data collection systems, and systems-capacity to analyze data that could inform policy decisions or programmatic interventions. There are currently multiple data collection mechanisms working in parallel, in addition to long delays in the ability for MEHE or CERD to collect and use school-level data for decision-making. As a result, MEHE and CERD will develop a data management framework detailing how data about schools, students, teachers, and other personnel is collected, validated, categorized, and used by different stakeholders.", + "type": "data", + "explanation": "In the context, 'school-level data' is mentioned as information that informs decision-making, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'school-level data' refers to a structured collection of data related to schools.", + "contextual_reason_agent": "In the context, 'school-level data' is mentioned as information that informs decision-making, indicating it is used as a data source.", + "contextual_signal": "mentioned as data that informs decision-making", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 3, + "validated": 2, + "not_validated": 1 + } + }, + { + "filename": "021_BOSIB05e6fc47e0770aeec00ad5e11774f2", + "page": 84, + "text": "Uganda remains committed to improving economic opportunities, social services and infrastructure to benefit refugees and host communities. Five CRRF program response plans have been finalized and are being supported by large national projects which include refugees and host community members with World Bank financing. These plans foresee medium and long-term development investments and support the transition of humanitarian assistance into Government services in RHDs. z There is strong progress on: the commitment to integrate refugee services into national service delivery systems. As outlined in the National Development Plan III ( NDP III ), refugee planning is integrated into national, sectoral and local government plans and data collection. The CRRF has developed sectoral plans for refugees and host communities and included both groups under the Uganda Intergovernmental Fiscal Transfer to support service provision through district development plans. Refugees and hosts have been included in the 2022 Uganda Demographic Health Survey. Although a refugee sample was not collected in the Uganda National Household Survey that was conducted in 2019 / 20 due to COVID-19 pressures, the Uganda Bureau of Statistics remains committed to include refugees in national data exercises. On the commitments to: ensure access for refugees and host population to quality, efficient and integrated basic social services; and enhance social infrastructure in refugee hosting areas, strong progress is being made on health and education service provision.", + "ner_text": [ + [ + 1102, + 1134, + "named" + ], + [ + 0, + 6, + "Uganda National Household Survey <> data geography" + ], + [ + 861, + 867, + "Uganda National Household Survey <> data geography" + ], + [ + 1017, + 1023, + "Uganda National Household Survey <> data geography" + ], + [ + 1102, + 1108, + "Uganda National Household Survey <> data geography" + ], + [ + 1157, + 1166, + "Uganda National Household Survey <> reference year" + ] + ], + "validated": true, + "empirical_context": "Refugees and hosts have been included in the 2022 Uganda Demographic Health Survey. Although a refugee sample was not collected in the Uganda National Household Survey that was conducted in 2019 / 20 due to COVID-19 pressures, the Uganda Bureau of Statistics remains committed to include refugees in national data exercises. On the commitments to: ensure access for refugees and host population to quality, efficient and integrated basic social services; and enhance social infrastructure in refugee hosting areas, strong progress is being made on health and education service provision.", + "type": "survey", + "explanation": "This is indeed a dataset as it is mentioned in the context of national data exercises and data collection efforts.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that collects data.", + "contextual_reason_agent": "This is indeed a dataset as it is mentioned in the context of national data exercises and data collection efforts.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 11, + "text": "Over 84 percent of women in South Sudan are illiterate, and 50 percent of girls under the legal age of 18 1 United Nations High Commissioner for Refugees ( UNHCR ) 2022. https: / / www. unhcr. org / en-us / south-sudan. html. 2 International Monetary Fund 2019. \u201c Republic of South Sudan. 2019 Article IV Consultation Report. \u201d IMF Country Report No. 19 / 153. 3 United Nations Office for the Coordination of Humanitarian Affairs, 2022b Humanitarian Needs Overview: South Sudan. 4 There have been recent upticks in violence in Upper Nile, Jonglei, Western Equatoria and Central Equatoria States, as well as high-conflict risk scenarios in Greater Tonj and the Warrap State / Abyei area ( among others ). 5 Macro Poverty Outlook \u2013 Spring Meetings, World Bank, 2023. 6 World Bank. 2022. South Sudan Economic Monitor, February 2022: Towards a Jobs Agenda. World Bank, Washington, DC. 7 The UNDP \u2019 s life-course gender gap dashboard contains 13 indicators that display gender gaps in choices and opportunities over the life course \u2014 childhood and youth, adulthood, and older age. Retrieved from https: / / hdr. undp. org / life-course-gender-gapint. I. STRATEGIC CONTEXT", + "ner_text": [ + [ + 896, + 928, + "named" + ] + ], + "validated": false, + "empirical_context": "World Bank, Washington, DC. 7 The UNDP \u2019 s life-course gender gap dashboard contains 13 indicators that display gender gaps in choices and opportunities over the life course \u2014 childhood and youth, adulthood, and older age. Retrieved from https: / / hdr.", + "type": "dashboard", + "explanation": "However, it is described as a dashboard, which typically presents data rather than serving as a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it contains indicators that display data on gender gaps.", + "contextual_reason_agent": "However, it is described as a dashboard, which typically presents data rather than serving as a structured collection of data itself.", + "contextual_signal": "mentioned only as a dashboard, not as a data source", + "tags": [] + }, + { + "filename": "168_252640updated0version", + "page": 88, + "text": "The mission recommends that the ES: ( i ) launch at this time the competitive bidding process for the project financial monitoring information system, based on the description o f the special conditions of the functionalities for this application; ( ii ) ascertain that all o f these functionalities are present in the information system to be chosen and that they meet the requirements, mainly those o f producing the data required in the FMR; and ( iii ) ascertain that the monitoring o f procurement operations are functional, including the processing o f specific operations, namely the launch o f the competitive bidding process for the contract. Failing that, the mission suggests the development and implementation at this time o f an application that meets this requirement. It should be designed to monitor the contracts and identify any issues.", + "ner_text": [ + [ + 102, + 149, + "named" + ] + ], + "validated": false, + "empirical_context": "The mission recommends that the ES: ( i ) launch at this time the competitive bidding process for the project financial monitoring information system, based on the description o f the special conditions of the functionalities for this application; ( ii ) ascertain that all o f these functionalities are present in the information system to be chosen and that they meet the requirements, mainly those o f producing the data required in the FMR; and ( iii ) ascertain that the monitoring o f procurement operations are functional, including the processing o f specific operations, namely the launch o f the competitive bidding process for the contract. Failing that, the mission suggests the development and implementation at this time o f an application that meets this requirement.", + "type": "system", + "explanation": "However, it is described as a system for monitoring and processing operations, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'information system' which often relates to data handling.", + "contextual_reason_agent": "However, it is described as a system for monitoring and processing operations, not as a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "157_417980PAD0P1041ly10IDAR200081006011", + "page": 46, + "text": "Participatory M & E instruments include: ( a ) community scorecards or other tools which allow users o f services to rate the quality o f service delivery; ( b ) monitoring o f simplified indicators identified by the communities ( beneficiaries ) to assess progress in implementation and results for each C D D subproject; and ( c ) citizen evaluations o f the performance o f their LGUs and JSCPDs. During appraisal, the community scorecard tool was successfully tested at the M O L G central level ( participants from various M O L G departments ) and at the JSCPD level. The participatory M & E process throughout the project life will be facilitated by the OC and guided by the OM. Project Evaluation through External Assessments 7. BIAs will be carried out by specialized consultants, at mid term and completion. The BIAs will draw on a representative sample o f household surveys in VNDP target areas, complemented by qualitative research. These surveys will measure the extent to which target groups, and in particular, economically and socially marginalized groups, including youth and women, who participated in CDD subproject identification and selection, are informed and aware o f the main decisions on subprojects, and have better access to services. Furthermore, the second ( final ) BIA will conduct surveys in VNDP target areas to provide in-depth analysis on issues o f inclusion and participation o f marginalized groups. The M & E section o f the O M will suggest a detailed scope o f work for the BIAs. External Audits 8. Financial Audit: To ensure the financial soundness of project accounts, the Grant Agreement will require the submission o f Audited Project Financial Statements within six months after the end o f each year. This will also include spot audits o f CDD subprojects. Relevantly qualified, experienced and independent external financial auditor will be selected on a competitive basis based on TORS acceptable to the Bank. 39", + "ner_text": [ + [ + 868, + 885, + "named" + ], + [ + 889, + 906, + "household surveys <> data geography" + ], + [ + 1027, + 1072, + "household surveys <> reference population" + ], + [ + 1084, + 1099, + "household surveys <> reference population" + ] + ], + "validated": true, + "empirical_context": "BIAs will be carried out by specialized consultants, at mid term and completion. The BIAs will draw on a representative sample o f household surveys in VNDP target areas, complemented by qualitative research. These surveys will measure the extent to which target groups, and in particular, economically and socially marginalized groups, including youth and women, who participated in CDD subproject identification and selection, are informed and aware o f the main decisions on subprojects, and have better access to services.", + "type": "survey", + "explanation": "In the context, 'household surveys' are explicitly mentioned as a source of data used to measure awareness and access to services among target groups.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'household surveys' is a dataset because it refers to a structured collection of data collected from households.", + "contextual_reason_agent": "In the context, 'household surveys' are explicitly mentioned as a source of data used to measure awareness and access to services among target groups.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 67, + "text": "A robust and reliable water resources monitoring network is key to the operationalization of the IWRM framework and to building Uganda \u2019 s resilience to climatic variation. A review of the water monitoring network carried out by the DWRM in 2005 showed that there is insufficient baseline data on groundwater as well as monitoring of boreholes to represent the full range of hydrogeological and climatic conditions in Uganda. For surface water, the review revealed that the operational gauges are inadequate and that the digitized data are not transmitted to a central processing center due to lack of a data transmission interface with an automated sensor. To address these issues, the WMDP supported the strengthening and expansion of the existing hydrological, hydrometeorological, water quality monitoring network in the Kyoga and Upper Nile WMZs as well as the design of a comprehensive WIS that integrates the hydrological information system, spatial data system, information management system, knowledge management system, and data management system. The WMDP supported the development of the detailed design of the National WIS, which will now be implemented at the national and regional level in all WMZs through this project. 34. In addition, the Project will further improve water resources monitoring by providing additional monitoring stations and equipment to monitor surface water, groundwater, water quality, and climate variations. The Project will also support the use of online remote sensing equipment to enable real-time monitoring and improve the quality and timeliness of data collection for the WIS. The Project will also support the rehabilitation of the National Water Quality Reference Laboratory. Component 4: Project Implementation and Institutional Strengthening ( US $ 5. 5 million of which national IDA US $ 5. 0 million and counterpart funds US $ 0. 5 million ) This component will finance activities designed to ensure effective and efficient Project implementation and coordination as well as institutional strengthening to support WSS service delivery reforms. Project management activities will include ( a ) coordination of planning, monitoring, reporting and supervision of the Project; ( b ) training of MWE and NWSC staff on World Bank procedures related to procurement,", + "ner_text": [ + [ + 892, + 895, + "named" + ] + ], + "validated": false, + "empirical_context": "For surface water, the review revealed that the operational gauges are inadequate and that the digitized data are not transmitted to a central processing center due to lack of a data transmission interface with an automated sensor. To address these issues, the WMDP supported the strengthening and expansion of the existing hydrological, hydrometeorological, water quality monitoring network in the Kyoga and Upper Nile WMZs as well as the design of a comprehensive WIS that integrates the hydrological information system, spatial data system, information management system, knowledge management system, and data management system. The WMDP supported the development of the detailed design of the National WIS, which will now be implemented at the national and regional level in all WMZs through this project.", + "type": "system", + "explanation": "'WIS' is mentioned as a system designed to integrate various components, but it is not explicitly described as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'WIS' is a dataset because it includes the term 'system' which can imply data management.", + "contextual_reason_agent": "'WIS' is mentioned as a system designed to integrate various components, but it is not explicitly described as a data source.", + "contextual_signal": "mentioned as a system but not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "041_Burkina-Faso-Support-to-Land-and-Mining-Management-Strengthening-Project", + "page": 20, + "text": "The World Bank Support to Land and Mining Management Strengthening Project ( P169267 ) Page 14 of 95 ( CPF ) for Burkina Faso for the period of FY2018-2023 ( Report No. 123712-BF ) 23 14. Land information is mostly paper-based and the existing digital information is scattered across different systems that are not interoperable, partially functional, and not secure. Separate land information management systems have been developed at central and municipal level to support the registration and delivery of APFRs, streamline the processes for issuing land titles, and manage information related to lease agreements and property taxes. Land information is often georeferenced in different local projection systems, hindering the constitution of an integrated and coherent cadaster. Operating difficulties are linked to various technical issues ( unreliable telecommunication network, faulty infrastructure, and power outages ) and lack of capacity. Most importantly, the lack of interoperability can result in different institutions assigning distinct rights to the same parcel, a risk that may trigger land disputes and erode social cohesion. 15. Initiatives for the digital transformation of land administration and the adoption of a geospatial roadmap have recently been launched and need additional support.", + "ner_text": [ + [ + 377, + 412, + "named" + ] + ], + "validated": false, + "empirical_context": "Land information is mostly paper-based and the existing digital information is scattered across different systems that are not interoperable, partially functional, and not secure. Separate land information management systems have been developed at central and municipal level to support the registration and delivery of APFRs, streamline the processes for issuing land titles, and manage information related to lease agreements and property taxes. Land information is often georeferenced in different local projection systems, hindering the constitution of an integrated and coherent cadaster.", + "type": "system", + "explanation": "However, it is described as systems rather than a structured collection of data, indicating it is not a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to systems that manage land information.", + "contextual_reason_agent": "However, it is described as systems rather than a structured collection of data, indicating it is not a dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "057_West-Bank-and-Gaza-Phase-1-of-the-Multiphase-Programmatic-Approach-Advancing-Sustainability-in-Performance-Infrastructure-and-Reliability-of-Energy-Sector-Project", + "page": 71, + "text": "The baseline depends on the staffing data provided by PENRA Male staffing numbers to be confirmed Additionally, lack of electricity negatively impacts female entrepreneurship, resulting in lost income for women-owned businesses and subsequent closures. About 42 percent of firms identified lack of electricity as a major constraint to business growth, with up to 16 outages lasting six hours each on average per month ( Enterprise Survey, WB & G 2019 ). While this affects all businesses, women-owned businesses \u2014 majority of whom are micro or small and often home based \u2014 are particularly affected because of the nature of their work ( cooking, handiwork, and textiles ) relying on electrical appliances to produce their products ( anecdotal evidence ). Data also suggest that women are less likely to own generators or other forms of electrification for back-up during shortages largely because of costs. The project will enable women-owned businesses access solar PV systems. This will be done first through an assessment that will capture the specific energy and financial needs of female-owned ( and male-owned ) businesses and help them determine which financial tools and solar kit options make most sense to address their different needs.", + "ner_text": [ + [ + 420, + 437, + "named" + ], + [ + 205, + 227, + "Enterprise Survey <> reference population" + ], + [ + 446, + 450, + "Enterprise Survey <> publication year" + ], + [ + 489, + 511, + "Enterprise Survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "The baseline depends on the staffing data provided by PENRA Male staffing numbers to be confirmed Additionally, lack of electricity negatively impacts female entrepreneurship, resulting in lost income for women-owned businesses and subsequent closures. About 42 percent of firms identified lack of electricity as a major constraint to business growth, with up to 16 outages lasting six hours each on average per month ( Enterprise Survey, WB & G 2019 ). While this affects all businesses, women-owned businesses \u2014 majority of whom are micro or small and often home based \u2014 are particularly affected because of the nature of their work ( cooking, handiwork, and textiles ) relying on electrical appliances to produce their products ( anecdotal evidence ).", + "type": "survey", + "explanation": "It is indeed a dataset as it is referenced in the context as a source of empirical data regarding business constraints.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Enterprise Survey' suggests a structured collection of data related to businesses.", + "contextual_reason_agent": "It is indeed a dataset as it is referenced in the context as a source of empirical data regarding business constraints.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "102_Kenya-Water-PAD-04072017", + "page": 36, + "text": "26 Annex 1: Results Framework and Monitoring Kenya: Water and Sanitation Development Project ( P156634 ) Results Framework Project Development Objective: To improve water supply and sanitation services in select coastal and northeastern regions in Kenya. PDO Level Results Indicators C o r e Unit of Meas ure Cumulative Target Values Fre - quency Data source / method - ology Respon - sibility for data collec - tion Comments Baselin e in 2016 2017 2018 2019 2020 2021 2022 People in urban areas provided with access to improved water sources under the project. X Number 0 0 2, 500 18, 400 50, 900 70, 000 90, 000 Semi - ann - ually Project reports on construct ion and operatio n of infrastru cture. WSP data on connec - tions. WSPs Coast counties, Wajir and Garissa. One household connection serves 5 people, one community water point serves 30 people, and one kiosk serves 400 people. People provided with access to improved sanitation services under the project \u2014 urban. X Number 0 0 0 10, 000 30, 000 40, 000 50, 000 Semi - ann - ually Project reports on construc - tion and operatio n of infra - structure. WSP data on connec - tions. WSPs Primarily, Wajir and Garissa.", + "ner_text": [ + [ + 701, + 709, + "named" + ], + [ + 45, + 50, + "WSP data <> data geography" + ], + [ + 474, + 495, + "WSP data <> reference population" + ], + [ + 734, + 748, + "WSP data <> data geography" + ], + [ + 750, + 755, + "WSP data <> data geography" + ], + [ + 760, + 767, + "WSP data <> data geography" + ] + ], + "validated": true, + "empirical_context": "X Number 0 0 2, 500 18, 400 50, 900 70, 000 90, 000 Semi - ann - ually Project reports on construct ion and operatio n of infrastru cture. WSP data on connec - tions. WSPs Coast counties, Wajir and Garissa.", + "type": "data", + "explanation": "In this context, 'WSP data' is used as a source of information related to connections, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'WSP data' refers to a dataset because it is mentioned in the context of project reports and infrastructure operations.", + "contextual_reason_agent": "In this context, 'WSP data' is used as a source of information related to connections, indicating it functions as a dataset.", + "contextual_signal": "mentioned as a source of information in project reports", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "113_Project-Appraisal-Document-PAD-disclosable-version-P156479-08122016", + "page": 45, + "text": "By having a database with larger geographical coverage, when crises / shocks happen, humanitarian organizations and government agencies will be able to respond faster and in a more coordinated way. The social registry and business functions for the two safety net pilots will be supported by the MIS developed under this Component 2 during the project. As the social registry \u2019 s use is expanded in the future, it can potentially de-linked from this MIS and have its own information system to facilitate flow of information between the social registry and other programs. 30. Payment system. The project will support the development of a payment system for Government safety nets. The payment system would enable Government to distribute the correct amount of benefits to the right people, at the right time, and with the right frequency, while minimizing transaction costs for both the program and the beneficiaries and allowing increased transparency and accountability of financial transactions. The project will use a small number of payment agencies to provide payments to beneficiaries and the selection of payment agencies will be supported by existing ( or new ) information outlining the various agencies and resources available, their pros and cons in the project areas and humanitarian and UN agencies experience for paying cash benefits in Chad. Payment agencies may be selected in each region based on the", + "ner_text": [ + [ + 450, + 453, + "named" + ] + ], + "validated": false, + "empirical_context": "The social registry and business functions for the two safety net pilots will be supported by the MIS developed under this Component 2 during the project. As the social registry \u2019 s use is expanded in the future, it can potentially de-linked from this MIS and have its own information system to facilitate flow of information between the social registry and other programs. 30.", + "type": "system", + "explanation": "However, 'MIS' is described as a management information system, not a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is related to information management.", + "contextual_reason_agent": "However, 'MIS' is described as a management information system, not a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 42, + "text": "information systems completed Integrated EMIS is under development Integrated EMIS is operational Annual report produced using integrated EMIS with Second annual report produced using integrated EMIS with Integrated EMIS is operational and reports, produced", + "ner_text": [ + [ + 30, + 45, + "named" + ] + ], + "validated": false, + "empirical_context": "information systems completed Integrated EMIS is under development Integrated EMIS is operational Annual report produced using integrated EMIS with Second annual report produced using integrated EMIS with Integrated EMIS is operational and reports, produced", + "type": "system", + "explanation": "However, it is described as a system and not explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'EMIS' suggests it involves data management.", + "contextual_reason_agent": "However, it is described as a system and not explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "161_28046", + "page": 26, + "text": "AT THE MACRO ECONOMIC LEVEL: A PRSP which ( i ) shows the priority given to the health sector, ( ii ) provides impact objectives as they relate to the MDGs; ( iii ) i s monitored in a participatory manner; AT THE HEALTH SECTOR LEVEL: 0 A complete five-year plan ( with all the activities and available financing and its sources ), monitored yearly ( verified by the annual review ); More than 70 % of the national health budget given to the health centers reaches the health centers ( verified by a health expenditures tracking survey ); The MOH executes more than 70 % o f its budget by the end of the fiscal year ( December 3 1 ); Adequate financial management by the MOH ( verified by an annual audit ); Decreased parallel payment practices in hospitals ( verified by a survey ); Results of the Health Sector Support Project are attained ( verified through surveys and regular project supervision, and the project Implementation Completion Report ( ICR ) ). 21", + "ner_text": [ + [ + 499, + 534, + "named" + ], + [ + 383, + 427, + "health expenditures tracking survey <> data description" + ], + [ + 707, + 743, + "health expenditures tracking survey <> data description" + ], + [ + 979, + 997, + "health expenditures tracking survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "AT THE MACRO ECONOMIC LEVEL: A PRSP which ( i ) shows the priority given to the health sector, ( ii ) provides impact objectives as they relate to the MDGs; ( iii ) i s monitored in a participatory manner; AT THE HEALTH SECTOR LEVEL: 0 A complete five-year plan ( with all the activities and available financing and its sources ), monitored yearly ( verified by the annual review ); More than 70 % of the national health budget given to the health centers reaches the health centers ( verified by a health expenditures tracking survey ); The MOH executes more than 70 % o f its budget by the end of the fiscal year ( December 3 1 ); Adequate financial management by the MOH ( verified by an annual audit ); Decreased parallel payment practices in hospitals ( verified by a survey ); Results of the Health Sector Support Project are attained ( verified through surveys and regular project supervision, and the project Implementation Completion Report ( ICR ) ). 21", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned as a 'health expenditures tracking survey' that provides data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a 'health expenditures tracking survey' that provides data used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 36, + "text": "The World Bank Pakistan: Balochistan Human Capital Investment Project ( P166308 ) Page 32 of 47 RESULT_FRAME_TBL_PDO Indicator Name PBC Baseline End Target ( Percentage ) PDO Table SPACE Intermediate Results Indicators by Components RESULT_FRAME_TBL_IO Indicator Name PBC Baseline End Target 1. Improving utilization of quality health services Targeted HFs having majority of essential medicines for RMNCHN services ( Percentage ) 20. 00 75. 00 Absenteeism among key staff to provide RMNCHN services ( Percentage ) 50. 00 25. 00 Targeted health care providers with minimum knowledge and competencies in RMNCHN services ( Percentage ) 0. 00 80. 00 Targeted HMIS digitally integrated into DHIS ( 2 ) ( Number ) 0. 00 3. 00 2. Improving utilization of quality education services Targeted school clusters with improved cluster \u2010 based governance ( Percentage ) 0. 00 50. 00 Student learning assessment reforms strategy implemented ( Text ) No strategy exists Dissemination of assessment results from 10 % target primary schools Grade 5 and 8 students scoring at least 50 % in concept \u2010 based learning assessment in project schools ( Percentage ) 0. 00 20. 00 Targeted female teachers trained ( Percentage ) 0. 00 80. 00 Grievances registered related to delivery of project benefits that are addressed ( Percentage ) 0. 00 75. 00 IO Table SPACE", + "ner_text": [ + [ + 656, + 660, + "named" + ] + ], + "validated": false, + "empirical_context": "00 80. 00 Targeted HMIS digitally integrated into DHIS ( 2 ) ( Number ) 0. 00 3.", + "type": "system", + "explanation": "However, HMIS is mentioned as a system and not explicitly as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HMIS is a dataset because it is associated with data integration.", + "contextual_reason_agent": "However, HMIS is mentioned as a system and not explicitly as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 21, + "validated": 4, + "not_validated": 17 + } + }, + { + "filename": "057_West-Bank-and-Gaza-Phase-1-of-the-Multiphase-Programmatic-Approach-Advancing-Sustainability-in-Performance-Infrastructure-and-Reliability-of-Energy-Sector-Project", + "page": 71, + "text": "The baseline depends on the staffing data provided by PENRA Male staffing numbers to be confirmed Additionally, lack of electricity negatively impacts female entrepreneurship, resulting in lost income for women-owned businesses and subsequent closures. About 42 percent of firms identified lack of electricity as a major constraint to business growth, with up to 16 outages lasting six hours each on average per month ( Enterprise Survey, WB & G 2019 ). While this affects all businesses, women-owned businesses \u2014 majority of whom are micro or small and often home based \u2014 are particularly affected because of the nature of their work ( cooking, handiwork, and textiles ) relying on electrical appliances to produce their products ( anecdotal evidence ). Data also suggest that women are less likely to own generators or other forms of electrification for back-up during shortages largely because of costs. The project will enable women-owned businesses access solar PV systems. This will be done first through an assessment that will capture the specific energy and financial needs of female-owned ( and male-owned ) businesses and help them determine which financial tools and solar kit options make most sense to address their different needs.", + "ner_text": [ + [ + 28, + 41, + "named" + ], + [ + 446, + 450, + "staffing data <> publication year" + ], + [ + 489, + 511, + "staffing data <> reference population" + ] + ], + "validated": true, + "empirical_context": "The baseline depends on the staffing data provided by PENRA Male staffing numbers to be confirmed Additionally, lack of electricity negatively impacts female entrepreneurship, resulting in lost income for women-owned businesses and subsequent closures. About 42 percent of firms identified lack of electricity as a major constraint to business growth, with up to 16 outages lasting six hours each on average per month ( Enterprise Survey, WB & G 2019 ).", + "type": "data", + "explanation": "In this context, 'staffing data' is confirmed as a dataset since it is explicitly mentioned as a source of information needed for the baseline.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'staffing data' is a dataset because it refers to specific information about staffing numbers.", + "contextual_reason_agent": "In this context, 'staffing data' is confirmed as a dataset since it is explicitly mentioned as a source of information needed for the baseline.", + "contextual_signal": "mentioned as a source of information needed for the baseline", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 14, + "text": "The World Bank Education Quality Improvement Project ( P179363 ) Page 5 of 68 the performance gap between students in urban and rural areas represents over one year of schooling ( figure 1 ). Moreover, the variation in reading performance explained by student and school socioeconomic status is one of the largest among PISA-participating countries. Many disadvantaged students hold lower ambitions than expected given their level of academic achievement. 12 In addition, there are large gender disparities. Examples include enrollment in science, technology, engineering, and mathematics ( STEM ) and subsequently in employment or asset ownership. These all represent barriers to better economic opportunities for women. 13 Disability continues to be a cause of educational disadvantage and exclusion. 14 Teachers \u2019 limited capacities to provide efficient individualized support and lack of relevant technology-enabled systems in schools remain key constraints. 15 Hence, better targeted and more focused interventions for the most disadvantaged and vulnerable students are essential to enable the path to recovery, protect and promote the existing human capital, and build the strong foundational skills and knowledge in preparation for future labor market needs. Figure 1. Distribution of PISA 2018 ( Science ) from Equity Perspective Figure 2. Estimated Impact of COVID-19 on Socioeconomic Learning Gap Source: OECD PISA 2018 data.", + "ner_text": [ + [ + 1292, + 1296, + "named" + ], + [ + 206, + 291, + "PISA <> data description" + ], + [ + 320, + 348, + "PISA <> data geography" + ], + [ + 1297, + 1301, + "PISA <> publication year" + ], + [ + 1415, + 1419, + "PISA <> publisher" + ], + [ + 1420, + 1429, + "PISA <> publication year" + ] + ], + "validated": true, + "empirical_context": "Figure 1. Distribution of PISA 2018 ( Science ) from Equity Perspective Figure 2. Estimated Impact of COVID-19 on Socioeconomic Learning Gap Source: OECD PISA 2018 data.", + "type": "dataset", + "explanation": "In this context, 'PISA' refers to a dataset as it is explicitly linked to the OECD PISA 2018 data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'PISA' is a dataset because it is referenced in the context of data distribution and impact analysis.", + "contextual_reason_agent": "In this context, 'PISA' refers to a dataset as it is explicitly linked to the OECD PISA 2018 data used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 14, + "text": "WHO / Europe and the Ministry of Health and Social Protection ( MoHSP ) of Tajikistan have been working closely together since 2016 to strengthen drinking water quality management and surveillance. A national team of water safety planning experts has been established. These facilitators have been equipped with the tools and knowledge to guide drinking water suppliers and authorities through proper risk assessment and safe operational practices under the many different environmental conditions in the country. 25 However, enforcement of the safe operational practices remains a challenge due to operational and financial constraints. 21 World Bank: Listening to Tajikistan Survey ( October 2018 ). https: / / thedocs. worldbank. org / en / doc / 498281560946839910 - 0080022019 / original / ServicesL2TJK1810en. pdf. 22 Ibid. 23 USAID. 2021. Knowledge, Attitudes, and Practices Survey on Maternal Newborn and Child Health, Nutrition, Water Sanitation and Hygiene, and COVID-19 in Khatlon Region, Tajikistan 2021 - Baseline. 24 Ibid. 25 WHO. 2014. \u201c Water Safety Plan: A Field Guide to Improving Drinking-Water Safety in Small Communities. \u201d https: / / www. euro. who. int / __data / assets / pdf_file / 0004 / 243787 / Water-safety-plan-Eng. pdf; WHO. 2019. \u201c Ensuring Safe Drinking-Water - Highlighting Water Safety Plans in Tajikistan on World Water Day. \u201d https: / / www. syke. fi / en-US / FinWaterWEI_II.", + "ner_text": [ + [ + 666, + 683, + "named" + ], + [ + 0, + 3, + "Tajikistan Survey <> publisher" + ], + [ + 75, + 85, + "Tajikistan Survey <> data geography" + ], + [ + 127, + 131, + "Tajikistan Survey <> reference year" + ], + [ + 641, + 651, + "Tajikistan Survey <> publisher" + ], + [ + 666, + 676, + "Tajikistan Survey <> data geography" + ], + [ + 686, + 698, + "Tajikistan Survey <> reference year" + ], + [ + 840, + 844, + "Tajikistan Survey <> publication year" + ], + [ + 984, + 998, + "Tajikistan Survey <> data geography" + ], + [ + 1000, + 1010, + "Tajikistan Survey <> data geography" + ], + [ + 1011, + 1015, + "Tajikistan Survey <> publication year" + ], + [ + 1251, + 1254, + "Tajikistan Survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "25 However, enforcement of the safe operational practices remains a challenge due to operational and financial constraints. 21 World Bank: Listening to Tajikistan Survey ( October 2018 ). https: / / thedocs.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it is explicitly named as a survey conducted by the World Bank, indicating it serves as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically involves structured data collection.", + "contextual_reason_agent": "The context confirms it is a dataset as it is explicitly named as a survey conducted by the World Bank, indicating it serves as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 25, + "text": "More specifically, it will finance ( a ) development and integration of the e-Catalog / e-Register and other modules to EMIS that would allow a switch from paper-based to a greener, more efficient electronic format of data reporting to various stakeholders and interoperability of the NBS system and EMIS; ( b ) creation of the dedicated module and interface in EMIS that would allow the MoER to identify students who require support with adherence to the personal data protection legislation; and ( c ) the development and integration to EMIS of a module for ANACEC on external assessment of education institutions. 42. Support for implementation of national and international student assessments includes the following activities: ( a ) analysis of PISA 2022 results in student learning and Moldova \u2019 s participation in PISA 2025 and 2028; ( b ) preparation activities conducted by NACE, including the development, translation, and editing of test materials, education, training, and preparatory workshops, and piloting of assessments; ( c ) test implementation, including printing, distribution, and supervision of assessments, data processing and analysis, and dissemination of results; and ( d ) strengthening of the capacity of the national implementing agency responsible for carrying out national and international student assessments, including for refugee children from Ukraine. Years of administration of PISA are indicative based on the frequency of previous cycles ( pending decision about the frequency of future cycles ). 43. This subcomponent will also support TA for: ( a ) revision of the teacher and manager professional standards, ( b ) nationwide education infrastructure investment planning for modern school", + "ner_text": [ + [ + 300, + 304, + "named" + ] + ], + "validated": false, + "empirical_context": "More specifically, it will finance ( a ) development and integration of the e-Catalog / e-Register and other modules to EMIS that would allow a switch from paper-based to a greener, more efficient electronic format of data reporting to various stakeholders and interoperability of the NBS system and EMIS; ( b ) creation of the dedicated module and interface in EMIS that would allow the MoER to identify students who require support with adherence to the personal data protection legislation; and ( c ) the development and integration to EMIS of a module for ANACEC on external assessment of education institutions. 42.", + "type": "system", + "explanation": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is involved in data reporting and integration.", + "contextual_reason_agent": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "110_Lebaon-RACE2-PforR-Board-Package-PAD-WB-9-5-16-09072016", + "page": 23, + "text": "MEHE and CERD will develop a comprehensive learning assessment framework, which will include formative assessments at the school level. Particular attention will be given to Grade 3 to detect early difficulties in basic reading, writing, and numeracy skills. With regards to system monitoring, the assessment concluded that supporting the development and incentivizing the completion of a proper EMIS may be the greatest contribution the Program can make to the overall RACE 2 program. Due to the absence of timely and accurate data, it is extremely difficult for MEHE to properly allocate resources to schools most in need, or otherwise direct efforts of the Ministry to those that would benefit most. Timely data will go a long way in ensuring current spending more efficient and better targeted. Expenditure framework 56. The RACE 2 program expenditure framework presents the overall US $ 2. 1 billion, and, within that, the US $ 1. 8 billion Bank-supported Program. Activities are structured according to the RACE 2 outputs areas, and the main expenditure categories featured are works for school construction / rehabilitation, furniture and equipment, transfers to schools, teaching and learning materials, transportation costs, and a number of staff-related costs including consultancies. The Government of Lebanon has been actively following-up with donors on pledges made at the London conference to secure the needed financing ( see Table 3 ). The Program Expenditure Framework also analyzes the budget planning and execution, drawing on the findings from the recent Public Expenditure", + "ner_text": [ + [ + 396, + 400, + "named" + ] + ], + "validated": false, + "empirical_context": "Particular attention will be given to Grade 3 to detect early difficulties in basic reading, writing, and numeracy skills. With regards to system monitoring, the assessment concluded that supporting the development and incentivizing the completion of a proper EMIS may be the greatest contribution the Program can make to the overall RACE 2 program. Due to the absence of timely and accurate data, it is extremely difficult for MEHE to properly allocate resources to schools most in need, or otherwise direct efforts of the Ministry to those that would benefit most.", + "type": "system", + "explanation": "However, EMIS is mentioned as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it relates to monitoring and data collection.", + "contextual_reason_agent": "However, EMIS is mentioned as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 49, + "text": "The World Bank Education Quality Improvement Project ( P179363 ) Page 40 of 68 the project. Students benefiting from direct interventions to enhance learning Annual Technical reports, progress reports Technical reports, progress reports drawing on data from selected schools NORLD, MoER, PMT Students benefiting from direct interventions to enhance learning - Female Annual Technical reports, progress reports Technical reports, progress reports drawing on data from selected schools NORLD, MoER, PMT Teachers recruited or trained Annual Progress and monitoring reports Progress reports and data MoER, PMT, ANACEC, NACE, CTICE Gender gap in STEM education reduced This indicator measures the participation rates of boys and girls in STEM education. The gap is calculated as the difference in enrollment rates of boys and girls in real profile ( proxy for STEM education ) in all secondary schools. The enrollment rate is calculated as the number of boys ( girls ) studying in real profile in grades 9-12 in a given year, expressed as a percentage of the total number of boys ( girls ) enrolled in grades 9-12 ( Baseline: Girls: 27 %; Boys: Annual EMIS EMIS data CTICE, MoER, PMT", + "ner_text": [ + [ + 1152, + 1156, + "named" + ] + ], + "validated": true, + "empirical_context": "The gap is calculated as the difference in enrollment rates of boys and girls in real profile ( proxy for STEM education ) in all secondary schools. The enrollment rate is calculated as the number of boys ( girls ) studying in real profile in grades 9-12 in a given year, expressed as a percentage of the total number of boys ( girls ) enrolled in grades 9-12 ( Baseline: Girls: 27 %; Boys: Annual EMIS EMIS data CTICE, MoER, PMT", + "type": "database", + "explanation": "In this context, 'EMIS' is explicitly mentioned as a source of data used to calculate enrollment rates, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'EMIS' is a dataset because it is referenced in the context of enrollment rates and education statistics.", + "contextual_reason_agent": "In this context, 'EMIS' is explicitly mentioned as a source of data used to calculate enrollment rates, confirming its role as a dataset.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 14, + "text": "The World Bank Education Quality Improvement Project ( P179363 ) Page 5 of 68 the performance gap between students in urban and rural areas represents over one year of schooling ( figure 1 ). Moreover, the variation in reading performance explained by student and school socioeconomic status is one of the largest among PISA-participating countries. Many disadvantaged students hold lower ambitions than expected given their level of academic achievement. 12 In addition, there are large gender disparities. Examples include enrollment in science, technology, engineering, and mathematics ( STEM ) and subsequently in employment or asset ownership. These all represent barriers to better economic opportunities for women. 13 Disability continues to be a cause of educational disadvantage and exclusion. 14 Teachers \u2019 limited capacities to provide efficient individualized support and lack of relevant technology-enabled systems in schools remain key constraints. 15 Hence, better targeted and more focused interventions for the most disadvantaged and vulnerable students are essential to enable the path to recovery, protect and promote the existing human capital, and build the strong foundational skills and knowledge in preparation for future labor market needs. Figure 1. Distribution of PISA 2018 ( Science ) from Equity Perspective Figure 2. Estimated Impact of COVID-19 on Socioeconomic Learning Gap Source: OECD PISA 2018 data.", + "ner_text": [ + [ + 1420, + 1424, + "named" + ], + [ + 206, + 291, + "PISA <> data description" + ], + [ + 320, + 348, + "PISA <> data geography" + ], + [ + 1297, + 1301, + "PISA <> publication year" + ], + [ + 1415, + 1419, + "PISA <> publisher" + ], + [ + 1420, + 1429, + "PISA <> publication year" + ] + ], + "validated": true, + "empirical_context": "Distribution of PISA 2018 ( Science ) from Equity Perspective Figure 2. Estimated Impact of COVID-19 on Socioeconomic Learning Gap Source: OECD PISA 2018 data.", + "type": "dataset", + "explanation": "PISA is explicitly mentioned as a source of data in the context, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because PISA is often associated with large-scale assessments that provide data for analysis.", + "contextual_reason_agent": "PISA is explicitly mentioned as a source of data in the context, confirming its role as a dataset.", + "contextual_signal": "mentioned as a source of data", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "143_749060PAD0P124010Box374388B00OUO090", + "page": 20, + "text": "To ensure the success of CDD-type projects, experience with the CDP has demonstrated the need to have local actors capable of performing outreach to local communities in order to stimulate proposal submissions, and conducting or reviewing needs assessments for proposed sub - projects. To this end, under Component 1, the project envisions providing SDCs with appropriate equipment and training to facilitate the staff ' s effective and efficient performance of the mandated activities. ( b ) The availability of timely and reliable data is necessary to evaluate ( both ex - ante and ex-post ) the effectiveness of CDD-type projects and SSN programs. While the CDP has asked for baseline data to be included in proposals submitted by NGOs, this requirement was not enforced, making any systematic ex-post evaluation of the project impact extremely difficult. Quality and timely data are also vital for the success of proxy-means tested SSN interventions, as the targeting formula is derived from household survey data, and the assessment of the program ' s performance is evaluated using the same data source.", + "ner_text": [ + [ + 996, + 1017, + "named" + ], + [ + 149, + 166, + "household survey data <> reference population" + ], + [ + 679, + 692, + "household survey data <> data description" + ], + [ + 1142, + 1160, + "household survey data <> usage context" + ] + ], + "validated": true, + "empirical_context": "While the CDP has asked for baseline data to be included in proposals submitted by NGOs, this requirement was not enforced, making any systematic ex-post evaluation of the project impact extremely difficult. Quality and timely data are also vital for the success of proxy-means tested SSN interventions, as the targeting formula is derived from household survey data, and the assessment of the program ' s performance is evaluated using the same data source.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a data source used for evaluating program performance.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'household survey data' implies a structured collection of data collected from households.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a data source used for evaluating program performance.", + "contextual_signal": "follows 'derived from household survey data'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 121, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 109 Conduct environmental and social audit on SPG woredas and submit the report timely Environmental and Social Systems MOF and EPA Other Years 3 and 5 E & S Audit reports SPG Woredas hire a dedicated planning staff to support multisectoral planning, implementing and M & E Technical DLI 8 MOF Other Year 2 Status report and confirmed by MoF The Ministry of Finance to develop a new / revised staffing strategy in EFY2016 for implementing donor financed programs. The PAP includes a full review of the previous strategy regarding contract staff and developing a new strategy Technical MOF Other Year 1 A staffing strategy / plan endorsed by MOF Management Development of data collection instrument and collection of data for baseline indicators Technical Sectors supported by IPF TA Other No later than 4 months after effectivenes s Data collection instrument reviewed and endorsed by MOPD Conduct assessments at federal, regional and woreda level to ( i ) prioritize and tailor intervention, ( ii ) review of existing model and identification technologies support under IPF / TA and ( iii ) identify capacitary building checklist at woreda level.", + "ner_text": [ + [ + 731, + 757, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 109 Conduct environmental and social audit on SPG woredas and submit the report timely Environmental and Social Systems MOF and EPA Other Years 3 and 5 E & S Audit reports SPG Woredas hire a dedicated planning staff to support multisectoral planning, implementing and M & E Technical DLI 8 MOF Other Year 2 Status report and confirmed by MoF The Ministry of Finance to develop a new / revised staffing strategy in EFY2016 for implementing donor financed programs. The PAP includes a full review of the previous strategy regarding contract staff and developing a new strategy Technical MOF Other Year 1 A staffing strategy / plan endorsed by MOF Management Development of data collection instrument and collection of data for baseline indicators Technical Sectors supported by IPF TA Other No later than 4 months after effectivenes s Data collection instrument reviewed and endorsed by MOPD Conduct assessments at federal, regional and woreda level to ( i ) prioritize and tailor intervention, ( ii ) review of existing model and identification technologies support under IPF / TA and ( iii ) identify capacitary building checklist at woreda level.", + "type": "tool", + "explanation": "However, it is not a dataset but rather a tool used for collecting data, which does not function as a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'data' and relates to data collection.", + "contextual_reason_agent": "However, it is not a dataset but rather a tool used for collecting data, which does not function as a structured collection of data itself.", + "contextual_signal": "mentioned as a tool for data collection, not as a data source", + "tags": [] + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 11, + "text": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 7 of 47 delivered by UNHCR through the Pakistan Post. This program tries to mirror the support to citizens under the Ehsaas umbrella in order to minimize unequal distribution of support. 8. Balochistan hosts around 325, 000 registered Afghan refugees. About 47 percent of them are females, and more than half of them ( 53 percent ) are less than 18 years of age. Districts with the highest presence of registered refugees include Quetta, Pishin, Chagai, Loralai, Killa Saifullah, and Killa Abdullah ( table 1 ). More than half of the refugees in Balochistan live in urban Quetta ( 56 percent ), whereas the remaining live in rural settlements ( 29 percent ) and refugee villages ( 15 percent ). Table 1. Number and Percentage of Afghan Refugees by District in Balochistan District Population Number of Registered Afghan Refugees Percentage of Registered Afghan Refugees Quetta 2, 275, 699 187, 031 8. 2 Pishin 736, 481 54, 691 7. 4 Chagai 226, 008 28, 901 12. 8 Loralai 397, 400 18, 894 4. 8 Killa Saifullah 342, 814 18, 842 5. 5 Killa Abdullah 757, 578 10, 775 1. 4 Source: Population data from Census 2017; Registered refugee data from UNHCR as", + "ner_text": [ + [ + 1186, + 1209, + "named" + ], + [ + 15, + 26, + "Registered refugee data <> data geography" + ], + [ + 98, + 103, + "Registered refugee data <> publisher" + ], + [ + 267, + 278, + "Registered refugee data <> data geography" + ], + [ + 301, + 327, + "Registered refugee data <> reference population" + ], + [ + 507, + 513, + "Registered refugee data <> data geography" + ], + [ + 515, + 521, + "Registered refugee data <> data geography" + ], + [ + 523, + 529, + "Registered refugee data <> data geography" + ], + [ + 531, + 538, + "Registered refugee data <> data geography" + ], + [ + 540, + 555, + "Registered refugee data <> data geography" + ], + [ + 561, + 575, + "Registered refugee data <> data geography" + ], + [ + 649, + 655, + "Registered refugee data <> data geography" + ], + [ + 947, + 953, + "Registered refugee data <> data geography" + ], + [ + 980, + 986, + "Registered refugee data <> data geography" + ], + [ + 1215, + 1220, + "Registered refugee data <> publisher" + ] + ], + "validated": true, + "empirical_context": "5 Killa Abdullah 757, 578 10, 775 1. 4 Source: Population data from Census 2017; Registered refugee data from UNHCR as", + "type": "data", + "explanation": "This is indeed a dataset as it is explicitly mentioned as data sourced from UNHCR.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific data collected about registered refugees.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as data sourced from UNHCR.", + "contextual_signal": "follows 'Source:' indicating it is a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 154, + "text": "In Diffa Region, most of the refugees live in 11 rural and urban HUD settings and host communities. Only 20 percent of the refugee population live in Sayam Forage camp, which is an open space developed with a logic of inclusion in the receiving communities. The security situation with the sabotage of distribution lines has forced NIGELEC to suspend services in some localities. The recent Government policy to increase state presence in Diffa Region will enable the reinstatement and expansion of electricity services to refugees and host communities. Since 2019, the south of the Maradi Region has become the host of around 57, 100 refugees fleeing violence in central north and northwest Nigeria. With the support of the UNHCR, the GoN has identified 10 \u2018 villages of opportunity \u2019 to support the voluntary relocation of refugees far from the border. Three villages ( Chadakori, Garin Kaka, and Dan Dadji Makaou ) are currently functional and hosting 17, 100 refugees. Apart from these villages, 31, 800 refugees are identified in four locations within a 20 km band from the Nigerian border with more volatile security situation including Guidan Roumdji, Guidan Sori, Tibirimaradi Maradi, and Chadakori. These refugees will be relocated in seven other villages of opportunity. 4. Hask\u00e9 has been designed with particular attention given to refugees and host communities. To ensure that refugees and host communities are adequately covered through the components of the project, a geospatial approach was used to maximize overlaps between the areas of intervention of the project and the priority conflict - affected communes under the PRA. These communes include areas covered under World Bank-financed projects that directly benefit refugees, host communities, and adjacent communes that could be considered at risk. These projects include PARCA, Lake Chad Region Recovery and Development Project, and Community-Based Recovery and Stabilization Project for the Sahel. The analysis of potential overlaps was conducted with two data sets: ( a )", + "ner_text": [ + [ + 2030, + 2039, + "named" + ], + [ + 3, + 15, + "data sets <> data geography" + ], + [ + 29, + 37, + "data sets <> reference population" + ], + [ + 439, + 451, + "data sets <> data geography" + ], + [ + 560, + 564, + "data sets <> publication year" + ], + [ + 583, + 596, + "data sets <> data geography" + ], + [ + 760, + 783, + "data sets <> data description" + ], + [ + 1172, + 1191, + "data sets <> data geography" + ] + ], + "validated": true, + "empirical_context": "These projects include PARCA, Lake Chad Region Recovery and Development Project, and Community-Based Recovery and Stabilization Project for the Sahel. The analysis of potential overlaps was conducted with two data sets: ( a )", + "type": "dataset", + "explanation": "In the context, 'data sets' is explicitly mentioned as being used for analysis, confirming it as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'data sets' refers to structured collections of data used in the analysis.", + "contextual_reason_agent": "In the context, 'data sets' is explicitly mentioned as being used for analysis, confirming it as a data source.", + "contextual_signal": "follows 'conducted with two data sets'", + "tags": [] + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 47, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XXXVIII Verification Protocol Table PDO-level Indicators Result Area 1 on improved service delivery through digitalization Expanding trusted and inclusive access to people-centric digitalized services Description Individuals accessing digitalized public - and private-sector services using trusted, people-centric DPI [ Number ]. Frequency Annually. Data source Annual reports on the usage of digital services from MODEE, sectoral ministries, and the private sector. Methodology for Data Collection Indicator values will be collected from MODEE \u2019 s administrative data on the usage of transactional digital services that incorporate trusted, people-centric DPI, and cross-checked by the IVA through spot surveys. Responsibility for Data Collection MODEE. Improved access to patient-centric digital service Description Number of beneficiaries who actively use patient-centric digital services offfered through an eletronic medical record ( EMR ) platfrom. Active users refer to those with active acccounts who had logged into accounts at least twice since registration ( disaggregated for Syrian refugee users ).. Frequency Annually. Data source Electronic Health Solution ( EHS ). Methodology for Data Collection Data extracted from myHakeem to measure the number of active users. Responsibility for Data Collection MOH and EHS. Result Area 2 on enhanced government effectiveness through Digitalization Increased student trust in the fairness of the Tawjihi exam. Description Increased percentage of students expressing trust in the fairness of the Tawjihi exam.", + "ner_text": [ + [ + 1240, + 1266, + "named" + ] + ], + "validated": false, + "empirical_context": "Frequency Annually. Data source Electronic Health Solution ( EHS ). Methodology for Data Collection Data extracted from myHakeem to measure the number of active users.", + "type": "system", + "explanation": "However, it is described as a system and not explicitly mentioned as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'solution' which can imply a data system.", + "contextual_reason_agent": "However, it is described as a system and not explicitly mentioned as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "066_Jordan-Youth-Technology-and-Jobs-Project", + "page": 68, + "text": "The World Bank will continue a close policy dialogue with the PMU and the government to support the achievement of project objectives. 4. Project procurement and FM missions will be undertaken as part of periodic fiduciary support conducted concurrently across operations in Jordan. In addition to autonomous supervision by the fiduciary specialists, FM and procurement specialists will also participate in the MTR, implementation support missions, and contribute to the ISRs and Implementation Completion and Results Report. 5. M & E. The World Bank will review the Results Framework submitted quarterly by the PMU as part of implementation support. The World Bank team will discuss the progress and deviations with the PMU to identify any areas where additional help from the World Bank is needed. The PMU and the World Bank will also use results data to build awareness of project results among key beneficiaries and counterparts. Beneficiary feedback will also feed into regular monitoring. 6. The tables below detail the key areas of focus of the implementation support activities for the first 24 months of the project \u2019 s implementation. These have been determined based on discussions with the client and an understanding of the priority activities to be implemented during the first two years of the project. Future updates will be based on progress on project activities, timing of major new activities or large procurement packages, and the expertise required to address any issues that arise, among other things.", + "ner_text": [ + [ + 841, + 853, + "named" + ], + [ + 4, + 14, + "results data <> publisher" + ], + [ + 275, + 281, + "results data <> data geography" + ], + [ + 540, + 550, + "results data <> publisher" + ], + [ + 655, + 665, + "results data <> publisher" + ], + [ + 778, + 788, + "results data <> publisher" + ], + [ + 816, + 826, + "results data <> publisher" + ], + [ + 898, + 915, + "results data <> reference population" + ], + [ + 1540, + 1558, + "results data <> usage context" + ] + ], + "validated": true, + "empirical_context": "The World Bank team will discuss the progress and deviations with the PMU to identify any areas where additional help from the World Bank is needed. The PMU and the World Bank will also use results data to build awareness of project results among key beneficiaries and counterparts. Beneficiary feedback will also feed into regular monitoring.", + "type": "data", + "explanation": "In this context, 'results data' is explicitly mentioned as being used to build awareness and inform monitoring, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'results data' refers to a dataset because it implies a collection of information used for analysis.", + "contextual_reason_agent": "In this context, 'results data' is explicitly mentioned as being used to build awareness and inform monitoring, indicating it functions as a data source.", + "contextual_signal": "follows 'will also use results data to build awareness'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "187_multi-page", + "page": 23, + "text": "performance standards. In addition, a number of spot surveys of individual issues will be undertaken over the course of the project, to capture and publicize citizens ' views on salient issues. In addition, the project will collect data produced by both public expenditure and human resource management systems, so as to monitor their functioning on an ongoing basis throughout project implementation. As a complement to these monitoring efforts, anti-corruption surveys were undertaken as a precursor to project preparation, to gauge citizen perceptions of corruption and related phenomena. Those surveys will also be repeated during the penultimate year of the project, so as to permit pre - and post-project comparisons. While the project is not expected to have measurable impacts on perceptions of corruption, the repeat of this survey should provide an instrument for contributing to enhanced citizen awareness of corruption issues, and thereby make a modest contribution to helping to nurture public pressure on the government to address the underlying factors that contribute to the pervasive levels of corruption evident in the first set of anti-corruption surveys. Accounting. financial reporting. and auditing arrangiements. The newly established UIPARP within the Office of the Prime Minister will be responsible for the overall fnancial managernent of the Project.", + "ner_text": [ + [ + 447, + 470, + "named" + ], + [ + 158, + 166, + "anti-corruption surveys <> reference population" + ], + [ + 1276, + 1304, + "anti-corruption surveys <> author" + ], + [ + 1393, + 1411, + "anti-corruption surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "In addition, the project will collect data produced by both public expenditure and human resource management systems, so as to monitor their functioning on an ongoing basis throughout project implementation. As a complement to these monitoring efforts, anti-corruption surveys were undertaken as a precursor to project preparation, to gauge citizen perceptions of corruption and related phenomena. Those surveys will also be repeated during the penultimate year of the project, so as to permit pre - and post-project comparisons.", + "type": "survey", + "explanation": "These surveys are explicitly mentioned as being undertaken to collect data, confirming their role as a dataset in the context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'anti-corruption surveys' imply a structured collection of data related to citizen perceptions.", + "contextual_reason_agent": "These surveys are explicitly mentioned as being undertaken to collect data, confirming their role as a dataset in the context.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 101, + "text": "Do Students Care about School Quality? Determinants of Dropout Behaviour in Developing Countries, NBER Working Paper Series,, NBER WORKING PAPER SERIES, WORKING PAPER 12737 Hanushek, Eric A. and Ludger Woessmann ( 2007 ). THE ROLE OF SCHOOL IMPROVEMENT IN ECONOMIC DEVELOPMENT, NBER WORKING PAPER SERIES, Working Paper 12832 Spence, Michael ( 2005 ) \u201c Rethinking growth. \u201d The World Bank, Keynote address, Poverty Reduction and Economic Management ( PREM ) Conference, PREM Week, available at http: / / info. worldbank. org / etools / BSPAN / PresentationView. asp? PID = 1425 & EID = 711 ( accessed 1 / 7 / 09 ) Murnane, Richard J., John B. Willett, Yves Duhaldeborde, and John H. Tyler ( 2000 ). \" How Important Are the Cognitive Skills of Teenagers in Predicting Subsequent Earnings? \" Journal of Policy Analysis and Management, Vol. 19, No. 4 ( Fall ), pp. 547. Tognolini, Jim ( 2006 ) Ways in Which the Effectiveness of the National Assessments and the National Final Examinations ( Tawjihi ) Might be Enhanced. Report prepared by the Australian Council for Educational Research ( ACER ) for the Ministry of Education of the Hashemite Kingdom of Jordan. UNESCO ( 2007 ). Global Monitoring Report. Paris: UNESCO World Bank ( 2002 ). Hashemite Kingdom of Jordan: Cost Efficiency and Education Spending, Human Development Group, MENA. World Bank ( 2007 ) Arab Republic of Egypt \u2014 Improving Quality, Equality, and Efficiency in the Education Sector: Fostering a Competent Generation of Youth, Human Development Group, Middle East and North Africa Region World Bank, ( HDNED, 2008 ) \u201c Using PISA to Understand the Determinants of Learning in the Middle - East and North Africa Region, \u201d The World Bank, HDNED", + "ner_text": [ + [ + 1591, + 1595, + "named" + ], + [ + 377, + 387, + "PISA <> publisher" + ], + [ + 1159, + 1165, + "PISA <> publisher" + ], + [ + 1216, + 1226, + "PISA <> publisher" + ], + [ + 1229, + 1233, + "PISA <> publication year" + ], + [ + 1337, + 1347, + "PISA <> publisher" + ], + [ + 1519, + 1554, + "PISA <> data geography" + ], + [ + 1555, + 1565, + "PISA <> publisher" + ], + [ + 1646, + 1683, + "PISA <> data geography" + ], + [ + 1691, + 1701, + "PISA <> publisher" + ] + ], + "validated": true, + "empirical_context": "Hashemite Kingdom of Jordan: Cost Efficiency and Education Spending, Human Development Group, MENA. World Bank ( 2007 ) Arab Republic of Egypt \u2014 Improving Quality, Equality, and Efficiency in the Education Sector: Fostering a Competent Generation of Youth, Human Development Group, Middle East and North Africa Region World Bank, ( HDNED, 2008 ) \u201c Using PISA to Understand the Determinants of Learning in the Middle - East and North Africa Region, \u201d The World Bank, HDNED", + "type": "dataset", + "explanation": "PISA is explicitly mentioned in the context as a source used to understand determinants of learning, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because PISA is known for providing standardized assessment data.", + "contextual_reason_agent": "PISA is explicitly mentioned in the context as a source used to understand determinants of learning, indicating it functions as a dataset.", + "contextual_signal": "follows 'using PISA to understand the determinants of learning'", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "155_432110PAD0P09717632B01off0use0only1", + "page": 65, + "text": "At the same time, this approach allows for further expansion through private connections in a subsequent phase, once the necessary hydraulic capacity is present in the network. Household Survey and Community Consultations 13. The economic and social feasibility of this alternative was explored through the socio - economic feasibility study and household survey, and through several rounds of discussion groups with representatives of local communities. In addition, the project team and REGIDESO organized a participatory workshop, in which experiences to date with standpost management in Burundi, Kenya, Senegal and Rwanda were presented and discussed. The workshop, which included community representatives, members of government, REGIDESO staff, World Bank representatives, and international invited speakers with direct experience in standpost management, also served to produce recommendations on the type of standpost management that would be most appropriate in the context of Bujumbura. 14. As seen in the previous section, the household survey generated baseline information regarding water supply, sanitation, electricity provision, and general socio-economic and demographic data about the 26 neighborhoods under study. It also asked respondents to express their preferences about the type of service they would like. Not surprisingly, a majority of respondents ( 63. 9 % ) would prefer to pay to have a private connection to the network.", + "ner_text": [ + [ + 1039, + 1055, + "named" + ], + [ + 592, + 599, + "household survey <> data geography" + ], + [ + 608, + 615, + "household survey <> data geography" + ], + [ + 987, + 996, + "household survey <> data geography" + ], + [ + 1248, + 1259, + "household survey <> reference population" + ], + [ + 1364, + 1375, + "household survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "14. As seen in the previous section, the household survey generated baseline information regarding water supply, sanitation, electricity provision, and general socio-economic and demographic data about the 26 neighborhoods under study. It also asked respondents to express their preferences about the type of service they would like.", + "type": "survey", + "explanation": "The household survey is explicitly mentioned as generating baseline information, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects structured information.", + "contextual_reason_agent": "The household survey is explicitly mentioned as generating baseline information, indicating it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "042_Burundi-Skills-for-Jobs-Women-and-Youth-Project", + "page": 61, + "text": "The World Bank Burundi Skills for Jobs: Women and Youth Project ( P164416 ) Page 55 of 102 informal apprenticeship ( Refugees ) completers that were undergoing an apprenticeship training with a master craftsperson in a cluster supported by the project and have successfully undergone an assessment leading to a certification. Number of students with advanced digital skills competencies at university level Indicator measures digital skills competencies according to accepted standard measurements of graduates / completers of supported digital skills programs. The definition and measurement of advanced digital skills to be used for measuring this indicator will be disaggregated by type of training. Baseline will be established upon start of intervention and among graduates / completers from the third year of project, onwards using standard digital skills measurement tools. Starting third year of the project Assessment records Administrative data ( University of Burundi registry ) PIU M & E specialist Of which number of students with advanced digital skills competencies Indicator measures digital skills competencies Annual starting year Assessment records Administrative data at the University of PIU M & E Specialist", + "ner_text": [ + [ + 935, + 954, + "named" + ], + [ + 4, + 14, + "Administrative data <> publisher" + ], + [ + 15, + 22, + "Administrative data <> data geography" + ], + [ + 326, + 406, + "Administrative data <> data description" + ], + [ + 971, + 978, + "Administrative data <> data geography" + ] + ], + "validated": true, + "empirical_context": "Baseline will be established upon start of intervention and among graduates / completers from the third year of project, onwards using standard digital skills measurement tools. Starting third year of the project Assessment records Administrative data ( University of Burundi registry ) PIU M & E specialist Of which number of students with advanced digital skills competencies Indicator measures digital skills competencies Annual starting year Assessment records Administrative data at the University of PIU M & E Specialist", + "type": "registry", + "explanation": "In this context, it is explicitly mentioned as part of the assessment records and is linked to the University of Burundi registry, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'administrative data' often refers to structured collections of data used for analysis.", + "contextual_reason_agent": "In this context, it is explicitly mentioned as part of the assessment records and is linked to the University of Burundi registry, indicating it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "173_multi0page", + "page": 28, + "text": "Annex 1: Project Design Summary GUINEA: MULTI-SECTORAL AIDS PROJECT ( MAP ) - ' -,, t_ - -, - 9 * ' -. F. - ~ - '. Key Performance ' - Data Collectlon Strategy. -; Hierarchiy of Objectives -. - Indicato ' r8.: 9rCriticalAssumptionss Sector-related CAS Goal: Sector Indicators: Sector / country reports: ( from Goal to Bank Mission ) Mitigate socioeconomic Human Development UTNDP annual report Political commitment to impact of HIV / AIDS within indicators, including poverty Epidemiological reports ad effectively address the framework of poverty profiles surveys HIV / AIDS epidernic, and reduction strategy sufficient financing of the national HTV / AIDS campaign PRSP implementation An effective and progress reports well-coordinated multi-sector program, fully involving the public and private sectors, NGOs, civil society, and donors.", + "ner_text": [ + [ + 476, + 499, + "named" + ] + ], + "validated": false, + "empirical_context": "- Indicato ' r8. : 9rCriticalAssumptionss Sector-related CAS Goal: Sector Indicators: Sector / country reports: ( from Goal to Bank Mission ) Mitigate socioeconomic Human Development UTNDP annual report Political commitment to impact of HIV / AIDS within indicators, including poverty Epidemiological reports ad effectively address the framework of poverty profiles surveys HIV / AIDS epidernic, and reduction strategy sufficient financing of the national HTV / AIDS campaign PRSP implementation An effective and progress reports well-coordinated multi-sector program, fully involving the public and private sectors, NGOs, civil society, and donors.", + "type": "report", + "explanation": "However, in this context, it is mentioned as a type of report rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'epidemiological reports' often contain data related to health statistics.", + "contextual_reason_agent": "However, in this context, it is mentioned as a type of report rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a report, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 20, + "validated": 4, + "not_validated": 16 + } + }, + { + "filename": "166_304360LK", + "page": 57, + "text": "In the absence o f effective local mechanisms to plan and implement social development activities, VRCs have begun to play a major role in the North East. The NEPC will form VRCs in those villages targeted by NEHRP that do not currently have VRCs. The VRC formation process will be that developed for the World Bank financed livelihood support cash grants. Tasks include: 0 0 Verify selected beneficiary list; 0 0 0 Contribute to the beneficiary selection process through social verification / damaged assessment survey; Act as local representative at divisional level monitoring committees; Act as intermediary forum between agency officials and community for vertical and horizontal information dissemination; and Information dissemination and represent marginalized families who are unable to articulate their grievances through formally established mechanisms. However, in addition to participation in Housing Damage Assessment and Social Verification Survey, VRCs in some villages have volunteered to play added roles during the pilot phase o f the program. This includes the facilitation o f material procurement, coordination o f skilled labor, sponsorshp o f housing development societies and organization o f community labor for the construction o f houses o f widow - headed households. Such expanded roles and functions indicate the capacity o f some VRCs and their potential as a mobilization mechanism at the grass root level. Subsequently, as part o f the communications campaign, an orientation program for those VRCs with the potential for an expanded role will be designed. Particularly orientation i s required on bulk purchasing procedures, quality control 52", + "ner_text": [ + [ + 906, + 962, + "named" + ], + [ + 143, + 153, + "Housing Damage Assessment and Social Verification Survey <> data geography" + ], + [ + 305, + 315, + "Housing Damage Assessment and Social Verification Survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "Tasks include: 0 0 Verify selected beneficiary list; 0 0 0 Contribute to the beneficiary selection process through social verification / damaged assessment survey; Act as local representative at divisional level monitoring committees; Act as intermediary forum between agency officials and community for vertical and horizontal information dissemination; and Information dissemination and represent marginalized families who are unable to articulate their grievances through formally established mechanisms. However, in addition to participation in Housing Damage Assessment and Social Verification Survey, VRCs in some villages have volunteered to play added roles during the pilot phase o f the program. This includes the facilitation o f material procurement, coordination o f skilled labor, sponsorshp o f housing development societies and organization o f community labor for the construction o f houses o f widow - headed households.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as part of the tasks involving data collection for beneficiary selection.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as part of the tasks involving data collection for beneficiary selection.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "162_PAD12050PAD0P15264600PUBLIC00Box391428B", + "page": 53, + "text": "This will enable management decisions to be made based on an assessment of whether the program is moving towards its objectives. A periodic financial audit will be conducted which will also help to identify and mitigate any potential sign of fraud and governance issues. The program monitoring system will rely on regular and accurate data collection and analysis to identify the timely implementation of activities, the achievement of intended results, and positive and negative unintended effects. i. HIS will form the basis for a well-functioning monitoring system and will comprise: ( i ) design of a program database; ( ii ) development of registers and forms to gather data ( enrollment registers, provider data collection forms ); ( iii ) development of the claims processing system, which will feed data into the HIS; ( iv ) collection and analysis of program indicators; ( v ) design of wider monitoring and verification activities ( such as spot checks of providers, user satisfaction surveys through sampling of beneficiaries at the household level ) which will provide data with which to counter-check the HIS data; and ( vi ) capacity building of MoPH in monitoring of the EHCP program. ii. Sources of Data: ( i ) the routine data collected through the claims processing system; ( ii ) data routinely collected at health facility level; ( iii ) additional data collected by MoPH such as from enrollment registers, spot checks and ad-hoc surveys; ( iv ) information collected during the verification processes; and ( v ) data from the MoPH rapid facility assessment.", + "ner_text": [ + [ + 503, + 506, + "named" + ] + ], + "validated": false, + "empirical_context": "i. HIS will form the basis for a well-functioning monitoring system and will comprise: ( i ) design of a program database; ( ii ) development of registers and forms to gather data ( enrollment registers, provider data collection forms ); ( iii ) development of the claims processing system, which will feed data into the HIS; ( iv ) collection and analysis of program indicators; ( v ) design of wider monitoring and verification activities ( such as spot checks of providers, user satisfaction surveys through sampling of beneficiaries at the household level ) which will provide data with which to counter-check the HIS data; and ( vi ) capacity building of MoPH in monitoring of the EHCP program. ii.", + "type": "system", + "explanation": "'HIS' is not a dataset but rather a health information system that supports data management and monitoring.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'HIS' is a dataset because it is mentioned in the context of data collection and analysis.", + "contextual_reason_agent": "'HIS' is not a dataset but rather a health information system that supports data management and monitoring.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 79, + "text": "Transparency and understanding of the targeting process will be key to minimize inter-household conflicts and empower beneficiary households. A specific component of the impact evaluation implemented with the Africa GIL could consider the differential impacts of the cash and specific behavior change communication on this issue. 56. The grievance redress mechanism will be a key tool to assess on-going issues, including coerced transfer of a portion of the cash. The promotion activities will be open to all community members on a voluntary basis for non-beneficiary households to foster change in community social norms. Both the process and the impact evaluations will track potential changes in conflict 57. Approximately one percent of the Burundian population is Batwa. Initial data collection for the indigenous peoples \u2019 framework showed that an estimated 7, 000 Batwa households were living in the provinces where the project will operate ( 300 in Ruyigi, 2, 000 in Karuzi, 2, 200 in Gitega and 2, 500 in Kirundo ). These data will be updated after the planned Batwa household census in these provinces. Given the experience of the Concern pilot, it is expected that between one and six percent of the project beneficiaries will be Batwa.", + "ner_text": [ + [ + 1071, + 1093, + "named" + ], + [ + 770, + 775, + "Batwa household census <> reference population" + ], + [ + 872, + 888, + "Batwa household census <> reference population" + ], + [ + 958, + 964, + "Batwa household census <> data geography" + ], + [ + 976, + 982, + "Batwa household census <> data geography" + ], + [ + 994, + 1000, + "Batwa household census <> data geography" + ], + [ + 1015, + 1022, + "Batwa household census <> data geography" + ] + ], + "validated": true, + "empirical_context": "Initial data collection for the indigenous peoples \u2019 framework showed that an estimated 7, 000 Batwa households were living in the provinces where the project will operate ( 300 in Ruyigi, 2, 000 in Karuzi, 2, 200 in Gitega and 2, 500 in Kirundo ). These data will be updated after the planned Batwa household census in these provinces. Given the experience of the Concern pilot, it is expected that between one and six percent of the project beneficiaries will be Batwa.", + "type": "census", + "explanation": "This is indeed a dataset as it refers to a planned census that will collect data on Batwa households.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a census, which typically involves structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a planned census that will collect data on Batwa households.", + "contextual_signal": "mentioned as a data source for updating existing data", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "073_Ethiopia-Consolidated-Water-Supply-Sanitation-and-Hygiene-Account-Project", + "page": 54, + "text": "The World Bank One WASH \u2014 Consolidated Water Supply, Sanitation, and Hygiene Account Project ( One WASH \u2014 CWA ) ( P167794 ) Page 50 of 103 term operational is defined as providing updated and regular information that is disseminated and utilized for better WRM and WASH service delivery. WASHCOMs. Percentage of grievances registered related to delivery of sub-project benefits that are timely and satisfactorily addressed This indicator measures the percentage of the grievances registered that are actually addressed in a timely and satisfactory manner ( maximum of one month ). At mid - term and Project completion Beneficiary survey at mid-term and Project completion. Beneficiary survey at mid-term and Project completion. National and regional WASH coordination offices, WASH sector PMUs, WWTs, and TWUs. Percentage of schemes with completed environmental and social screening processes and against which required mitigation measures including compensations are settled This indicator measures the percentage of schemes that have completed environmental and social screening processes and have settled required mitigation measures including compensations. Semi - annual Regular Project progress reports. Regular Project progress reports. National and regional WASH coordination offices, WASH sector PMUs, WWTs, and TWUs.", + "ner_text": [ + [ + 618, + 636, + "named" + ], + [ + 298, + 377, + "Beneficiary survey <> data description" + ], + [ + 811, + 892, + "Beneficiary survey <> data description" + ] + ], + "validated": true, + "empirical_context": "Percentage of grievances registered related to delivery of sub-project benefits that are timely and satisfactorily addressed This indicator measures the percentage of the grievances registered that are actually addressed in a timely and satisfactory manner ( maximum of one month ). At mid - term and Project completion Beneficiary survey at mid-term and Project completion. Beneficiary survey at mid-term and Project completion.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey used to measure grievances and their resolution.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Beneficiary survey' implies a structured collection of responses from beneficiaries.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey used to measure grievances and their resolution.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "059_Uganda-Investing-in-Forests-and-Protected-Areas-for-Climate-Smart-Development-Project", + "page": 72, + "text": "STEP. The project will use STEP, a planning and tracking system, which will provide data on procurement activities, establish benchmarks, monitor delays, and measure procurement performance. The MWE is familiar with STEP, which is also used under the IWMDP. The knowledge built, lessons learned, and corrective mechanisms put in place will provide a foundation for the project. 52. Use of national procurement procedures. Due to recent changes in Uganda \u2019 s procurement legal framework, Uganda National Procurement Procedures are not in line with the World Bank Procurement Regulations Section V - Para 5. 4 National Procurement Procedures. In particular, the following provisions are not consistent with Procurement Regulations: ( a ) Use of domestic preference for contracts obtained through open national competitive procedures ( b ) Fees for handling bidder complaints at procuring entity level", + "ner_text": [ + [ + 0, + 4, + "named" + ] + ], + "validated": false, + "empirical_context": "STEP. The project will use STEP, a planning and tracking system, which will provide data on procurement activities, establish benchmarks, monitor delays, and measure procurement performance.", + "type": "system", + "explanation": "However, STEP is described as a planning and tracking system, not a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is mentioned in the context of providing data.", + "contextual_reason_agent": "However, STEP is described as a planning and tracking system, not a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 15, + "validated": 1, + "not_validated": 14 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 23, + "text": "The focus on citizens \u2019 engagement includes a robust grievance redress mechanism ( GRM ). The grievance redress mechanism would track grievances linked to targeting, receipt of transfers and implementation of the complementary activities. It will use several mechanisms: in - person complaints to program commune focal point, SMS-based system to a third-party grievance manager ( conditional on finding a trusted and competent agent ), boxes at the colline-level in the care of a civil society organization, possibly a hotline at central-level. Complaints received through SMS, phone or boxes will be logged in the MIS. A results indicator to track the progress of the implementation of the GRM system has been included in the results framework. Subcomponent 2. 3: Monitoring and evaluation ( US $ 2. 8 million equivalent ) 43. Since the project is supporting new interventions and processes in Burundi, and in order to ensure transparency, the third sub-component will support process evaluations of the key program processes and an impact evaluation including beneficiary surveys. The process evaluations will focus on the core operational processes: targeting, payment, delivery of complementary activities. The process evaluation in the first phase communes will inform the expansion in the second phase but also provide key input in the design of the operating processes.", + "ner_text": [ + [ + 615, + 618, + "named" + ] + ], + "validated": false, + "empirical_context": "It will use several mechanisms: in - person complaints to program commune focal point, SMS-based system to a third-party grievance manager ( conditional on finding a trusted and competent agent ), boxes at the colline-level in the care of a civil society organization, possibly a hotline at central-level. Complaints received through SMS, phone or boxes will be logged in the MIS. A results indicator to track the progress of the implementation of the GRM system has been included in the results framework.", + "type": "system", + "explanation": "However, 'MIS' is mentioned as a management information system that logs complaints, not as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MIS' suggests a system that manages information.", + "contextual_reason_agent": "However, 'MIS' is mentioned as a management information system that logs complaints, not as a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 30, + "text": "The World Bank Generating Growth Opportunities and Productivity for Women Enterprises Uganda Project ( P176747 ) Page 26 of 77 60. Subcomponent 4A: Project management support for high-quality implementation. This subcomponent will finance the Project Implementation Teams ( PITs ) at the MGLSD and the PSFU. it will finance capacity building activities, including ( a ) of the national, district, subcounty, parish, refugee settlement stakeholders and implementation support teams. It will finance the development of key partnerships including of quarterly review meetings for all stakeholders involved in the project at the regional and district levels. The project as part of its Monitoring and Evaluation ( M & E ) activities will design and develop an MIS that collects and stores detailed data from project applicants during the registration process ( i. e.,, before beneficiaries have accessed any project-financed activities ). As indicated in component 1, the MIS is expected to assign a unique identifier to each registered applicant ( GROW_ID ) that will be shared with the applicant.", + "ner_text": [ + [ + 968, + 971, + "named" + ] + ], + "validated": false, + "empirical_context": ",, before beneficiaries have accessed any project-financed activities ). As indicated in component 1, the MIS is expected to assign a unique identifier to each registered applicant ( GROW_ID ) that will be shared with the applicant.", + "type": "system", + "explanation": "However, 'MIS' is mentioned as a management information system, not as a data source or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is an acronym that could imply a structured system for managing information.", + "contextual_reason_agent": "However, 'MIS' is mentioned as a management information system, not as a data source or dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "182_multi0page", + "page": 10, + "text": "The Govermment faces real difficulties in responding to these challenges: ( i ) current household level survey data is not representative of the population, and other information on living standards is ad hoc, irregular and of varying quality; ( ii ) the capacity of the Government to evaluate policy-relevant information is limited by knowledge and resource constraints; ( iii ) the policy environment is extremely fluid, there is high government turnover of officials, and Government efforts tend to be focused on crisis management; and ( iv ) roles and responsibilities for policy development and evaluation remain unclear, both within and between agencies. The project will address these issues by ( i ) supporting the national statistical agency ( INSTAT ) to develop and implement a permanent household survey instrument that will collect data on a regular basis, utilizing a new sample frame derived from the 2001 Census as well as new questionnaires and data management procedures; ( ii ) developing and implementing a strategic plan for social policy monitoring and evaluation within and between agencies, covering both social insurance and social assistance; and ( iii ) developing and implementing an associated training plan. - 7 -", + "ner_text": [ + [ + 88, + 115, + "named" + ], + [ + 753, + 759, + "household level survey data <> publisher" + ], + [ + 789, + 826, + "household level survey data <> data type" + ], + [ + 916, + 927, + "household level survey data <> reference year" + ] + ], + "validated": true, + "empirical_context": "The Govermment faces real difficulties in responding to these challenges: ( i ) current household level survey data is not representative of the population, and other information on living standards is ad hoc, irregular and of varying quality; ( ii ) the capacity of the Government to evaluate policy-relevant information is limited by knowledge and resource constraints; ( iii ) the policy environment is extremely fluid, there is high government turnover of officials, and Government efforts tend to be focused on crisis management; and ( iv ) roles and responsibilities for policy development and evaluation remain unclear, both within and between agencies. The project will address these issues by ( i ) supporting the national statistical agency ( INSTAT ) to develop and implement a permanent household survey instrument that will collect data on a regular basis, utilizing a new sample frame derived from the 2001 Census as well as new questionnaires and data management procedures; ( ii ) developing and implementing a strategic plan for social policy monitoring and evaluation within and between agencies, covering both social insurance and social assistance; and ( iii ) developing and implementing an associated training plan.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to structured data collected through a household survey, which is intended for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific type of data collection method used to gather information.", + "contextual_reason_agent": "This is indeed a dataset as it refers to structured data collected through a household survey, which is intended for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "002_BOSIB-ca473522-8ad0-4c80-9f0d-88bf887f2a2f", + "page": 46, + "text": "The World Bank Uganda Development Response to Displacement Impacts Project Phase II ( P510476 ) PROJECT APPRAISAL DOCUMENT Page 31 Description DCRM is a contingency fund with specific triggers. In the event of being triggered, the indicator will record the number of beneficiaries benefiting from actions financed by the DCRM ( infrastructure construction, upgrading or rehabilitation to expand water, education, and health service capacity in districts experiencing public service pressure caused by large refugee inflows ). In the event of the DCRM not being triggered this indicator will stay at zero. Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation. Responsibility for Data Collection IA Infrastructure subprojects that are fully functional with high quality workmanship one year after completion ( Percentage ) Description Quantitative indicator counting number of infrastructure subprojects completed and functional one year after completion. Quality indicator will be determined by surveyor / engineer report on construction and maintenance of the construction. Frequency Quarterly Data source Project MIS and Project Progress Reports. Surveyor / Engineer reports. Methodology for Data Collection Monitoring project implementation. Responsibility for Data Collection IA Women in leadership roles within community development committees ( Percentage ) Description Quantitative indicator counting percentage of women in leadership roles within community development committees ( CPMC, CPC, GRC, PDC, RWC ).", + "ner_text": [ + [ + 1081, + 1107, + "named" + ] + ], + "validated": false, + "empirical_context": "Responsibility for Data Collection IA Infrastructure subprojects that are fully functional with high quality workmanship one year after completion ( Percentage ) Description Quantitative indicator counting number of infrastructure subprojects completed and functional one year after completion. Quality indicator will be determined by surveyor / engineer report on construction and maintenance of the construction. Frequency Quarterly Data source Project MIS and Project Progress Reports.", + "type": "document", + "explanation": "However, it is not a dataset but rather a document that provides information on construction and maintenance.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves data collection and reporting.", + "contextual_reason_agent": "However, it is not a dataset but rather a document that provides information on construction and maintenance.", + "contextual_signal": "mentioned only as a report, not as a data source", + "tags": [] + }, + { + "filename": "027_Jordan-Emergency-Food-Security-Project", + "page": 51, + "text": "The current core application software comprises: ( i ) Hyperion for budget preparation; ( ii ) ORACLE financials for budget execution; and ( iii ) software for interfacing with other software for Debt Management, Payroll, Bank Reconciliation, and Revenue Management. GFMIS is fully utilized for budget execution. For budget preparation, however, there are manual interventions with various ICT tools being used. MoITS will utilize the functionalities available in their Oracle database and accounting module when recording transactions related to the Project operations, by creating a separate cost center for the Project which is used for recording the day-to-day transactions and large contract purchases under both components, the FO might also use excel sheet in support of the Oracle system to prepare Bank required reports such as the WA-IFRs and the Semiannual IFRs. Furthermore, the assigned FO will work on improving the automated linkage between the MoITS \u2019 Oracle based accounting system and the IFRs to be produced for the Bank purposes on excel spread sheets. 9. Financial Section of the POM: MoITS will develop the FM section of the POM used in the project which will cover all administrative, financial, and accounting, budgetary, and human resources procedures relevant to the additional activities to be financed under the project. The POM should describe the payment procedures, including controls and oversight arrangements.", + "ner_text": [ + [ + 267, + 272, + "named" + ] + ], + "validated": false, + "empirical_context": "The current core application software comprises: ( i ) Hyperion for budget preparation; ( ii ) ORACLE financials for budget execution; and ( iii ) software for interfacing with other software for Debt Management, Payroll, Bank Reconciliation, and Revenue Management. GFMIS is fully utilized for budget execution. For budget preparation, however, there are manual interventions with various ICT tools being used.", + "type": "system", + "explanation": "However, GFMIS is described as a system used for budget execution, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed GFMIS is a dataset because it is mentioned in the context of budget execution.", + "contextual_reason_agent": "However, GFMIS is described as a system used for budget execution, not as a structured collection of data.", + "contextual_signal": "system but mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "154_431120PAD0P1021e0only10R20081014311", + "page": 40, + "text": "Annex 3: Results Framework and Monitoring BOTSWANA: Botswana National HIV / AIDS Prevention Support Project 1. Botswana i s committed to rapidly strengthening its capacity in monitoring and evaluation. The BNAPS \u2019 s support to M & E would be guided by the following criteria: ( i ) support for the NSF and the development o f a single national M & E system, under the principle o f the \u201c Three Ones \u201d; ( ii ) support for an M & E system, that enables DMSACs to monitor and improve their performance as well as allowing for monitoring o f community, district and national activities; ( iii ) support for institutional, human resource and systems development; and ( iv ) support for activities which are not being financed by other development partners; and ( v ) support to the N A C A in its function as the lead coordinating agency for the sharing and coordinating o f M & E activities between all agencies and donors. Improvements in the indicators as reflected in the HIV / AIDS Score Card ( Annex 14 ) would be measured through this approach. 2. The Botswana HIV / AIDS Response Information Management System ( BHRIMS ) i s the national multi-sectoral response monitoring system.", + "ner_text": [ + [ + 1054, + 1112, + "named" + ] + ], + "validated": false, + "empirical_context": "2. The Botswana HIV / AIDS Response Information Management System ( BHRIMS ) i s the national multi-sectoral response monitoring system.", + "type": "system", + "explanation": "However, it is described as a monitoring system, not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information Management System' in its name, suggesting data handling.", + "contextual_reason_agent": "However, it is described as a monitoring system, not explicitly as a data source or structured collection of data.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 15, + "text": "Data from a survey on the schooling of refugee children conducted by MENFOP \u2019 s planning department, the OOSC 2019 and the EDAM4 provide some information on the differential experiences of refugee and host students. The most commonly cited reason by refugees for not attending school is the need to help the family ( 36 percent females; 21 percent males ), while among host communities, the most common reason was the lack of interest in school ( 39 percent females; 36 percent males ). Around 20 percent of host communities indicated that girls having to stay home as a reason for not attending school, compared to 14 percent of refugee respondents. Although food insecurity is common among both refugee households and rural host communities, the lack of school canteens is cited more frequently by refugees as a reason for non - school attendance. Data on the prevalence of disability among refugee and host community children is unavailable; however, 1 percent of host community respondents and 1. 4 percent of refugee respondents cite disability as a reason for not attending school. 16. Focus group discussions with various stakeholder groups ( school administrators, teachers, parents, and students ) in Ali-Addeh, Holl-Holl, and Markazi confirm the quantitative indicators and provide additional information on refugee school needs, which include school canteens, school kits, improving the school environment, and accelerating implementation of the national program ( which would allow certification to access to higher levels of education ), and increased teacher support. A study underway by the World Bank 5 One school in Djibouti Ville was observed to have an average class size of 1: 44", + "ner_text": [ + [ + 12, + 55, + "named" + ], + [ + 69, + 75, + "survey on the schooling of refugee children <> author" + ], + [ + 105, + 114, + "survey on the schooling of refugee children <> publication year" + ], + [ + 369, + 385, + "survey on the schooling of refugee children <> reference population" + ], + [ + 697, + 715, + "survey on the schooling of refugee children <> reference population" + ], + [ + 720, + 742, + "survey on the schooling of refugee children <> reference population" + ], + [ + 748, + 771, + "survey on the schooling of refugee children <> data description" + ], + [ + 967, + 993, + "survey on the schooling of refugee children <> reference population" + ], + [ + 1210, + 1219, + "survey on the schooling of refugee children <> data geography" + ], + [ + 1221, + 1230, + "survey on the schooling of refugee children <> data geography" + ], + [ + 1236, + 1243, + "survey on the schooling of refugee children <> data geography" + ], + [ + 1633, + 1647, + "survey on the schooling of refugee children <> data geography" + ] + ], + "validated": true, + "empirical_context": "Data from a survey on the schooling of refugee children conducted by MENFOP \u2019 s planning department, the OOSC 2019 and the EDAM4 provide some information on the differential experiences of refugee and host students. The most commonly cited reason by refugees for not attending school is the need to help the family ( 36 percent females; 21 percent males ), while among host communities, the most common reason was the lack of interest in school ( 39 percent females; 36 percent males ).", + "type": "survey", + "explanation": "This is a dataset as it provides structured data on the schooling experiences of refugee children.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data on a specific topic.", + "contextual_reason_agent": "This is a dataset as it provides structured data on the schooling experiences of refugee children.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "086_NEW-ETHIOPIA-PAD-06072018", + "page": 57, + "text": "49 11. Disbursement methods. The project may follow one or a combination of the following disbursement methods: Designated Account, Direct Payment, Reimbursement, and Special Commitment. The project will use a report-based disbursement. 12. Financial reporting arrangements. The EIC follows the Government reporting system. The Commission submits monthly financial statements to MoFEC in soft and hard copies within the stipulated deadline ( within 15 days after the month end ). The commission also closed the EFY 2009 accounts and submitted them to MoFEC and OFAG. ARRA submits quarterly the implementing partner financial monitoring report ( IPFMR ) to UNHCR which includes both financial and physical activity report. For this project, quarterly IFRs will be required from the project. The EIC will prepare quarterly financial statements for the project and submit to the World Bank within 45 days of the end of the quarter. The contents of the IFR will include narrations or explanations on financial performance, statement of sources and use of funds, statement of use of funds by project activity / component, designated account activity statements, statement of cash forecast, trial balances and other related schedules. The format and content of the IFR was agreed between the World Bank and EIC during negotiations.", + "ner_text": [ + [ + 594, + 642, + "named" + ] + ], + "validated": false, + "empirical_context": "The commission also closed the EFY 2009 accounts and submitted them to MoFEC and OFAG. ARRA submits quarterly the implementing partner financial monitoring report ( IPFMR ) to UNHCR which includes both financial and physical activity report. For this project, quarterly IFRs will be required from the project.", + "type": "report", + "explanation": "However, it is not a dataset as it is described as a report rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'report' which can imply structured information.", + "contextual_reason_agent": "However, it is not a dataset as it is described as a report rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a report, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 20, + "validated": 4, + "not_validated": 16 + } + }, + { + "filename": "050_Cameroon-Secondary-Education-and-Skills-Development-Project", + "page": 13, + "text": "Since 2014, the repeated terrorist attacks of Boko Haram in the Far North region led to over 244, 000 IDPs, and the region also took in over 308, 000 refugees from Nigeria in 2019. Cameroon \u2019 s Eastern, Northern, and Adamawa regions received a surge of refugees from the Central African Republic. Education outcomes and level of education attainment was previously higher in anglophone regions than in some other regions. However, the persistent sociopolitical crisis in the North West and South West anglophone regions resulted in approximately 950, 000 IDPs, 51 percent of whom were children, in 2019. 13 These political crises have led to the destruction of 7 Cameroon Household Survey 2007 and 2014. 8 Staff calculations using the Fourth Cameroon Household Survey ( ECAM4 ) 2014, National Institute of Statistics ( NIS ). 9 The narrow unemployment rate among the labor force ages 15 \u2013 64 averaged 3. 3 in 2007 and 4. 0 percent in 2014 and 5. 4 and 5. 8 percent among youth in 2007 and 2014, respectively. The broader measure of unemployment, which includes workers who are inactive but would accept a job if offered one, averaged 3 percentage points higher. 10 The HCI measures the amount of human capital that a child born today can expect to attain by age 18.", + "ner_text": [ + [ + 663, + 688, + "named" + ], + [ + 6, + 10, + "Cameroon Household Survey <> reference year" + ], + [ + 64, + 80, + "Cameroon Household Survey <> data geography" + ], + [ + 181, + 189, + "Cameroon Household Survey <> data geography" + ], + [ + 217, + 232, + "Cameroon Household Survey <> data geography" + ], + [ + 689, + 693, + "Cameroon Household Survey <> reference year" + ], + [ + 698, + 702, + "Cameroon Household Survey <> publication year" + ], + [ + 778, + 782, + "Cameroon Household Survey <> publication year" + ], + [ + 784, + 816, + "Cameroon Household Survey <> publisher" + ], + [ + 839, + 856, + "Cameroon Household Survey <> data description" + ], + [ + 867, + 891, + "Cameroon Household Survey <> reference population" + ], + [ + 934, + 938, + "Cameroon Household Survey <> reference year" + ], + [ + 989, + 993, + "Cameroon Household Survey <> reference year" + ] + ], + "validated": true, + "empirical_context": "However, the persistent sociopolitical crisis in the North West and South West anglophone regions resulted in approximately 950, 000 IDPs, 51 percent of whom were children, in 2019. 13 These political crises have led to the destruction of 7 Cameroon Household Survey 2007 and 2014. 8 Staff calculations using the Fourth Cameroon Household Survey ( ECAM4 ) 2014, National Institute of Statistics ( NIS ).", + "type": "survey", + "explanation": "In the context, it is explicitly mentioned as a survey that provides data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Household Survey', which typically refers to structured data collection.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as a survey that provides data used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 16, + "text": "There are also 14, 936 asylum-seekers in Rwanda: see https: / / data. unhcr. org / en / country / rwa 2 Loschmann, C., Bilgili, O. & Siegel, M. ( 2019 ) \u201c Considering the benefits of hosting refugees: evidence of refugee camps influencing local labour market activity and economic welfare in Rwanda, \u201d IZA Journal of Development & Migration, pp. 1-23. 3 The self-reliance survey is part of the \u201c Enhancing Self-Reliance and Preparedness for Forced Displacement in the Great Lakes Region \u201d activity ( P500793 ) and draws on the global Refugee Self-Reliance Initiative index ( www. refugeeselfreliance. org ). 4 Phase I of Jya Mbere was approved on April 30, 2019, and will close on October 30, 2026. Of the total Phase I budget of US $ 84. 41 million, US $ 68 million is from the IDA 18 Sub-window for Refugees & Host Communities and the IDA 19 Window for Host Communities & Refugees ( WHR ) and US $ 4. 41 million was provided in co-financing by the Danish International Development Agency ( Danida ). 5 To meet WHR eligibility requirements, the Government submitted a Strategy Note on supporting refugees, which was discussed and agreed with the World Bank.", + "ner_text": [ + [ + 358, + 378, + "named" + ], + [ + 41, + 47, + "self-reliance survey <> data geography" + ], + [ + 133, + 143, + "self-reliance survey <> author" + ], + [ + 146, + 150, + "self-reliance survey <> publication year" + ], + [ + 468, + 486, + "self-reliance survey <> data geography" + ], + [ + 657, + 661, + "self-reliance survey <> publication year" + ], + [ + 1147, + 1157, + "self-reliance survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "1-23. 3 The self-reliance survey is part of the \u201c Enhancing Self-Reliance and Preparedness for Forced Displacement in the Great Lakes Region \u201d activity ( P500793 ) and draws on the global Refugee Self-Reliance Initiative index ( www. refugeeselfreliance.", + "type": "survey", + "explanation": "The self-reliance survey is explicitly mentioned as part of an activity and is likely used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically collects structured data.", + "contextual_reason_agent": "The self-reliance survey is explicitly mentioned as part of an activity and is likely used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "037_Uganda-Investment-for-Industrial-Transformation-and-Employment-Project", + "page": 76, + "text": "Table 1: Refugee and Host Population in Uganda62 Population Refugee % of total Number of firms63 Refu gee Ugandan hosts Refuge e Host North West Refugee-Hosting Districts Yumbe, Adjumani, Madi Okollo, Terego Lamwo, Koboko, Obongi 873, 844 2, 169, 200 29 % 1, 987 13, 505 South West Refugee-Hosting Districts Isingiro, Kyegegwa, Kamwenge, Kiryandongo, Kikuube 576, 922 2, 266, 800 20 % 2, 526 15, 095 Total non-Kampala RHDs 1, 450, 766 4, 436, 000 25 % 4, 513 28, 601 Total Kampala 98, 415 1, 709, 000 5 % 5, 028 104, 972 2. The economic activity slow down caused by COVID-19 has affected Uganda \u2019 s ability to generate jobs for those living in vulnerable situations, including refugees and host communities. Despite the concerted efforts to integrate refugees within the ecosystems of their host communities, refugee - hosting districts ( RHDs ) remain less developed areas. Low levels of disposable incomes have resulted in low demand and limited access to labor markets, leaving those residents with some access to land with no alternative but to live off subsistence agriculture and humanitarian aid. These areas were less developed even before the inflow of refugees and remain decoupled from resilient and viable supply chains in the economy. For example, the average value of assets among all households ( both refugee and host ) in the district of Arua64 is 560, 000 Ugandan shillings ( US $ 144 ), which is only 10 percent of comparable asset values in the Kampala region. 62 Uganda Comprehensive Refugee Response Portal ( https: / / data2. unhcr. org / en / country / uga ) 31 October 2021 63 Calculation based on district-level firm data from Census of Business Establishments ( COBE ), and refugee and host community household data from the Refugee and Host Community Household Survey 64 Arua was until recent sub-divisions of the district considered a refugee hosting district.", + "ner_text": [ + [ + 1623, + 1647, + "named" + ], + [ + 328, + 336, + "district-level firm data <> data geography" + ], + [ + 338, + 349, + "district-level firm data <> data geography" + ], + [ + 588, + 594, + "district-level firm data <> data geography" + ], + [ + 1355, + 1361, + "district-level firm data <> data geography" + ], + [ + 1484, + 1490, + "district-level firm data <> data geography" + ], + [ + 1594, + 1598, + "district-level firm data <> publication year" + ], + [ + 1905, + 1923, + "district-level firm data <> usage context" + ] + ], + "validated": true, + "empirical_context": "unhcr. org / en / country / uga ) 31 October 2021 63 Calculation based on district-level firm data from Census of Business Establishments ( COBE ), and refugee and host community household data from the Refugee and Host Community Household Survey 64 Arua was until recent sub-divisions of the district considered a refugee hosting district.", + "type": "data", + "explanation": "This is indeed a dataset as it is used in the context of calculations and is derived from a structured collection of data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific data collected at the district level.", + "contextual_reason_agent": "This is indeed a dataset as it is used in the context of calculations and is derived from a structured collection of data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 20, + "text": "Similarly, the public investment preparation process does not have a systematic evidence-based assessment of return on investment to inform selection and prioritization of public investment projects, despite the availability of good national statistics overall. Public spending planning and programming would have a higher impact with better use of available statistics to inform resource allocation according to the population, poverty level, and other socioeconomic indicators. 25. The statistics system is adequate overall but is obviously under strain. The National Institute of Statistics ( Institut National de la Statistique, INS ) produces and publishes economic statistics ( national accounts, prices indices, external trade, enterprises ) and social statistics ( poverty and living condition, demographic, health ) of satisfactory quality. Cameroon has a solid experience in rolling out large data collection operations such as population censuses, living standard household surveys, and demographic surveys. The quality of training of Cameroonian statisticians is good. The sub regional training institute ( Institut Sous R\u00e9gional de Statistique et d \u2019 Economie Appliqu\u00e9e ) recruits through a rigorous open competitive exam jointly with the statistics training institutes of Abidjan and Dakar.", + "ner_text": [ + [ + 959, + 992, + "named" + ], + [ + 561, + 593, + "living standard household surveys <> author" + ], + [ + 753, + 770, + "living standard household surveys <> data type" + ], + [ + 850, + 858, + "living standard household surveys <> data geography" + ], + [ + 1320, + 1338, + "living standard household surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "The National Institute of Statistics ( Institut National de la Statistique, INS ) produces and publishes economic statistics ( national accounts, prices indices, external trade, enterprises ) and social statistics ( poverty and living condition, demographic, health ) of satisfactory quality. Cameroon has a solid experience in rolling out large data collection operations such as population censuses, living standard household surveys, and demographic surveys. The quality of training of Cameroonian statisticians is good.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned as part of large data collection operations conducted by the National Institute of Statistics.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific type of survey that collects data on living standards.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as part of large data collection operations conducted by the National Institute of Statistics.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 19, + "text": "Contracted IPs will be responsible for: ( a ) delivering quality health services; ( b ) quality improvement activities; ( c ) supervision of health facilities ( d ) recording of HMIS data, provision of HMIS data to CHDs, and support for entry of DHIS2 data into DHIS2 and data use; ( e ) in-service training complementing UNICEF \u2019 s training activities; ( f ) health facility stock management, recording, and rational use; ( g ) through an integrated approach, developing the capacity of CHDs to plan, supervise, and oversee service delivery and the DHIS2 system; and ( h ) sustain the support of the innovation activities under CERHSSP and expand using the digital health technology to address service delivery and supply chain issues. 31. Subcomponent 1. 2: Boma Health Initiative ( implemented by UNICEF; US $ 12. 41 million: US $ 3. 41 million equivalent IDA [ WHR IDA ] and US $ 9. 0 million Trust Funds [ US $ 0. 95 million SDTF and US $ 8. 05 million MDTF ] ).", + "ner_text": [ + [ + 550, + 555, + "named" + ], + [ + 178, + 187, + "DHIS2 <> data type" + ], + [ + 202, + 211, + "DHIS2 <> data type" + ], + [ + 322, + 328, + "DHIS2 <> publisher" + ], + [ + 800, + 806, + "DHIS2 <> publisher" + ] + ], + "validated": true, + "empirical_context": "Contracted IPs will be responsible for: ( a ) delivering quality health services; ( b ) quality improvement activities; ( c ) supervision of health facilities ( d ) recording of HMIS data, provision of HMIS data to CHDs, and support for entry of DHIS2 data into DHIS2 and data use; ( e ) in-service training complementing UNICEF \u2019 s training activities; ( f ) health facility stock management, recording, and rational use; ( g ) through an integrated approach, developing the capacity of CHDs to plan, supervise, and oversee service delivery and the DHIS2 system; and ( h ) sustain the support of the innovation activities under CERHSSP and expand using the digital health technology to address service delivery and supply chain issues. 31.", + "type": "system", + "explanation": "DHIS2 is indeed a data system used for health information management, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned in the context of data entry and use.", + "contextual_reason_agent": "DHIS2 is indeed a data system used for health information management, confirming its role as a data source.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 19, + "text": "The Theory of Change ( Table 2 ) is structured around the Program \u2019 s three RAs: \u2022 RA1 on improved service delivery through digitalization \u2022 RA2 on enhanced government effectiveness through digitalization \u2022 RA3 on transparency and accountability through digitalization. 25. The Program builds synergies across its results framework. The strengthening of trusted and people-centric DPI under RA1 will bolster the digitalization of the education and health sectors and competency-based management in the civil service in RA2 and RA3. Specifically, secondary education diplomas will be digitally verifiable using DPI, which will not only increase trust in their authenticity but also allow them to be shared easily in a people-centric way ( that is, with user consent and data minimization ). For core health systems, such as those that manage EMRs, their integration with trusted DPI will improve the protection of sensitive health data while facilitating safe data sharing capabilities. The Program \u2019 s results framework is further underpinned by three main cross-cutting dimensions across the three RAs: 1 ) People-centricity, that is, a focus on output - and outcome-level results at the interface between government and the people ( across e-services and in the education and health sectors ). 2 ) Data generation and use for performance monitoring and evaluation ( M & E ) to inform policymaking and implementation ( through the release of interactive statistical data and the use of health quality data ). 3 ) Direct and indirect benefits to Syrian refugees, since the Program supports enhanced refugee access to e-services and digital ID, the digitalized secondary education examination, and e-health services. Disaggregated statistical and administrative data will help provide evidence on socioeconomic indicators and inform policy dialogue.", + "ner_text": [ + [ + 1443, + 1471, + "named" + ], + [ + 1487, + 1506, + "interactive statistical data <> data type" + ], + [ + 1546, + 1561, + "interactive statistical data <> reference population" + ] + ], + "validated": true, + "empirical_context": "The Program \u2019 s results framework is further underpinned by three main cross-cutting dimensions across the three RAs: 1 ) People-centricity, that is, a focus on output - and outcome-level results at the interface between government and the people ( across e-services and in the education and health sectors ). 2 ) Data generation and use for performance monitoring and evaluation ( M & E ) to inform policymaking and implementation ( through the release of interactive statistical data and the use of health quality data ). 3 ) Direct and indirect benefits to Syrian refugees, since the Program supports enhanced refugee access to e-services and digital ID, the digitalized secondary education examination, and e-health services.", + "type": "data", + "explanation": "This is indeed a dataset as it is mentioned in the context of performance monitoring and evaluation, indicating its use in empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'interactive statistical data' suggests a structured collection of data used for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it is mentioned in the context of performance monitoring and evaluation, indicating its use in empirical analysis.", + "contextual_signal": "follows 'uses data from'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "180_multi0page", + "page": 54, + "text": "Detailed discussions were carried out with the financial manager in the LORA ( now FSD ). The following issues were discussed and explained - the financial management and LACI requirements, including accounting, reporting, internal control, budgeting, auditing, financial manual, and accounting system. It is stressed that proper arrangements will have to be put in place to ensure that the accounting records generated are accurate and reliable. It was explained that specific books and records will have to be designed and set up as soon as possible. At a minimum, there should be a general ledger and cash disbursement journal, with account distribution headings corresponding to the project components and activities, project disbursement categories, project funding source and project chart of accounts. The FSD will use the accounting system currently used by LORA. LORA uses an in-house designed financial management system, which is specifically designed for World Bank-financed projects and reporting under non - LACI requirements. This accounting software system, which is based on Visual Basic Database, appears to be well-developed by an external consultant a few years ago and subsequently enhanced by LORA ' s MIS staff. It consists of the following modules: financial, procurement and reporting. The financial modules include 4 sub - modules: Payments Ia, Reports Ia, Other Categories, and Reports by Other Categories.", + "ner_text": [ + [ + 1092, + 1113, + "named" + ] + ], + "validated": false, + "empirical_context": "LORA uses an in-house designed financial management system, which is specifically designed for World Bank-financed projects and reporting under non - LACI requirements. This accounting software system, which is based on Visual Basic Database, appears to be well-developed by an external consultant a few years ago and subsequently enhanced by LORA ' s MIS staff. It consists of the following modules: financial, procurement and reporting.", + "type": "system", + "explanation": "However, it is described as an accounting software system, not a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Visual Basic Database' includes the term 'Database', which often implies data storage.", + "contextual_reason_agent": "However, it is described as an accounting software system, not a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "029_IBArchive-ea5ff2ad-6f1f-45bf-a2ad-036440562c18", + "page": 63, + "text": "The World Bank Electricity Access Scale-up Project ( EASP ) ( P166685 ) Page 58 of 130 connections provided under the project with grid, mini - grid, and off-grid solutions. on progress reports. MEMD database, IVA reports, approved loan and grants applications. People provided with access to electricity under the project with grid and mini-grid The indicator will track the number of people benefiting from grid and mini-grid electricity access under the project, disaggregated by gender. Average household size of 4. 7 people, UBOS National Household Survey 2016-2017. Quarterly Project implementati on progress reports. Household size based on UBOS National Household Survey. Service Providers customer database, MEMD database, IVA reports MEMD People provided with access to electricity under the project with grid and mini-grid of which female The indicator will track the number of females benefiting from grid and mini-grid electricity access under the project. Share of female is 51 %, UBOS Women and Men in Uganda, Facts and Figures 2016. Quarterly Project implementati on progress reports. UBOS Women and Men in Uganda, Facts and Figures, 2016. Service Providers customer database, MEMD database, IVA reports MEMD People provided with access to electricity under the project with off-grid The indicator will track the number of people benefiting from access to off-grid Quarterly Project implementati on progress Approved loan and grants applications.", + "ner_text": [ + [ + 530, + 560, + "named" + ], + [ + 491, + 528, + "UBOS National Household Survey <> data description" + ], + [ + 561, + 570, + "UBOS National Household Survey <> publication year" + ], + [ + 1017, + 1023, + "UBOS National Household Survey <> data geography" + ], + [ + 1043, + 1047, + "UBOS National Household Survey <> publication year" + ], + [ + 1123, + 1129, + "UBOS National Household Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "Average household size of 4. 7 people, UBOS National Household Survey 2016-2017. Quarterly Project implementati on progress reports.", + "type": "survey", + "explanation": "This is indeed a dataset as it is referenced in the context of providing empirical data about household size.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a national survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is referenced in the context of providing empirical data about household size.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "151_533550PAD0P117101Official0Use0Only1", + "page": 39, + "text": "raising requirements and M & E procedures. The Manual now requires completion ( development of detailed technical annexes and process tools ) and integration into SWF operational processes. The development of a Management Information System ( MIS ) and MIS capacity in program process, beneficiary databases, web-based applications is urgently needed as are national as well as local communications networks between SWF offices to facilitate real-time information and application flows. 117. The 2008 report on Yemen \u2019 s Social Protection Strategy indicated that with appropriate targeting, technology and administrative support, the SWF could become an important institution providing cash support to the poorest ( chronic poor ), as well for channeling additional ( supplemental ) assistance under special circumstances such as compensation for the reduction and / or abolition of subsidies and the sharp rise in the price of food staples. The Bank has also initiated a dialogue on the subsidy reform agenda; it is recognized that SWF \u201c can have a fundamental role in poverty reduction, and for that role to be effective, SWF needs to improve: ( i ) its method of screening ( targeting ); ( ii ) its application pool andprocesses; and ( iii ) its cash delivery mechanism \u201d.", + "ner_text": [ + [ + 211, + 240, + "named" + ] + ], + "validated": false, + "empirical_context": "The Manual now requires completion ( development of detailed technical annexes and process tools ) and integration into SWF operational processes. The development of a Management Information System ( MIS ) and MIS capacity in program process, beneficiary databases, web-based applications is urgently needed as are national as well as local communications networks between SWF offices to facilitate real-time information and application flows. 117.", + "type": "system", + "explanation": "However, it is described as a system that supports processes rather than a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Management Information System' suggests a structured collection of data.", + "contextual_reason_agent": "However, it is described as a system that supports processes rather than a data source itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 13, + "text": "Poverty is disproportionately concentrated in rural areas and in the northern regions of the country. The 2014 household survey found that 56. 8 percent of rural families are poor, compared to just 8. 9 percent of urban families. 3 Overall, approximately 87 percent of the poor live in rural areas. Moreover, a majority of poor individuals are concentrated in the three northern regions of the country: the Far North, North, and Adamawa regions. More than one-half ( 56 percent ) of all poor inhabitants are located in the Far North and North regions, a significant increase from 34 percent in 2001. While poverty has increased in northern Cameroon, the incidence of poverty in the center-west of the country ( in the Littoral, Center, West, and South West regions ), as well as in Douala and Yaound\u00e9, has declined. 3. A rapid increase in poverty in northern Cameroon had been observed before the heightened regional insecurity associated with Boko Haram activities in 2014. The deterioration of the security environment in northern Cameroon occurred largely over the course of the past three years, following the collection of data in support of the 2014 Enqu\u00eate Camerounaise aupr\u00e8s des M\u00e9nages ( ECAM \u2013 Cameroon Household Survey ) household survey.", + "ner_text": [ + [ + 106, + 127, + "named" + ] + ], + "validated": true, + "empirical_context": "Poverty is disproportionately concentrated in rural areas and in the northern regions of the country. The 2014 household survey found that 56. 8 percent of rural families are poor, compared to just 8.", + "type": "survey", + "explanation": "This is indeed a dataset as it provides structured data on poverty levels among rural families.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey that collects data on household poverty.", + "contextual_reason_agent": "This is indeed a dataset as it provides structured data on poverty levels among rural families.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "115_PAD1678-PAD-P152822-IDA-R2016-0090-1-Box394886B-OUO-9", + "page": 105, + "text": "District accountants will coordinate the follow up of accountabilities from the communities with the subcounty accountants. The Public Financial Management Act 2015 and related Regulations; Treasury Accounting Instructions, 2003; Local Government Act; Local Government ( Financial and Accounting ) Regulations, 2007; and Local Government Financial Management Manual. SUN Accounting System but likely to migrate to the Integrated Financial Management Information System ( IFMIS ) June 30 Djibouti: Agence Djiboutienne de D\u00e9veloppement Social ( Djiboutian Social Development Agency, ADDS ) ADDS accounting staff under the Department of Accounting includes a finance officer who reports to the \u201c Agent Comptable, \u201d but the PIU will need to recruit an accountant who will be responsible for all financial management aspects of the project. Project Financial Operation Manual, which needs to be updated to cover this project. TOMPRO Accounting system and a management information system that needs to improve its functionality with respect to its reporting mechanism. December 31 Internal Control and Internal Audit Arrangements 57. Internal controls. The internal control procedures will be documented in the financial management manuals that are shown in the table 4. 1 for each of the implementing entities and their PIMs that will take into consideration gaps in their existing financial management manuals", + "ner_text": [ + [ + 418, + 468, + "named" + ] + ], + "validated": false, + "empirical_context": "The Public Financial Management Act 2015 and related Regulations; Treasury Accounting Instructions, 2003; Local Government Act; Local Government ( Financial and Accounting ) Regulations, 2007; and Local Government Financial Management Manual. SUN Accounting System but likely to migrate to the Integrated Financial Management Information System ( IFMIS ) June 30 Djibouti: Agence Djiboutienne de D\u00e9veloppement Social ( Djiboutian Social Development Agency, ADDS ) ADDS accounting staff under the Department of Accounting includes a finance officer who reports to the \u201c Agent Comptable, \u201d but the PIU will need to recruit an accountant who will be responsible for all financial management aspects of the project. Project Financial Operation Manual, which needs to be updated to cover this project.", + "type": "system", + "explanation": "However, it is described as a system and not explicitly mentioned as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a data-related function.", + "contextual_reason_agent": "However, it is described as a system and not explicitly mentioned as a data source in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 41, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XXXII DLI allocation 17, 197, 500. 00 As a % of Total Financing Amount 5. 0 % \u27a2 5. 3: Enhanced digital literacy / skills ( Number ) 0 0 Prime Ministry endorses the curricular for digital training adopted by IPA, 1, 000 civil servants with certified digital literacy / skills 2, 000 civil servants with certified digital literacy / skills 3, 000 civil servants with certified digital literacy / skills 4, 000 civil servants with certified digital literacy / skills 0. 00 0. 00 3, 000, 000. 00 2, 000, 000. 00 2, 000, 000. 00 2, 000, 000. 00 DLI allocation 9, 000, 000. 00 As a % of Total Financing Amount 2. 57 % \u27a2 5. 4: Increase in the utilization of HRMIS and other digital platforms by public entities for competitive recruitment, performance-based promotion and digital upskilling. ( Percentage ) 0 0 18 % of recruitments, promotions, and digital skills trainings recorded using HRMIS and other digital platforms 36 % of recruitments, promotions, and digital skills trainings recorded using HRMIS and other digital platforms 54 % of recruitments, promotions, and digital skills trainings recorded using HRMIS and other digital platforms 72 % of recruitments", + "ner_text": [ + [ + 1089, + 1094, + "named" + ] + ], + "validated": false, + "empirical_context": "4: Increase in the utilization of HRMIS and other digital platforms by public entities for competitive recruitment, performance-based promotion and digital upskilling. ( Percentage ) 0 0 18 % of recruitments, promotions, and digital skills trainings recorded using HRMIS and other digital platforms 36 % of recruitments, promotions, and digital skills trainings recorded using HRMIS and other digital platforms 54 % of recruitments, promotions, and digital skills trainings recorded using HRMIS and other digital platforms 72 % of recruitments", + "type": "system", + "explanation": "However, HRMIS is described as a system and not explicitly mentioned as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HRMIS is a dataset because it is associated with recorded data on recruitments and promotions.", + "contextual_reason_agent": "However, HRMIS is described as a system and not explicitly mentioned as a data source in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "158_40156", + "page": 37, + "text": "Annex 3: Results Framework and Monitoring IGAD Regional HIV / AIDS Partnership Program ( IRAPP ) Support Project Results Framework15 Preface The results framework for the IRAPP Support Project was developed in close collaboration between IGAD, the IGAD Member States, GAMET, the World Bank and other development partners. The monitoring table which outlines the baseline data and targets for the Project is being finalized, and is a critical part of the project. The UNHCR is currently finalizing the baseline data for the refugee camps, which have been collected through behavioral surveillance surveys for each of the refugee areas to be targeted. IGAD is completing additional baseline data based on the regional mapping assessment recently completed. This data will be presented, discussed and targets finalized during the IGAD Technical Working Group meeting on M & E, which will include all the IGAD Member States and development partners, scheduled for the project launch ( September 2007 ). The project will contribute towards the establishment of a regional M & E system, in order to get systematic and regular data updates for cross-border and mobile populations. As of now there is no regional M & E system that captures data on these target groups. Furthermore, data on increased regional collaboration will be collected using the Most - Significant-Change technique. This qualitative data will be collected by September 2007, and annually thereafter.", + "ner_text": [ + [ + 572, + 603, + "named" + ], + [ + 362, + 375, + "behavioral surveillance surveys <> data description" + ], + [ + 501, + 514, + "behavioral surveillance surveys <> data type" + ], + [ + 523, + 536, + "behavioral surveillance surveys <> reference population" + ], + [ + 981, + 995, + "behavioral surveillance surveys <> publication year" + ] + ], + "validated": true, + "empirical_context": "The monitoring table which outlines the baseline data and targets for the Project is being finalized, and is a critical part of the project. The UNHCR is currently finalizing the baseline data for the refugee camps, which have been collected through behavioral surveillance surveys for each of the refugee areas to be targeted. IGAD is completing additional baseline data based on the regional mapping assessment recently completed.", + "type": "survey", + "explanation": "In the context, it is explicitly mentioned that the surveys have been conducted to collect baseline data, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'behavioral surveillance surveys' implies a structured collection of data collected for analysis.", + "contextual_reason_agent": "In the context, it is explicitly mentioned that the surveys have been conducted to collect baseline data, confirming its role as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 45, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | XXXVI adherence with the access protocols adherence with the access protocols 0. 00 0. 00 0. 00 3, 000, 000. 00 5, 000, 000. 00 0. 00 DLI allocation 8, 000, 000. 00 As a % of Total Financing Amount 3. 0 % \u27a2 9. 3: An additional premium for every core micro-data set made available via an online portal with indicators for gender, disability and residency status ( Number ) 0 0 0 0 2 core micro-data sets made available via an online portal with indicators for gender, disability and residency status 3 core micro-data sets made available via an online portal with indicators for gender, disability and residency status 0. 00 0. 00 0. 00 0. 00 800, 000. 00 1, 200, 000. 00 DLI allocation 2, 000, 000. 00 As a % of Total Financing Amount 0. 5 % 10: Institutionalizing effective health data use ( Yes / No ) No No No Yes Yes Yes 0. 00 0. 00 0. 00 3, 250, 000. 00 1, 500, 000. 00 3, 250, 000. 00 DLI allocation 8, 000, 000. 00 As a % of Total Financing", + "ner_text": [ + [ + 340, + 359, + "named" + ], + [ + 4, + 14, + "core micro-data set <> publisher" + ], + [ + 401, + 455, + "core micro-data set <> data description" + ] + ], + "validated": true, + "empirical_context": "0 % \u27a2 9. 3: An additional premium for every core micro-data set made available via an online portal with indicators for gender, disability and residency status ( Number ) 0 0 0 0 2 core micro-data sets made available via an online portal with indicators for gender, disability and residency status 3 core micro-data sets made available via an online portal with indicators for gender, disability and residency status 0. 00 0.", + "type": "micro-data set", + "explanation": "This is indeed a dataset as it is explicitly mentioned as being made available via an online portal, indicating its use as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'core micro-data sets' which suggests a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as being made available via an online portal, indicating its use as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "035_Senegal-Casamance-Economic-Development-Project", + "page": 72, + "text": "The World Bank Senegal, Casamance Economic Development Project ( P175325 ) Page 68 of 72 Annex 7: Climate related activities financed by the project Climate-related project activity How will activity address climate-related vulnerabilities? Area 1: Investing in community resilience 1. Targeting of investments in climate vulnerable communities Project investments will target communes identified based on their vulnerability to climate and conflict risks. Vulnerability is based on a communes \u2019 exposure, sensitivity, and adaptive capacity and will be estimated by using spatially explicit and publicly available datasets ( including indicators on predicted climate change, water availability and land cover change / forest lost ). This approach will ensure communities most at risk to climate change are prioritized to benefit from the project. 2. Integrating a science - based participatory climate risk assessment in needs assessments and local planning A participatory climate risk assessment based on available past and projected climate data and local knowledge mobilization on past climate risks will be conducted for each local needs assessment and used to guide and prioritize climate smart investments in the supported local plans. 3. Screening and prioritizing of investments that are climate smart and adapted, as well as activities that promote climate resilience Investments will be prioritized and screened based on their contributions to climate change adaptation or mitigation and improved natural resource management to ensure climate smart and climate-sensitive investments.", + "ner_text": [ + [ + 572, + 622, + "named" + ], + [ + 15, + 22, + "spatially explicit and publicly available datasets <> data geography" + ], + [ + 635, + 729, + "spatially explicit and publicly available datasets <> data description" + ], + [ + 1610, + 1628, + "spatially explicit and publicly available datasets <> usage context" + ] + ], + "validated": true, + "empirical_context": "Targeting of investments in climate vulnerable communities Project investments will target communes identified based on their vulnerability to climate and conflict risks. Vulnerability is based on a communes \u2019 exposure, sensitivity, and adaptive capacity and will be estimated by using spatially explicit and publicly available datasets ( including indicators on predicted climate change, water availability and land cover change / forest lost ). This approach will ensure communities most at risk to climate change are prioritized to benefit from the project.", + "type": "dataset", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source of information used to estimate vulnerability in the context of the project.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'spatially explicit and publicly available datasets' which suggests a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source of information used to estimate vulnerability in the context of the project.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "101_Project-Appraisal-Document-PAD-P163830-2017-05-22-CLEAN-05232017", + "page": 99, + "text": "The ESMF will include a Pest Management Plan to ensure that livestock vaccines and medications are procured and administered in accordance with the FAO / WHO International Code on Pesticide Management. Similarly, any fishing kits procured and distributed will be vetted with the FAO Code of Conduct for Responsible Fisheries and related technical guidelines. 96. The ESMF will also include a supervision, monitoring, and reporting plan and budget for environmental safeguards. It is expected that safeguards supervision will be incorporated into the overall TPTR contract to be financed through Sub-component 2. 5. TPTR will be supplemented by reporting from FAO-Somalia staff as well as local technical affiliates of FAO. 97. SEDRP will not allow commencement of any Component 2 civil works until the project ESMF has been disclosed publicly on the FAO and World Bank websites. The deferment of ESMF disclosure is linked to a legal covenant in the project Financing Agreement. Monitoring and Evaluation 98. Monitoring and Evaluation. The ICRC and FAO will be independently responsible for their own program monitoring and evaluation ( M & E ) of the Project, using the Project \u2019 s results framework to issue quarterly updates on the overall project implementation and results. The ICRC and FAO will be responsible to set up a Results Monitoring System that will allow to report on the Results Framework and any related outcome and impact information for the project. 99. The Results Monitoring System for ICRC will include data from each sector: Economic Security ( EcoSec ), Water / Sanitation, and Health. The monitoring system has a two-pronged approach: 1 ) the internal data collection and analysis by the EcoSec team through regular field assessment visits and market survey that is conducted in 14 regions on a monthly basis; 2 ) the exchange with the relevant stakeholders such as SRCS, communities, local associations and NGOs, different governmental authorities at the field and central level, the UN led Food / Nutrition and Shelter clusters at the regional and central levels, including specialized", + "ner_text": [ + [ + 1327, + 1352, + "named" + ] + ], + "validated": false, + "empirical_context": "The ICRC and FAO will be independently responsible for their own program monitoring and evaluation ( M & E ) of the Project, using the Project \u2019 s results framework to issue quarterly updates on the overall project implementation and results. The ICRC and FAO will be responsible to set up a Results Monitoring System that will allow to report on the Results Framework and any related outcome and impact information for the project. 99.", + "type": "system", + "explanation": "However, it is not a dataset as it is described as a system for monitoring and evaluation, not a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Monitoring System' which suggests data collection.", + "contextual_reason_agent": "However, it is not a dataset as it is described as a system for monitoring and evaluation, not a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "113_Project-Appraisal-Document-PAD-disclosable-version-P156479-08122016", + "page": 45, + "text": "This approach will, reduce potential inclusion errors, link with existing approaches such as the HEA, and lay the foundation of a methodological approach to support the establishment of a social registry. 29. Registration system and social registry. Based on the harmonized data collection procedures and survey instruments, the project will support the Government in developing a social registry, which will eventually function as a single registry. A social registry is a database that is capable of collecting, analyzing and storing the following information: personally identifying data ( either at an individual level or grouped into family or households ); socio - economic data which would be used to classify individual identities into poverty or vulnerability categories through the application of PMT. Hence, the social registry supports targeting, scoring, selection, on-boarding, identification, and, verification processes all linked to identity. Moreover, the registry eventually would allow: ( a ) different actors and programs to target households according to their own program objectives; ( b ) better coordinate interventions, avoid duplication and save significant costs in data collection activities; and ( c ) improve capacity to quickly scale up safety net programs in face of shocks.", + "ner_text": [ + [ + 663, + 684, + "named" + ] + ], + "validated": false, + "empirical_context": "Based on the harmonized data collection procedures and survey instruments, the project will support the Government in developing a social registry, which will eventually function as a single registry. A social registry is a database that is capable of collecting, analyzing and storing the following information: personally identifying data ( either at an individual level or grouped into family or households ); socio - economic data which would be used to classify individual identities into poverty or vulnerability categories through the application of PMT. Hence, the social registry supports targeting, scoring, selection, on-boarding, identification, and, verification processes all linked to identity.", + "type": "data", + "explanation": "'socio - economic data' is not a structured collection of data itself but rather a type of information that will be collected and analyzed within the social registry.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'socio - economic data' refers to a dataset because it is mentioned in the context of data collection and analysis.", + "contextual_reason_agent": "'socio - economic data' is not a structured collection of data itself but rather a type of information that will be collected and analyzed within the social registry.", + "contextual_signal": "described as information to be collected, not as a dataset", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "050_Cameroon-Secondary-Education-and-Skills-Development-Project", + "page": 103, + "text": "The World Bank Secondary Education and Skills Development Project ( P170561 ) Page 99 of 159 Data source / Agency MINEPAT ( Strategy ), MINEFOP ( NQCF ) Verification Entity Independent Verification Agency ( IVA ) Procedure PCU compiles the necessary evidence, the IVA verifies the evidence, PCU sends the evidence and results of the verification to the Bank, the Bank reviews, requests clarification if needed and approves. Year 5 DLR: The mid-term evaluation of the NSDS will be coordinated by MINEFOP, it will be validated by relevant stakeholders and signed by MINEPAT. PBC 7 Strengthened information system and skills development sector monitoring Description The Skills Platform is an integrated digital platform on training opportunities in the form of a website, with a related application. These will contain information about the availability of training centers and types of programs offered by different regions, and contact information of these centers. They will also contain aggregated results of labor market outcomes of different programs from the tracer studies. The concept note for the Skills Platform will present information flows, roles and responsibilities for data collection, processing and publishing, the definition of indicators to be collected and published. The concept note will also explain how the Skills Platform is linked to the Education Management Information System.", + "ner_text": [ + [ + 1364, + 1403, + "named" + ] + ], + "validated": false, + "empirical_context": "The concept note for the Skills Platform will present information flows, roles and responsibilities for data collection, processing and publishing, the definition of indicators to be collected and published. The concept note will also explain how the Skills Platform is linked to the Education Management Information System.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "029_IBArchive-ea5ff2ad-6f1f-45bf-a2ad-036440562c18", + "page": 123, + "text": "Health centers are unable to provide quality and reliable services to the refugees and host communities due to limited and inadequate electricity supply. Social facilities are over capacity as they are serving nationals and refugees, while power supply is inadequate and non-reliable. Most of these critical loads are powered by solar systems and high-cost diesel generators provided by UNHCR, but supply is unable to meet the ever-increasing demand from the refugee settlements and host communities. To improve on service delivery and livelihoods within these settlements and surrounding host communities, it is necessary to enhance access to reliable and sustainable forms of energy services. UNHCR has identified energy as a critical area for refugees and host communities needing financing support from development partners69. COVID-19 has only increased the necessity for these improvements. Furthermore, energy for productive uses to support livelihood activities like agro-processing and appropriate technology for food preservation is scarcely available in refugee settlements and their host communities, leaving refugees communities dependent on humanitarian aid. 3. The design of Component 3 was also informed by the Diagnostic Study70 conducted by the World Bank in selected refugee-hosting districts to address behavioral challenges for refugees and host communities in switching from traditional fuels to off-grid and clean cooking technologies. The study led to the identification of several psychological and social bottlenecks for the uptake of off-grid solar products and clean cooking.", + "ner_text": [ + [ + 1227, + 1245, + "named" + ] + ], + "validated": false, + "empirical_context": "3. The design of Component 3 was also informed by the Diagnostic Study70 conducted by the World Bank in selected refugee-hosting districts to address behavioral challenges for refugees and host communities in switching from traditional fuels to off-grid and clean cooking technologies. The study led to the identification of several psychological and social bottlenecks for the uptake of off-grid solar products and clean cooking.", + "type": "study", + "explanation": "However, it is not a dataset as it is described as a study that informs design rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'study', which often relates to data collection.", + "contextual_reason_agent": "However, it is not a dataset as it is described as a study that informs design rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a study, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 21, + "validated": 8, + "not_validated": 13 + } + }, + { + "filename": "015_BOSIB0e7334a5d0570a3e40f8ae4d0c1266", + "page": 28, + "text": "The World Bank Enhancing Connectivity and Resilience in the Far North of Cameroon for Inclusiveness Project ( P178207 ) Page 29 of 82 facilitating social cohesion in the communities. Financing the community \u2019 s infrastructure along with the rural roads is expected to maximize the project \u2019 s benefits to the population and build local community engagement and ownership in the project \u2019 s influence. These infrastructures will also contribute to closing the spatial disparities in access to basic socioeconomic services in the Far North of Cameroon. 56. Component 3: Transport Sector Institutional Strengthening ( IDA: US $ 3. 0 million equivalent ). 57. This component will finance capacity building and institutional-strengthening activities of the transport sector, among other things, in the following areas: 3 ( a ) Building capacity for the inclusion of climate resilience in the planning and management of road infrastructure. 3 ( b ) Supporting main public engineering universities on climate adaptation, civil engineering, transportation planning, and digital technology. 3 ( c ) Developing and operationalizing a road accident database management system disaggregated by gender, refugee, and host community. 3 ( d ) Undertaking training and awareness campaigns on road safety in the project area. 3 ( e ) Strengthening the capacity of the Ministry of Transport ( MINT ) and other public road safety stakeholders in road safety management. 3 ( f )", + "ner_text": [ + [ + 1124, + 1164, + "named" + ], + [ + 4, + 14, + "road accident database management system <> publisher" + ], + [ + 60, + 81, + "road accident database management system <> data geography" + ], + [ + 1165, + 1217, + "road accident database management system <> data description" + ] + ], + "validated": true, + "empirical_context": "3 ( b ) Supporting main public engineering universities on climate adaptation, civil engineering, transportation planning, and digital technology. 3 ( c ) Developing and operationalizing a road accident database management system disaggregated by gender, refugee, and host community. 3 ( d ) Undertaking training and awareness campaigns on road safety in the project area.", + "type": "database", + "explanation": "This is indeed a dataset as it is described as a 'road accident database management system' which indicates it is used to manage and analyze data on road accidents.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'database' which often implies a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is described as a 'road accident database management system' which indicates it is used to manage and analyze data on road accidents.", + "contextual_signal": "mentioned as a data source", + "tags": [] + }, + { + "filename": "041_Burkina-Faso-Support-to-Land-and-Mining-Management-Strengthening-Project", + "page": 77, + "text": "The World Bank Support to Land and Mining Management Strengthening Project ( P169267 ) Page 71 of 95 MEMC \u2019 s FM team will handle the FM activities for the project \u2019 s activities implemented by MINEFID until then, based on a partnership agreement to be signed between MEMC and MINEFID prior to effectiveness date. 146. Accounting information systems. A computerized financial management system is in place within MEMC, which parameters would be migrated within three months after project effectiveness, to take into consideration the specificity of the new project. For this project, MINEFID will acquire and install its computerized financial management system within three months after the project effectiveness date. The project FM teams within MINEFID and MEMC will keep records on Excel spreadsheet until then. Any new FM and accounting staff recruited for the proposed project will be trained to be conversant with the accounting software. The accounting software to be procured would include the following modules to be integrated: budgeting, general accounting, cost accounting, reporting, M & E, fixed assets management, preparation of withdrawal applications, and tracking of disbursements by donors. 147. Accounting standards. The MINEFID PIT and MEMC PIT will use SYSCOHADA64 accounting standards which are commonly used among West and Central African Francophone countries.", + "ner_text": [ + [ + 353, + 393, + "named" + ] + ], + "validated": false, + "empirical_context": "Accounting information systems. A computerized financial management system is in place within MEMC, which parameters would be migrated within three months after project effectiveness, to take into consideration the specificity of the new project. For this project, MINEFID will acquire and install its computerized financial management system within three months after the project effectiveness date.", + "type": "system", + "explanation": "However, it is not a dataset as it is described as a financial management system, not a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'system' which can imply data handling.", + "contextual_reason_agent": "However, it is not a dataset as it is described as a financial management system, not a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 54, + "text": "DLR 5. 3: Enhanced digital literacy / skills. DLR 5. 4: Utilization of the HRMIS and other digital platforms for recording competitive recruitments, performance-based promotions, and digital upskilling. Data source / Agency DLR 5. 0: SPAC DLR 5. 1 and 2: SPAC Assessment Center HR interview and evaluation records, as well as HRMIS data records. DLR 5. 3: Training registry maintained by the IPA and HRMIS data records. DLR 5. 4: HRMIS data reports. Verification Entity KACE. Procedure DLR 5. 0: Verification of adopted SPAC by-law and subsidiary regulation. DLR 5. 1: Verification of recruitment records and competency assessment reports shared biannually by the SPAC and competence-based recruitments recorded by the HRMIS. DLR 5. 2: Verification of promotion records and competency assessment reports shared biannually by the SPAC and performance-based promotions recorded by the HRMIS and other digital platforms. DLR 5. 3: Verification of digital skill certifying training records shared annually by IPA and digital skill credentials recorded by the HRMIS and other digital platforms. DLR 5. 4: Cross-validation of HRMIS and other digital platforms data reports with SPAC Assessment Center interview and evaluation records and IPA training assessment reports. DLI 6 on the establishment of digital health standards across a national Health Information Exchange ( HIE ) Formula Scalable. The DLI disburses up to US $ 29 million across the three following DLRs:", + "ner_text": [ + [ + 944, + 985, + "named" + ] + ], + "validated": false, + "empirical_context": "DLR 5. 3: Verification of digital skill certifying training records shared annually by IPA and digital skill credentials recorded by the HRMIS and other digital platforms. DLR 5.", + "type": "document", + "explanation": "However, it is not functioning as a data source in this context, as it is described as training records rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'records' which often implies data collection.", + "contextual_reason_agent": "However, it is not functioning as a data source in this context, as it is described as training records rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 43, + "text": "The World Bank Education Quality Improvement Project ( P179363 ) Page 34 of 68 RESULT_FRAME_TBL_PDO Indicator Name PBC Baseline Intermediate Targets End Target 1 2 3 4 5 based on data generated by the integrated EMIS and used for education sector management including refugee response ( Text ) assessment, NBS and MoF data ( report on trends, regional variations in learning outcomes and inputs, and determinants of student learning ) assessment, NBS and MoF data ( report on trends, regional variations in learning outcomes and inputs, and determinants of student learning ) using its data, are publicly disseminated and used for education sector management including refugee response. PDO Table SPACE Intermediate Results Indicators by Components RESULT_FRAME_TBL_IO Indicator Name PBC Baseline Intermediate Targets End Target 1 2 3 4 5 Improving Quality of Teaching Teachers trained on professional standards under the project ( % of teachers in ECEC facilities and grades 1 \u2013 12 ) ( Percentage ) 0. 00 0. 00 5. 00 10. 00 15. 00 20. 00 20. 00 Rapid student assessment mechanism developed and piloted ( for selected grade ) ( Yes / No ) No No Yes Yes Yes Yes Yes Tutoring / accelerated learning program for disadvantaged students developed and implemented ( for selected grade ) ( Yes / No ) No No Yes Yes Yes Yes Yes Students benefiting from direct interventions to 0. 00 0. 00", + "ner_text": [ + [ + 314, + 322, + "named" + ] + ], + "validated": true, + "empirical_context": "The World Bank Education Quality Improvement Project ( P179363 ) Page 34 of 68 RESULT_FRAME_TBL_PDO Indicator Name PBC Baseline Intermediate Targets End Target 1 2 3 4 5 based on data generated by the integrated EMIS and used for education sector management including refugee response ( Text ) assessment, NBS and MoF data ( report on trends, regional variations in learning outcomes and inputs, and determinants of student learning ) assessment, NBS and MoF data ( report on trends, regional variations in learning outcomes and inputs, and determinants of student learning ) using its data, are publicly disseminated and used for education sector management including refugee response. PDO Table SPACE Intermediate Results Indicators by Components RESULT_FRAME_TBL_IO Indicator Name PBC Baseline Intermediate Targets End Target 1 2 3 4 5 Improving Quality of Teaching Teachers trained on professional standards under the project ( % of teachers in ECEC facilities and grades 1 \u2013 12 ) ( Percentage ) 0.", + "type": "data", + "explanation": "In the context, 'MoF data' is explicitly referenced as a source of information used for assessments and reports, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'MoF data' is a dataset because it is mentioned in the context of providing information for education sector management.", + "contextual_reason_agent": "In the context, 'MoF data' is explicitly referenced as a source of information used for assessments and reports, confirming its role as a dataset.", + "contextual_signal": "mentioned as a data source for assessments and reports", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "026_Djibouti-Health-System-Strengthening-Project", + "page": 34, + "text": "Transport vouchers and cash support for incidentals will be provided to refugees and very poor individuals with select conditions, especially for chronic and complex conditions ( ii ) The recruitment of a firm / organization to implement integrated SBCC campaigns, including topics on the medical risks of FGM, climate-related mitigation and adaptation and other recommendations from the CHVA at national, regional, and facility levels ( including in refugee villages ) ( iii ) Implementation of social accountability and transparency mechanisms such as: a ) \u201c Community scorecards \u201d linked to counter-verification of PBF activities at the facility comparing and measuring a community \u2019 s progress in achieving higher standards, including on climate vulnerability and adaptation b ) Patient charters, service standard displays at health facilities, etc. c ) Initiatives on citizen engagement and empowerment to usher in a social movement in health, including an annual Regional Health Forum of all relevant stakeholders which channel their findings upward to an Annual National Health Forum ( iv ) Data systems to create feedback loops from health system users, including on climate vulnerability / sensitivity, using ongoing user-experience surveys; the integration of user experience data into administrative systems \u2019 facility self-evaluations to inform and influence decision makers ( v ) Grievance redress mechanism at different levels. o Sub-component 2. 3: Project management \u2013 This subcomponent will support project management, including project M & E. Component 3: Contingent Emergency Response Component - CERC ( US $ 0 million ). 49. This contingent emergency response component is included under the project in accordance with World Bank \u2019 s Investment Project Financing Policy, paragraphs 12, for situations of urgent need of assistance. This will allow for rapid reallocation of project proceeds in the event of a natural or man-made disaster or health outbreak or crisis that has caused or is likely to imminently cause a major adverse economic and / or social impact.", + "ner_text": [ + [ + 1226, + 1249, + "named" + ] + ], + "validated": false, + "empirical_context": "Transport vouchers and cash support for incidentals will be provided to refugees and very poor individuals with select conditions, especially for chronic and complex conditions ( ii ) The recruitment of a firm / organization to implement integrated SBCC campaigns, including topics on the medical risks of FGM, climate-related mitigation and adaptation and other recommendations from the CHVA at national, regional, and facility levels ( including in refugee villages ) ( iii ) Implementation of social accountability and transparency mechanisms such as: a ) \u201c Community scorecards \u201d linked to counter-verification of PBF activities at the facility comparing and measuring a community \u2019 s progress in achieving higher standards, including on climate vulnerability and adaptation b ) Patient charters, service standard displays at health facilities, etc. c ) Initiatives on citizen engagement and empowerment to usher in a social movement in health, including an annual Regional Health Forum of all relevant stakeholders which channel their findings upward to an Annual National Health Forum ( iv ) Data systems to create feedback loops from health system users, including on climate vulnerability / sensitivity, using ongoing user-experience surveys; the integration of user experience data into administrative systems \u2019 facility self-evaluations to inform and influence decision makers ( v ) Grievance redress mechanism at different levels. o Sub-component 2.", + "type": "survey", + "explanation": "However, it is not functioning as a structured collection of data in this context, but rather as a method for gathering feedback.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'user-experience surveys' imply a collection of data from respondents.", + "contextual_reason_agent": "However, it is not functioning as a structured collection of data in this context, but rather as a method for gathering feedback.", + "contextual_signal": "mentioned only as a method for gathering feedback, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "104_PAD2161-PUBLIC-P162004", + "page": 10, + "text": "The World Bank Education Infrastructure for Resilience ( EU Facility for SuTP ) ( P162004 ) Page 9 of 86 Turkey \u2019 s total population of 76. 6 million; however, this population represents a significant percentage of the population in border provinces such as Hatay, Gaziantep, \u015eanl\u0131urfa, and Mardin. In Kilis, there are as many Syrians as Turks and in Istanbul a non-negligent ( and growing ) percentage of the population is Syrian. Most of the provinces hosting a high concentration of Syrians are already more vulnerable or disadvantaged cities in Turkey, which exacerbates the development challenges for Turkey. Figure 1 provides a visual depiction of the growing presence of Syrians throughout Turkey. Moreover, data collected by the World Bank jointly with the Government of Turkey2 ( Muhtar3 survey ) not only validates the information shown in figure 1, but it also provides more precise information on the location of Syrians at the municipal level and the ratios to the local population. Figure 1. Provincial Breakdown of Syrian Refugees in Turkey ( as of November 2016 ) Source: UNHCR, DGMM, November 20164 5. Economically disadvantaged regions in the country now host large numbers of Syrians. As shown before, provinces with large numbers of SuTP are mostly located in the southern and southeastern regions of Turkey.", + "ner_text": [ + [ + 789, + 803, + "named" + ], + [ + 4, + 14, + "Muhtar3 survey <> publisher" + ], + [ + 105, + 111, + "Muhtar3 survey <> data geography" + ], + [ + 258, + 263, + "Muhtar3 survey <> data geography" + ], + [ + 265, + 274, + "Muhtar3 survey <> data geography" + ], + [ + 276, + 285, + "Muhtar3 survey <> data geography" + ], + [ + 291, + 297, + "Muhtar3 survey <> data geography" + ], + [ + 327, + 334, + "Muhtar3 survey <> reference population" + ], + [ + 351, + 359, + "Muhtar3 survey <> data geography" + ], + [ + 549, + 555, + "Muhtar3 survey <> data geography" + ], + [ + 697, + 703, + "Muhtar3 survey <> data geography" + ], + [ + 737, + 747, + "Muhtar3 survey <> publisher" + ], + [ + 913, + 955, + "Muhtar3 survey <> data description" + ], + [ + 1049, + 1055, + "Muhtar3 survey <> data geography" + ], + [ + 1073, + 1077, + "Muhtar3 survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "Figure 1 provides a visual depiction of the growing presence of Syrians throughout Turkey. Moreover, data collected by the World Bank jointly with the Government of Turkey2 ( Muhtar3 survey ) not only validates the information shown in figure 1, but it also provides more precise information on the location of Syrians at the municipal level and the ratios to the local population. Figure 1.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it is mentioned to provide precise information and validate data shown in a figure.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that collects data.", + "contextual_reason_agent": "The context confirms it is a dataset as it is mentioned to provide precise information and validate data shown in a figure.", + "contextual_signal": "described as a survey that collects data", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "082_PAD2809-PAD-PUBLIC-disclosed-9-12-2018-IDA-R2018-0286-1", + "page": 52, + "text": "The World Bank Chad - Refugees and Host Communities Support Project ( P164748 ) Page 47 committee and the World Bank. Beneficiaries in targeted areas included in the Unified Social Registry - - Female Beneficiaries in targeted areas included in the Unified Social Registry - - Refugees Eligible refugees with identity documents issued by CNARR Every six months. CNARR CNARR, in consultation with UNHCR, will provide a bi-annual report to CFS on the number of eligible refugees receiving an ID, to be measured against total number of refugees. CNARR and CFS. Eligible refugees with identity documents issued by CNARR - - Female ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Geo-referenced health and education sector facility mapping in targeted areas completed Yearly. CFS - CNARR - Ministry of Health - Ministry of Education - CFS will hire a firm to carry out a geo - referencing of all facilities in project areas in the first year of CFS", + "ner_text": [ + [ + 166, + 189, + "named" + ], + [ + 4, + 14, + "Unified Social Registry <> publisher" + ], + [ + 106, + 116, + "Unified Social Registry <> publisher" + ], + [ + 194, + 200, + "Unified Social Registry <> reference population" + ], + [ + 277, + 285, + "Unified Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "The World Bank Chad - Refugees and Host Communities Support Project ( P164748 ) Page 47 committee and the World Bank. Beneficiaries in targeted areas included in the Unified Social Registry - - Female Beneficiaries in targeted areas included in the Unified Social Registry - - Refugees Eligible refugees with identity documents issued by CNARR Every six months. CNARR CNARR, in consultation with UNHCR, will provide a bi-annual report to CFS on the number of eligible refugees receiving an ID, to be measured against total number of refugees.", + "type": "registry", + "explanation": "The Unified Social Registry is explicitly mentioned as a source that includes data on beneficiaries, indicating it functions as a structured collection of data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a registry that includes beneficiaries.", + "contextual_reason_agent": "The Unified Social Registry is explicitly mentioned as a source that includes data on beneficiaries, indicating it functions as a structured collection of data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "034_Lebanon-Beirut-Housing-Rehabilitation-and-Cultural-and-Creative-Industries-Recovery", + "page": 68, + "text": "Socio - economic vulnerability The project will prioritize the poorest and the most vulnerable households affected by the blast ( e. g., low-income, FHH, refugees ), based on a socio - economic field survey. The vulnerability criteria will consider social ( i. e., presence of the elderly, female headed households, people with disabilities, refugees, and building located in an area Presence of the elderly, female headed households. 3 = Social vulnerability in more than 50 percent of multi-storied apartment buildings. 2 = Between 50 percent and 10 percent. 1 = Less than 10 percent. Presence of people with disabilities. 3 = Present. 1 = Not present.", + "ner_text": [ + [ + 177, + 206, + "named" + ], + [ + 154, + 162, + "socio - economic field survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "g. , low-income, FHH, refugees ), based on a socio - economic field survey. The vulnerability criteria will consider social ( i.", + "type": "survey", + "explanation": "This is indeed a dataset as it is a structured collection of data gathered from a field survey.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data on socio-economic factors.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data gathered from a field survey.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "082_PAD2809-PAD-PUBLIC-disclosed-9-12-2018-IDA-R2018-0286-1", + "page": 61, + "text": "The World Bank Chad - Refugees and Host Communities Support Project ( P164748 ) Page 56 productive measures grants are participated by refugees and members of host communities. The reports are then consolidated by the CFS. If needed, indicator can be based on survey of representative sample Beneficiaries that have been selected through a combined PMT / community validation process Twice a year CFS - Management Information System CFS regional offices produce two reports a year ( June and December ), indicating how many ( percentage ) Beneficiaries have been selected through a combined PMT ( Proxy Means Testing ) / community validation process. The target is 90 percent. The reports are then consolidated by the CFS. If needed, indicator can be based on a survey of a representative sample. CFS", + "ner_text": [ + [ + 397, + 432, + "named" + ] + ], + "validated": false, + "empirical_context": "The reports are then consolidated by the CFS. If needed, indicator can be based on survey of representative sample Beneficiaries that have been selected through a combined PMT / community validation process Twice a year CFS - Management Information System CFS regional offices produce two reports a year ( June and December ), indicating how many ( percentage ) Beneficiaries have been selected through a combined PMT ( Proxy Means Testing ) / community validation process. The target is 90 percent.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting it handles data.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 8, + "text": "With the exception of early grade primary teachers, most teachers enter the classroom without any sustained training in pedagogical approaches or basic classroom skills. \u2022 Access and Equity: Household survey data indicate that public financing of basic schooling is more pro-poor than that of secondary schooling. Progress has been made in reducing gender differences in access to basic education, and to a lesser extent in secondary education. Subject specialization in secondary and vocational education still tends to reflect traditional gender roles. Completion rates and transition rates to tertiary education are highly correlated with family income: three times as many students in university come from the upper two income quintiles. \u2022 Physical facilities: A recent school utilization study indicates that the number of MoE students is expected to increase by 124, 634 between 2008 and 2013. It is estimated that there will be a need to provide an additional 3, 360 classrooms over this time period. The same study also reveals a dichotomy in the provision of educational infrastructure in the Kingdom.", + "ner_text": [ + [ + 774, + 798, + "named" + ] + ], + "validated": false, + "empirical_context": "Completion rates and transition rates to tertiary education are highly correlated with family income: three times as many students in university come from the upper two income quintiles. \u2022 Physical facilities: A recent school utilization study indicates that the number of MoE students is expected to increase by 124, 634 between 2008 and 2013. It is estimated that there will be a need to provide an additional 3, 360 classrooms over this time period.", + "type": "study", + "explanation": "However, the term 'school utilization study' is mentioned as a study and not as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'study' which often implies data collection.", + "contextual_reason_agent": "However, the term 'school utilization study' is mentioned as a study and not as a structured collection of data or a data source.", + "contextual_signal": "mentioned only as a study, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 21, + "validated": 8, + "not_validated": 13 + } + }, + { + "filename": "045_Turkey-Agricultural-Employment-Support-for-Refugees-and-Turkish-Citizens-through-Enhanced-Market-Linkages-Project", + "page": 33, + "text": "The local ACCs will review the training delivery reports and the training and employment timesheets and approve the summary payment request to be sent to the ACC PIU. The ACC-CU will contract an independent firm that will independently verify that the project is being carried out according to the POM. B. Results Monitoring and Evaluation Arrangements 78. The development and intermediate indicators will be tracked using the following instruments, sources, and methodologies: ( a ) administrative data collected through the information system, enhanced and optimized by the project ( including the registry of certified beneficiaries and the database of vacancies ); ( b ) regular information and data collection on program processes; ( c ) process evaluations and independent verification; ( d ) impact evaluation to measure results attributable to the project; ( e ) other studies and assessments carried out during the implementation of the project; and ( f ) progress reports prepared by the PIU. The POM will further describe the agreed monitoring processes and protocols for each indicator, including source, instrument, institutional responsibility, and frequency.", + "ner_text": [ + [ + 600, + 635, + "named" + ], + [ + 484, + 503, + "registry of certified beneficiaries <> data type" + ] + ], + "validated": true, + "empirical_context": "Results Monitoring and Evaluation Arrangements 78. The development and intermediate indicators will be tracked using the following instruments, sources, and methodologies: ( a ) administrative data collected through the information system, enhanced and optimized by the project ( including the registry of certified beneficiaries and the database of vacancies ); ( b ) regular information and data collection on program processes; ( c ) process evaluations and independent verification; ( d ) impact evaluation to measure results attributable to the project; ( e ) other studies and assessments carried out during the implementation of the project; and ( f ) progress reports prepared by the PIU. The POM will further describe the agreed monitoring processes and protocols for each indicator, including source, instrument, institutional responsibility, and frequency.", + "type": "registry", + "explanation": "This is a dataset as it is explicitly mentioned as part of the administrative data collected through the information system.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured collection of data related to beneficiaries.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as part of the administrative data collected through the information system.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "008_BOSIB-60d57288-4e09-4519-ae6c-ffdc0037e0b1", + "page": 15, + "text": "According to data from the International Labor Organization, a lack of safe transport reduces the probability of women accessing economic opportunities by 16. 5 percent. 15 In 2020, the regional rate was 18. 7 fatalities per 100, 000 inhabitants, which is higher than the national rate ( 15. 5 ) ( DATASUS data ). 16 Looking at the formal jobs of the Region, 69 percent of the people that receive more than ten minimum wages, on average, are men. Meanwhile, 94 percent of indigenous or black women receive less than 3 minimum wages. Data Source: RAIS, 2019. 17 Gender-disaggregated mobility data for Foz do R\u00edo Itaja\u00ed were collected during the preparation of the Stakeholder Engagement Plan and other project-related documents. 18 Data derived from PNAD-Cont\u00ednua ( 2019 ) State of Santa Catarina. 19 Hayd\u00e9e Svab, Marina Kohler Harkot, and Beatriz Moura Dos Santos, A Baseline Study of Gender and Transport in Sao Paulo, Brazil: Present Initiatives to Improve Women \u2019 s Mobility ( English ) ( Washington, DC: World Bank, 2021 ). 20 Brazil - Improving Mobility and Urban Inclusion in the Amazonas Corridor in Belo Horizonte Project ( Washington, DC: World Bank ). 21 Data reflect cases of harassment in public spaces without disaggregation. However, given the trend in Brazil and the Region, it can be inferred that in the Santa Catarina Region, more women are also survivors of sexual harassment.", + "ner_text": [ + [ + 546, + 550, + "named" + ], + [ + 472, + 497, + "RAIS <> reference population" + ], + [ + 552, + 556, + "RAIS <> publication year" + ], + [ + 561, + 595, + "RAIS <> data type" + ], + [ + 600, + 617, + "RAIS <> data geography" + ], + [ + 772, + 795, + "RAIS <> data geography" + ], + [ + 800, + 811, + "RAIS <> author" + ], + [ + 813, + 833, + "RAIS <> author" + ], + [ + 839, + 863, + "RAIS <> author" + ], + [ + 1321, + 1342, + "RAIS <> data geography" + ] + ], + "validated": true, + "empirical_context": "Meanwhile, 94 percent of indigenous or black women receive less than 3 minimum wages. Data Source: RAIS, 2019. 17 Gender-disaggregated mobility data for Foz do R\u00edo Itaja\u00ed were collected during the preparation of the Stakeholder Engagement Plan and other project-related documents.", + "type": "dataset", + "explanation": "RAIS is indeed a dataset as it is explicitly mentioned as a data source for the analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a data source in the context.", + "contextual_reason_agent": "RAIS is indeed a dataset as it is explicitly mentioned as a data source for the analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 33, + "text": "The key gaps identified in the MTR assessment are: ( a ) fewer women access loans from financial institutions to start their own businesses; ( b ) fewer women are starting businesses due to time poverty and capacity gaps; and ( c ) fewer women are in employment than men. Social gender norms and household dynamics play a critical role in causing these gaps. According to a 2022 study by Access to Finance Rwanda, deeply ingrained societal expectations shape how women participate in economic life and influence their capacity to leverage assets \u2014 particularly land and property \u2014 as collateral. 34 One pervasive norm is that women should prioritize family and caregiving responsibilities over business activities. This norm restricts their time and engagement in income-generating pursuits and weakens their perceived legitimacy as entrepreneurs, reducing their chances of qualifying for credit. Additionally, women are often expected to rely on family support, especially from spouses, instead of seeking independent financial solutions, distancing them from formal financial institutions and financial products. 31 Baseline failure rates by segment are extremely difficult to assess. As such, the analysis uses conservative assumptions based on extensive discussions with key stakeholders and potential beneficiaries. These assumptions have been tested for sensitivity. 32 CEIC data 33 In the absence of available survey data, the analysis uses proxies for these losses and repair costs from survey data from neighboring Burundi. Based on this data, annual flooding losses amount to ~ US $ 89 per household and repair costs amount to US $ 25. 60 per household. 34 Gendered Social Norms Diagnostic and their Impact on Women \u2019 s Financial Inclusion in Rwanda, 2022, Access to Finance Rwanda", + "ner_text": [ + [ + 1495, + 1506, + "named" + ], + [ + 153, + 158, + "survey data <> reference population" + ], + [ + 374, + 378, + "survey data <> publication year" + ], + [ + 388, + 412, + "survey data <> publisher" + ], + [ + 626, + 631, + "survey data <> reference population" + ], + [ + 1524, + 1531, + "survey data <> data geography" + ], + [ + 1553, + 1609, + "survey data <> data description" + ], + [ + 1761, + 1765, + "survey data <> publication year" + ] + ], + "validated": true, + "empirical_context": "These assumptions have been tested for sensitivity. 32 CEIC data 33 In the absence of available survey data, the analysis uses proxies for these losses and repair costs from survey data from neighboring Burundi. Based on this data, annual flooding losses amount to ~ US $ 89 per household and repair costs amount to US $ 25.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source of information used for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'survey data' typically refers to structured collections of data collected through surveys.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source of information used for analysis.", + "contextual_signal": "follows 'uses data from'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "015_BOSIB0e7334a5d0570a3e40f8ae4d0c1266", + "page": 13, + "text": "The Coronavirus disease 2019 ( COVID-19 ) crisis has reversed much of the progress in monetary poverty reduction that had been achieved in recent years, as it is estimated that the international poverty rate increased by 0. 82 percentage points between 2019 and 2020, for the first time in more than a decade. Poverty projections suggest that the rate of extreme poverty will remain high, at nearly 25 percent owing to job and income losses. The number of poor households has continued to increase, with an additional 166, 000 people falling into extreme poverty in 2021. Poverty rates are expected to remain above pre-pandemic estimates in the medium term. 5. In Cameroon, gender equality has progressed slightly, although gender gaps and disparities exist between rural and urban areas. The country recognizes the importance of women \u2019 s empowerment both for its intrinsic value and for its contribution to economic development. Overall labor force participation has remained steady since 2010, with increasing participation of women, although their unemployment and informality remain higher. Antenatal care overall has significantly improved, especially in the regions most in need. However, the lowest rates of care continue to be reported in the rural northern and eastern regions. 5 Women in rural areas 1 United Nations Population Division 2019. 2 IMF country report, Cameroon third reviews under the extended Fund Facility Arrangements. 3 World Bank projection based on the latest Cameroon Household Survey conducted in 2014. 4 UNDP ( United Nations Development Programme ). 2022. Human Development Report. 5 World Bank. ( 2022 ). Cameroon Systematic Country Diagnostic: and update.", + "ner_text": [ + [ + 1490, + 1515, + "named" + ], + [ + 24, + 28, + "Cameroon Household Survey <> publication year" + ], + [ + 456, + 471, + "Cameroon Household Survey <> reference population" + ], + [ + 664, + 672, + "Cameroon Household Survey <> data geography" + ], + [ + 991, + 995, + "Cameroon Household Survey <> reference year" + ], + [ + 1290, + 1310, + "Cameroon Household Survey <> reference population" + ], + [ + 1376, + 1384, + "Cameroon Household Survey <> data geography" + ], + [ + 1448, + 1458, + "Cameroon Household Survey <> publisher" + ], + [ + 1490, + 1498, + "Cameroon Household Survey <> data geography" + ], + [ + 1529, + 1533, + "Cameroon Household Survey <> publication year" + ], + [ + 1537, + 1541, + "Cameroon Household Survey <> publisher" + ], + [ + 1584, + 1588, + "Cameroon Household Survey <> publication year" + ], + [ + 1618, + 1628, + "Cameroon Household Survey <> publisher" + ], + [ + 1640, + 1648, + "Cameroon Household Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "2 IMF country report, Cameroon third reviews under the extended Fund Facility Arrangements. 3 World Bank projection based on the latest Cameroon Household Survey conducted in 2014. 4 UNDP ( United Nations Development Programme ).", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey that provides data for projections.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically involves structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey that provides data for projections.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "162_PAD12050PAD0P15264600PUBLIC00Box391428B", + "page": 20, + "text": "The data will be collected and used to: ( i ) supervise the performance of PHCCs; ( ii ) monitor beneficiary accessibility progress; and ( iii ) improve the response of the project and provision of services based on intermediate output and outcome data. 47. An HIS system will be developed by the MoPH by the effectiveness to support the implementation and monitoring of the program. This includes developing the system at the central level as well as updating the current systems at the facility level in order to meet the new requirements for program implementation. Assistance would be provided to the PHCCs to upgrade their recording and reporting systems and strengthen their capacity to implement the program. 48. The Bank will conduct regular implementation support missions during which implementation progress, outputs and work plan updates, will be assessed and adjustments made as necessary. Project M & E will consist of four parts: ( i ) internal oversight by MoPH of the PHCCs; ( ii ) independent project evaluation; ( iii ) beneficiary assessment; and ( iv ) evaluation by the Bank. C. Sustainability 49. The project \u2019 s sustainability is reinforced through three elements: ( a ) alignment with GOL priorities and the national health sector strategy: the GOL strategy emphasizes short - term stabilization, medium-term resilience, while the strategic direction of the MoPH focuses at laying the foundation for Universal Health Coverage with special emphasis on the poor. These will be achieved as follows: i ) short-term stabilization will be achieved through targeting the", + "ner_text": [ + [ + 261, + 271, + "named" + ] + ], + "validated": false, + "empirical_context": "47. An HIS system will be developed by the MoPH by the effectiveness to support the implementation and monitoring of the program. This includes developing the system at the central level as well as updating the current systems at the facility level in order to meet the new requirements for program implementation.", + "type": "system", + "explanation": "However, the context indicates it is a system for implementation and monitoring, not a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'HIS system' suggests a structured collection of health information.", + "contextual_reason_agent": "However, the context indicates it is a system for implementation and monitoring, not a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 93, + "text": "The World Bank Niger Accelerating Electricity Access Project ( Hask\u00e9 ) ( P174034 ) Page 86 of 174 This is not covered by \" People provided with new or improved electricity service \" indicator. of which, refugee beneficiaries Number of refugees ( specifically ) living in localities and settings where the project provides electricity through grid, mini grids and off-grid electrification ( under components 1, 2 and 3 ). Semi - annually. National statistics / Proj ect data / UNHCR data / World Bank data.. Estimated based on collected data about refugees and project - level data. NIGELEC / ANPER / ANERS OL. Public facilities electrified under the project The indicator captures the number of public institutions ( health facilities, schools, administrative buildings, market places, religious sites, etc. ) provided with an electricity connection under the project. The connection can come from any type of solution ( grid, mini-grid, standalone systems ). Semi - annually. NIGELEC / ANP ER / ANERSOL project databases. Determined based on the reporting of the contractors responsible for connections and Project databases. NIGELEC / ANPER / ANERS OL. People provided with clean and efficient cooking solutions under the project The indicator captures the number of people directly benefiting from clean cooking solutions that were acquired through the project including refugee and host population. Semi - annually.", + "ner_text": [ + [ + 560, + 580, + "named" + ], + [ + 4, + 14, + "project - level data <> publisher" + ], + [ + 489, + 499, + "project - level data <> publisher" + ] + ], + "validated": true, + "empirical_context": ". Estimated based on collected data about refugees and project - level data. NIGELEC / ANPER / ANERS OL.", + "type": "data", + "explanation": "In this context, 'project - level data' is used to refer to data collected specifically for projects, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'project - level data' suggests a structured collection of information related to specific projects.", + "contextual_reason_agent": "In this context, 'project - level data' is used to refer to data collected specifically for projects, indicating it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 55, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 44 Indicator Name National Water information systems established and operationalized under the project Definition / Description National water information systems established to track water data. Operationalized is defined as the systems are functional for data collection, monitoring and reporting. Frequency Bi-annually Data Source MWE / DWRM progress reports, annual Water & environment sector performance report and Water Supply Atlas. Methodology for Data Collection Responsibility for Data Collection MWE and DWRM Indicator Name State of water resources reports produced to inform decision making Definition / Description State of water resources management reports developed to inform decision making. Frequency Every two years Data Source MWE / DWRM Annual reports, annual water and environment sector performance reports. Methodology for Data Collection Responsibility for Data Collection MWE / DWDRM", + "ner_text": [ + [ + 499, + 517, + "named" + ], + [ + 614, + 646, + "Water Supply Atlas <> data description" + ] + ], + "validated": true, + "empirical_context": "Operationalized is defined as the systems are functional for data collection, monitoring and reporting. Frequency Bi-annually Data Source MWE / DWRM progress reports, annual Water & environment sector performance report and Water Supply Atlas. Methodology for Data Collection Responsibility for Data Collection MWE and DWRM Indicator Name State of water resources reports produced to inform decision making Definition / Description State of water resources management reports developed to inform decision making.", + "type": "atlas", + "explanation": "The Water Supply Atlas is explicitly listed as a data source, indicating it functions as a structured collection of data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is mentioned as a data source for reporting.", + "contextual_reason_agent": "The Water Supply Atlas is explicitly listed as a data source, indicating it functions as a structured collection of data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 61, + "text": "Consolidat ed informatio Multiple sources - community mobilization company reports, PMU data, self - reported data, reports of target utilities, survey data. Methodology for each CE channel will be reported separately as prescribed in the POM. MEWR, KMK, PMU", + "ner_text": [ + [ + 94, + 114, + "named" + ], + [ + 145, + 156, + "self - reported data <> data type" + ] + ], + "validated": true, + "empirical_context": "Consolidat ed informatio Multiple sources - community mobilization company reports, PMU data, self - reported data, reports of target utilities, survey data. Methodology for each CE channel will be reported separately as prescribed in the POM.", + "type": "data", + "explanation": "In this context, 'self-reported data' is used as a source of information for analysis, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'self-reported data' suggests a collection of data provided by individuals.", + "contextual_reason_agent": "In this context, 'self-reported data' is used as a source of information for analysis, indicating it functions as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "007_BOSIB-e8e37b29-1d61-491d-8aad-23a07cf57740", + "page": 30, + "text": "Page 26 of 88 The World Bank Chad Agribusiness and Rural Transformation Project ( P179238 ) activities and will be assisted by a team of M & E specialists in the four regional coordination units. The project will use the Geo-Enabling Monitoring System ( GEMS ) developed by the World Bank using geo-enabled methods to undertake M & E, particularly data collection in areas difficult to reach due to insecurity or conflict. It will also use third party monitoring where needed, through UN agencies, national NGOs, or firms hired by the PCU, to collect just-in-time information via mobile apps / tablets, building on geo-tagging of activities. As part of the legacy from ProPAD, provision has also been made for the project to use the toll-free number which permits collecting feedback directly from beneficiaries. 49. A baseline survey will be conducted during the first year of the project to establish the RF reference data and verify targets. Beneficiaries will be surveyed subsequently in year 3 ( mid \u2010 term ) and year 6 ( project end ) as part of surveys covering both reference and treatment samples, to track changes in their livelihood conditions attributable to project performance. M & E reports will be issued every six months on physical implementation and results monitoring. C. Sustainability 50. Sustainability considerations have been integrated into all project components.", + "ner_text": [ + [ + 221, + 251, + "named" + ] + ], + "validated": false, + "empirical_context": "Page 26 of 88 The World Bank Chad Agribusiness and Rural Transformation Project ( P179238 ) activities and will be assisted by a team of M & E specialists in the four regional coordination units. The project will use the Geo-Enabling Monitoring System ( GEMS ) developed by the World Bank using geo-enabled methods to undertake M & E, particularly data collection in areas difficult to reach due to insecurity or conflict. It will also use third party monitoring where needed, through UN agencies, national NGOs, or firms hired by the PCU, to collect just-in-time information via mobile apps / tablets, building on geo-tagging of activities.", + "type": "system", + "explanation": "However, it is described as a monitoring system rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves data collection methods.", + "contextual_reason_agent": "However, it is described as a monitoring system rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 46, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 35 Indicator Name People provided with access to improved sanitation services Definition / Description Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC Area performance reports, Refugee Database, Refugees GIS Mapping for WASH and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE and NWSC Indicator Name People provided with access to improved sanitation services - Female ( RMS requirement ) Definition / Description Frequency Bi-annually. Data Source MWE M & E system, NWSC Area performance reports, Refugee Database, Refugees GIS Mapping for WASH and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE and NWSC.", + "ner_text": [ + [ + 241, + 257, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Integrated Water Management and Development Project ( P163782 ) 35 Indicator Name People provided with access to improved sanitation services Definition / Description Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC Area performance reports, Refugee Database, Refugees GIS Mapping for WASH and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'system' in its name, which can imply data collection.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "153_476340PAD0ZR0P101Official0Use0Only1", + "page": 33, + "text": "The DEP will build upon and create connections among socio - economic and ecological data made available through various development and scientific efforts that are taking place in the country and the project area. Indicators related to capacity building will be integrated into an overall planning, monitoring and evaluation system ( SPSE ) maintained by the DEP. Specialized forest management and forest product tracking systems, as well as data related to participatory zoning will be maintained and made available through the Forest Inventory and Management Service ( SPIAF ) of the MECNT. 81. Most baselines for tracking progress have been produced within current development efforts that the project will build upon, collaborate with, or finance directly. These baselines are presented in Annex 3. New, original baselines for parameters such as sources of income or rural and indigenous peoples ' knowledge of the Forest Code will be obtained through new, project-funded baseline studies. These studies will be conducted in the first year and repeated sampling will be done over the life of the project. 82. The set of project indicators will serve a dual purpose. They will give project managers information useful for adaptive management, and they will give policy makers clear benchmarks for evaluating the project ' s effectiveness. Output and process indicators will include among others: ( i ) statistics on office rehabilitation, equipment, and staff training at central and field offices; ( ii ) remote-sensing-based statistics on deforestation, illegal logging, and land-use changes; ( iii ) statistics on water sanitation, feeder roads, and other small participatory infrastructure projects implemented with project financing; ( iv ) trends in the abundance of key bioindicator species; ( v ) number and area covered by various types of forest land management plans \" plans agreed upon by MECNT and other l1 Timber concessions, community forests, protected areas, conservation concessions, community hunting zones, and others. 21", + "ner_text": [ + [ + 530, + 569, + "named" + ] + ], + "validated": false, + "empirical_context": "Indicators related to capacity building will be integrated into an overall planning, monitoring and evaluation system ( SPSE ) maintained by the DEP. Specialized forest management and forest product tracking systems, as well as data related to participatory zoning will be maintained and made available through the Forest Inventory and Management Service ( SPIAF ) of the MECNT. 81.", + "type": "service", + "explanation": "However, it is described as a service, not a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Inventory' and 'Management' in its name, suggesting data-related functions.", + "contextual_reason_agent": "However, it is described as a service, not a structured collection of data or a data source.", + "contextual_signal": "mentioned only as a service, not as a data source", + "tags": [] + }, + { + "filename": "191_multi-page", + "page": 12, + "text": "Project Appraisal Document Page 7 Burundi - Second Social Action Project Poverty monitoring. Another strategic choice is for smaller scale statistical monitoring. The BURSAP I is financing a relatively large-scale survey that will yield detailed baseline information on poverty. In contrast, the proposed project would finance less expensive surveys that can be repeated to provide up-to-date follow-up information on the evolution of poverty. Furthermore, these surveys would be oriented toward action to reduce poverty rather than simply gathering analytical infor: mation about poverty. To that end, the surveys would provide the information on the conditions of roads, water supply, health and education services, and local institutions that is necessary for planning reconstruction programs, and provide information on health and nutrition status. The relevant indicators would be selected through a survey of statistics users in the Government, donor, and NGO sectors. C: Project Description Summary 1. Project components: ( see Annex 2 for a detailed description and Annex 3 for a detailed cost breakdown ) The following are total costs of the project in current US dollars and include contributions from the Central Govemment and from beneficiaries. 1. Community investments Poverty 10. 7 81. 4 10. 0 93. 0 Of which alleviation a. Community participation c; 1. 00 7. 6 1. 0 100. 0 b.", + "ner_text": [ + [ + 905, + 931, + "named" + ] + ], + "validated": false, + "empirical_context": "To that end, the surveys would provide the information on the conditions of roads, water supply, health and education services, and local institutions that is necessary for planning reconstruction programs, and provide information on health and nutrition status. The relevant indicators would be selected through a survey of statistics users in the Government, donor, and NGO sectors. C: Project Description Summary 1.", + "type": "survey", + "explanation": "However, it is not a dataset itself but rather a method of gathering information from users about statistics.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'survey', which often relates to data collection.", + "contextual_reason_agent": "However, it is not a dataset itself but rather a method of gathering information from users about statistics.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 65, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 53. Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Girls \u2019 survival rate in secondary school in all regions including areas affected by conflict, droughts and high levels of refugees This indicator measures the share of a cohort of female students enrolled in Grade 9 who successfully completed secondary schools and it will be measured by using a \u2018 synthetic cohort method \u2019. Annual EMIS, RRS and UNHCR Data collected by MoE through the annual EMIS process and through RRS and UNHCR in refugee host communities MOE / EMIS Directorate Children 0-59 months with diarrhea treated with ORS and zinc in low performing regions The indicator will be measured by the percentage of children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in the low performing regions Annual DHIS2, RRS and UNHCH in refugee host communities Data collected by MoH through the annual DHIS2 and through RRS and UNHCR in refugee host communities MoH Woredas that engage citizens in planning and budgeting including in areas affected by conflict, droughts and high levels of refugees in conflict affected areas The percentage of woredas that conduct pre-budget discussion with at least 40 % participation of women Annual Administrative data / reports from regions including through RRS and UNHCR in refugee host communities Use", + "ner_text": [ + [ + 995, + 1000, + "named" + ], + [ + 15, + 23, + "DHIS2 <> data geography" + ], + [ + 1062, + 1065, + "DHIS2 <> author" + ], + [ + 1145, + 1148, + "DHIS2 <> author" + ] + ], + "validated": true, + "empirical_context": "Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Girls \u2019 survival rate in secondary school in all regions including areas affected by conflict, droughts and high levels of refugees This indicator measures the share of a cohort of female students enrolled in Grade 9 who successfully completed secondary schools and it will be measured by using a \u2018 synthetic cohort method \u2019. Annual EMIS, RRS and UNHCR Data collected by MoE through the annual EMIS process and through RRS and UNHCR in refugee host communities MOE / EMIS Directorate Children 0-59 months with diarrhea treated with ORS and zinc in low performing regions The indicator will be measured by the percentage of children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in the low performing regions Annual DHIS2, RRS and UNHCH in refugee host communities Data collected by MoH through the annual DHIS2 and through RRS and UNHCR in refugee host communities MoH Woredas that engage citizens in planning and budgeting including in areas affected by conflict, droughts and high levels of refugees in conflict affected areas The percentage of woredas that conduct pre-budget discussion with at least 40 % participation of women Annual Administrative data / reports from regions including through RRS and UNHCR in refugee host communities Use", + "type": "database", + "explanation": "DHIS2 is explicitly referenced as a data source for collecting health-related data, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because DHIS2 is mentioned as a source for data collection in the context of health indicators.", + "contextual_reason_agent": "DHIS2 is explicitly referenced as a data source for collecting health-related data, confirming its role as a dataset.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "186_multi-page", + "page": 54, + "text": "In addition, a Social Assessment is currently underway with Bank assistance to identify issues related to disadvantaged groups, especially orphans and child-headed households. The information from the household survey and social assessment will be used to develop intervention strategies to widen access to education and training, especially for targeted groups, as well as identify other areas of analysis which need further investigation. Development of the MIS, already begun during project preparation ( with PPF financing ), will be an ongoing and important feature of the project. Relevant education and financial indicators, including school-level information indicators, will be agreed for inclusion in RHRDP efforts to develop the MIS. Finally, the AIDS epidemic and its impact on the education system will be further analyzed. Changes in demand for education and in the supply of labor need to be factored into the planning for human resource capacity of the sector. Projections that take these changes into account are needed to provide the basis for this planning process.", + "ner_text": [ + [ + 740, + 743, + "named" + ] + ], + "validated": false, + "empirical_context": "Development of the MIS, already begun during project preparation ( with PPF financing ), will be an ongoing and important feature of the project. Relevant education and financial indicators, including school-level information indicators, will be agreed for inclusion in RHRDP efforts to develop the MIS. Finally, the AIDS epidemic and its impact on the education system will be further analyzed.", + "type": "system", + "explanation": "However, 'MIS' is described as a management information system, not a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it relates to information management.", + "contextual_reason_agent": "However, 'MIS' is described as a management information system, not a data source itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "010_BOSIB1554c314c0a2187c019d7e85bc2a91", + "page": 14, + "text": "Sectoral and Institutional Context * 1 Kenya Economic Update, June 2023 2 World Bank, Climate Change Knowledge Portal 3 World Bank Climate Change Knowledge Portal - Kenya. https: / / climateknowledgeportal. worldbank. org / country / kenya / vulnerability 4 United Nations Environment Program: \u201c Climate change could spark floods in world \u2019 s largest desert lake: new study \u201d, 2021. 5 World Bank data. Prevalence of food insecurity in the population \u2013 KenyaError! Hyperlink reference not valid. 6 Kenya: IPC Acute Food Insecurity and Acute Malnutrition Analysis ( July - December 2022 ) 7 For a detailed map see: https: / / data2. unhcr. org / en / country / ken 8 UNHCR Statistics package. Kenya registered refugees and asylum seekers ( 31 July 2023 ) 9 The Shirika Plan is a Government of Kenya socioeconomic development plan outlining the transition from refugee encampment to integrated settlements.", + "ner_text": [ + [ + 86, + 117, + "named" + ] + ], + "validated": false, + "empirical_context": "Sectoral and Institutional Context * 1 Kenya Economic Update, June 2023 2 World Bank, Climate Change Knowledge Portal 3 World Bank Climate Change Knowledge Portal - Kenya. https: / / climateknowledgeportal.", + "type": "portal", + "explanation": "However, it is mentioned as a portal and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Knowledge Portal', which suggests a collection of information.", + "contextual_reason_agent": "However, it is mentioned as a portal and not explicitly as a data source or structured collection of data.", + "contextual_signal": "mentioned only as a portal, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 9, + "validated": 1, + "not_validated": 8 + } + }, + { + "filename": "118_PAD1199-PAD-P144637-IDA-R2015-0247-1-Box393201B-OUO-9", + "page": 74, + "text": "The objectives of the M & E activities are to provide PNDP III staff and stakeholders with regular information on project implementation and outputs; identify bottlenecks and impediments in the project implementation; ensure that all the activities under PNDP III are implemented in compliance with the PIM; determine to what extent the NCU achieves its goals and objectives, and how it affects the intended beneficiaries \u2019 social conditions and capacities; and maintain acceptable performance standards for environmental and social impacts. The arrangements for M & E are critical given the multitude of capacity building and subproject activities that will take place under the project. 89. The project management information system ( MIS ) set up during PNDP II will be adjusted to the needs of the proposed project. Indeed, the new MIS will include the following adjustments: ( i ) complete on-line connection with RCUs and NCU to strengthen decentralized supervision; ( ii ) extend MIS to allow comparison of planned versus actual performance ( i. e., physical and financial ) in a format that can also be used in reports to be presented to government and Bank; ( iii ) integrate financial management system, and ( iv ) launching of the MIS on the internet for public access with the aim of promoting transparency. 90. The monitoring and evaluation system of PNDP III is designed to consolidate and improve the system used during the first and second phase of the program. It will be organized at four levels: communal, departmental, regional, and national. It will use the PRO-ADP software ( Progiciel d \u2019 Appui au D\u00e9veloppement Participatif ) developed during phase 2 to facilitate the monitoring of the implementation of CDP at the communal level. 91. The communal level will be the operational level of the system. Communal development agents will be trained to monitor the implementation of CDPs and to collect data and transmit them to the departmental level. These data will include feedback from communes on PNDP implementation as well as environmental monitoring indicators to determine the effectiveness of environmental mitigation measures implemented under subprojects and the extent to which the", + "ner_text": [ + [ + 697, + 734, + "named" + ] + ], + "validated": false, + "empirical_context": "89. The project management information system ( MIS ) set up during PNDP II will be adjusted to the needs of the proposed project. Indeed, the new MIS will include the following adjustments: ( i ) complete on-line connection with RCUs and NCU to strengthen decentralized supervision; ( ii ) extend MIS to allow comparison of planned versus actual performance ( i.", + "type": "system", + "explanation": "However, it is described as a project management information system, which indicates it is a system rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'information system' which often relates to data handling.", + "contextual_reason_agent": "However, it is described as a project management information system, which indicates it is a system rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "050_Cameroon-Secondary-Education-and-Skills-Development-Project", + "page": 155, + "text": "The World Bank Secondary Education and Skills Development Project ( P170561 ) Page 151 of 159 Figure 5. 1. Education Levels and Wage, Nonwage and Agriculture, 2007 Sources: Third Cameroon Household Survey ( ECAM3 ), 2007 of the NIS; World Bank staff estimates. Note: The wage category includes those employed in the public sector receiving wages plus those working in the private, nonagricultural sector receiving wages. The nonwage category includes those working in the nonfarm sector ( small enterprises and informal sector ). The agriculture category includes those employed in the private agriculture sector receiving wages plus those working in the farm sector ( small and family farms ). Figure 5. 2. Education Levels and Wage, Nonwage, and Agriculture, 2014 Source: Fourth Cameroon Household Survey ( ECAM4 ) 2014 of the NIS; World Bank staff estimates. Note: The wage category includes those employed in the public sector receiving wages plus those working in the private, nonagricultural sector receiving wages. The nonwage category includes those working in the nonfarm sector ( small enterprises, informal sector ).", + "ner_text": [ + [ + 173, + 204, + "named" + ], + [ + 4, + 14, + "Third Cameroon Household Survey <> publisher" + ], + [ + 159, + 163, + "Third Cameroon Household Survey <> reference year" + ], + [ + 233, + 243, + "Third Cameroon Household Survey <> publisher" + ], + [ + 834, + 844, + "Third Cameroon Household Survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "1. Education Levels and Wage, Nonwage and Agriculture, 2007 Sources: Third Cameroon Household Survey ( ECAM3 ), 2007 of the NIS; World Bank staff estimates. Note: The wage category includes those employed in the public sector receiving wages plus those working in the private, nonagricultural sector receiving wages.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source of information for the analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides data for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source of information for the analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "056_Niger-COVID-19-Emergency-Response-Project", + "page": 27, + "text": "Large volumes of personal data, personally identifiable information and sensitive data are likely to be collected and used in connection with the management of the COVID-19 outbreak under circumstances where measures to ensure the legitimate, appropriate and proportionate use and processing of that data may not feature in national law or data governance regulations, or be routinely collected and managed in health information systems. In order to guard against abuse of that data, the Project will incorporate best international practices for dealing with such data in such circumstances. Such measures may include, by way of example, data minimization ( collecting only data that is necessary for the purpose ); data accuracy ( correct or erase data that are not necessary or are inaccurate ), use limitations ( data are only used for legitimate and related purposes ), data retention ( retain data only for as long as they are necessary ), informing data subjects of use and processing of data, and allowing data subjects the opportunity to correct information about them, etc.", + "ner_text": [ + [ + 17, + 30, + "named" + ] + ], + "validated": false, + "empirical_context": "Large volumes of personal data, personally identifiable information and sensitive data are likely to be collected and used in connection with the management of the COVID-19 outbreak under circumstances where measures to ensure the legitimate, appropriate and proportionate use and processing of that data may not feature in national law or data governance regulations, or be routinely collected and managed in health information systems. In order to guard against abuse of that data, the Project will incorporate best international practices for dealing with such data in such circumstances.", + "type": "data", + "explanation": "'Personal data' is not a structured collection of data or a dataset, but rather refers to types of information that can be collected.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'personal data' refers to a dataset due to its mention in the context of data collection and management.", + "contextual_reason_agent": "'Personal data' is not a structured collection of data or a dataset, but rather refers to types of information that can be collected.", + "contextual_signal": "mentioned only as a type of information, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "074_Djibouti-Integrated-Cash-Transfer-and-Human-Capital-Project", + "page": 36, + "text": "The World Bank Integrated Cash Transfer and Human Capital Project ( P166220 ) Page 32 of 44 component 1. 2 e data Community counselors trained to lead community sessions Number of community counselors ( m\u00e8res conseill\u00e8res ) trained to deliver community sessions as part of the accompanying measures of the project Quarterly Project administrativ e data Routine monitoring SEAS Beneficiaries satisfied with community sessions Percentage of beneficiaries of the cash transfer program who participate in community sessions as part of the accompanying measures that are satisfied with the sessions Twice Survey Survey at middle and end of project SEAS PNSF beneficiary households with biometric data in the social registry Percentage of PNSF beneficiary households with biometric data either directly in the social registry or in a database linked to the social registry Quarterly Project administrativ e data Routine monitoring SEAS PNSF beneficiary households paid within 15 days of scheduled payment date Percentage of beneficiaries paid within 15 days of date specified in POM Quarterly Project administrativ e data Routine monitoring SEAS People in the social registry that received national identity cards with the support of the project Number of people for whom the project facilitated obtaining a national identity card or birth certificate Quarterly Project administrativ e Routine monitoring SEAS PNSF complaints registered electronically and resolved by the time of the next cash transfer payment Percentage of complaints i ) registered in PNSF MIS and ii ) with a resolution Quarterly Project administrativ e data Routine monitoring SEAS", + "ner_text": [ + [ + 703, + 718, + "named" + ], + [ + 1184, + 1207, + "social registry <> data description" + ] + ], + "validated": true, + "empirical_context": "The World Bank Integrated Cash Transfer and Human Capital Project ( P166220 ) Page 32 of 44 component 1. 2 e data Community counselors trained to lead community sessions Number of community counselors ( m\u00e8res conseill\u00e8res ) trained to deliver community sessions as part of the accompanying measures of the project Quarterly Project administrativ e data Routine monitoring SEAS Beneficiaries satisfied with community sessions Percentage of beneficiaries of the cash transfer program who participate in community sessions as part of the accompanying measures that are satisfied with the sessions Twice Survey Survey at middle and end of project SEAS PNSF beneficiary households with biometric data in the social registry Percentage of PNSF beneficiary households with biometric data either directly in the social registry or in a database linked to the social registry Quarterly Project administrativ e data Routine monitoring SEAS PNSF beneficiary households paid within 15 days of scheduled payment date Percentage of beneficiaries paid within 15 days of date specified in POM Quarterly Project administrativ e data Routine monitoring SEAS People in the social registry that received national identity cards with the support of the project Number of people for whom the project facilitated obtaining a national identity card or birth certificate Quarterly Project administrativ e Routine monitoring SEAS PNSF complaints registered electronically and resolved by the time of the next cash transfer payment Percentage of complaints i ) registered in PNSF MIS and ii ) with a resolution Quarterly Project administrativ e data Routine monitoring SEAS", + "type": "registry", + "explanation": "In this context, 'social registry' is indeed a dataset as it refers to a structured collection of data about beneficiary households.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'social registry' is a dataset because it is mentioned in the context of biometric data and beneficiary households.", + "contextual_reason_agent": "In this context, 'social registry' is indeed a dataset as it refers to a structured collection of data about beneficiary households.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 8, + "validated": 7, + "not_validated": 1 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 94, + "text": "Providing additional support to these communities under the project will reduce the pressure on the education system that would be expected in receiving an increase in refugee students. Disaggregated data by refugee / host community status will be collected and reported under the project. Efforts will be made to integrate gradually existing refugee-related data into the integrated EMIS developed under the project. 50. Results Area 7: Improved learning environment and quality of education for children in host community schools with refugees. The objective of this result area is to: ( a ) promote refugee welfare and inclusion in host communities \u2019 socio-economic structure; ( b ) help ensure access to and quality of services and basic infrastructure to refugees and host communities; and ( c ) strengthen Government finances where these have been strained by expenditures related to their hosting responsibilities. These objectives are consistent with the IDA18 RSW resource allocation framework implementation guidelines. 51. Activities supported under this result area will include: ( a ) promoting refugee welfare and inclusion in host communities: given the project specific profile of refugees, having a very low enrollment rate in their origin country ( around 20 percent ), the project will support an awareness program for refugees on sociocultural obstacles that influence school attendance: TA will be provided in the areas of", + "ner_text": [ + [ + 384, + 388, + "named" + ] + ], + "validated": false, + "empirical_context": "Disaggregated data by refugee / host community status will be collected and reported under the project. Efforts will be made to integrate gradually existing refugee-related data into the integrated EMIS developed under the project. 50.", + "type": "system", + "explanation": "'EMIS' is mentioned as a system developed under the project, not as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'EMIS' is a dataset because it is associated with data collection and reporting.", + "contextual_reason_agent": "'EMIS' is mentioned as a system developed under the project, not as a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 54, + "text": "DLR 5. 3: Enhanced digital literacy / skills. DLR 5. 4: Utilization of the HRMIS and other digital platforms for recording competitive recruitments, performance-based promotions, and digital upskilling. Data source / Agency DLR 5. 0: SPAC DLR 5. 1 and 2: SPAC Assessment Center HR interview and evaluation records, as well as HRMIS data records. DLR 5. 3: Training registry maintained by the IPA and HRMIS data records. DLR 5. 4: HRMIS data reports. Verification Entity KACE. Procedure DLR 5. 0: Verification of adopted SPAC by-law and subsidiary regulation. DLR 5. 1: Verification of recruitment records and competency assessment reports shared biannually by the SPAC and competence-based recruitments recorded by the HRMIS. DLR 5. 2: Verification of promotion records and competency assessment reports shared biannually by the SPAC and performance-based promotions recorded by the HRMIS and other digital platforms. DLR 5. 3: Verification of digital skill certifying training records shared annually by IPA and digital skill credentials recorded by the HRMIS and other digital platforms. DLR 5. 4: Cross-validation of HRMIS and other digital platforms data reports with SPAC Assessment Center interview and evaluation records and IPA training assessment reports. DLI 6 on the establishment of digital health standards across a national Health Information Exchange ( HIE ) Formula Scalable. The DLI disburses up to US $ 29 million across the three following DLRs:", + "ner_text": [ + [ + 400, + 405, + "named" + ] + ], + "validated": false, + "empirical_context": "DLR 5. 3: Training registry maintained by the IPA and HRMIS data records. DLR 5.", + "type": "system", + "explanation": "HRMIS is not explicitly described as a data source but rather as a system that maintains records.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HRMIS is a dataset because it is mentioned alongside a training registry and data records.", + "contextual_reason_agent": "HRMIS is not explicitly described as a data source but rather as a system that maintains records.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "182_multi0page", + "page": 48, + "text": "On the data production side, the plan calls for institutionalizing the regular collection of household-level information by introducing a multi-topic panel household survey in the Strategic Plan of Statistics of INSTAT. As a result of the Project ' s support and the consensus reached among stakeholders, the next 5-year plan calls for the implementation of a Living Standard Measurement Survey ( LSMS ) in 2002, to be repeated after 3 years. The system also includes annual panel surveys on a sub-sample of LSMS households to monitor trends in areas like poverty, unemployment, migration and social services on a more frequent basis than the LSMS allows for. An important component of the assistance will be to promote activities to foster a closer dialogue between data producers and the various data users from governmental and non-governmental institutions, the donor community, and the civil society at large. This consultation process will be encouraged since the early stages of the Project ' s cycle and is expected to increase the relevance of the information produced, and so augment its role for policy-making purposes. On the data use side, the strategy calls for building analytical capacity within the Government to take full advantage of the statistical information being generated by INSTAT. Towards this end, the Project will provide continuous support both in the analysis and dissemination of policy-relevant information primarily through technical assistance, training and the provision of the necessary equipment and software for the Poverty Unit to be established in MOLSA. Sub-Component 2: Social Services Policy Development: The objectives of the sub-component are to assist the MOLSA to: a ) strengthen its policy formulation capacity, b ) further develop legislative and institutional framework for social services, c ) develop capacity for policy monitoring, evaluation and program improvement, d ) design and implement a national public awareness campaign on social exclusion, including - 45 -", + "ner_text": [ + [ + 360, + 394, + "named" + ], + [ + 212, + 218, + "Living Standard Measurement Survey <> publisher" + ], + [ + 397, + 401, + "Living Standard Measurement Survey <> acronym" + ], + [ + 407, + 411, + "Living Standard Measurement Survey <> publication year" + ], + [ + 1300, + 1306, + "Living Standard Measurement Survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "On the data production side, the plan calls for institutionalizing the regular collection of household-level information by introducing a multi-topic panel household survey in the Strategic Plan of Statistics of INSTAT. As a result of the Project ' s support and the consensus reached among stakeholders, the next 5-year plan calls for the implementation of a Living Standard Measurement Survey ( LSMS ) in 2002, to be repeated after 3 years. The system also includes annual panel surveys on a sub-sample of LSMS households to monitor trends in areas like poverty, unemployment, migration and social services on a more frequent basis than the LSMS allows for.", + "type": "survey", + "explanation": "This is indeed a dataset as it is described as a survey designed to collect and analyze data on living standards.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that collects household-level information.", + "contextual_reason_agent": "This is indeed a dataset as it is described as a survey designed to collect and analyze data on living standards.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 42, + "text": "While the elements discussed earlier support the overall sustainability of the project \u2019 s objective, the limited fiscal space and the volatility of the security context may pose challenges for the sustainability of some of the activities in the long term. The aim of the project is to improve the quality of teaching and learning and strengthen education planning and management. The majority of the project \u2019 s funding is allocated to quality improvement and capacity building, which has minor implications on the fiscal space of the Government. Some of the activities, however, may imply incremental costs whose management by the government budget remains uncertain, such as the financing of school grants. It is expected that being able to demonstrate measurable impact of these activities on the improvement of teaching practices and learning outcomes will help mobilize additional domestic resources and attract further external financing needed to improve the education system in Niger. IV. PROJECT APPRAISAL SUMMARY A. Technical, Economic and Financial Analysis 85. The design of project activities was informed by international and national evidence of good practices, which ensures its technical viability. The technical design of the project took into consideration capacity constraints and lessons learned from previous operations in Niger, particularly the GPE-PAEQ, and was supported by analytical work, such as the World Development Report 2018 and the Ending Learning Poverty Report, as well as relevant TA such as the Advisory Services and Analytics on Makaranta schools ( P168795 ), whose findings informed a project subcomponent. 86. The proposed project uses the 2014 household survey National Survey on Household Living Conditions and Agriculture ( Enqu\u00eate nationale sur les Conditions de Vie des M\u00e9nages et l \u2019 Agriculture, ECVMA ) and recent administrative data to identify target beneficiaries ( both in terms of poverty profile and geographic areas ) and estimate the economic and social benefits of the project. Econometric methods", + "ner_text": [ + [ + 1705, + 1767, + "named" + ], + [ + 987, + 992, + "National Survey on Household Living Conditions and Agriculture <> data geography" + ], + [ + 1346, + 1351, + "National Survey on Household Living Conditions and Agriculture <> data geography" + ], + [ + 1683, + 1687, + "National Survey on Household Living Conditions and Agriculture <> publication year" + ], + [ + 1688, + 1704, + "National Survey on Household Living Conditions and Agriculture <> data type" + ], + [ + 1865, + 1884, + "National Survey on Household Living Conditions and Agriculture <> data type" + ], + [ + 1897, + 1917, + "National Survey on Household Living Conditions and Agriculture <> reference population" + ], + [ + 2073, + 2091, + "National Survey on Household Living Conditions and Agriculture <> usage context" + ] + ], + "validated": true, + "empirical_context": "86. The proposed project uses the 2014 household survey National Survey on Household Living Conditions and Agriculture ( Enqu\u00eate nationale sur les Conditions de Vie des M\u00e9nages et l \u2019 Agriculture, ECVMA ) and recent administrative data to identify target beneficiaries ( both in terms of poverty profile and geographic areas ) and estimate the economic and social benefits of the project. Econometric methods", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly used in the project to identify target beneficiaries and estimate benefits.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a survey, which typically involves structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly used in the project to identify target beneficiaries and estimate benefits.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 17, + "text": "The global spread of COVID-19 has led to unprecedented disruptions in schooling, requiring understanding of how students \u2019 learning has changed. Moldova \u2019 s participation in PISA 2025 and the analysis of PISA 2022 results are important for managing learning recovery from the pandemic. The MoER consolidated and strengthened the student-level EMIS to support evidence-based decision-making. 22 EMIS now covers preschool, primary, general secondary, and vocational education data, which are regularly collected by the system. Reports on the performance of each school are produced and disclosed annually. However, significant differences exist between data reported in the EMIS open data portal and data published by the National Bureau of Statistics ( NBS ) of Moldova. A single integrated EMIS capable of linking information on students with population register data and financing data of the Ministry of Finance ( MoF ) is currently lacking. The current EMIS and established data reporting practices provide a solid foundation for the development of an integrated EMIS 22 Under support of the completed World Bank-financed Moldova Education Reform Project.", + "ner_text": [ + [ + 672, + 676, + "named" + ] + ], + "validated": false, + "empirical_context": "Reports on the performance of each school are produced and disclosed annually. However, significant differences exist between data reported in the EMIS open data portal and data published by the National Bureau of Statistics ( NBS ) of Moldova. A single integrated EMIS capable of linking information on students with population register data and financing data of the Ministry of Finance ( MoF ) is currently lacking.", + "type": "system", + "explanation": "However, EMIS is described as a system and not explicitly mentioned as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is mentioned in the context of data reporting and integration.", + "contextual_reason_agent": "However, EMIS is described as a system and not explicitly mentioned as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "104_PAD2161-PUBLIC-P162004", + "page": 21, + "text": "Therefore, MoNE has acquired the experience and capacity to implement the project and scale up its infrastructure capacity. ( b ) Turkey \u2019 s regulations and codes for structural design and seismic safety are well developed and applying those would avoid the creation of new risks and improve resilience in education facilities. ( c ) Project design and location selection is based on thorough analysis of quality education needs of SuTP at the provincial and community levels. Increasing education facilities \u2019 capacity is a major expressed need, as well as the support mechanisms to encourage SuTP children and youth to remain in school. ( d ) The prioritization of education facilities to be constructed was done by MoNE during project preparation and is in line with the findings of the World Bank-Government \u2019 s joint Muhtar survey ( formally titled \u2018 Mapping Location and Living Conditions of Syrians under Temporary Protection in Turkey: Results from National Muhtar Survey \u2019 ), which identifies localities where access to education is most stringent. ( e ) As learned from other country examples ( for example, the Syrian response in Jordan and Lebanon ), the geographical proximity of construction sites to communities experiencing fast growth due to intake of displaced populations provides useful criteria for education access expansion. Thus, the design and prioritization of investment packages seek to maximize efficient usage by the careful selection of locations to construct new or expanded education facilities.", + "ner_text": [ + [ + 822, + 835, + "named" + ], + [ + 130, + 136, + "Muhtar survey <> data geography" + ], + [ + 432, + 436, + "Muhtar survey <> reference population" + ], + [ + 790, + 811, + "Muhtar survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "Increasing education facilities \u2019 capacity is a major expressed need, as well as the support mechanisms to encourage SuTP children and youth to remain in school. ( d ) The prioritization of education facilities to be constructed was done by MoNE during project preparation and is in line with the findings of the World Bank-Government \u2019 s joint Muhtar survey ( formally titled \u2018 Mapping Location and Living Conditions of Syrians under Temporary Protection in Turkey: Results from National Muhtar Survey \u2019 ), which identifies localities where access to education is most stringent. ( e ) As learned from other country examples ( for example, the Syrian response in Jordan and Lebanon ), the geographical proximity of construction sites to communities experiencing fast growth due to intake of displaced populations provides useful criteria for education access expansion.", + "type": "survey", + "explanation": "The Muhtar survey is explicitly mentioned as a source of findings that inform project preparation, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides findings relevant to education access.", + "contextual_reason_agent": "The Muhtar survey is explicitly mentioned as a source of findings that inform project preparation, indicating it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "046_Cameroon-COVID-19-Preparedness-and-Response-Project", + "page": 26, + "text": "Reporting: The MoPH will produce a periodic report each 90 days based on agreed targets and the progress made of implementation of critical project activities. This report will contain tables of performance against indicators for the proposed project. 54. For real-time data collection and analysis, the project will implement the Geo-enabling Method for Monitoring and Supervision ( GEMS ). The GEMS method was developed by the Fragility, Conflict and Violence ( FCV ) Group of the World Bank and enables project teams to use open source ICT tools for in - field collection of structured digital data from the field that automatically feeds into a centralized M & E system and MIS. The integrated data can include any kind of indicators, based on tailor-made forms; photos, audio, videos; time and date stamps; and GPS coordinates that allow for automated geo-mapping of the information. Using these tools systematically allows the project to enhance the transparency and accuracy of project planning as well as M & E and third-party monitoring throughout the project cycle. Moreover, GEMS allow to establish a digital platform for remote supervision, real-time safeguards monitoring, and portfolio mapping for coordination across project components as well as with other operations in the country. 55. GEMS use in the context of COVID-19 monitoring. While GEMS can support operational monitoring of the COVID-19 response in a variety of ways, it is not intended to be used for surveillance activities", + "ner_text": [ + [ + 578, + 601, + "named" + ] + ], + "validated": false, + "empirical_context": "For real-time data collection and analysis, the project will implement the Geo-enabling Method for Monitoring and Supervision ( GEMS ). The GEMS method was developed by the Fragility, Conflict and Violence ( FCV ) Group of the World Bank and enables project teams to use open source ICT tools for in - field collection of structured digital data from the field that automatically feeds into a centralized M & E system and MIS. The integrated data can include any kind of indicators, based on tailor-made forms; photos, audio, videos; time and date stamps; and GPS coordinates that allow for automated geo-mapping of the information.", + "type": "data", + "explanation": "'Structured digital data' is described as a type of information collected rather than a specific dataset or data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'structured digital data' refers to a dataset due to the term 'structured' suggesting organization.", + "contextual_reason_agent": "'Structured digital data' is described as a type of information collected rather than a specific dataset or data source.", + "contextual_signal": "mentioned only as a type of information, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 46, + "text": "In addition, a survey will be conducted in sampled SPG woredas as part of the impact evaluation to measure composite indicators not collected through routine information systems. 71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2. The MOH will share HCO-related data with the regions and woredas to provide any disaggregated data needed to monitor HCO results indicators and DLIs. Likewise, the central MOE is responsible for the management of EMIS and NLA which collects varying aspects of data at the school level. MOE will validate and share disaggregated data with regions and woredas to monitor HCO results indicators and DLIs. Other M & E data sources will be coordinated with various implementation units and service providers. The SPG impact evaluation survey will be 71 Such as Minimal Acceptable Diet for children 6 \u2013 23 months. Minimal Acceptable Diet is calculated according to breasfeeding status, number of food groups consumed, and number of times fed. https: / / inddex. nutrition. tufts. edu / data4diets / indicator / minimum-acceptable-diet-mad.", + "ner_text": [ + [ + 651, + 656, + "named" + ], + [ + 347, + 355, + "UNISE <> data geography" + ], + [ + 764, + 782, + "UNISE <> data type" + ], + [ + 1240, + 1263, + "UNISE <> data description" + ] + ], + "validated": true, + "empirical_context": "71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2. The MOH will share HCO-related data with the regions and woredas to provide any disaggregated data needed to monitor HCO results indicators and DLIs.", + "type": "system", + "explanation": "UNISE is indeed a data management system that collects and manages data, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed UNISE is a dataset because it is mentioned alongside other data collection systems.", + "contextual_reason_agent": "UNISE is indeed a data management system that collects and manages data, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 3, + "validated": 1, + "not_validated": 2 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 33, + "text": "Under this results area, the following activities will be supported: ( a ) the development of an integrated EMIS for MINEDUB-MINESEC-MINESUP-MINEFOP, including the establishment of new institutional arrangements for the sector, unified methods and processes, and a common platform for data collection and management; ( b ) the deconcentration of EMIS to regional, divisional, and sub-divisional levels ( as applicable ); ( c ) the provision of equipment and software to support the functioning of EMIS, and so on; and ( d ) the strengthening of human capacity and resources, particularly in the areas of statistics and data management. The release of IDA funds under this results area will be linked to the following DLI: Integrated education management information system functional and operational ( DLI 6 ). 52. While not specifically benefitting from financing from the RSW, activities linked to learning assessment and EMIS will include features that will enable the Government to monitor and evaluate data effectively on refugee-related education challenges. Learning assessments will include modules to analyze the learning outcomes of refugees and efforts will be made toward including disaggregated data on refugees in statistical yearbooks. Priority Area 4: Supporting the educational needs of public schools in host communities affected by refugees. 53. Refugee-related project activities, estimated at US $ 36 million, fall under Results Areas 1, 3, and 7.", + "ner_text": [ + [ + 346, + 350, + "named" + ] + ], + "validated": false, + "empirical_context": "Under this results area, the following activities will be supported: ( a ) the development of an integrated EMIS for MINEDUB-MINESEC-MINESUP-MINEFOP, including the establishment of new institutional arrangements for the sector, unified methods and processes, and a common platform for data collection and management; ( b ) the deconcentration of EMIS to regional, divisional, and sub-divisional levels ( as applicable ); ( c ) the provision of equipment and software to support the functioning of EMIS, and so on; and ( d ) the strengthening of human capacity and resources, particularly in the areas of statistics and data management. The release of IDA funds under this results area will be linked to the following DLI: Integrated education management information system functional and operational ( DLI 6 ).", + "type": "system", + "explanation": "However, EMIS is described as a management information system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it relates to data collection and management.", + "contextual_reason_agent": "However, EMIS is described as a management information system, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "089_UGANDA-PAD-04272018", + "page": 66, + "text": "Only Kasese MLG has the position of the Town Clerk, Principal Treasurer and Senior Environment Officer substantively filled. Moreover, even the staff substantively appointed do not have previous experience in handling activities to the magnitude expected under USMID AF. 42. An analysis of the staffing levels for a sample of 7 USMID and 4 coming MLGs indicates that there are gaps in both USMID and 4 additional MLIGs, see the summary table below. The sample shows that the coming 4 additional USMID MLGs have significant gaps in especially procurement and engineering whereas the Finance and IA positions are filled with gaps as per the current 14 MLGs. Table 8: Overview of Required Staffing Positions Filled of required positions ( % ) Finance Department Internal Audit Procurement Planning Engineering 7 USMID ( average ) 66 % 62 % 92 % 79 % 54 % 4 \u201c additional \u201d ULGs 66 % 71 % 50 % 63 % 49 % Source: Self-reported data during field level collections, October 2017. 43. The capacity gaps identified across all the 18 municipal LGs assessed still falls into three broad categories, namely: ( i ) gaps in numbers of key positions filled, ( ii ) operation skills to backup academic qualifications, and ( iii ) inadequate tools, equipment and facilities. The USMID Program will contribute to addressing the last two gaps. The first gap is structural and can only be addressed with the involvement of Ministry of Finance, Ministry of Public Service, and Ministry of LGs. Although the municipal LGs can use part of the Program fund for investment servicing cost ( procurement of technical support for engineering design, preparation of bidding documents and supervision ), there is need to continue building their technical and managerial capacity to handle the significant increase in development funds. For the additional", + "ner_text": [ + [ + 907, + 925, + "named" + ] + ], + "validated": false, + "empirical_context": "The sample shows that the coming 4 additional USMID MLGs have significant gaps in especially procurement and engineering whereas the Finance and IA positions are filled with gaps as per the current 14 MLGs. Table 8: Overview of Required Staffing Positions Filled of required positions ( % ) Finance Department Internal Audit Procurement Planning Engineering 7 USMID ( average ) 66 % 62 % 92 % 79 % 54 % 4 \u201c additional \u201d ULGs 66 % 71 % 50 % 63 % 49 % Source: Self-reported data during field level collections, October 2017. 43.", + "type": "data", + "explanation": "However, 'self-reported data' is not a structured collection or dataset in this context, but rather a description of the type of information collected.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'self-reported data' refers to a dataset because it implies a collection of information gathered from individuals.", + "contextual_reason_agent": "However, 'self-reported data' is not a structured collection or dataset in this context, but rather a description of the type of information collected.", + "contextual_signal": "mentioned only as a type of information collected, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 25, + "text": "The World Bank Strengthening Public Sector Effectiveness and Statiscal Capacity Project ( P151155 ) Page 22 of 93 publication of public finance data and the setup of the BOOST25 database on public budget and spending recently developed with World Bank support to ensure it is updated and published. \uf0b7 Subcomponent 1. 3: Deploying new payroll management. This subcomponent will support the deployment of the recently developed integrated HR / payroll management system ( SIGIPES II ) in MINEDUB and MINSANTE. This will require the completion of the ongoing update of the HR database for the selected ministries, acquisition of additional IT equipment, and training of the respective units in charge of HR / payroll in these ministries including change management support ( possibly using the RRA ). Finally, the project will support the use of SIGIPES II for HR analysis, the update of the Organizational Framework ( cadres organiques ), and strategic staffing planning for MINEDUB and MINSANTE. \uf0b7 Subcomponent 1. 4: Strengthening SOE portfolio management. This subcomponent will support the Government \u2019 s efforts to strengthen the corporate governance framework and Government oversight of the public enterprise sector.", + "ner_text": [ + [ + 129, + 148, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Strengthening Public Sector Effectiveness and Statiscal Capacity Project ( P151155 ) Page 22 of 93 publication of public finance data and the setup of the BOOST25 database on public budget and spending recently developed with World Bank support to ensure it is updated and published. \uf0b7 Subcomponent 1.", + "type": "data", + "explanation": "However, 'public finance data' is not explicitly described as a structured collection or dataset in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'public finance data' refers to a dataset due to its mention in the context of a project focused on data publication.", + "contextual_reason_agent": "However, 'public finance data' is not explicitly described as a structured collection or dataset in this context.", + "contextual_signal": "mentioned only as data, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 27, + "text": "Page | 18 \u2022 World Bank diagnostics and international benchmarks also warrant the Program \u2019 s sectoral focus on education and health. According to the Global Digital Health Monitor 202313, the digital landscape in the health sector is considered more mature in Jordan compared to neighboring countries in the MENA region, especially in developing digital services and applications ( see the technical assessment for further detail ). However, due to limited governance and multiple services and systems designed and operated in silos, health information systems have been fragmented with inconsistent data standards and quality. The education sector has also been advancing on the digital front. Jordan was one of the first countries in the region to respond to the COVID-19 pandemic and school closures by developing an online learning platform called Darsak covering the curriculum \u2019 s core subjects of Arabic, English, math, and science for grades 1 through 12. In addition, a newly launched platform for teacher training offers courses on distance learning tools, blended learning, and educational technology.", + "ner_text": [ + [ + 852, + 858, + "named" + ] + ], + "validated": false, + "empirical_context": "The education sector has also been advancing on the digital front. Jordan was one of the first countries in the region to respond to the COVID-19 pandemic and school closures by developing an online learning platform called Darsak covering the curriculum \u2019 s core subjects of Arabic, English, math, and science for grades 1 through 12. In addition, a newly launched platform for teacher training offers courses on distance learning tools, blended learning, and educational technology.", + "type": "program", + "explanation": "'Darsak' is mentioned as an online learning platform, not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Darsak' is a dataset because it is related to education and digital learning.", + "contextual_reason_agent": "'Darsak' is mentioned as an online learning platform, not as a data source or structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 46, + "text": "In addition, a survey will be conducted in sampled SPG woredas as part of the impact evaluation to measure composite indicators not collected through routine information systems. 71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2. The MOH will share HCO-related data with the regions and woredas to provide any disaggregated data needed to monitor HCO results indicators and DLIs. Likewise, the central MOE is responsible for the management of EMIS and NLA which collects varying aspects of data at the school level. MOE will validate and share disaggregated data with regions and woredas to monitor HCO results indicators and DLIs. Other M & E data sources will be coordinated with various implementation units and service providers. The SPG impact evaluation survey will be 71 Such as Minimal Acceptable Diet for children 6 \u2013 23 months. Minimal Acceptable Diet is calculated according to breasfeeding status, number of food groups consumed, and number of times fed. https: / / inddex. nutrition. tufts. edu / data4diets / indicator / minimum-acceptable-diet-mad.", + "ner_text": [ + [ + 764, + 782, + "named" + ] + ], + "validated": false, + "empirical_context": "DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2. The MOH will share HCO-related data with the regions and woredas to provide any disaggregated data needed to monitor HCO results indicators and DLIs. Likewise, the central MOE is responsible for the management of EMIS and NLA which collects varying aspects of data at the school level.", + "type": "data", + "explanation": "'Disaggregated data' is mentioned in a general sense and does not refer to a specific structured collection or dataset in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'disaggregated data' refers to a dataset because it implies a structured collection of data broken down into subcategories.", + "contextual_reason_agent": "'Disaggregated data' is mentioned in a general sense and does not refer to a specific structured collection or dataset in this context.", + "contextual_signal": "mentioned only as a type of data, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "154_431120PAD0P1021e0only10R20081014311", + "page": 40, + "text": "Through BHRIMS a national M & E curriculum has been developed and training o f stakeholders has been initiated and i s ongoing at the Institute of Development Management. Through support from development partners ( Le. ACHAP and BOTUSA ), M & E personnel have either been placed at the sectoral and district levels or plans are underway to have them placed at these levels. 5. While data collection and reporting at national, aggregate level i s already well advanced in Botswana, monitoring o f activities and results at local level is, as in most countries, less well established. However, changes at local and community level are critical to significantly change the course o f the epidemic. The project, through a results-based design o f the civil society and private sector component, has built in the measuring and reporting o f baseline, progress and project completion data. This design also permits assessing the effectiveness o f specific Calls for Proposals to bring about change through social mobilization o f civil society and private sector organizations. These changes at community level are captured through the same indicators as those used in the BAIS. They also contribute to changes at aggregate level as reflected in the BAIS reports but cannot be compared as such since the denominators are different. 31", + "ner_text": [ + [ + 1167, + 1171, + "named" + ], + [ + 471, + 479, + "BAIS <> data geography" + ] + ], + "validated": true, + "empirical_context": "This design also permits assessing the effectiveness o f specific Calls for Proposals to bring about change through social mobilization o f civil society and private sector organizations. These changes at community level are captured through the same indicators as those used in the BAIS. They also contribute to changes at aggregate level as reflected in the BAIS reports but cannot be compared as such since the denominators are different.", + "type": "report", + "explanation": "BAIS is indeed used as a source of information for assessing changes, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed BAIS is a dataset because it is referenced in the context of capturing indicators and reporting changes.", + "contextual_reason_agent": "BAIS is indeed used as a source of information for assessing changes, indicating it functions as a dataset.", + "contextual_signal": "mentioned as a source of indicators and reports", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 20, + "validated": 4, + "not_validated": 16 + } + }, + { + "filename": "114_PAD860-PAD-P147854-R2016-0133-1-Box396255B-OUO-9", + "page": 25, + "text": "The WQI summarizes a large amount of water quality data scores, reported as a total number between 1 and 100, with ( i ) 90-100 as excellent, ( ii ) 75-90 as good, 34 Ministries of Environment, Energy and Water, Industry, Agriculture, Public Health, Interior, and Municipalities; the Council for Development and Reconstruction; the BWE; the LRA; the National Council for Scientific Research; and the Municipalities of Zahl\u00e9, Baalbeck, Ferzol, Marj, Anjar, and Jeb Jennin. 35 Ammonia ( NH3 ), Chlorine ( Cl - ), Conductivity, Dissolved Oxygen ( DO ), Nitrate ( NO3 - ), Nitrite ( NO2 - ), pH, Phosphate ( PO4 3 - ), Sulfate ( SO4 2 - ), and Total Dissolved Solids ( TDS ).", + "ner_text": [ + [ + 37, + 62, + "named" + ] + ], + "validated": false, + "empirical_context": "The WQI summarizes a large amount of water quality data scores, reported as a total number between 1 and 100, with ( i ) 90-100 as excellent, ( ii ) 75-90 as good, 34 Ministries of Environment, Energy and Water, Industry, Agriculture, Public Health, Interior, and Municipalities; the Council for Development and Reconstruction; the BWE; the LRA; the National Council for Scientific Research; and the Municipalities of Zahl\u00e9, Baalbeck, Ferzol, Marj, Anjar, and Jeb Jennin. 35 Ammonia ( NH3 ), Chlorine ( Cl - ), Conductivity, Dissolved Oxygen ( DO ), Nitrate ( NO3 - ), Nitrite ( NO2 - ), pH, Phosphate ( PO4 3 - ), Sulfate ( SO4 2 - ), and Total Dissolved Solids ( TDS ).", + "type": "data", + "explanation": "However, it is not a structured collection of data but rather a summary of scores derived from various measurements.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a collection of scores related to water quality.", + "contextual_reason_agent": "However, it is not a structured collection of data but rather a summary of scores derived from various measurements.", + "contextual_signal": "described as scores rather than a dataset", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "027_Jordan-Emergency-Food-Security-Project", + "page": 45, + "text": "The World Bank Emergency Food Security Project ( P178936 ) Page 41 of 54 livestock producers in Jordan system representative sample of project beneficiaries Team Female beneficiaries reporting adequate access to affordable animal feed Level of access to affordable animal feed reported by female livestock producers in Jordan Quarterly Project monitoring system Animal feed access survey of a representative sample of project beneficiaries, with data disaggregated by gender Project Coordination Team Monitoring tool for access to bread developed and accessible to the public External users able to access a web-based and spatially explicit monitoring tool for access to bread Twice per year Project monitoring system Internal validation of accessibility of monitoring tool Project Coordination Team Value chain assessments for basic commodities carried out and discussed with stakeholders Reports outlining value chain assessments and related stakeholder engagement activities Twice per year Project monitoring system Progress status report of analytical and stakeholder engagement work Project Coordination Team Financial instruments for commodity risk management identified and discussed with stakeholders Report outlining financial instruments for commodity risk management and related stakeholder engagement activities Twice per year Project monitoring system Progress status report of analytical and stakeholder engagement work Project Management Team Fiscally sustainable policy options for food security identified and discussed with stakeholders Report outlining fiscally sustainable policy options for food security and related stakeholder engagement activities Twice per year Project monitoring system Progress status report of analytical and stakeholder engagement work Project Coordination Team Platforms for grain storage built and equipped by", + "ner_text": [ + [ + 362, + 387, + "named" + ], + [ + 4, + 14, + "Animal feed access survey <> publisher" + ], + [ + 73, + 92, + "Animal feed access survey <> reference population" + ], + [ + 96, + 102, + "Animal feed access survey <> data geography" + ], + [ + 162, + 182, + "Animal feed access survey <> reference population" + ], + [ + 289, + 315, + "Animal feed access survey <> reference population" + ], + [ + 319, + 325, + "Animal feed access survey <> data geography" + ], + [ + 418, + 439, + "Animal feed access survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "The World Bank Emergency Food Security Project ( P178936 ) Page 41 of 54 livestock producers in Jordan system representative sample of project beneficiaries Team Female beneficiaries reporting adequate access to affordable animal feed Level of access to affordable animal feed reported by female livestock producers in Jordan Quarterly Project monitoring system Animal feed access survey of a representative sample of project beneficiaries, with data disaggregated by gender Project Coordination Team Monitoring tool for access to bread developed and accessible to the public External users able to access a web-based and spatially explicit monitoring tool for access to bread Twice per year Project monitoring system Internal validation of accessibility of monitoring tool Project Coordination Team Value chain assessments for basic commodities carried out and discussed with stakeholders Reports outlining value chain assessments and related stakeholder engagement activities Twice per year Project monitoring system Progress status report of analytical and stakeholder engagement work Project Coordination Team Financial instruments for commodity risk management identified and discussed with stakeholders Report outlining financial instruments for commodity risk management and related stakeholder engagement activities Twice per year Project monitoring system Progress status report of analytical and stakeholder engagement work Project Management Team Fiscally sustainable policy options for food security identified and discussed with stakeholders Report outlining fiscally sustainable policy options for food security and related stakeholder engagement activities Twice per year Project monitoring system Progress status report of analytical and stakeholder engagement work Project Coordination Team Platforms for grain storage built and equipped by", + "type": "survey", + "explanation": "This is a dataset as it is explicitly described as a survey collecting data on access to animal feed from a representative sample of project beneficiaries.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is a dataset as it is explicitly described as a survey collecting data on access to animal feed from a representative sample of project beneficiaries.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "090_Djibouti-Public-Admin-PAD-PAD2604-04062018", + "page": 37, + "text": "The World Bank Public Administration Modernization Project ( P162904 ) Page 34 of 69 VII. RESULTS FRAMEWORK AND MONITORING Results Framework COUNTRY: Djibouti Public Administration Modernization Project Project Development Objectives The PDO is to enable access to e-government and promote efficiency of selected revenue administration services. Project Development Objective Indicators Indicator Name Core Unit of Measure Baseline End Target Frequency Data Source / Methodology Responsibility for Data Collection Name: Percentage of population with unique ID Percentage 0. 00 50. 00 Quarterly ANSIE / PCU will create and maintain a e - ID registry which will initially be populated via a mass enrollment campaign and by leveraging the work and data gathered by the national social security fund and the social registry. The percentage of population with unique ID is obtain by dividing total recipients of e-ID to total population. ANSIE / PCU and the General Directorate of the Population at the Ministry of Interior.", + "ner_text": [ + [ + 633, + 648, + "named" + ], + [ + 150, + 158, + "e - ID registry <> data geography" + ], + [ + 520, + 559, + "e - ID registry <> data description" + ], + [ + 594, + 605, + "e - ID registry <> author" + ] + ], + "validated": true, + "empirical_context": "00 50. 00 Quarterly ANSIE / PCU will create and maintain a e - ID registry which will initially be populated via a mass enrollment campaign and by leveraging the work and data gathered by the national social security fund and the social registry. The percentage of population with unique ID is obtain by dividing total recipients of e-ID to total population.", + "type": "registry", + "explanation": "This is a dataset as it is described as a registry that will be populated with data and used to obtain population statistics.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a registry that collects and maintains data on unique IDs.", + "contextual_reason_agent": "This is a dataset as it is described as a registry that will be populated with data and used to obtain population statistics.", + "contextual_signal": "described as a registry that collects and maintains data", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 66, + "text": "The private sector: ( a ) inquired about the type of the planned interventions under the UDAP-GovNet, the locations, the possibility of private sector investment, and whether small start-ups or big enterprises or both could participate; ( b ) showed readiness to participate in the delivery of equipment, and in the build aspect in case the \u2018 design and build \u2019 approach is adopted for the backbone and last mile under one contract though a CBA was also recommended; and ( c ) recommended a study of the consumers of Uganda \u2019 s digital services, such as citizens, businesses, government, to aid decision making and ensure commercial viability and subsequent incorporation in the project design. The activities under the UDAP-GovNet are like those under RCIP-5, except the scope is much larger though the staff skills requirements are the same. There is limited domestic consulting / contracting IT capacity to conduct projects of this nature, complexity, and scope. Individual consultants will be hired to enhance the technical in - house capacity of NITA-U to support effective project implementation. 2. The project will use the Systematic Tracking of Exchanges in Procurement ( STEP ), the World Bank \u2019 s online planning and tracking system, which will provide data on procurement activities and delays and measure procurement performance.", + "ner_text": [ + [ + 1131, + 1178, + "named" + ] + ], + "validated": false, + "empirical_context": "2. The project will use the Systematic Tracking of Exchanges in Procurement ( STEP ), the World Bank \u2019 s online planning and tracking system, which will provide data on procurement activities and delays and measure procurement performance.", + "type": "system", + "explanation": "However, it is described as a system rather than a structured collection of data, indicating it is not a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'tracking system' which suggests data collection.", + "contextual_reason_agent": "However, it is described as a system rather than a structured collection of data, indicating it is not a dataset.", + "contextual_signal": "system but mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 33, + "text": "The key gaps identified in the MTR assessment are: ( a ) fewer women access loans from financial institutions to start their own businesses; ( b ) fewer women are starting businesses due to time poverty and capacity gaps; and ( c ) fewer women are in employment than men. Social gender norms and household dynamics play a critical role in causing these gaps. According to a 2022 study by Access to Finance Rwanda, deeply ingrained societal expectations shape how women participate in economic life and influence their capacity to leverage assets \u2014 particularly land and property \u2014 as collateral. 34 One pervasive norm is that women should prioritize family and caregiving responsibilities over business activities. This norm restricts their time and engagement in income-generating pursuits and weakens their perceived legitimacy as entrepreneurs, reducing their chances of qualifying for credit. Additionally, women are often expected to rely on family support, especially from spouses, instead of seeking independent financial solutions, distancing them from formal financial institutions and financial products. 31 Baseline failure rates by segment are extremely difficult to assess. As such, the analysis uses conservative assumptions based on extensive discussions with key stakeholders and potential beneficiaries. These assumptions have been tested for sensitivity. 32 CEIC data 33 In the absence of available survey data, the analysis uses proxies for these losses and repair costs from survey data from neighboring Burundi. Based on this data, annual flooding losses amount to ~ US $ 89 per household and repair costs amount to US $ 25. 60 per household. 34 Gendered Social Norms Diagnostic and their Impact on Women \u2019 s Financial Inclusion in Rwanda, 2022, Access to Finance Rwanda", + "ner_text": [ + [ + 1376, + 1385, + "named" + ], + [ + 1524, + 1531, + "CEIC data <> data geography" + ] + ], + "validated": true, + "empirical_context": "These assumptions have been tested for sensitivity. 32 CEIC data 33 In the absence of available survey data, the analysis uses proxies for these losses and repair costs from survey data from neighboring Burundi. Based on this data, annual flooding losses amount to ~ US $ 89 per household and repair costs amount to US $ 25.", + "type": "data", + "explanation": "In the context, 'CEIC data' is used as a source of information for analysis, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'CEIC data' is a dataset because it is referenced in the context of analysis and sensitivity testing.", + "contextual_reason_agent": "In the context, 'CEIC data' is used as a source of information for analysis, indicating it functions as a dataset.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "042_Burundi-Skills-for-Jobs-Women-and-Youth-Project", + "page": 24, + "text": "In Burundi, of the 80. 2 percent of the population that owns land, 62. 5 percent are men and 17. 7 percent are women. 25 Facing discrimination in customary inheritance laws and challenges in accessing formal lines of credit due to lack of collateral, female-owned businesses tend to have slower growth and lower profits than male - owned businesses. Female entrepreneurs need training that allows them to overcome social gender roles and an aversion to risk and develop an \u2018 entrepreneurial mindset \u2019 to, for example, look for ways to differentiate their business from others, anticipate problems, overcome setbacks, and foster planning skills to create opportunities. 24 Support includes providing devices and digital content and conducting basic digital skills training. The BSF has three main platforms for using education technology including \u2018 Ideas Box \u2019 ( consisting of a digital and physical library in a box ); \u2018 Ideas Cube \u2019 ( an offline server with digital content to which any wifi-enabled device can connect ); and \u2018 Kajou \u2019 ( an SD card and mobile app pre-loaded with digital content, Kajou is owned by the BSF ). 25 General Population and Housing Census 2008, as reported in Ndikumana, Alain. 2015. \u201c Gender Equality in Burundi: Why Does Support not Extend to Women \u2019 s Right to Inherit Land? \u201d Afrobarometer Policy Paper. 22.", + "ner_text": [ + [ + 1131, + 1168, + "named" + ], + [ + 3, + 10, + "General Population and Housing Census <> data geography" + ], + [ + 1169, + 1173, + "General Population and Housing Census <> reference year" + ], + [ + 1190, + 1206, + "General Population and Housing Census <> author" + ], + [ + 1208, + 1212, + "General Population and Housing Census <> publication year" + ], + [ + 1235, + 1242, + "General Population and Housing Census <> data geography" + ] + ], + "validated": true, + "empirical_context": "The BSF has three main platforms for using education technology including \u2018 Ideas Box \u2019 ( consisting of a digital and physical library in a box ); \u2018 Ideas Cube \u2019 ( an offline server with digital content to which any wifi-enabled device can connect ); and \u2018 Kajou \u2019 ( an SD card and mobile app pre-loaded with digital content, Kajou is owned by the BSF ). 25 General Population and Housing Census 2008, as reported in Ndikumana, Alain. 2015.", + "type": "census", + "explanation": "This is indeed a dataset as it is a census that provides structured data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a census, which typically involves structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it is a census that provides structured data used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "015_BOSIB0e7334a5d0570a3e40f8ae4d0c1266", + "page": 30, + "text": "es Paid internship program targe ng women Development of a road accident database management system, training, and awareness campaign. Road safety audits 7 km of the ousseri bypass road rehabilitated to bituminous paved road standard 200 km of roads maintained and rehabilitated with safety and climate resilient features in refugees and host communi es Construc on or rehabilita on of markets, water points, storage facili es and vaccina on parks for ca le Construc on of improved schools, health centers, and rural market infrastructures Crea on of community mul media and youth centers 20 km of the MD road and the Tilde Bridge ( 180 meters ) rehabilitated with road safety and climate resilient features Reduc on of travel me to refugees and host communi es and within a 30 km bu er of the MD road roject Development bjec ve D Enhance connec vity and climate resilience along the MD road sec on and Improve access to basic socioeconomic infrastructure in selected districts of the Far North of Cameroon Improved access to services, jobs and economic opportuni es Economic growth Improved living condi ons ImprovedMD road vulnerability to iden ed climate hazards ( to oods and heat ) Improvedroad accessto schools, healthcenters and markets Increasedpopula on with accessto an all wheather passable road within ve kilometersof the MD sec on Improved users sa sfac on with the socio economic infrastructures Reducedtravel mes Improved access to educa on, health, and market in refugee and host communi es Improved residents, refugee, and host communi es markets condi ons Safer roads and sensi zed popula on Improved ins tu onal capacity and women integra on in the transport sector Reduc on of travel me along the MD road Improvedemployment opportuni es C N N C U U U C mmediate D ND C R igher evel Improved access to educa on and health services Construc on of Tilde Bridge Par cipa on of students ( including women ) to nanced internships in the transport sector and at MINTP Priori za on of roads regarding bene ciary communi es in the Mobility plans Road safety management and educa on", + "ner_text": [ + [ + 59, + 99, + "named" + ], + [ + 36, + 41, + "road accident database management system <> reference population" + ], + [ + 985, + 1006, + "road accident database management system <> data geography" + ], + [ + 1644, + 1649, + "road accident database management system <> reference population" + ], + [ + 1914, + 1919, + "road accident database management system <> reference population" + ] + ], + "validated": true, + "empirical_context": "es Paid internship program targe ng women Development of a road accident database management system, training, and awareness campaign. Road safety audits 7 km of the ousseri bypass road rehabilitated to bituminous paved road standard 200 km of roads maintained and rehabilitated with safety and climate resilient features in refugees and host communi es Construc on or rehabilita on of markets, water points, storage facili es and vaccina on parks for ca le Construc on of improved schools, health centers, and rural market infrastructures Crea on of community mul media and youth centers 20 km of the MD road and the Tilde Bridge ( 180 meters ) rehabilitated with road safety and climate resilient features Reduc on of travel me to refugees and host communi es and within a 30 km bu er of the MD road roject Development bjec ve D Enhance connec vity and climate resilience along the MD road sec on and Improve access to basic socioeconomic infrastructure in selected districts of the Far North of Cameroon Improved access to services, jobs and economic opportuni es Economic growth Improved living condi ons ImprovedMD road vulnerability to iden ed climate hazards ( to oods and heat ) Improvedroad accessto schools, healthcenters and markets Increasedpopula on with accessto an all wheather passable road within ve kilometersof the MD sec on Improved users sa sfac on with the socio economic infrastructures Reducedtravel mes Improved access to educa on, health, and market in refugee and host communi es Improved residents, refugee, and host communi es markets condi ons Safer roads and sensi zed popula on Improved ins tu onal capacity and women integra on in the transport sector Reduc on of travel me along the MD road Improvedemployment opportuni es C N N C U U U C mmediate D ND C R igher evel Improved access to educa on and health services Construc on of Tilde Bridge Par cipa on of students ( including women ) to nanced internships in the transport sector and at MINTP Priori za on of roads regarding bene ciary communi es in the Mobility plans Road safety management and educa on", + "type": "database management system", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a 'road accident database management system' used for managing data related to road accidents.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'database management system', which suggests a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a 'road accident database management system' used for managing data related to road accidents.", + "contextual_signal": "described as a database management system that stores records", + "tags": [] + }, + { + "filename": "010_BOSIB1554c314c0a2187c019d7e85bc2a91", + "page": 39, + "text": "The World Bank Building Resilient and Responsive Health Systems ( P179698 ) Feb 21, 2024 Page 34 of 43 Description Total number of deliveries attended by skilled health personnel Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Number of community enrolled health nurses trained in Garissa and Turkana ( Number ) Description Total number of community enrolled health nurses trained in Garissa and Turkana Frequency Every six months Data source Project report Methodology for Data Collection Project monitoring Responsibility for Data Collection MoH Number of refugee health facilities supported under the project, as part of the transition to county management, in Garissa and Turkana ( Number ) Description Total number of refugee health facilities supported under the project, as part of the transition to county management, in Garissa and Turkana ( registration in the master facility list, provision of non-program HPTs from KEMSA, staffing ). Frequency Every six months Data source Project report Methodology for Data Collection Project monitoring Responsibility for Data Collection MoH People in Garissa and Turkana who have received essential health, nutrition, and population ( HNP ) services ( Number ) Description Total number of deliveries attended by skilled health personnel and total number of children immunized among the host community and refugees in Garissa and Turkana.", + "ner_text": [ + [ + 263, + 267, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Building Resilient and Responsive Health Systems ( P179698 ) Feb 21, 2024 Page 34 of 43 Description Total number of deliveries attended by skilled health personnel Frequency Every six months Data source KHIS Methodology for Data Collection Routine HMIS data collection Responsibility for Data Collection MoH Number of community enrolled health nurses trained in Garissa and Turkana ( Number ) Description Total number of community enrolled health nurses trained in Garissa and Turkana Frequency Every six months Data source Project report Methodology for Data Collection Project monitoring Responsibility for Data Collection MoH Number of refugee health facilities supported under the project, as part of the transition to county management, in Garissa and Turkana ( Number ) Description Total number of refugee health facilities supported under the project, as part of the transition to county management, in Garissa and Turkana ( registration in the master facility list, provision of non-program HPTs from KEMSA, staffing ). Frequency Every six months Data source Project report Methodology for Data Collection Project monitoring Responsibility for Data Collection MoH People in Garissa and Turkana who have received essential health, nutrition, and population ( HNP ) services ( Number ) Description Total number of deliveries attended by skilled health personnel and total number of children immunized among the host community and refugees in Garissa and Turkana.", + "type": "system", + "explanation": "However, HMIS is described as a system for data collection, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HMIS is a dataset because it is mentioned in the context of data collection.", + "contextual_reason_agent": "However, HMIS is described as a system for data collection, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 21, + "validated": 4, + "not_validated": 17 + } + }, + { + "filename": "108_Jordan-PforR-PAD-P159522-FINAL-DISCLOSURE-10052016", + "page": 65, + "text": "In other sectors, investments may expand and be retained in Jordan thanks to improved aftercare services dispensed by the JIC. Moreover, foreign investments will increase led by the Syrian diaspora, regional investors, and investors \u2019 goodwill ( corporate social responsibility ). 31. Literature review by Mi\u0161kinis and Byrka14 highlights that a 10 percent increase in the investment promotion budget will lead to a 2. 5 percent increase in foreign direct investment and that US $ 1 spent on investment promotion increases foreign direct investment inflows by US $ 189. 32. It is assumed that accompanied with the JIC \u2019 s investment promotion and aftercare efforts will lead to an average 3 percent increase in investment each year for the next 10 years. Technical Assessments Political Economy Issues 33. Jordan has received millions of refugees during its history. According to some estimates, approximately half of the Jordanian population is made up of Palestinians and their descendants. An estimated 131, 000 Iraqis remain in Jordan after seeking refuge during the first Gulf War. The recent population census in Jordan indicates that Jordan hosts 2. 9 million non-Jordanians out of a total population of 9. 5 million. 15 The census puts the number of Syrians refugees in Jordan at 1. 3 million, while the UNHCR has registered about 655, 217.", + "ner_text": [ + [ + 1097, + 1114, + "named" + ], + [ + 60, + 66, + "population census <> data geography" + ], + [ + 805, + 811, + "population census <> data geography" + ], + [ + 1031, + 1037, + "population census <> data geography" + ], + [ + 1118, + 1124, + "population census <> data geography" + ], + [ + 1140, + 1146, + "population census <> data geography" + ], + [ + 1257, + 1273, + "population census <> reference population" + ] + ], + "validated": true, + "empirical_context": "An estimated 131, 000 Iraqis remain in Jordan after seeking refuge during the first Gulf War. The recent population census in Jordan indicates that Jordan hosts 2. 9 million non-Jordanians out of a total population of 9.", + "type": "census", + "explanation": "This is a dataset as it provides empirical data about the population in Jordan, specifically regarding non-Jordanians.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'population census' typically refers to a structured collection of demographic data.", + "contextual_reason_agent": "This is a dataset as it provides empirical data about the population in Jordan, specifically regarding non-Jordanians.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + }, + "term_stats": { + "total": 6, + "validated": 5, + "not_validated": 1 + } + }, + { + "filename": "044_Turkey-Social-Entrepreneurship-Empowerment-and-Cohesion-in-Refugee-and-Host-Communities-in-Turkey-Project", + "page": 90, + "text": "The World Bank Social Entrepreneurship, Empowerment and Cohesion in Refugee and Host Communities in Turkey Project ( P171456 ) 81 who also have limitations. Syrians under Temporary Protection ( SuTP ) have the right to stay in Turkey until safe return conditions are established in Syria. 103 SuTP have access to social benefits and services including health and education and can enter the labor market, subject to certain conditions. 104 6. Refugees who work are also better off than those who do not work. Although the data is limited, reliable data is mostly available for SuTP and especially those receiving the ESSN. In a 2018 representative survey of households receiving the ESSN, 84 percent of refugee households had at least one person who was working, but only 3 percent had a work permit. Of those that were working, 20 percent were working in unskilled services, with others working in the textile industry ( 19 percent ), construction ( 12 percent ), and artisanship ( 10 percent ). Of the 18 percent of refugees that had graduated from university or high school, about 20 percent were unemployed. Twenty percent of those without any formal education were also unemployed; and overall, over half of refugees were working irregularly.", + "ner_text": [ + [ + 628, + 654, + "named" + ] + ], + "validated": true, + "empirical_context": "Although the data is limited, reliable data is mostly available for SuTP and especially those receiving the ESSN. In a 2018 representative survey of households receiving the ESSN, 84 percent of refugee households had at least one person who was working, but only 3 percent had a work permit. Of those that were working, 20 percent were working in unskilled services, with others working in the textile industry ( 19 percent ), construction ( 12 percent ), and artisanship ( 10 percent ).", + "type": "survey", + "explanation": "This is indeed a dataset as it is a representative survey providing empirical data on refugee households.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey that collects data on households.", + "contextual_reason_agent": "This is indeed a dataset as it is a representative survey providing empirical data on refugee households.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 19, + "text": "The proposed project will also support the Social Registry Directorate to scale-up its communication and outreach, while strengthening data protection. Few line ministries currently use the Social Registry ( most users are linked to projects supported by the World Bank ) 9. In addition, knowledge about the objectives, modalities and strengths of the Social Registry is limited, within the government and among partners. These gaps will be addressed by intensifying the Social Registry \u2019 s communication and outreach efforts. User agreements will specify authorized uses of the data and protection of personal data. 30. The project will also encourage the use of the Social Registry as a tool for program coordination. The objective of this activity is to increase the SCAPP Directorate, Taazour and sectorial ministries \u2019 ownership of the Social Registry, by fostering its use for integrated service provision to poor and vulnerable households, and for better planning of investments according to needs. 31. This sub-component will finance the costs associated with: ( a ) the pilot Social Registry card and its evaluation, potential scale-up to 200, 000 households; ( b ) the development of the outreach strategy and related tools and communication campaign; and ( c ) workshops, analytical and coordination capacity building.", + "ner_text": [ + [ + 43, + 58, + "named" + ], + [ + 259, + 269, + "Social Registry <> publisher" + ], + [ + 915, + 945, + "Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "The proposed project will also support the Social Registry Directorate to scale-up its communication and outreach, while strengthening data protection. Few line ministries currently use the Social Registry ( most users are linked to projects supported by the World Bank ) 9.", + "type": "registry", + "explanation": "The Social Registry is mentioned as being used by line ministries and linked to projects, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a registry, which typically implies a structured collection of data.", + "contextual_reason_agent": "The Social Registry is mentioned as being used by line ministries and linked to projects, indicating it functions as a data source.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "124_PAD8220PAD0P12010Box385319B00OUO090", + "page": 117, + "text": "To date however, the application of gender and development in infrastructure projects has been very limited in Lebanon. 3. Through in-depth analysis of the gender and poverty profiles of the targeted population groups in the GBML, the Project integrates specific measures that respond to the needs of both women and men, especially among the poor, in an equitable and inclusive manner. Data gathered was used to integrate gender sensitive features for the anticipated public outreach and awareness campaigns on metering, water quality and demand management. As such, activities will be tailored based on the socio-economic profiles of the various population groups in the GBML identified as part of project \u2019 s analysis. Objectives 4. Mainstreaming gender in the Water Supply Augmentation Project will focus on the following objectives: ( i ) inform the differentiated impact of the burdens and benefits of improved water supply; and ( ii ) identify areas of engagement by men and women during the operationalization of the project. 5. In order to achieve these objectives, a qualitative study in the form of semi-structured focus groups was carried out in the GBML area to provide richer analysis of the gendered dimension and inform the design of gender-responsive indicators for measuring how the project is performing in this particular area. 34 6. In the Bisri region, women comprise a sizeable percentage of the population living in project-affected areas: approximately 50 percent of the residents are women whose livelihoods 33 Republic of Lebanon MoSA Social Development Strategy, 2011. 34 Gender-responsive indicators can encapsulate gender-specific or gender-inclusive performance outcomes. The former measures specific needs of men and women whereas the latter focuses on relative benefits and provides comparable information. 106", + "ner_text": [ + [ + 1249, + 1277, + "named" + ] + ], + "validated": false, + "empirical_context": "5. In order to achieve these objectives, a qualitative study in the form of semi-structured focus groups was carried out in the GBML area to provide richer analysis of the gendered dimension and inform the design of gender-responsive indicators for measuring how the project is performing in this particular area. 34 6.", + "type": "concept", + "explanation": "However, 'gender-responsive indicators' are described as a concept for measurement rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'indicators' can imply measurable data points.", + "contextual_reason_agent": "However, 'gender-responsive indicators' are described as a concept for measurement rather than a structured collection of data.", + "contextual_signal": "mentioned only as a concept, not as a data source", + "tags": [] + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 64, + "text": "On an annual basis the health service quality assessment will include direct observation of health service process quality measures at hospitals and health centers. ( iv ) Bi-annual patient feedback using exit surveys. ( v ) Bi-annual visits to a sample of BHTs to measure service outputs and quality. ( b ) Periodic TPM data collection: ( i ) Biennial household coverage surveys as baseline / endline surveys in the project \u2019 s three-year timeframe. 35 ( ii ) Citizen engagement survey collected at the household level, with the coverage survey every other year. 2. Data Analysis and Visualization Platform. The platform will emphasize development of an integrated, institutionalized, and sustainable system. The platform will include analysis of health service delivery in refugee and host community areas. The platform will include the following: ( a ) Interactive data visualization platform presenting Results Framework and core indicators. The platform will use data from DHIS2 and the TPM and will include BHI data. It will be updated at least on a quarterly basis. ( b ) Presentation of data in data visualization platform in the form of static and interactive maps, charts and graphs, tailored to project monitoring needs and partner priorities. ( c ) Incorporation of HSF data within the data visualization platform through a link or page within the platform. ( d ) Overlay of disease and health service delivery data with meteorologic data to better understand seasonal patterns in service delivery and infectious disease data. 35 Given the planned project length of three years, this is a baseline and an endline survey. Potential timeframe changes would include interim surveys, which are planned to be light surveys focusing on key indicators.", + "ner_text": [ + [ + 461, + 486, + "named" + ], + [ + 504, + 519, + "Citizen engagement survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "( b ) Periodic TPM data collection: ( i ) Biennial household coverage surveys as baseline / endline surveys in the project \u2019 s three-year timeframe. 35 ( ii ) Citizen engagement survey collected at the household level, with the coverage survey every other year. 2.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey that collects data for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data at the household level.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey that collects data for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "101_Project-Appraisal-Document-PAD-P163830-2017-05-22-CLEAN-05232017", + "page": 38, + "text": "The assessments concluded that the WB can rely on the existing ICRC FM arrangements to implement part of the Project, complemented by the project-specific risk mitigation measures. In addition, the WB shall rely on ICRC Internal control procedures, that will be complemented by the WB \u2019 s periodic FM implementation support supervision. B. Results Monitoring and Evaluation 74. Monitoring and Evaluation. The ICRC and FAO will be independently responsible for their own program monitoring and evaluation ( M & E ) of the Project, using the Project \u2019 s results framework to issue quarterly updates on the overall project implementation and results. The ICRC and FAO will be responsible to set up a Results Monitoring System that will allow to report on the Results Framework and any related outcome and impact information for the project. 75. The Results Monitoring System for ICRC will include data from each sector: Economic Security ( EcoSec ), Water / Sanitation, and Health.", + "ner_text": [ + [ + 697, + 722, + "named" + ] + ], + "validated": false, + "empirical_context": "The ICRC and FAO will be independently responsible for their own program monitoring and evaluation ( M & E ) of the Project, using the Project \u2019 s results framework to issue quarterly updates on the overall project implementation and results. The ICRC and FAO will be responsible to set up a Results Monitoring System that will allow to report on the Results Framework and any related outcome and impact information for the project. 75.", + "type": "system", + "explanation": "However, it is described as a system for reporting rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Monitoring System' which suggests data collection.", + "contextual_reason_agent": "However, it is described as a system for reporting rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 25, + "text": "More specifically, it will finance ( a ) development and integration of the e-Catalog / e-Register and other modules to EMIS that would allow a switch from paper-based to a greener, more efficient electronic format of data reporting to various stakeholders and interoperability of the NBS system and EMIS; ( b ) creation of the dedicated module and interface in EMIS that would allow the MoER to identify students who require support with adherence to the personal data protection legislation; and ( c ) the development and integration to EMIS of a module for ANACEC on external assessment of education institutions. 42. Support for implementation of national and international student assessments includes the following activities: ( a ) analysis of PISA 2022 results in student learning and Moldova \u2019 s participation in PISA 2025 and 2028; ( b ) preparation activities conducted by NACE, including the development, translation, and editing of test materials, education, training, and preparatory workshops, and piloting of assessments; ( c ) test implementation, including printing, distribution, and supervision of assessments, data processing and analysis, and dissemination of results; and ( d ) strengthening of the capacity of the national implementing agency responsible for carrying out national and international student assessments, including for refugee children from Ukraine. Years of administration of PISA are indicative based on the frequency of previous cycles ( pending decision about the frequency of future cycles ). 43. This subcomponent will also support TA for: ( a ) revision of the teacher and manager professional standards, ( b ) nationwide education infrastructure investment planning for modern school", + "ner_text": [ + [ + 120, + 124, + "named" + ] + ], + "validated": false, + "empirical_context": "More specifically, it will finance ( a ) development and integration of the e-Catalog / e-Register and other modules to EMIS that would allow a switch from paper-based to a greener, more efficient electronic format of data reporting to various stakeholders and interoperability of the NBS system and EMIS; ( b ) creation of the dedicated module and interface in EMIS that would allow the MoER to identify students who require support with adherence to the personal data protection legislation; and ( c ) the development and integration to EMIS of a module for ANACEC on external assessment of education institutions. 42.", + "type": "system", + "explanation": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is involved in data reporting and integration.", + "contextual_reason_agent": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 13, + "text": "Therefore, ECAM \u2019 s poverty estimates for the northern regions should be considered lower bounds, since the impact of an influx of refugees and the closure of markets, roads, and frontiers was not captured. According to the 2014 ECAM, approximately 2. 4 million people are deemed food-insecure, and 250, 000 people are estimated to be suffering from acute malnutrition, because of the impact of interrupted agricultural activities and trade, population displacement, increased vulnerability, and food insecurity in the Far North region. 4. The impact of the refugee crisis has reinforced existing patterns of spatial inequity. As of November 2017, Cameroon was host to 338, 505 refugees, over 70 percent of whom originated in the Central African Republic ( CAR ), 28 percent in Nigeria, and 0. 5 percent in Chad, with the remainder 1 World Bank. 2016. Priorities for Ending Poverty and Boosting Shared Prosperity. Cameroon Systematic Country Diagnostic. Washington, DC: World Bank. 2 INS ( Institut National de Statistique ). 2014. \u201c Fourth Cameroon Household Survey, Enqu\u00eate Camerounaise Aupr\u00e8s des M\u00e9nages ( ECAM 4 ) \u201d. Yaounde, Cameroon. 3 INS ( Institut National de Statistique ). 2014. \u201c Fourth Cameroon Household Survey, Enqu\u00eate Camerounaise Aupr\u00e8s des M\u00e9nages ( ECAM 4 ) \u201d. Yaounde, Cameroon.", + "ner_text": [ + [ + 1034, + 1066, + "named" + ], + [ + 224, + 228, + "Fourth Cameroon Household Survey <> publication year" + ], + [ + 519, + 535, + "Fourth Cameroon Household Survey <> data geography" + ], + [ + 648, + 656, + "Fourth Cameroon Household Survey <> data geography" + ], + [ + 914, + 922, + "Fourth Cameroon Household Survey <> data geography" + ], + [ + 984, + 987, + "Fourth Cameroon Household Survey <> publisher" + ], + [ + 1122, + 1129, + "Fourth Cameroon Household Survey <> data geography" + ], + [ + 1131, + 1139, + "Fourth Cameroon Household Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "2014. \u201c Fourth Cameroon Household Survey, Enqu\u00eate Camerounaise Aupr\u00e8s des M\u00e9nages ( ECAM 4 ) \u201d. Yaounde, Cameroon.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly named as a household survey, which is a structured collection of data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly named as a household survey, which is a structured collection of data used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 19, + "text": "Cameroon has participated in several rounds of the PASEC and has recently carried out a national Early Grade Reading Assessment. The Government is also keen to carry out an Early Grade Mathematics Assessment in the near future. These different assessments are not anchored to a more systematic national assessment framework. This would allow for tracking student learning on a national scale, providing diagnostics and identifying solutions for improving, the education system, and equipping decision-makers with reliable and timely information to guide policy decisions, adjustments, or reforms. There is currently limited technical capacity to conduct large-scale assessments, disseminate assessment results, and provide evidence-based guidance to those responsible for curriculum reform and teacher training. ( c ) The availability and quality of education data are poor. The Education Management Information System ( EMIS ) in Cameroon is complex, with each of the four ministries of education collecting data for the subsector for which they are responsible in the absence of institutional coordination mechanisms. Despite some progress, there is still considerable variability in the quality ( completeness, timeliness, and reliability ) of data collected by each ministry. Comparability is difficult as methods for data compilation differ from one ministry to another. The EMIS for primary education, managed by MINEDUB, is perhaps the most advanced. The United Nations Educational, Scientific, and Cultural Organization ( UNESCO ) is supporting the Government \u2019 s efforts to develop and link regionally comparable data and the United Nations Children \u2019 s Fund ( UNICEF ) is undertaking a pilot data collection and school mapping exercise in ZEPs to capture information on various school-level indicators. With the World Bank \u2019 s technical support and financing, the Government has prepared school report cards across the country, but these are yet to be mainstreamed. 9 MINEDUB. Statistical Yearbook 2014 data.", + "ner_text": [ + [ + 879, + 918, + "named" + ] + ], + "validated": false, + "empirical_context": "( c ) The availability and quality of education data are poor. The Education Management Information System ( EMIS ) in Cameroon is complex, with each of the four ministries of education collecting data for the subsector for which they are responsible in the absence of institutional coordination mechanisms. Despite some progress, there is still considerable variability in the quality ( completeness, timeliness, and reliability ) of data collected by each ministry.", + "type": "system", + "explanation": "However, it is described as a system without being explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a data-related function.", + "contextual_reason_agent": "However, it is described as a system without being explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 69, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | 60 20. Targeted results may be impacted by the exogenous risks identified in the Program \u2019 s integrated risk assessment. Increasing security concerns triggered by the ongoing conflict may distract government attention and fiscal resources, while climate change is not expected to impact the achievement of results. M & E Capacity 21. The Program \u2019 s M & E framework leverages government monitoring systems, which ensures adequate measuring and reporting of progress toward the targeted results. Indicators to measure process, output and outcome level results are deemed specific, measurable, attainable, relevant, and timely ( SMART ). Across the Result Areas, reporting on progress will be based on information and data generated by the very same IT infrastructure mobilized for the achievement of results, as follows: Sanad for DLIs 1, 2, and 3 on access to e-services, digital identity, and trusted, people-centric data sharing; the EMR system for DLI 4 on the management of electronic medical records; the HRMIS for DLI 5 on the professionalization of the civil service; the Education Management Information System ( EMIS ) for DLI 7 on the digitalization of general secondary education exam; the PMDU internet portal for DLI 8 on e-participation; and the DOS interactive internet portal for DLI 9 on interactive statistical data.", + "ner_text": [ + [ + 1174, + 1213, + "named" + ] + ], + "validated": false, + "empirical_context": "Indicators to measure process, output and outcome level results are deemed specific, measurable, attainable, relevant, and timely ( SMART ). Across the Result Areas, reporting on progress will be based on information and data generated by the very same IT infrastructure mobilized for the achievement of results, as follows: Sanad for DLIs 1, 2, and 3 on access to e-services, digital identity, and trusted, people-centric data sharing; the EMR system for DLI 4 on the management of electronic medical records; the HRMIS for DLI 5 on the professionalization of the civil service; the Education Management Information System ( EMIS ) for DLI 7 on the digitalization of general secondary education exam; the PMDU internet portal for DLI 8 on e-participation; and the DOS interactive internet portal for DLI 9 on interactive statistical data.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a data-related function.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "041_Burkina-Faso-Support-to-Land-and-Mining-Management-Strengthening-Project", + "page": 20, + "text": "To increase efficiency and transparency of land management, two web-based platforms aimed at assessing land and real estate value ( SIEBI ) and managing cadastral and land information ( SYCAD ) were recently developed by the Government with support from the Danish development cooperation. These platforms remain to be tested, integrated and rolled out nationwide, and an efficient network and server infrastructure must be set up for their effective performance. In parallel, the Government has started to lay the core foundations of a National Spatial Data Infrastructure ( NSDI ) in collaboration with the UN Global Geospatial Information Management. The adoption of a geospatial roadmap will help government agencies to agree on common data standards, mutualize investments in geospatial services and infrastructure, and eventually make land and mining information interoperable with other common territorial datasets for better natural resources and land use management, thereby enhancing the possibilities for a climate resilient development. Currently, additional resources and technical expertise are required to advance this agenda. The Mining Sector 16. Mining brought the most important structural change to Burkina Faso \u2019 s economy over the last decade. Its share of GDP increased from 0. 8 percent in 2008 to 10. 4 percent in 2019; and of export from an insignificant number to about 70 percent for the same period.", + "ner_text": [ + [ + 537, + 573, + "named" + ] + ], + "validated": false, + "empirical_context": "These platforms remain to be tested, integrated and rolled out nationwide, and an efficient network and server infrastructure must be set up for their effective performance. In parallel, the Government has started to lay the core foundations of a National Spatial Data Infrastructure ( NSDI ) in collaboration with the UN Global Geospatial Information Management. The adoption of a geospatial roadmap will help government agencies to agree on common data standards, mutualize investments in geospatial services and infrastructure, and eventually make land and mining information interoperable with other common territorial datasets for better natural resources and land use management, thereby enhancing the possibilities for a climate resilient development.", + "type": "framework", + "explanation": "However, it is described as a framework for geospatial data management rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'data' in its name.", + "contextual_reason_agent": "However, it is described as a framework for geospatial data management rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 65, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 53. Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Girls \u2019 survival rate in secondary school in all regions including areas affected by conflict, droughts and high levels of refugees This indicator measures the share of a cohort of female students enrolled in Grade 9 who successfully completed secondary schools and it will be measured by using a \u2018 synthetic cohort method \u2019. Annual EMIS, RRS and UNHCR Data collected by MoE through the annual EMIS process and through RRS and UNHCR in refugee host communities MOE / EMIS Directorate Children 0-59 months with diarrhea treated with ORS and zinc in low performing regions The indicator will be measured by the percentage of children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in the low performing regions Annual DHIS2, RRS and UNHCH in refugee host communities Data collected by MoH through the annual DHIS2 and through RRS and UNHCR in refugee host communities MoH Woredas that engage citizens in planning and budgeting including in areas affected by conflict, droughts and high levels of refugees in conflict affected areas The percentage of woredas that conduct pre-budget discussion with at least 40 % participation of women Annual Administrative data / reports from regions including through RRS and UNHCR in refugee host communities Use", + "ner_text": [ + [ + 631, + 635, + "named" + ], + [ + 4, + 14, + "EMIS <> publisher" + ], + [ + 15, + 23, + "EMIS <> data geography" + ], + [ + 608, + 611, + "EMIS <> author" + ], + [ + 673, + 697, + "EMIS <> reference population" + ] + ], + "validated": true, + "empirical_context": "Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Girls \u2019 survival rate in secondary school in all regions including areas affected by conflict, droughts and high levels of refugees This indicator measures the share of a cohort of female students enrolled in Grade 9 who successfully completed secondary schools and it will be measured by using a \u2018 synthetic cohort method \u2019. Annual EMIS, RRS and UNHCR Data collected by MoE through the annual EMIS process and through RRS and UNHCR in refugee host communities MOE / EMIS Directorate Children 0-59 months with diarrhea treated with ORS and zinc in low performing regions The indicator will be measured by the percentage of children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in the low performing regions Annual DHIS2, RRS and UNHCH in refugee host communities Data collected by MoH through the annual DHIS2 and through RRS and UNHCR in refugee host communities MoH Woredas that engage citizens in planning and budgeting including in areas affected by conflict, droughts and high levels of refugees in conflict affected areas The percentage of woredas that conduct pre-budget discussion with at least 40 % participation of women Annual Administrative data / reports from regions including through RRS and UNHCR in refugee host communities Use", + "type": "system", + "explanation": "In the context, 'EMIS' is explicitly referenced as a datasource for collecting data on girls' survival rates in secondary school, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'EMIS' is a dataset because it is mentioned as a source of data collection in the context.", + "contextual_reason_agent": "In the context, 'EMIS' is explicitly referenced as a datasource for collecting data on girls' survival rates in secondary school, indicating it functions as a data source.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "061_Turkey-Formal-Employment-Creation-Project", + "page": 16, + "text": "Constrained by such costs, SMEs may resort to hiring workers informally or might exit completely from the registered formal sector and continue operations informally. 17. Another major challenge affecting the capacity of firms to create jobs and expand is the capacity to find skilled workers. An inadequately educated labor force is perceived to be among the top five constraints to doing business in Turkey. The analysis of data on more than 7 million job postings at the public employment agency ( \u0130\u015eKUR ) between 2016 and 2018 and from \u0130\u015eKUR \u2019 s Labor Market Needs Assessment Survey and the top nine online job search portals shows that the most critical skills sought by employers across provinces are behavioral, socioemotional, and software-related skills. 14 13 Enterprise Surveys ( database ), International Finance Corporation and World Bank, Washington, DC, https: / / www. enterprisesurveys. org /. 14 https: / / media. \u0130\u015eKUR. gov. tr / 33412 / istihdamda-3i-30-sayi-ek1-2019-yili-isgucu-piyasasi-arastirmasi-sonuclari. pdf", + "ner_text": [ + [ + 550, + 586, + "named" + ], + [ + 402, + 408, + "Labor Market Needs Assessment Survey <> data geography" + ], + [ + 454, + 466, + "Labor Market Needs Assessment Survey <> data type" + ], + [ + 501, + 506, + "Labor Market Needs Assessment Survey <> publisher" + ], + [ + 517, + 521, + "Labor Market Needs Assessment Survey <> reference year" + ], + [ + 526, + 530, + "Labor Market Needs Assessment Survey <> publication year" + ], + [ + 540, + 545, + "Labor Market Needs Assessment Survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "An inadequately educated labor force is perceived to be among the top five constraints to doing business in Turkey. The analysis of data on more than 7 million job postings at the public employment agency ( \u0130\u015eKUR ) between 2016 and 2018 and from \u0130\u015eKUR \u2019 s Labor Market Needs Assessment Survey and the top nine online job search portals shows that the most critical skills sought by employers across provinces are behavioral, socioemotional, and software-related skills. 14 13 Enterprise Surveys ( database ), International Finance Corporation and World Bank, Washington, DC, https: / / www.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source of data used in the analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that collects data on labor market needs.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source of data used in the analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "062_Turkey-Municipal-Services-Improvement-Project", + "page": 54, + "text": "FRIT MSP will also rely on the same systems and the accounting and reporting for the project will also be fully integrated into the IL_BIS system. ILBANK will conduct the necessary modifications / additions to the IL_BIS system and these arrangements are expected to be in place before project effectiveness. 16. ILBANK has robust systems, manuals and guidelines regulating the internal controls environment. The accounting and reporting systems at ILBANK are geared toward producing statements and information as required by Turkish laws and regulations. Additionally, lLBANK has developed and executed specific internal control procedures for the implementation of the foreign financed projects including the SCP Program and these procedures are clearly defined in the project financial management manual which is available in the ILBANK web-site. 17. FRIT MSP will disburse through sub-loans and grant agreements that will be made between ILBANK and qualifying municipalities and utilities. The municipalities will submit the payment requests to the PMU after verifying completeness of all documentation is complete will prepare the payment order through its financial management department. The payment will be made directly from the designated accounts to the constructer \u2019 s bank account. 18. The PMU has been utilizing detailed checklists that are completed and signed by the relevant staff before processing the payments.", + "ner_text": [ + [ + 132, + 145, + "named" + ] + ], + "validated": false, + "empirical_context": "FRIT MSP will also rely on the same systems and the accounting and reporting for the project will also be fully integrated into the IL_BIS system. ILBANK will conduct the necessary modifications / additions to the IL_BIS system and these arrangements are expected to be in place before project effectiveness.", + "type": "system", + "explanation": "However, the context indicates that it is a system used for accounting and reporting, not explicitly a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'system' can imply a structured collection of data.", + "contextual_reason_agent": "However, the context indicates that it is a system used for accounting and reporting, not explicitly a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 19, + "text": "More than two out of five individuals ( 41. 7 percent ) 2 lived below the poverty line in 2020 ( less than US $ 1. 9 per day ). This corresponds to an absolute number of 10 million poor individuals ( out of 23 million ). This was an increase by 0. 1 percentage point compared to 2019 following a 0. 1 percent drop in per capita income in 2020. As a consequence, an additional 400, 000 people were drawn into extreme poverty. Based on current projections, the number of extreme poor is expected to increase by an additional 200, 000 people in 2021, due mainly to population growth. The country will not be able to reduce its poverty rate to pre-COVID-19 levels before end - 2023. The poverty incidence in rural areas is 46. 8 percent, compared to only 11. 8 percent in urban areas. There are also striking disparities in poverty at the subnational level. Estimates from the 2018 / 19 household survey3 suggest that a Nigerien inhabitant in Niamey has only a 7 percent chance of being poor while this probability for a person living in Dosso Region is 48 percent. Such wealth disparities between regions can be a source of social tensions and can potentially affect the country \u2019 s future growth and security situation. 1 World Bank - Niger Spring 2021 Economic Update: Maximizing Public Expenditure Efficiency for Rebuilding Better. 2 World Bank Macro Poverty Outlook 2021. 3 Survey on the Living Conditions of Households [ Enqu\u00eate Harmonis\u00e9e sur les Conditions de Vie des M\u00e9nages ] ( 2018 \u2013 19 ).", + "ner_text": [ + [ + 1375, + 1420, + "named" + ], + [ + 90, + 94, + "Survey on the Living Conditions of Households <> reference year" + ], + [ + 873, + 882, + "Survey on the Living Conditions of Households <> publication year" + ], + [ + 916, + 935, + "Survey on the Living Conditions of Households <> reference population" + ], + [ + 939, + 945, + "Survey on the Living Conditions of Households <> data geography" + ], + [ + 1034, + 1046, + "Survey on the Living Conditions of Households <> data geography" + ], + [ + 1220, + 1230, + "Survey on the Living Conditions of Households <> publisher" + ], + [ + 1246, + 1250, + "Survey on the Living Conditions of Households <> publication year" + ], + [ + 1334, + 1344, + "Survey on the Living Conditions of Households <> publisher" + ], + [ + 1367, + 1371, + "Survey on the Living Conditions of Households <> publication year" + ], + [ + 1512, + 1530, + "Survey on the Living Conditions of Households <> usage context" + ] + ], + "validated": true, + "empirical_context": "2 World Bank Macro Poverty Outlook 2021. 3 Survey on the Living Conditions of Households [ Enqu\u00eate Harmonis\u00e9e sur les Conditions de Vie des M\u00e9nages ] ( 2018 \u2013 19 ).", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly identified as a survey that collects data on living conditions.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly identified as a survey that collects data on living conditions.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 23, + "text": "To facilitate data sharing and use, the subcomponent will develop a data visualization and use platform ( software ) focusing on visual representations of TPM and routine data, inclusive of BHI data. Linking of platforms, including DHIS2 and the HSF platform will be integral to the work. The data visualization platform will include visualization of Results Framework data and other core indicators from the HSSP, linking TPM and DHIS2 data using maps, charts, and graphs and will incorporate HSF data along with the overlay of health and meteorologic data to better understand the impact of climatic patterns on health. The platform will include analysis of health service delivery in refugee and host community areas to facilitate improved health service delivery among the critical underserved populations. The development of an integrated, institutionalized, and sustainable platform which will strengthen MoH systems will be emphasized. Annex 2 provides further details on the platform. 43. Subcomponent 3. 3: Contract and Program Management Capacity Development ( PMU; US $ 3. 44 million: US $ 1. 54 million equivalent IDA [ WHR ] and US $ 1. 9 million Trust Funds [ US $ 0. 20 million SDTF and US $ 1. 7 million MDTF ] ).", + "ner_text": [ + [ + 232, + 237, + "named" + ] + ], + "validated": false, + "empirical_context": "To facilitate data sharing and use, the subcomponent will develop a data visualization and use platform ( software ) focusing on visual representations of TPM and routine data, inclusive of BHI data. Linking of platforms, including DHIS2 and the HSF platform will be integral to the work. The data visualization platform will include visualization of Results Framework data and other core indicators from the HSSP, linking TPM and DHIS2 data using maps, charts, and graphs and will incorporate HSF data along with the overlay of health and meteorologic data to better understand the impact of climatic patterns on health.", + "type": "system", + "explanation": "However, DHIS2 is described as a platform, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned in the context of data sharing and visualization.", + "contextual_reason_agent": "However, DHIS2 is described as a platform, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "033_IBArchive-38b35d89-2c5c-4636-ab7e-0ba662b95a5f", + "page": 22, + "text": "It will be more efficient in the long term given the rapid changes taking place in the demand for labor in Djibouti and beyond, and the significant advances being made in technology that is likely to make skills acquisition through such means a standard for future learners. 30. Integration with System Partners. There is a need to build close working partnerships between the training system and institutions, and representatives of the private sector, employers, chamber of commerce, and other relevant stakeholders including youth organization, Nongovernmental Organizations ( NGOs ) working vulnerable population as well as organizations supporting refugees \u2019 education and training in Djibouti such as UNHCR and United Nations Children ' s Fund ( UNICEF ). These partnerships should be developed and strengthened in the coming years to ensure that the supply side for training is able to cater to the needs of local businesses, employers, and the private sector. Partnerships can support the development of mandatory training programs, internship opportunities, and apprenticeships to support school to work transition and on the job training ( OJT ) and in general help prepare trainees for the world of work25, 26. Furthermore, such partnerships can also support direct training opportunities for those already in employment through Work-Based Learning ( WBL ) opportunities. 24 Preferably a comprehensive framework that covers the entire education and training system, but which can be constructed in parts. This will help ensure articulation across levels. 25 https: / / data. worldbank. org / indicator / IT. NET. USER. ZS? locations = SZ 26 Labour Force Survey, 2016.", + "ner_text": [ + [ + 1652, + 1671, + "named" + ], + [ + 107, + 115, + "Labour Force Survey <> data geography" + ], + [ + 690, + 698, + "Labour Force Survey <> data geography" + ], + [ + 1673, + 1677, + "Labour Force Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "ZS? locations = SZ 26 Labour Force Survey, 2016.", + "type": "survey", + "explanation": "In this context, it is confirmed as a dataset since it is explicitly referenced alongside a specific year and location, indicating its use in empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Labour Force Survey' is a recognized term for a structured collection of data related to employment statistics.", + "contextual_reason_agent": "In this context, it is confirmed as a dataset since it is explicitly referenced alongside a specific year and location, indicating its use in empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 22, + "text": "The World Bank South Sudan Health Sector Transformation Project ( HSTP ) ( P181385 ) Page 19 of 68 37. Subcomponent 2. 3: Health Service Quality Improvement ( implemented by WHO; US $ 2. 5 million: US $ 0. 83 million equivalent IDA [ including US $ 0. 53 million WHR ] and US $ 1. 67 million Trust Funds [ US $ 0. 17 million SDTF and US $ 1. 50 million MDTF ] ). This subcomponent focuses on improving health service quality in South Sudan by addressing the challenges of remote health facilities, shortage of qualified health workers, and a long history of low health service quality. This subcomponent will: ( a ) develop an HRH policy, strategy, and manual; ( b ) implement the national Human Resources for Health Information System; ( c ) review and update the health worker training curriculum; ( d ) review and update the essential medicines list and standard treatment guidelines, including rational use of medicines; strengthen the capacity of the Drug and Food Control Authority ( DFCA ) at the State and National levels through training, development of tools and guidelines, and operational support for testing and supervision; ( e ) review and update the national quality of care policy and strategy; ( f ) review and update the BPHNS; and ( g", + "ner_text": [ + [ + 690, + 735, + "named" + ], + [ + 4, + 14, + "Human Resources for Health Information System <> publisher" + ], + [ + 15, + 26, + "Human Resources for Health Information System <> data geography" + ], + [ + 174, + 177, + "Human Resources for Health Information System <> author" + ], + [ + 428, + 439, + "Human Resources for Health Information System <> data geography" + ] + ], + "validated": true, + "empirical_context": "This subcomponent focuses on improving health service quality in South Sudan by addressing the challenges of remote health facilities, shortage of qualified health workers, and a long history of low health service quality. This subcomponent will: ( a ) develop an HRH policy, strategy, and manual; ( b ) implement the national Human Resources for Health Information System; ( c ) review and update the health worker training curriculum; ( d ) review and update the essential medicines list and standard treatment guidelines, including rational use of medicines; strengthen the capacity of the Drug and Food Control Authority ( DFCA ) at the State and National levels through training, development of tools and guidelines, and operational support for testing and supervision; ( e ) review and update the national quality of care policy and strategy; ( f ) review and update the BPHNS; and ( g", + "type": "system", + "explanation": "This is indeed a dataset as it is mentioned in the context of implementing a national system for health information, indicating its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is mentioned in the context of implementing a national system for health information, indicating its role as a data source.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 46, + "text": "In addition, a survey will be conducted in sampled SPG woredas as part of the impact evaluation to measure composite indicators not collected through routine information systems. 71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2. The MOH will share HCO-related data with the regions and woredas to provide any disaggregated data needed to monitor HCO results indicators and DLIs. Likewise, the central MOE is responsible for the management of EMIS and NLA which collects varying aspects of data at the school level. MOE will validate and share disaggregated data with regions and woredas to monitor HCO results indicators and DLIs. Other M & E data sources will be coordinated with various implementation units and service providers. The SPG impact evaluation survey will be 71 Such as Minimal Acceptable Diet for children 6 \u2013 23 months. Minimal Acceptable Diet is calculated according to breasfeeding status, number of food groups consumed, and number of times fed. https: / / inddex. nutrition. tufts. edu / data4diets / indicator / minimum-acceptable-diet-mad.", + "ner_text": [ + [ + 677, + 682, + "named" + ], + [ + 51, + 62, + "DHIS2 <> data geography" + ], + [ + 194, + 197, + "DHIS2 <> publisher" + ], + [ + 688, + 691, + "DHIS2 <> publisher" + ], + [ + 703, + 719, + "DHIS2 <> data type" + ], + [ + 764, + 782, + "DHIS2 <> data type" + ] + ], + "validated": true, + "empirical_context": "71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2. The MOH will share HCO-related data with the regions and woredas to provide any disaggregated data needed to monitor HCO results indicators and DLIs.", + "type": "system", + "explanation": "DHIS2 is indeed a data management system that regularly collects and manages data from service delivery units, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned in the context of data collection and management.", + "contextual_reason_agent": "DHIS2 is indeed a data management system that regularly collects and manages data from service delivery units, confirming its role as a data source.", + "contextual_signal": "system but mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 58, + "text": "48 implementation, and the monitoring and evaluation of the cash transfer program, as a template for other social safety net interventions. 34. Building on the database of eligible households, these modules will include: ( i ) program beneficiary lists with an eventual registration of complementary activities, ( ii ) payment modules ( payroll and the reconciliation from the payment provider ( s ) ), ( iii ) operational tracking of program, and ( iv ) basic monitoring and evaluation, including beneficiary feedback and grievance redress mechanisms when operational. The program beneficiary lists will start with the cash transfer beneficiary list and track beneficiaries \u2019 participation in the complementary activities set-up by the program. While initially, participation will be required but payments will not be conditional on participation, the system will provide the functionality to set up conditionalities in the future. The payment system will include the quarterly / monthly payroll based on beneficiary lists, the amounts transferred to the payment agency ( ies ), the beneficiary receipts and the reconciliation of accounts. The operational tracking module would provide an operational dashboard to enable program managers to plan and track activities, human and material resources and other inputs at the central, provincial and communal levels.", + "ner_text": [ + [ + 620, + 650, + "named" + ], + [ + 227, + 252, + "cash transfer beneficiary list <> data type" + ], + [ + 661, + 674, + "cash transfer beneficiary list <> reference population" + ] + ], + "validated": true, + "empirical_context": "Building on the database of eligible households, these modules will include: ( i ) program beneficiary lists with an eventual registration of complementary activities, ( ii ) payment modules ( payroll and the reconciliation from the payment provider ( s ) ), ( iii ) operational tracking of program, and ( iv ) basic monitoring and evaluation, including beneficiary feedback and grievance redress mechanisms when operational. The program beneficiary lists will start with the cash transfer beneficiary list and track beneficiaries \u2019 participation in the complementary activities set-up by the program. While initially, participation will be required but payments will not be conditional on participation, the system will provide the functionality to set up conditionalities in the future.", + "type": "list", + "explanation": "This is a dataset as it is explicitly mentioned as a beneficiary list used for tracking participation in the program.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a list of beneficiaries that is structured and used for program tracking.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a beneficiary list used for tracking participation in the program.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "031_IBArchive-bd2c789e-ee04-4df7-a219-9409a5f705d3", + "page": 89, + "text": "The World Bank Chad Energy Access Scale Up Project ( P174495 ) Page 84 of 87 ANNEX 6: Outcomes of a Survey on Ability and Willingness of Rural Households to Pay for Electricity Services 1. To inform the design of the project subcomponent aiming to electrify households through SHSs, a survey on ability and willingness of Chad rural households to pay for electricity services was conducted in the first half of 2021. Due to time and budget limitations, as well as security constraints, the survey was implemented in the rural areas of three Chadian provinces that were selected with the objective of obtaining representative data that can be extrapolated to the rest of the rural areas of the country. The poverty incidence, together with homogeneity / differences between provinces, played a key role in the stratification of the sample. Table 6. 1. summarizes information on the three selected provinces and sample size, while figure 6. 1. shows a Chad map with the names of provinces. Table 6. 1. Sample Size by Province Province Poverty Incidence ( % ) Sample Size Gu\u00e9ra 60. 0 248 Kanem 27. 7 241 Logone Occidental 43. 5 239 Total 728 Figure 6. 1. Map of Chad Source: Cartography Unit, the World Bank. 2.", + "ner_text": [ + [ + 285, + 375, + "named" + ], + [ + 4, + 14, + "survey on ability and willingness of Chad rural households to pay for electricity services <> publisher" + ], + [ + 15, + 19, + "survey on ability and willingness of Chad rural households to pay for electricity services <> data geography" + ], + [ + 411, + 415, + "survey on ability and willingness of Chad rural households to pay for electricity services <> publication year" + ], + [ + 541, + 558, + "survey on ability and willingness of Chad rural households to pay for electricity services <> data geography" + ], + [ + 706, + 723, + "survey on ability and willingness of Chad rural households to pay for electricity services <> data description" + ], + [ + 950, + 954, + "survey on ability and willingness of Chad rural households to pay for electricity services <> data geography" + ], + [ + 1194, + 1204, + "survey on ability and willingness of Chad rural households to pay for electricity services <> publisher" + ] + ], + "validated": true, + "empirical_context": "The World Bank Chad Energy Access Scale Up Project ( P174495 ) Page 84 of 87 ANNEX 6: Outcomes of a Survey on Ability and Willingness of Rural Households to Pay for Electricity Services 1. To inform the design of the project subcomponent aiming to electrify households through SHSs, a survey on ability and willingness of Chad rural households to pay for electricity services was conducted in the first half of 2021. Due to time and budget limitations, as well as security constraints, the survey was implemented in the rural areas of three Chadian provinces that were selected with the objective of obtaining representative data that can be extrapolated to the rest of the rural areas of the country.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to a structured collection of data obtained from a survey conducted to inform project design.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it describes a survey that collects data on households' ability and willingness to pay for electricity services.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a structured collection of data obtained from a survey conducted to inform project design.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 44, + "text": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 40 of 47 measles vaccination in Balochistan who are fully vaccinated. 74 Data on under \u2010 five mortality comes from PDHS 2017 \u2013 18. The impact of additional family planning visits on maternal mortality is based on the maternal mortality effect of modern contraceptive use estimated by Ahmed et al. ( 2012 ). 75 It is assumed that 50 percent of family planning visits to the project facilities result in modern contraceptive use. An adjustment factor is applied throughout to account for crowding out of formal private health care services, assuming that 95 percent of the additional benefits in project facilities would have occurred in the absence of the project. Table 1. 3. Estimated Lives Saved over the Project Cycle Years Neonatal Under \u2010 5 Maternal Total 2020 0 0 0 0 2021 7 17 5 29 2022 8 22 7 36 2023 9 28 9 45 2024 10 37 11 58 Total 33 103 32 168 11. Lives saved by the project are converted into monetary terms using the concept of VSL, that is, individuals \u2019 willingness to pay for small changes in their likelihood of dying in a certain time frame, considering their budgetary constraints. These are then converted into a population \u2010 level value of a decrease in expected number of deaths \u2014 the so \u2010 called VSL.", + "ner_text": [ + [ + 192, + 206, + "named" + ] + ], + "validated": true, + "empirical_context": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 40 of 47 measles vaccination in Balochistan who are fully vaccinated. 74 Data on under \u2010 five mortality comes from PDHS 2017 \u2013 18. The impact of additional family planning visits on maternal mortality is based on the maternal mortality effect of modern contraceptive use estimated by Ahmed et al. ( 2012 ).", + "type": "survey", + "explanation": "It is indeed a dataset as it provides structured data used for empirical analysis regarding under-five mortality.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a source of data on under-five mortality.", + "contextual_reason_agent": "It is indeed a dataset as it provides structured data used for empirical analysis regarding under-five mortality.", + "contextual_signal": "follows 'Data on under-five mortality comes from'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 14, + "text": "The World Bank Generating Growth Opportunities and Productivity for Women Enterprises Uganda Project ( P176747 ) Page 10 of 77 owned enterprises without children. 19 With total fertility rates in Uganda still very high at 4. 7 children per woman, care burdens are compounded for women. 13. Social norms and risks of violence against women also influence the choices of Ugandan women for businesses sectors and sizes. Women can feel discouraged from entering or expanding in more profitable ( male-dominated ) sectors, as doing so may signal their transgression of gender norms about men being the main income providers in households. Risk of violence also constitutes a significant barrier to women \u2019 s entrepreneurship in Uganda. A 2020 national survey of violence against women reports that almost all ( 95 percent ) of Ugandan women between 15 \u2013 49 years old have experienced physical or sexual violence from either an intimate partner or a non-partner during their lifetime. 20 This is more than three times the global average ( 27 percent lifetime, ) and the averages for Sub-Saharan Africa ( 33 percent lifetime ). 21 More than half reported that their partners insisted on knowing where they were at all times ( 54 percent ) and control how they spend their money ( 29 percent ). 14.", + "ner_text": [ + [ + 733, + 779, + "named" + ] + ], + "validated": true, + "empirical_context": "Risk of violence also constitutes a significant barrier to women \u2019 s entrepreneurship in Uganda. A 2020 national survey of violence against women reports that almost all ( 95 percent ) of Ugandan women between 15 \u2013 49 years old have experienced physical or sexual violence from either an intimate partner or a non-partner during their lifetime. 20 This is more than three times the global average ( 27 percent lifetime, ) and the averages for Sub-Saharan Africa ( 33 percent lifetime ).", + "type": "survey", + "explanation": "This is indeed a dataset as it is a national survey that provides empirical data on violence against women in Uganda.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a national survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is a national survey that provides empirical data on violence against women in Uganda.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "183_multi0page", + "page": 26, + "text": "Key Perfonnance Hierarchy of Objectives Indicators Monitoring & Evaluation Critical Assumptions Output from each Output Indicators: Project reports: ( from Outputs to Objective ) Component: 1. 1 Improved policy making, * Annual workplans that are * Supervision reports * Incentives for behavioral planning, and financing increasingly based on agreed changes take place among MOE criteria are used to prioritize * Annual work plan and District staff enabling them donor funded activities and PA to use data appropriately. education program. * Workshop evaluations from * Difference between budgeted local and national level, s Trained staff stay in MOE amounts and actual donorlNGO committee meeting service in light of low salaries expenditures is within 1O % minutes range * EMIS updated and utilized at central and district offices in annual and multi-year planning and monitoring plan implementation * Financial management system computerized and operational by January 2002 * Lessons leamed from planning process are assessed, disseminated, and discussed at workshops and regularly held planning commnittee meetings and sector working group ( Donor / NGO ) meetings 1. 2 PST established and oTimeliness of Project * Supervision reports * PST adequately staffed operational procurement; * Timeliness of Project implementation reports; oTimeliness of preparation of necessary Project documentation. 2 Targeted activities * Detailed planning for selected * Annual work plans * Coordination between implemented according to annual pilot activities is based on * Supervision reports departments at the MOE", + "ner_text": [ + [ + 776, + 780, + "named" + ] + ], + "validated": false, + "empirical_context": "education program. * Workshop evaluations from * Difference between budgeted local and national level, s Trained staff stay in MOE amounts and actual donorlNGO committee meeting service in light of low salaries expenditures is within 1O % minutes range * EMIS updated and utilized at central and district offices in annual and multi-year planning and monitoring plan implementation * Financial management system computerized and operational by January 2002 * Lessons leamed from planning process are assessed, disseminated, and discussed at workshops and regularly held planning commnittee meetings and sector working group ( Donor / NGO ) meetings 1. 2 PST established and oTimeliness of Project * Supervision reports * PST adequately staffed operational procurement; * Timeliness of Project implementation reports; oTimeliness of preparation of necessary Project documentation.", + "type": "system", + "explanation": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is mentioned in the context of data utilization and planning.", + "contextual_reason_agent": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 63, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 59 of 74 installment of livelihood grant. Number of beneficiaries receiving Economic Opportunities who are female youth Number of beneficiaries receiving economic opportunities under Component 2, in accordance with the Project Operations Manual, of which are youth and female, defined as women between the ages of 18 and 35 years, and have received at least 1 installment of livelihood grant. This indicator will be measured, at a minimum, on a quarterly basis SNSOP MIS Beneficiary data will be collected during registration and updated during project implementation. Payment data will be regularly updated in the SNSOP MIS The Implementing Partner in charge of Component 2 will be responsible for data collection. Percentage of grievances resolved through the GRM Number of all grievances which are lodged, processed and resolved through the SNSOP GRM divided by all complaints which are lodged, processed and / or resolved, expressed as a percentage This indicator will be reviewed on a monthly basis Monthly GRM Reports GRM data will be reviewed and analyzed on a monthly basis through the MIS GRM module Selected Implementing Partner Percentage of cash transfers to beneficiary households made on time The total number of cash transfers which were completed on time for both LIPW and DIS, per the Project Operations Manual, divided by the total number of cash transfers, expressed This indicator", + "ner_text": [ + [ + 555, + 564, + "named" + ], + [ + 4, + 14, + "SNSOP MIS <> publisher" + ], + [ + 565, + 581, + "SNSOP MIS <> data type" + ], + [ + 663, + 675, + "SNSOP MIS <> data type" + ] + ], + "validated": true, + "empirical_context": "Number of beneficiaries receiving Economic Opportunities who are female youth Number of beneficiaries receiving economic opportunities under Component 2, in accordance with the Project Operations Manual, of which are youth and female, defined as women between the ages of 18 and 35 years, and have received at least 1 installment of livelihood grant. This indicator will be measured, at a minimum, on a quarterly basis SNSOP MIS Beneficiary data will be collected during registration and updated during project implementation. Payment data will be regularly updated in the SNSOP MIS The Implementing Partner in charge of Component 2 will be responsible for data collection.", + "type": "management information system", + "explanation": "It is indeed a dataset as it functions as a management information system that collects and maintains data on beneficiaries.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a system that collects and updates beneficiary data.", + "contextual_reason_agent": "It is indeed a dataset as it functions as a management information system that collects and maintains data on beneficiaries.", + "contextual_signal": "described as a management information system that stores records", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 18, + "validated": 10, + "not_validated": 8 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 21, + "text": "This sub-component will also support the background analysis for the implementation of the database and the targeting process \u2013 development of poverty and malnutrition maps, community-based targeting criteria and processes, registry questionnaire, proxy-means test analysis; the organization of and support to the different committees involved in the targeting and registration, the implementation of the PMT survey, storage and analysis of the data as well as the preparation of beneficiary lists for the cash transfer program. It will also support the development of the corresponding database and management information systems related to the registry and the targeting ( hardware, software, back-up equipment ). Finally, it will support potential additional activities to ensure that transfer recipients are aware of the requirements and supporting documents to obtain national ID cards. 12 Based on the experience of the Terintanbwe pilot, it is expected that up to 30 percent of expected transfer recipients may not have an ID.", + "ner_text": [ + [ + 405, + 415, + "named" + ] + ], + "validated": false, + "empirical_context": "This sub-component will also support the background analysis for the implementation of the database and the targeting process \u2013 development of poverty and malnutrition maps, community-based targeting criteria and processes, registry questionnaire, proxy-means test analysis; the organization of and support to the different committees involved in the targeting and registration, the implementation of the PMT survey, storage and analysis of the data as well as the preparation of beneficiary lists for the cash transfer program. It will also support the development of the corresponding database and management information systems related to the registry and the targeting ( hardware, software, back-up equipment ).", + "type": "survey", + "explanation": "However, the context indicates that it is mentioned as a survey, not as a data source or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'PMT survey' implies a structured collection of data from a survey.", + "contextual_reason_agent": "However, the context indicates that it is mentioned as a survey, not as a data source or dataset.", + "contextual_signal": "mentioned only as a survey, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 4, + "validated": 1, + "not_validated": 3 + } + }, + { + "filename": "153_476340PAD0ZR0P101Official0Use0Only1", + "page": 129, + "text": "Availability of key administration and monitoring data that has been validated through ground surveys in key protected areas. SyGIAP data system operationalized for this purpose.... \u2018 tected Areas and Strengthening of ICCN ( GEF US $ 4. 1 million )..... Central and local staff trained on the use of SyGIAP. Significant proportion of ICCN field staff in Maiko NP adequately trained, increased number of man - - days performed in the field and staff performance management systems in place; Increase in total park area regularly patrolled and monitored;. 36 A workshop with potential partners was held at Chatham House in London in December 2007 to identify potential alternative finance models for SFM in DRC. Discussions are ongoing with CI for a proposed Bonobo Conservation Concession in Equateur Province; i. e. conversion of a cancelled timber concession to a conservation contract. 117", + "ner_text": [ + [ + 126, + 144, + "named" + ], + [ + 20, + 54, + "SyGIAP data system <> data type" + ], + [ + 354, + 362, + "SyGIAP data system <> data geography" + ], + [ + 640, + 644, + "SyGIAP data system <> publication year" + ], + [ + 705, + 708, + "SyGIAP data system <> data geography" + ], + [ + 791, + 808, + "SyGIAP data system <> data geography" + ] + ], + "validated": true, + "empirical_context": "Availability of key administration and monitoring data that has been validated through ground surveys in key protected areas. SyGIAP data system operationalized for this purpose. .", + "type": "data system", + "explanation": "It is indeed a dataset as it is described as a data system operationalized for the purpose of monitoring and administration.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'data system' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "It is indeed a dataset as it is described as a data system operationalized for the purpose of monitoring and administration.", + "contextual_signal": "mentioned as a data system operationalized for this purpose", + "tags": [] + }, + { + "filename": "026_Djibouti-Health-System-Strengthening-Project", + "page": 59, + "text": "The World Bank Djibouti Health System Strengthening ( P178033 ) Page 55 of 64 resident of the commune of Balbala. Public health facilities with safe disposal of sharps waste Quarterly IVA Quarterly IVA and PBF report MOH Facilities receiving PBF payment within 30 days of submission of verification of results Semi - annual Accounting system Semi-annual internal project review by DGP MOH Average user experience score among PBF / DFF participating facilities User experience score is a zero if not collected. Quarterly IVA Quarterly IVA and PBF report MOH Primary health facilities with functional Facility Community Committee assessed by the PBF quality checklist Semi - annual Human resources MIS Semi-annual internal project review by DGP MOH Communities with functioning community health workers per the norms set by the National Community Health Strategy Semi - annual Human resources MIS Semi-annual internal project review by DGP MOH Communities that have formally declared the abandonment of the practice of FGM Annual Official act by community leadership Annual internal project review by DGP ( combined with semi - annual review ) MOH Facilities with DHIS-2 implementation ( DHIS-2 data entry at the facility level ) Semi - annual DHIS2 Semi-annual internal project review by DGP MOH Facilities that are connected to the National Logistics Management Information System ( eLMIS ) Semi - annual eLMIS Semi-annual internal project review by DGP MOH Health surveys conducted and results made public Survey results will lead to action plans after every Semi - annual DGP Semi-annual", + "ner_text": [ + [ + 1186, + 1192, + "named" + ] + ], + "validated": false, + "empirical_context": "Public health facilities with safe disposal of sharps waste Quarterly IVA Quarterly IVA and PBF report MOH Facilities receiving PBF payment within 30 days of submission of verification of results Semi - annual Accounting system Semi-annual internal project review by DGP MOH Average user experience score among PBF / DFF participating facilities User experience score is a zero if not collected. Quarterly IVA Quarterly IVA and PBF report MOH Primary health facilities with functional Facility Community Committee assessed by the PBF quality checklist Semi - annual Human resources MIS Semi-annual internal project review by DGP MOH Communities with functioning community health workers per the norms set by the National Community Health Strategy Semi - annual Human resources MIS Semi-annual internal project review by DGP MOH Communities that have formally declared the abandonment of the practice of FGM Annual Official act by community leadership Annual internal project review by DGP ( combined with semi - annual review ) MOH Facilities with DHIS-2 implementation ( DHIS-2 data entry at the facility level ) Semi - annual DHIS2 Semi-annual internal project review by DGP MOH Facilities that are connected to the National Logistics Management Information System ( eLMIS ) Semi - annual eLMIS Semi-annual internal project review by DGP MOH Health surveys conducted and results made public Survey results will lead to action plans after every Semi - annual DGP Semi-annual", + "type": "system", + "explanation": "However, DHIS-2 is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed DHIS-2 is a dataset because it is mentioned in the context of data entry and health facilities.", + "contextual_reason_agent": "However, DHIS-2 is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "042_Burundi-Skills-for-Jobs-Women-and-Youth-Project", + "page": 59, + "text": "These programs will be defined in Annual Enrolment records at sector training hubs Administrative data M & E Specialist within the PIU", + "ner_text": [ + [ + 34, + 58, + "named" + ] + ], + "validated": true, + "empirical_context": "These programs will be defined in Annual Enrolment records at sector training hubs Administrative data M & E Specialist within the PIU", + "type": "records", + "explanation": "In the context, it is indicated as part of administrative data, which implies it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Annual Enrolment records' suggests a structured collection of data related to enrolment.", + "contextual_reason_agent": "In the context, it is indicated as part of administrative data, which implies it is used as a data source.", + "contextual_signal": "described as part of administrative data", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 5, + "validated": 3, + "not_validated": 2 + } + }, + { + "filename": "129_PAD1030-ENGLISH-P149242-PUBLIC-FINAL-LEB-ENPTP-English", + "page": 24, + "text": "Results Monitoring and Verification 43. The results monitoring framework assesses progress towards the PDO through key indicators, focusing on expanding the coverage and social assistance of the NPTP. Specifically, the project will monitor the number of direct project beneficiaries of education, health and e-card food vouchers. All data will be collected disaggregating by gender to be able to monitor participation by women and girls. In addition, intermediate indicators will monitor program awareness and efficiency in terms of timing between application and eligibility notification, over the life of the project. 44. A computerized modular MIS, developed under the first phase of the NPTP, is the central piece of the monitoring and evaluation ( M & E ) system and includes a module to register applicant households in the NPTP database, record the results of their eligibility assessment", + "ner_text": [ + [ + 626, + 650, + "named" + ] + ], + "validated": false, + "empirical_context": "44. A computerized modular MIS, developed under the first phase of the NPTP, is the central piece of the monitoring and evaluation ( M & E ) system and includes a module to register applicant households in the NPTP database, record the results of their eligibility assessment", + "type": "system", + "explanation": "However, it is described as a management information system, not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'MIS' which often relates to data management.", + "contextual_reason_agent": "However, it is described as a management information system, not as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "172_multi0page", + "page": 44, + "text": "Annex 1: Project Design Summary SIERRA LEONE: REHABILITATION OF BASIC EDUCATION Key Performance Data Collection Strategy Hierarchy of Objectives Indicators. Critical AIsumptions Sector-related CAS Goal: Sector Indicators: Sector / country reports: ( from Goal to Bank Mission ) To rehabilitate the education 70 % completion rate for Annual sector performance Political and social stability sector in order to support the primary education in target report by the MEST. remains in order to carry out provision of education districts and a 20 % increase in the program. Continuous services as a key instrument in enrollment on average for girls dedication and support of establishing normalcy in the in primary schools in the MEST decision makers and country. target districts of Northem key operational personnel. and Eastem regions is achieved by end of Project. 20 % increase in enrollment in JSS is achieved by the end of Project 20 % increase in passing rate of students taking the BECE in the target districts in 2007 as compared to 2001 Project Development Outcome I Impact Project reports: ( from Objective to Goal ) Objective: Indicators: To assist primary and JSS to At least 50 % of primary Annual school surveys and Schools achieving basic achieve BOL standards, schools in targeted districts EMIS reports. operational standards will be defined in terms of trained meet BOL standards by the more effective in providing teachers and headmasters, end of", + "ner_text": [ + [ + 1303, + 1307, + "named" + ], + [ + 32, + 44, + "EMIS <> data geography" + ], + [ + 1017, + 1021, + "EMIS <> publication year" + ], + [ + 1037, + 1041, + "EMIS <> reference year" + ], + [ + 1200, + 1221, + "EMIS <> data type" + ] + ], + "validated": true, + "empirical_context": "and Eastem regions is achieved by end of Project. 20 % increase in enrollment in JSS is achieved by the end of Project 20 % increase in passing rate of students taking the BECE in the target districts in 2007 as compared to 2001 Project Development Outcome I Impact Project reports: ( from Objective to Goal ) Objective: Indicators: To assist primary and JSS to At least 50 % of primary Annual school surveys and Schools achieving basic achieve BOL standards, schools in targeted districts EMIS reports. operational standards will be defined in terms of trained meet BOL standards by the more effective in providing teachers and headmasters, end of", + "type": "system", + "explanation": "EMIS is indeed a dataset as it refers to an Education Management Information System that collects and reports data on schools.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is mentioned in the context of reports and indicators related to education outcomes.", + "contextual_reason_agent": "EMIS is indeed a dataset as it refers to an Education Management Information System that collects and reports data on schools.", + "contextual_signal": "mentioned as a source of data in project reports", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "028_Ethiopia-Response-Recovery-Resilience-for-Conflict-Affected-Communities-in-Ethiopia-Project", + "page": 61, + "text": "The World Bank Response - Recovery - Resilience for Conflict-Affected Communities in Ethiopia ( P177233 ) Page 57 of 104 psychosocial, police / security, and legal support, including referrals. The result will be further disaggregated by point of entry for services. management tools and service providers ' reports. be calculated by comparing the number of GBV cases that receive at least two services ( including referrals ) to the total number of GBV cases that access services. Share of reported GBV cases who receive access to multi-sectoral response services - female Baseline and semi-annual starting in Year 2 Gender disaggregate d data from available information management tools and service providers ' reports. Aggregation and averaging of female results. MoWSA FPIU ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Financed sub-projects that are functioning or delivering services to communities six months after completion Sub-projects are further disaggregated by type Semi - Annual starting in YR2 Kebeles and Woredas Aggregation of community-level data.", + "ner_text": [ + [ + 660, + 688, + "named" + ] + ], + "validated": false, + "empirical_context": "be calculated by comparing the number of GBV cases that receive at least two services ( including referrals ) to the total number of GBV cases that access services. Share of reported GBV cases who receive access to multi-sectoral response services - female Baseline and semi-annual starting in Year 2 Gender disaggregate d data from available information management tools and service providers ' reports. Aggregation and averaging of female results.", + "type": "tool", + "explanation": "However, it is not a dataset as it refers to tools rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'information management tools' could imply a structured way to handle data.", + "contextual_reason_agent": "However, it is not a dataset as it refers to tools rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a tool, not as a data source", + "tags": [] + }, + { + "filename": "101_Project-Appraisal-Document-PAD-P163830-2017-05-22-CLEAN-05232017", + "page": 59, + "text": "Rural communities of pastoralists and agro-pastoralists that suffered major ( over 50 percent ) or complete losses of their crops and livestock, and are in need of external support to cover their basic food needs. d. Resident and IDP Households of caretakers ( pregnant and lactating women-PLW ) of malnourished children at the ICRC supported Stabilization Centers in Kismayo and Baidoa. 9. Priority will be given to women and children as these groups are identified as particularly vulnerable categories. For instance, needs assessments will include separate focus group discussions and household interviews with women, including local ' women groups ' where these exist; prioritization of woman-headed households and those with high number of children for assistance; nutrition surveys of children ' under 5 '; adjustments of the food ration and hygiene kit to the specific needs of women and children. Similarly, the process of defining the assistance type and delivery modality involves the analysis of the gender based risks and mitigation measures. For instance, delivery of food in kind as a preferred option in situations where", + "ner_text": [ + [ + 770, + 787, + "named" + ], + [ + 368, + 375, + "nutrition surveys <> data geography" + ], + [ + 380, + 386, + "nutrition surveys <> data geography" + ], + [ + 791, + 809, + "nutrition surveys <> reference population" + ] + ], + "validated": true, + "empirical_context": "Priority will be given to women and children as these groups are identified as particularly vulnerable categories. For instance, needs assessments will include separate focus group discussions and household interviews with women, including local ' women groups ' where these exist; prioritization of woman-headed households and those with high number of children for assistance; nutrition surveys of children ' under 5 '; adjustments of the food ration and hygiene kit to the specific needs of women and children. Similarly, the process of defining the assistance type and delivery modality involves the analysis of the gender based risks and mitigation measures.", + "type": "survey", + "explanation": "In this context, 'nutrition surveys' are explicitly mentioned as part of the needs assessments, indicating they are used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'nutrition surveys' imply a structured collection of data related to nutritional assessments.", + "contextual_reason_agent": "In this context, 'nutrition surveys' are explicitly mentioned as part of the needs assessments, indicating they are used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "103_Kenya-Hon-Africa-PAD-04072017", + "page": 16, + "text": "The World Bank Development Response to Displacement Impacts Project ( DRDIP ) in the Horn of Africa ( P161067 ) Page 14 of 120 To curb poverty, growth must take place in sectors where the majority of the poor depend on their livelihoods. Investment must be redirected to services targeting the poor, including improving agricultural productivity in rural areas, expanding and targeting unified social protection programs that keep people from slipping into poverty, attracting private sector investment, and enhancing human capital through improved access to quality education and health services at the local level. 16. Historically, the north and northeast regions in Kenya have experienced significant deficits in service delivery, infrastructure, and economic opportunities. These are also areas that are disproportionately affected by environmental degradation, climate change impacts, and insecurity. The Commission on Revenue Allocation identifies 14 counties as marginalized \u2014 Turkana, Mandera, Wajir, Marsabit, Samburu, West Pokot, Tana River, Narok, Kwale, Garissa, Kilifi, Taita Taveta, Isiolo, and Lamu \u2014 based on the county development index ( Commission on Revenue Allocation 2012 ) which uses indicators that measure the state of a county \u2019 s health and education systems, infrastructure, and poverty levels to identify marginalized areas for the allocation of equalization funds.", + "ner_text": [ + [ + 1130, + 1154, + "named" + ] + ], + "validated": false, + "empirical_context": "These are also areas that are disproportionately affected by environmental degradation, climate change impacts, and insecurity. The Commission on Revenue Allocation identifies 14 counties as marginalized \u2014 Turkana, Mandera, Wajir, Marsabit, Samburu, West Pokot, Tana River, Narok, Kwale, Garissa, Kilifi, Taita Taveta, Isiolo, and Lamu \u2014 based on the county development index ( Commission on Revenue Allocation 2012 ) which uses indicators that measure the state of a county \u2019 s health and education systems, infrastructure, and poverty levels to identify marginalized areas for the allocation of equalization funds.", + "type": "index", + "explanation": "However, the 'county development index' is mentioned as a measure rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'index' which often refers to a collection of data points.", + "contextual_reason_agent": "However, the 'county development index' is mentioned as a measure rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "described as a measure, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 7, + "validated": 3, + "not_validated": 4 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 22, + "text": "12 Sub-component 2. 2. Core modules of the Management Information System ( US $ 1. 5 million equivalent ) 40. This sub-component will support the development of basic core modules of the Management Information System to support the delivery mechanisms of a basic social safety net system. These modules will be developed for the cash transfer program and will use a unique individual identification number for each beneficiary. They will include key social program Box 3: The selection at communes, collines, and household-level Based on the poverty map, the registry will start in the four communes with the highest estimated rural poverty rate in each province ( Gitega: Bugendana, Buraza, Gitega, and Itaba; Karusi: Bugenyuzi, Gihogazi, Mutumba, and Nyabikere; Kirundo: Bugabira, Busoni, Kirundo, and Ntega; and Ruyigi: Butagwanza, Butezi, Bweru, and Gisuru ). The 16 selected communes are sub-divided in 2 to 5 zones and include on average 26 collines ( between 11 and 43 ) with an average of 535 households ( between 130 and 1, 400 ) per colline.", + "ner_text": [ + [ + 542, + 553, + "named" + ] + ], + "validated": false, + "empirical_context": "These modules will be developed for the cash transfer program and will use a unique individual identification number for each beneficiary. They will include key social program Box 3: The selection at communes, collines, and household-level Based on the poverty map, the registry will start in the four communes with the highest estimated rural poverty rate in each province ( Gitega: Bugendana, Buraza, Gitega, and Itaba; Karusi: Bugenyuzi, Gihogazi, Mutumba, and Nyabikere; Kirundo: Bugabira, Busoni, Kirundo, and Ntega; and Ruyigi: Butagwanza, Butezi, Bweru, and Gisuru ). The 16 selected communes are sub-divided in 2 to 5 zones and include on average 26 collines ( between 11 and 43 ) with an average of 535 households ( between 130 and 1, 400 ) per colline.", + "type": "concept", + "explanation": "However, the 'poverty map' is mentioned as a tool for selection rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a map that could imply data representation.", + "contextual_reason_agent": "However, the 'poverty map' is mentioned as a tool for selection rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a tool for selection, not as a data source", + "tags": [] + }, + { + "filename": "041_Burkina-Faso-Support-to-Land-and-Mining-Management-Strengthening-Project", + "page": 89, + "text": "A baseline customer survey will be carried out during the first months of implementation of the project. Information on project beneficiaries will help to understand project impacts in a disaggregated manner. Data collection will support the establishment of a database for future financial and economic analysis. The PIT will be responsible for collecting and reporting data as agreed in the PM & E Section included in the PIM and reflected in the Results Framework. 182. The PIT will be responsible for implementing the Management Information System in cooperation with the respective regional / technical units in the selected regions. The data and information collection, processing and analysis system will be designed for a country-wide application. In response to the need for adequate information to develop a detailed economic and financial analysis of the project, data collection will also include: ( a ) registration cost of tenure documents ( costs of producing a land title and / or other land tenure records or rights documents ); ( b ) time required for a property transaction ( ownership transfer lease of public land, time to produce a land title ); ( c ) property valuation ( changes in market values of properties with different land tenure rights ); ( d ) difference in productivity of land with different types of tenure security attached to them ( farmland, customary, formalized tenure security, and urban and potentially commercial land ); ( e ) differences in investments in land with different types of security linked to them ( irrigation investment in titled land vs other, choice of land for commercial investments ); and ( f ) use of tenure documents to access credit and loans. 183. Implementation Support Plan ( Strategy and Approach for Implementation Support ) 184. The strategy for Project Implementation Support by the World Bank reflects the nature of the project and its risk profile. The strategy aims at making the Government more efficient while remaining focused on implementation of the risk mitigation measures identified. The strategy is also an indicative and flexible instrument which will be revisited during project implementation and as part of the Implementation Status and", + "ner_text": [ + [ + 522, + 551, + "named" + ] + ], + "validated": false, + "empirical_context": "182. The PIT will be responsible for implementing the Management Information System in cooperation with the respective regional / technical units in the selected regions. The data and information collection, processing and analysis system will be designed for a country-wide application.", + "type": "system", + "explanation": "However, it is described as a system for collecting and processing information, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'data' in its description.", + "contextual_reason_agent": "However, it is described as a system for collecting and processing information, not as a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 32, + "text": "KNEC to conduct midline and end-line assessments for the PDO indicators on learning outcomes ( NASMLA ). RA 3 1. 4 Beneficiary survey 1. 5 Consultancy services including for the target refugee schools ( scholarships / school kits and mentorship services ). RAs 1 and 2 1. 6 Safeguards and fiduciary actions, including scaling up GBV prevention interventions under SEQIP. PAP 1. 7 Three-year capacity building plan PAP 2. Key TA and capacity building for adequate implementation of the initiated reforms and other systems strengthening activities. 2. 1 Consultancy services, trainings, and workshops including for the target refugee schools ( national survey on child development and pre-primary school quality assurance standards and tools, including tools for appraisal for teachers in pre - school41; two TAs for CBC and CBA; strengthening supply chain system for sanitary towels; develop implementation guidelines for the school meals and nutrition policy; utilization of the online based assessment item portal by teachers; support for the SBTS initiative; and activities for OOSC \u2019 in the 15 Counties ). RA 3 and PAP US $ 7 million42 Procurement: International partner institutions; consultants / firms; trainings and workshops. 39 A detailed Annual Workplan and Budget for the IPF component is part of the POM, which is prepared before Program effectiveness. 40 The SIPs will include aspects of tree planting, rainwater harvesting and disaster mitigation actions. MoE will develop an overall disaster management plan for schools in the regions affected by drought and floods. 41 TSC will develop a custom appraisal tool for nontrained teachers teaching at the camp-based refugee schools. 42 Ibid", + "ner_text": [ + [ + 979, + 1014, + "named" + ] + ], + "validated": false, + "empirical_context": "2. 1 Consultancy services, trainings, and workshops including for the target refugee schools ( national survey on child development and pre-primary school quality assurance standards and tools, including tools for appraisal for teachers in pre - school41; two TAs for CBC and CBA; strengthening supply chain system for sanitary towels; develop implementation guidelines for the school meals and nutrition policy; utilization of the online based assessment item portal by teachers; support for the SBTS initiative; and activities for OOSC \u2019 in the 15 Counties ). RA 3 and PAP US $ 7 million42 Procurement: International partner institutions; consultants / firms; trainings and workshops.", + "type": "tool", + "explanation": "However, it is described as a 'portal' and not as a data source, indicating it functions as a tool rather than a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'assessment' which is often associated with data collection.", + "contextual_reason_agent": "However, it is described as a 'portal' and not as a data source, indicating it functions as a tool rather than a dataset.", + "contextual_signal": "mentioned only as a tool, not as a data source", + "tags": [] + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 144, + "text": "Altogether, these four regions ( North, Far North, East and Adamawa ) account for 66 percent of the poor households in the country ( even though they are home to only 38 percent of the total population ). Access to basic services is limited, and these regions are relatively isolated from the rest of the country. The presence of large numbers of refugees has exacerbated these pre-existing challenges. 3. The refugee crisis has reinforced existing territorial inequities and a rapid increase in poverty in northern Cameroon had been observed before the heightened insecurity in the region associated with Boko Haram activities. The deterioration of the security environment in northern Cameroon occurred in large part after data were collected for the 2014 ECAM. The poverty estimates for the northern regions should, therefore, be considered lower bounds, since the impact of an influx of refugees, the closure of markets, roads and frontiers, were largely not recorded by the ECAM. Because of the interruption of agricultural activities and trade, as well as population displacement and increased vulnerability and food insecurity in the Far North, approximately 2. 4 million people are considered food insecure and 250, 000 people are estimated to be suffering from acute malnutrition. 4.", + "ner_text": [ + [ + 758, + 762, + "named" + ], + [ + 60, + 67, + "ECAM <> data geography" + ], + [ + 507, + 524, + "ECAM <> data geography" + ], + [ + 753, + 757, + "ECAM <> publication year" + ] + ], + "validated": true, + "empirical_context": "The refugee crisis has reinforced existing territorial inequities and a rapid increase in poverty in northern Cameroon had been observed before the heightened insecurity in the region associated with Boko Haram activities. The deterioration of the security environment in northern Cameroon occurred in large part after data were collected for the 2014 ECAM. The poverty estimates for the northern regions should, therefore, be considered lower bounds, since the impact of an influx of refugees, the closure of markets, roads and frontiers, were largely not recorded by the ECAM.", + "type": "survey", + "explanation": "ECAM is explicitly mentioned as a source of data collection for poverty estimates, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because ECAM is referenced in relation to data collection and poverty estimates.", + "contextual_reason_agent": "ECAM is explicitly mentioned as a source of data collection for poverty estimates, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 65, + "text": "It will describe clearly such implementation aspects as beneficiary targeting, cash transfer procedures, complementary measures, delivery mechanisms, accounting documentation, and information flows throughout the project implementation cycle. No disbursements for cash transfers will be made before the technical component of the PIM is prepared and adopted. 10. In addition to the measures mentioned above, the Project Implementation Unit will outsource several activities to local consulting firms, NGOs to facilitate the implementation of the projects. These arrangements may include: ( a ) Contractual arrangements with the National Statistical Institute ( Institut de Statistiques et \u00c9tudes \u00c9conomiques du Burundi, ISTEEBU ) or other data collection firms, to conduct household-level surveys for the targeting and impact evaluation and individual-level surveys for the beneficiary surveys; ( b ) Contracts with payment agencies to carry out transfer payments; ( c ) Contracts with NGOs or other entities with the required technical expertise, qualifications, and experience to facilitate the work of the targeting committees, the implementation of the complementary measures and the implementation of the grievance redress mechanisms. Financial Management 11. Financial Management Assessment.", + "ner_text": [ + [ + 773, + 796, + "named" + ], + [ + 628, + 658, + "household-level surveys <> publisher" + ] + ], + "validated": true, + "empirical_context": "In addition to the measures mentioned above, the Project Implementation Unit will outsource several activities to local consulting firms, NGOs to facilitate the implementation of the projects. These arrangements may include: ( a ) Contractual arrangements with the National Statistical Institute ( Institut de Statistiques et \u00c9tudes \u00c9conomiques du Burundi, ISTEEBU ) or other data collection firms, to conduct household-level surveys for the targeting and impact evaluation and individual-level surveys for the beneficiary surveys; ( b ) Contracts with payment agencies to carry out transfer payments; ( c ) Contracts with NGOs or other entities with the required technical expertise, qualifications, and experience to facilitate the work of the targeting committees, the implementation of the complementary measures and the implementation of the grievance redress mechanisms. Financial Management 11.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to surveys designed to collect data for targeting and impact evaluation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'household-level surveys' implies a structured collection of data collected from households.", + "contextual_reason_agent": "This is indeed a dataset as it refers to surveys designed to collect data for targeting and impact evaluation.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 13, + "text": "In 2012, only 22 percent of rural health centers ( RHCs ) had most ( more than 75 percent ) of the key RMNCHN drugs, 5 percent had most of the family planning commodities, none had sufficient basic laboratory tests, and 39 percent had a functional ambulance. 18 Funding gaps for essential medicines and maintenance and repairs, inadequate capacity to quantify needs, and suboptimal supply chains are key factors leading to high stockouts of medicines and lack of functional equipment at public HFs. The limited open hours of PHC HFs represent 11 NIPS and ICF International. 2013. PDHS 2012 \u2010 13. UNHCR. 2011. Population Profiling, Verification and Response Survey of Afghans in Pakistan. 12 Wilhelm J. A. and S. Helleringer. 2019. \" Utilization of Non \u2010 Ebola Health Care Services during Ebola Outbreaks: a Systematic Review and Meta \u2010 Analysis. \" Journal of Global Health. 9 ( 1 ). https: / / www. ncbi. nlm. nih. gov / pmc / articles / PMC6344071 /; Chang H. J., N. Huang, C. H.", + "ner_text": [ + [ + 580, + 584, + "named" + ], + [ + 3, + 7, + "PDHS <> reference year" + ], + [ + 555, + 572, + "PDHS <> publisher" + ], + [ + 574, + 578, + "PDHS <> publication year" + ], + [ + 585, + 594, + "PDHS <> reference year" + ], + [ + 691, + 723, + "PDHS <> author" + ] + ], + "validated": true, + "empirical_context": "2013. PDHS 2012 \u2010 13. UNHCR.", + "type": "survey", + "explanation": "In this context, PDHS is explicitly referenced alongside a specific year and organization, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because PDHS is often associated with demographic and health surveys that collect structured data.", + "contextual_reason_agent": "In this context, PDHS is explicitly referenced alongside a specific year and organization, indicating it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "040_Iraq-COVID-19-Vaccination-Project", + "page": 39, + "text": "These services are offered by health facility staff that received GBV counselling and messaging as part of their regular on-the-job training to support and direct vulnerable women to specific support channels and resources. MOHE, supported by UN agencies, developed a remote and face-to-face GBV counseling flowchart targeting primary health care workers to clarify management methods and referral pathways. These service adaptations were informed by a rapid assessment of available health care options for survivors of GBV during the COVID-19 outbreak. The survey included health care workers from primary health care centers, hospitals, and mobile medical clinics from 16 districts in Iraq. Of those surveyed, 69 percent of health facilities reported that their staff have already been trained on GBV. Following the COVID-19 outbreak, 81 percent of health facilities surveyed have already updated their referral pathways. Among those health facilities, 95 percent included GBV services in their updates. These interventions will be monitored and measured through the project \u2019 s results framework, TPMA reports, and through ESF instruments. 13 Sex-disaggregated data by priority group on vaccination uptake is not available, however, the gender gap in uptake among these groups is likely to be similar to the overall trend. This project will contribute to collection of sex-disaggregated data across priority groups whenever possible.", + "ner_text": [ + [ + 1372, + 1394, + "named" + ], + [ + 687, + 691, + "sex-disaggregated data <> data geography" + ], + [ + 1240, + 1260, + "sex-disaggregated data <> data description" + ] + ], + "validated": true, + "empirical_context": "13 Sex-disaggregated data by priority group on vaccination uptake is not available, however, the gender gap in uptake among these groups is likely to be similar to the overall trend. This project will contribute to collection of sex-disaggregated data across priority groups whenever possible.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to a specific type of data that is being collected for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'sex-disaggregated data' implies a structured collection of data categorized by gender.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a specific type of data that is being collected for analysis.", + "contextual_signal": "mentioned as a type of data to be collected", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "105_Mali-Reinsertion-of-Ex-combatants-Project", + "page": 48, + "text": "The M & E unit will serve to: ( i ) improve project management; ( ii ) ensure transparency in project data sharing with various stakeholders; ( iii ) ensure efficiency of the activities; ( iv ) provide accurate and timely information to adjust or modify the activities in relation to the evolution of the context during implementation; and ( v ) provide accurate and timely information to help management take the right decisions. The PIU M & E Specialist will be responsible for the overall M & E activities, and the implementing partners will be responsible with providing the necessary information, data, and reports from the field. The PIU will be supported by the coordination mechanisms as outlined in the Peace Agreement with regards to monitoring, implementation and performance of the overall DDR program. 56. The M & E unit of the PIU will produce monthly, quarterly, and annual project activity reports. These reports will be available electronically to facilitate access to the various government partners and donors. Ad hoc assessments and needs-based studies conducted will include formal assessments on the demobilization process and regular qualitative and quantitative tracer beneficiary surveys. A mid-term review and final implementation report will be conducted in collaboration with donors and government. During the project Mid-Term Review, progress towards achieving the PDOs will be evaluated and remedial action will be taken as needed. 57. It is essential that information on all beneficiaries is captured during the cantonment process, securely stored, and subsequently utilized to inform project implementation. To that end, beneficiaries and project implementation progress and effectiveness will be monitored through a well-functioning DDR MIS. The MIS will support M & E activities through several databases to monitor: ( i ) ex-combatants from cantonment to reinsertion; ( ii ) reinsertion support provided by implementing partners; and ( iii ) financial management. The MIS will provide data on", + "ner_text": [ + [ + 1766, + 1773, + "named" + ] + ], + "validated": false, + "empirical_context": "It is essential that information on all beneficiaries is captured during the cantonment process, securely stored, and subsequently utilized to inform project implementation. To that end, beneficiaries and project implementation progress and effectiveness will be monitored through a well-functioning DDR MIS. The MIS will support M & E activities through several databases to monitor: ( i ) ex-combatants from cantonment to reinsertion; ( ii ) reinsertion support provided by implementing partners; and ( iii ) financial management.", + "type": "system", + "explanation": "'DDR MIS' is mentioned as a system that supports monitoring and evaluation activities, but it is not explicitly described as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'DDR MIS' is a dataset because it includes 'MIS' which often refers to management information systems that handle data.", + "contextual_reason_agent": "'DDR MIS' is mentioned as a system that supports monitoring and evaluation activities, but it is not explicitly described as a data source itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "026_Djibouti-Health-System-Strengthening-Project", + "page": 33, + "text": "( including regulating and engaging private sector in health ), standards, protocols, guidelines, and plans related to RMNCAH-N, PHC, quality of care, pandemic preparedness and response, digitalization and e-health strategy, integration of refugees into the national health system, human resources for health, review of supply chain efficiency and controls to improve efficiency, prevention of the medicalization of FGM, health care waste management, \u201c green \u201d facilities and climate change mitigation and adaptation measures, emergency preparedness and response, including climate-change induced or exacerbated natural disasters and pandemics / epidemics. TA will also address high-leverage initiatives including the rollout of DHIS2 ( which will disaggregate services by patients \u2019 refugee status ), development of and rollout of the national Logistics Management Information System ( eLMIS ), and activation and operationalization of a national nutrition surveillance system; TA will also resolve bottlenecks preventing refugees from receiving free services to which they are entitled 30 Topics on climate resilience will include preventive behavior and recognition of warning signs around water - and vector-borne diseases exacerbated by climate change, multi-risk contingency planning and disease surveillance and response.", + "ner_text": [ + [ + 939, + 977, + "named" + ], + [ + 240, + 248, + "national nutrition surveillance system <> reference population" + ], + [ + 1023, + 1031, + "national nutrition surveillance system <> reference population" + ] + ], + "validated": true, + "empirical_context": "( including regulating and engaging private sector in health ), standards, protocols, guidelines, and plans related to RMNCAH-N, PHC, quality of care, pandemic preparedness and response, digitalization and e-health strategy, integration of refugees into the national health system, human resources for health, review of supply chain efficiency and controls to improve efficiency, prevention of the medicalization of FGM, health care waste management, \u201c green \u201d facilities and climate change mitigation and adaptation measures, emergency preparedness and response, including climate-change induced or exacerbated natural disasters and pandemics / epidemics. TA will also address high-leverage initiatives including the rollout of DHIS2 ( which will disaggregate services by patients \u2019 refugee status ), development of and rollout of the national Logistics Management Information System ( eLMIS ), and activation and operationalization of a national nutrition surveillance system; TA will also resolve bottlenecks preventing refugees from receiving free services to which they are entitled 30 Topics on climate resilience will include preventive behavior and recognition of warning signs around water - and vector-borne diseases exacerbated by climate change, multi-risk contingency planning and disease surveillance and response.", + "type": "system", + "explanation": "In the context, it is mentioned as part of the operationalization efforts, indicating it functions as a data source for monitoring nutrition.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'surveillance system,' which often implies a structured collection of data.", + "contextual_reason_agent": "In the context, it is mentioned as part of the operationalization efforts, indicating it functions as a data source for monitoring nutrition.", + "contextual_signal": "mentioned as a data source in the context of operationalization", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 61, + "text": "Consolidat ed informatio Multiple sources - community mobilization company reports, PMU data, self - reported data, reports of target utilities, survey data. Methodology for each CE channel will be reported separately as prescribed in the POM. MEWR, KMK, PMU", + "ner_text": [ + [ + 145, + 156, + "named" + ], + [ + 94, + 114, + "survey data <> data type" + ] + ], + "validated": true, + "empirical_context": "Consolidat ed informatio Multiple sources - community mobilization company reports, PMU data, self - reported data, reports of target utilities, survey data. Methodology for each CE channel will be reported separately as prescribed in the POM.", + "type": "survey", + "explanation": "In this context, 'survey data' is explicitly mentioned as part of the information sources used for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'survey data' typically refers to collected information from surveys.", + "contextual_reason_agent": "In this context, 'survey data' is explicitly mentioned as part of the information sources used for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "180_multi0page", + "page": 7, + "text": "B. Strategic Context 1. Sector-related Country Assistance Strategy ( CAS ) goal supported by the project: ( see Annex 1 ) Click here to get to the CAS Document Document number: IDAJR2000-60 Date of latest CAS discussion: May 18, 2000 The proposed project is fully consistent with the Bosnia and Herzegovina CAS which was presented to the Board on May 18, 2000. The project, through its focus on institutional processes for transparent, inclusive and sustainable services and infrastructure in low-income areas, is in keeping with the overall CAS objective of generating sustainable growth and expanding the benefits and opportunities of this growth to all Bosnians, including the most vulnerable. In particular, it supports the CAS priorities of: ( i ) building institutions and strengthening governance; ( ii ) building social sustainability, through promoting access and inclusion; and ( iii ) completing reconstruction. 2. Main sector issues and Government strategy: Four years of reconstruction and growth in Bosnia and Herzegovina have brought dramatic increases in income and well-being. Per capita incomes have more than doubled to about US $ 1, 000 ( from US $ 456 at end 1995 ). Household surveys provide evidence of increasing per capita consumption. Basic services such as water, education and healthcare are now broadly available in most areas ( although with uneven quality and cost-effectiveness ).", + "ner_text": [ + [ + 1188, + 1205, + "named" + ], + [ + 284, + 306, + "Household surveys <> data geography" + ], + [ + 656, + 664, + "Household surveys <> reference population" + ], + [ + 1180, + 1184, + "Household surveys <> reference year" + ], + [ + 1214, + 1259, + "Household surveys <> data description" + ] + ], + "validated": true, + "empirical_context": "Per capita incomes have more than doubled to about US $ 1, 000 ( from US $ 456 at end 1995 ). Household surveys provide evidence of increasing per capita consumption. Basic services such as water, education and healthcare are now broadly available in most areas ( although with uneven quality and cost-effectiveness ).", + "type": "survey", + "explanation": "In this context, 'household surveys' are explicitly mentioned as providing evidence, indicating they are used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'household surveys' typically involve structured data collection.", + "contextual_reason_agent": "In this context, 'household surveys' are explicitly mentioned as providing evidence, indicating they are used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 79, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 74 of 117 prior to construction. Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "ner_text": [ + [ + 1015, + 1020, + "named" + ], + [ + 589, + 608, + "NEMIS <> data type" + ], + [ + 777, + 793, + "NEMIS <> reference population" + ], + [ + 829, + 832, + "NEMIS <> author" + ] + ], + "validated": true, + "empirical_context": "Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "type": "system", + "explanation": "NEMIS is indeed a dataset as it is described as an online platform that collects and manages data related to primary education.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is referenced in the context of data utilization for education management.", + "contextual_reason_agent": "NEMIS is indeed a dataset as it is described as an online platform that collects and manages data related to primary education.", + "contextual_signal": "mentioned as a data source for managing primary education", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "078_Afghanistan-Eshteghal-Zaiee-Karmondena-EZ-Kar-Project", + "page": 61, + "text": "MoEC \u2010 PIU Percentage of complaints / grievances resolved following agreed EZ \u2010 Kar service standards The number of project related complaints or grievances resolved in accordance with EZ \u2010 Kar Semi \u2010 annually GRM Systems Administrative data collected through GRM systems MoEC \u2010 PIU", + "ner_text": [ + [ + 210, + 221, + "named" + ] + ], + "validated": false, + "empirical_context": "MoEC \u2010 PIU Percentage of complaints / grievances resolved following agreed EZ \u2010 Kar service standards The number of project related complaints or grievances resolved in accordance with EZ \u2010 Kar Semi \u2010 annually GRM Systems Administrative data collected through GRM systems MoEC \u2010 PIU", + "type": "system", + "explanation": "'GRM Systems' is not a dataset but rather a system mentioned in the context of administrative data collection.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'GRM Systems' is a dataset because it is associated with data collection.", + "contextual_reason_agent": "'GRM Systems' is not a dataset but rather a system mentioned in the context of administrative data collection.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "029_IBArchive-ea5ff2ad-6f1f-45bf-a2ad-036440562c18", + "page": 22, + "text": "The most prominent is the approval of the Electricity Connections Policy ( ECP ) in 2018. The ECP covers the period 2018-27 and subsidizes connection costs for customers in proximity of the existing network as the means for scaling up access to grid connectivity as well as improved consumption. More specifically, the ECP targets: ( a ) three million new connections by 2027 through fully subsidizing no pole and one-pole connections, addressing affordability of internal wiring through credit and low-cost technologies ( for example, ready boards ), increasing capacity of electricity SPs to meet connection targets, and promoting off-grid solutions through private sector participation; and ( b ) increasing electricity demand through facilitating connection of large-load customers and promoting productive uses of electricity. Another important policy initiative has been the adoption of a quality assurance framework for component-based solar home systems ( SHSs ) in 2019. The GoU has also undertaken two important studies \u2013 national off-grid strategy and diagnostic of distribution sector institutional reforms, which will influence the direction of grid and off-grid programs in the run-up to the 2030 SDG7 targets. The GoU has mainstreamed the role of planning and using a geographical information system ( GIS ) for establishing a spatial development infrastructure ( SDI ) for integrated power sector planning across generation, transmission, and distribution under the auspices of the MEMD.", + "ner_text": [ + [ + 1283, + 1314, + "named" + ] + ], + "validated": false, + "empirical_context": "The GoU has also undertaken two important studies \u2013 national off-grid strategy and diagnostic of distribution sector institutional reforms, which will influence the direction of grid and off-grid programs in the run-up to the 2030 SDG7 targets. The GoU has mainstreamed the role of planning and using a geographical information system ( GIS ) for establishing a spatial development infrastructure ( SDI ) for integrated power sector planning across generation, transmission, and distribution under the auspices of the MEMD.", + "type": "system", + "explanation": "However, it is described as a system for planning rather than a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'geographical information system' suggests a structured collection of spatial data.", + "contextual_reason_agent": "However, it is described as a system for planning rather than a data source itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 30, + "text": "The PDO-level and intermediate results indicators would be monitored using the following sources and methodologies: ( i ) data collected through MISs supported by the project ( registry, payment systems ); ( ii ) regular administrative data collection processes; ( iii ) beneficiary surveys ( spot checks ) supported by the project and conducted by outsourced external firm ( s ); ( iv ) process evaluations of the Social Registry, the cash transfers and the human development ( social promotion ) interventions supported by the project; ( v ) an additional round of the cash transfer impact evaluation conducted by outsourced external firms; ( vi ) other M & E studies conducted by the Client; and ( vii ) progress reports to be prepared by the project implementation team ( especially the M & E specialist ). 78. The Taazour team would be responsible for gathering the relevant reports and information from CSA representatives and other relevant parties involved in project implementation to monitor the PDO and results, and for communicating with the World Bank according to the frequency of reports to be described in the project \u2019 s Results Framework.", + "ner_text": [ + [ + 415, + 430, + "named" + ], + [ + 271, + 290, + "Social Registry <> data description" + ], + [ + 571, + 602, + "Social Registry <> data description" + ], + [ + 1054, + 1064, + "Social Registry <> publisher" + ] + ], + "validated": true, + "empirical_context": "The PDO-level and intermediate results indicators would be monitored using the following sources and methodologies: ( i ) data collected through MISs supported by the project ( registry, payment systems ); ( ii ) regular administrative data collection processes; ( iii ) beneficiary surveys ( spot checks ) supported by the project and conducted by outsourced external firm ( s ); ( iv ) process evaluations of the Social Registry, the cash transfers and the human development ( social promotion ) interventions supported by the project; ( v ) an additional round of the cash transfer impact evaluation conducted by outsourced external firms; ( vi ) other M & E studies conducted by the Client; and ( vii ) progress reports to be prepared by the project implementation team ( especially the M & E specialist ). 78.", + "type": "registry", + "explanation": "In the context, it is mentioned as part of the monitoring indicators and is involved in evaluations, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Social Registry' suggests a structured collection of data related to beneficiaries.", + "contextual_reason_agent": "In the context, it is mentioned as part of the monitoring indicators and is involved in evaluations, indicating it functions as a data source.", + "contextual_signal": "mentioned as a data source in evaluations and monitoring", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "013_BOSIB0efb09b920d90858a0135df22da7d1", + "page": 29, + "text": "In India, for example, biometric-based e-KYC contributed to increasing financial inclusion from 35 percent in 2011 to 80 percent in 201743 and reduced customer onboarding costs for firms from US $ 23 per customer to as low as US $ 0. 15. 44 This and other examples are relevant to Ethiopia, given that current validation exercises focus on using demographic rather than biometric verification and are thus prone to imposters using real information ( for example, that of deceased persons ). 45 64. The economic and financial analysis \u2019 model to appraise the project is based on the economic impact of Fayda, following a twofold approach: savings from digitization of service delivery and revenue streams from transaction fees and add-on services made possible by Fayda. To estimate the savings that arise from transitioning from paper-based to digital-enabled service delivery, the model first estimated the savings from using Fayda versus the current Kebele IDs, for 40 World Bank. 2016. World Development Report: Digital Dividends. 41 World Bank. 2018. Public Sector Savings and Revenue from Identification Systems: Opportunities and Constraints. 42 World Bank. 2017. Advancing Electronic Food Security Payments in Ethiopia. 43 World Bank. 2018. Findex Survey. 44 World Bank. 2018. Private Sector Economic Impacts from Identification Systems. 45 World Bank. 2018. Public Sector Savings and Revenue from Identification Systems: Opportunities and Constraints.", + "ner_text": [ + [ + 1248, + 1261, + "named" + ], + [ + 3, + 8, + "Findex Survey <> data geography" + ], + [ + 281, + 289, + "Findex Survey <> data geography" + ], + [ + 971, + 981, + "Findex Survey <> publisher" + ], + [ + 1037, + 1047, + "Findex Survey <> publisher" + ], + [ + 1049, + 1053, + "Findex Survey <> publication year" + ], + [ + 1152, + 1162, + "Findex Survey <> publisher" + ], + [ + 1164, + 1168, + "Findex Survey <> publication year" + ], + [ + 1217, + 1225, + "Findex Survey <> data geography" + ], + [ + 1230, + 1240, + "Findex Survey <> publisher" + ], + [ + 1242, + 1246, + "Findex Survey <> publication year" + ], + [ + 1266, + 1276, + "Findex Survey <> publisher" + ], + [ + 1278, + 1282, + "Findex Survey <> publication year" + ], + [ + 1348, + 1358, + "Findex Survey <> publisher" + ], + [ + 1360, + 1364, + "Findex Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "2018. Findex Survey. 44 World Bank.", + "type": "survey", + "explanation": "The Findex Survey is explicitly mentioned as a survey, which is a type of dataset used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Findex Survey' is a structured collection of data collected by the World Bank.", + "contextual_reason_agent": "The Findex Survey is explicitly mentioned as a survey, which is a type of dataset used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 64, + "text": "Specific activities include ( a ) undertaking catchment management measures to promote soil and water conservation, river bank protection and restoration; and ( b ) and providing alternative livelihoods for affected communities. The proposed component will include a TA to prepare a Water Resources Strategy for the Albert WMZ, CMPs and related technical studies for priority sub-catchments in identified hot spot sub catchments, and a national groundwater management study; and ( c ) activities to strengthen water resource monitoring and information systems, including installation of the WIS at the national and WMZ levels, installation of hydrologic monitoring systems, and rehabilitation of the National Water Quality Reference Laboratory. 27. Apart from the national support to IWRM, this component will mainly support activities in the Upper Nile and Kyoga WMZs ( where most of the WSS investments financed under this Project and the WMDP are located ). This component will contribute to national, regional, and local stakeholders \u2019 capacity to apply an IWRM approach to infrastructure development.", + "ner_text": [ + [ + 591, + 594, + "named" + ] + ], + "validated": false, + "empirical_context": "Specific activities include ( a ) undertaking catchment management measures to promote soil and water conservation, river bank protection and restoration; and ( b ) and providing alternative livelihoods for affected communities. The proposed component will include a TA to prepare a Water Resources Strategy for the Albert WMZ, CMPs and related technical studies for priority sub-catchments in identified hot spot sub catchments, and a national groundwater management study; and ( c ) activities to strengthen water resource monitoring and information systems, including installation of the WIS at the national and WMZ levels, installation of hydrologic monitoring systems, and rehabilitation of the National Water Quality Reference Laboratory. 27.", + "type": "system", + "explanation": "However, 'WIS' is mentioned as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'WIS' is a dataset because it is related to water resource monitoring and information systems.", + "contextual_reason_agent": "However, 'WIS' is mentioned as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "073_Ethiopia-Consolidated-Water-Supply-Sanitation-and-Hygiene-Account-Project", + "page": 81, + "text": "institutional capacities for gender mainstreaming. The GAP will be refined during implementation, informed by a study on the different constraints men and women face in the WASH sector with proposed actions that need to be put in place and the necessary mitigating measures. The Project will ensure active participation of women in the decision-making processes in the PIUs, the Project Steering Committees, as well as promote the active participation of women in municipal water boards and WASHCOMs. There will be TA for the NWCO to enable them to: ( i ) monitor the implementation of GAP; ( ii ) provide gender training for key stakeholders including the MoWIE and water, health, and education bureau staff to ensure an understanding of women \u2019 s issues; and ( iii ) collect gender - disaggregated information as part of the Project \u2019 s results tracking and monitoring system. Gender-disaggregated information will be collected as part of the routine tracking and monitoring system of the Project. The GAP has been incorporated in the POM. 94. Citizen engagement: The Project will further facilitate citizen \u2019 s empowerment by conducting social M & E surveys with beneficiaries before the midterm review ( after the first year \u2019 s activities are carried out ) and postimplementation ( after all subprojects are carried out ) to evaluate the impact on the ground.", + "ner_text": [ + [ + 1140, + 1160, + "named" + ], + [ + 777, + 811, + "social M & E surveys <> data type" + ], + [ + 1166, + 1179, + "social M & E surveys <> reference population" + ] + ], + "validated": true, + "empirical_context": "94. Citizen engagement: The Project will further facilitate citizen \u2019 s empowerment by conducting social M & E surveys with beneficiaries before the midterm review ( after the first year \u2019 s activities are carried out ) and postimplementation ( after all subprojects are carried out ) to evaluate the impact on the ground.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to surveys conducted to collect data for evaluating the project's impact.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'social M & E surveys' implies a structured collection of data gathered from beneficiaries.", + "contextual_reason_agent": "This is indeed a dataset as it refers to surveys conducted to collect data for evaluating the project's impact.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "136_PAD7230P1476890AD0October0100final", + "page": 63, + "text": "52 dissemination workshops will be held in participating municipalities involving local citizens, NGOs, CBOs, the private sector, and so forth. Finally, annual technical audits will track participatory aspects of subproject prioritization, planning, implementation and oversight. Resp: Govt / Ban k Status: Not due Stage: Impl. Recurrent: Yes Due Date: Continuo us Frequency: Periodic 3. 2 Social and Environmental Rating Moderate Risk Description: Municipalities may not have the capacity to consider social and environmental impacts when choosing their priority projects. Risk Management: A dedicated Environmental and Social Safeguards Specialist already engaged by CVDB ( through the RLDP ) will be responsible for safeguards oversight of municipal subprojects. Annual technical audits will cover safeguards compliance. Resp: Govt Status: Not due Stage: Impl. Recurrent: Yes Due Date: Continuo us Frequency: Periodic 3. 3 Program and Donor Rating Substantial Risk Description: As the majority of Project financing stems from donors, any donor decision affecting design or disbursement would pose a risk. Risk Management: Project identification, preparation and appraisal missions have been done jointly with financing partners, leading to a collaboratively designed project. Annual joint project reviews and periodic joint missions led by the Bank will ensure that current collaborative approaches are maintained during project implementation.", + "ner_text": [ + [ + 153, + 176, + "named" + ] + ], + "validated": false, + "empirical_context": "52 dissemination workshops will be held in participating municipalities involving local citizens, NGOs, CBOs, the private sector, and so forth. Finally, annual technical audits will track participatory aspects of subproject prioritization, planning, implementation and oversight. Resp: Govt / Ban k Status: Not due Stage: Impl.", + "type": "audit", + "explanation": "However, the context indicates that 'annual technical audits' are processes for tracking and oversight, not a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'audits' can imply a collection of data or findings.", + "contextual_reason_agent": "However, the context indicates that 'annual technical audits' are processes for tracking and oversight, not a structured collection of data.", + "contextual_signal": "mentioned only as a process, not as a data source", + "tags": [] + }, + { + "filename": "016_BOSIB029693bc600f090cb0745b2b12ee69", + "page": 13, + "text": "These effects further exacerbate pressures related to population growth, deforestation, and land use change, increasing water scarcity risks throughout the country. 6 Kenya faces minimal seismic hazards in comparison to neighboring countries with hazard levels highest in the north-west and south - west regions ( for example Nakuru and Eldoret have a medium degree of seismic hazard ). 7 4. Over recent decades, there have been substantial inflows of refugees into Kenya, most of whom live in designated camps. About 53 percent of Kenya \u2019 s registered refugee population of about 550, 0008 originate from Somalia, 25 percent from South Sudan and the remainder from other countries. Almost half of the refugees in Kenya ( 43 percent ) reside in Dadaab ( Garissa County ), 41 percent in Kakuma ( Turkana County ) and 16 percent in urban areas ( mainly Nairobi ). The inflow of refugees into the Arid and Semi-Arid Lands ( ASAL ) counties of Garissa and Turkana has significantly changed the population, spatial, and social dynamics of host counties. 1 United Nations World Population Prospects: 2022 Revision 2 World Bank national accounts data. 3 Idem 4 Kenya \u2019 s Gini coefficient stands at 40. 8 ( 2015 ). World Bank, Poverty and Inequality Platform. 5 Kenya Population and Housing Census ( 2019 ); World Bank ( 2016 ). Kenya Urbanization Review. 6 Think Hazard \u2013 Kenya; World Bank Group. 2021. Climate Risk Country Profile: Kenya. 7 GSDRC ( 2013 ) Assessing seismic risk in Kenya 8 UNHCR. Kenya Figures at a Glance. https: / / www. unhcr. org / ke / figures-at-a-glance.", + "ner_text": [ + [ + 1254, + 1289, + "named" + ], + [ + 167, + 172, + "Kenya Population and Housing Census <> data geography" + ], + [ + 466, + 471, + "Kenya Population and Housing Census <> data geography" + ], + [ + 532, + 537, + "Kenya Population and Housing Census <> data geography" + ], + [ + 754, + 768, + "Kenya Population and Housing Census <> data geography" + ], + [ + 795, + 809, + "Kenya Population and Housing Census <> data geography" + ], + [ + 1154, + 1159, + "Kenya Population and Housing Census <> data geography" + ], + [ + 1254, + 1259, + "Kenya Population and Housing Census <> data geography" + ], + [ + 1292, + 1296, + "Kenya Population and Housing Census <> publication year" + ], + [ + 1313, + 1317, + "Kenya Population and Housing Census <> publication year" + ], + [ + 1476, + 1481, + "Kenya Population and Housing Census <> data geography" + ] + ], + "validated": true, + "empirical_context": "World Bank, Poverty and Inequality Platform. 5 Kenya Population and Housing Census ( 2019 ); World Bank ( 2016 ). Kenya Urbanization Review.", + "type": "census", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a census, which is a structured collection of population data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Census', which typically refers to a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a census, which is a structured collection of population data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "155_432110PAD0P09717632B01off0use0only1", + "page": 58, + "text": "A key objective of this subcomponent is to disseminate information on the compliance of REGIDESO and the Government of Burundi with the obligations established in the performance contract in order to create social accountability. This sub-component will fund the following activities: ( i ) Public consultation and information campaign. The project will fund consultant services to assist REGIDESO in launching and maintaining a campaign on water and sanitation to: ( a ) transparently publish and disseminate key service indicators; ( b ) promote the conservation of water through early detection and prevention of leaks at the household level, and education on how to avoid wasting water; ( c ) promote hand-washing to prevent the spread of bacteria and disease; ( d ) promote techniques for the safe transportation and storage of potable water to prevent its contamination; and ( e ) promote actions to prevent the spread of HIV / AIDS. ( ii ) Collection of monitoring data through recurrent surveys. The project will fund consulting services to conduct an annual household and business survey in the project area to monitor energy and water consumption practices, sanitation practices and satisfaction with water supply services. 3 ( f ) Support to the Project Implementation Unit ( PIU ) within REGIDESO ( US $ 0. 4 million ). This sub-component will finance the consultant services and operational costs of the Project Implementation Unit including: ( i ) The cost of establishing the PIU office, including office equipment and a small number ( 2-3 ) of vehicles for use during implementation;", + "ner_text": [ + [ + 1060, + 1096, + "named" + ], + [ + 119, + 126, + "annual household and business survey <> data geography" + ], + [ + 1128, + 1232, + "annual household and business survey <> data description" + ], + [ + 1257, + 1284, + "annual household and business survey <> author" + ] + ], + "validated": true, + "empirical_context": "( ii ) Collection of monitoring data through recurrent surveys. The project will fund consulting services to conduct an annual household and business survey in the project area to monitor energy and water consumption practices, sanitation practices and satisfaction with water supply services. 3 ( f ) Support to the Project Implementation Unit ( PIU ) within REGIDESO ( US $ 0.", + "type": "survey", + "explanation": "This is indeed a dataset as it involves the collection of monitoring data through recurrent surveys.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured collection of data gathered through surveys.", + "contextual_reason_agent": "This is indeed a dataset as it involves the collection of monitoring data through recurrent surveys.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 18, + "text": "Poor instructional conditions and weak management are also characterized by a lack of textbooks and guides and an imbalanced distribution of resources. The SDI survey showed that 91. 3 percent of the pupils surveyed did not have textbooks and only 23. 4 percent of public schools had minimum equipment. 17. A complex curriculum reform in national languages has been under way for more than a decade. Planned in 2004 and launched in 2009, the MEP officially started its piloting in 2012. The curriculum reform was subsequently rolled out in Grade 1 with five national languages in 500 pilot schools in 2015 10 Occasional professional development opportunities are provided through classroom training, distance learning, training through pedagogical advisory units at the local level ( Pedagogical Advisory Unit [ Cellule d \u2019 Animation P\u00e9dagogique, CAPED ] and mini-CAPED [ CAPED at school level ] ), and inspection visits. 11 In Niger, only 24 percent of teaching staff are civil servants. The majority of teachers ( 75. 4 percent ), are \u2018 contractual teachers \u2019 ( enseignants contractuels ) with fixed-term contracts and reduced salaries, and often no prior training. A minority of teachers ( 0. 6 percent ) are volunteers.", + "ner_text": [ + [ + 156, + 166, + "named" + ], + [ + 200, + 206, + "SDI survey <> reference population" + ], + [ + 481, + 485, + "SDI survey <> publication year" + ], + [ + 928, + 933, + "SDI survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "Poor instructional conditions and weak management are also characterized by a lack of textbooks and guides and an imbalanced distribution of resources. The SDI survey showed that 91. 3 percent of the pupils surveyed did not have textbooks and only 23.", + "type": "survey", + "explanation": "The SDI survey is explicitly mentioned as providing empirical data regarding the lack of textbooks, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data on textbook availability among pupils.", + "contextual_reason_agent": "The SDI survey is explicitly mentioned as providing empirical data regarding the lack of textbooks, confirming its role as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 22, + "text": "Low trust in the fairness of student assessment for the general secondary education examination. b. High fiscal cost of the exam. c. Limited access to the exam for refugees living in camps. Result Area 3 on Transparency and Accountability through Digitalization DLI8 on enhancing e - information 30. 00 - 30. 00 a. Weak enforcement of the 2007 Access to Information Law on proactive disclosure of information and response to requests for information. b. Opportunity to enhance government reporting to the public online about progress achieved towards economic and public sector modernization. DLI9 on interactive statistical information 30. 00 - 30. 00 Important gaps in open data coverage and openness to be mitigated by establishing a national data repository with an interactive interface and protocols to allow access to the data for policy analysis and research purposes. DLI10 on institutionalizing effective health data use 8 18. 00 26. 00 a. Weak health data management. b. Need to institutionalize the data quality assurance mechanism in place, establishing data quality standards and conducting routine assessments. c. Opportunity to better utilize quality data for more effective and timely decision-making. Front-end Fees 0. 8025 TBD d. Total 350 54. 34 404. 34", + "ner_text": [ + [ + 737, + 761, + "named" + ] + ], + "validated": true, + "empirical_context": "00 - 30. 00 Important gaps in open data coverage and openness to be mitigated by establishing a national data repository with an interactive interface and protocols to allow access to the data for policy analysis and research purposes. DLI10 on institutionalizing effective health data use 8 18.", + "type": "repository", + "explanation": "This is a dataset as it is described as a national data repository meant for data access for policy analysis and research.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a repository that is intended to store and provide access to data.", + "contextual_reason_agent": "This is a dataset as it is described as a national data repository meant for data access for policy analysis and research.", + "contextual_signal": "mentioned as a data source for policy analysis and research", + "tags": [] + }, + { + "filename": "108_Jordan-PforR-PAD-P159522-FINAL-DISCLOSURE-10052016", + "page": 79, + "text": "71 is generally weak. Contract management capacity and communication / transaction with vendors also needs improvement as implementation of the contracts needs to be done on time. ( d ) The JSMO has its own procurement office staffed with two persons who are civil service employees. Both report to the financial department, and they work according to its own by - law. Considering the small quantity of procurement involved, the Program will rely on this arrangement for procurement undertaken by JSMO up to the threshold of GSD ( as stated above ). Accounting and Financial Reporting 24. The assessment concludes that the accounting and financial reporting systems are found acceptable for purposes of the Program. The Government adopts the cash basis of accounting while IPUs ( JIC and JSMO ) follow accrual basis in accordance with International Financial Reporting Standards. The Government adopts a chart of accounts that is compatible with the Government Finance Statistics Manual 2001. MOF has an ambitious plan to convert to the International Public Sector Accounting Standards cash basis by 2020. Both the final accounts and the in-year reporting are timely but are still prepared through the Oracle legacy system until the GFMIS implementation is completed and relevant legislative amendments are made. The monthly General Government Finance Bulletin includes budgetary government finance statistics aggregated according to the economic and functional classifications ( budget versus actual ).", + "ner_text": [ + [ + 1234, + 1239, + "named" + ] + ], + "validated": false, + "empirical_context": "MOF has an ambitious plan to convert to the International Public Sector Accounting Standards cash basis by 2020. Both the final accounts and the in-year reporting are timely but are still prepared through the Oracle legacy system until the GFMIS implementation is completed and relevant legislative amendments are made. The monthly General Government Finance Bulletin includes budgetary government finance statistics aggregated according to the economic and functional classifications ( budget versus actual ).", + "type": "system", + "explanation": "GFMIS is mentioned as a system for financial management, not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed GFMIS is a dataset because it is related to financial reporting and data management.", + "contextual_reason_agent": "GFMIS is mentioned as a system for financial management, not as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 19, + "text": "The subcomponent will also support strengthened supervision, management, and on-the-job coaching for IPs and service providers through an integrated supervisory approach in which IPs develop CHD capacity, inclusive of health service delivery planning, supervision, and data entry into District Health Information Software 2 ( DHIS2 ). The subcomponent will include planning and execution of outreach ( village visits, mass campaigns, and so on ) and transportation ( vehicle, boat, and foot ) modalities paying close attention to seasonal population movement patterns and access. Climate sensitive health service delivery and planning will be integral to the approach under this subcomponent. 30. Subcomponent 1. 1 will channel resources through UNICEF to sub-contract national and international NGOs for health service delivery and coordination. 29 In collaboration and through the leadership of the MoH, SMoHs, and CHDs, UNICEF will be responsible for ( a ) oversight and coordination of health services and DHIS2 data collection and entry systems; ( b ) supervision and quality assurance of IPs and health facilities in line with national plans and guidelines; ( c ) coordinating and conducting in-service training; ( d ) through an integrated approach, developing the capacity of SMoHs to plan, supervise, and oversee service delivery and the DHIS2 system; and ( e ) integrated pharmaceutical procurement, quantification, and forecasting.", + "ner_text": [ + [ + 1010, + 1015, + "named" + ], + [ + 746, + 752, + "DHIS2 <> publisher" + ], + [ + 923, + 929, + "DHIS2 <> publisher" + ] + ], + "validated": true, + "empirical_context": "1 will channel resources through UNICEF to sub-contract national and international NGOs for health service delivery and coordination. 29 In collaboration and through the leadership of the MoH, SMoHs, and CHDs, UNICEF will be responsible for ( a ) oversight and coordination of health services and DHIS2 data collection and entry systems; ( b ) supervision and quality assurance of IPs and health facilities in line with national plans and guidelines; ( c ) coordinating and conducting in-service training; ( d ) through an integrated approach, developing the capacity of SMoHs to plan, supervise, and oversee service delivery and the DHIS2 system; and ( e ) integrated pharmaceutical procurement, quantification, and forecasting.", + "type": "system", + "explanation": "DHIS2 is indeed a data collection system used for health information, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned in the context of data collection and entry systems.", + "contextual_reason_agent": "DHIS2 is indeed a data collection system used for health information, confirming its role as a dataset.", + "contextual_signal": "mentioned as a data collection and entry system", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 99, + "text": "training of CPC animators, and the school councils Results Area 5: Establishment of a standardized student learning assessment system for primary and secondary education \u2022 To undertake an assessment of current efforts in student learning assessments and develop a plan for undertaking regular assessments \u2022 Support for the establishment of the national unit for learning outcomes assessment and build its technical capacity \u2022 Development of assessment tests, administer them and produce learning assessment reports Results Area 6: Integrated EMIS functional and operational \u2022 Support for the development of national strategy for integrated EMIS involving four ministries \u2022 Support for increased technical capacity in support of EMIS systems development and information management \u2022 Support for training at deconcentrated levels on the use of the information technology platform for data management, descriptive statistics, and data management techniques ( data validation, tabulations, metadata and database management, report generation, among others ). Support will also include EMIS monitoring at central and deconcentrated levels. Results Area 7: Improved learning environment and quality of education for children in host community schools with refugees \u2022 Survey of schools to identify potential beneficiaries from refugee-affected local councils \u2022 Mobilization and sensitization of communities in reaching refugees for enrolling and retaining children in school \u2022 Development of Response Strategy and Plan 2019-2023 Priority Area 3: Improving service delivery through performance-based Financing ( PBF ) at school level \u2022 Strengthening capacity of school management councils \u2022 Mobilizing and strengthening community participation in school management", + "ner_text": [ + [ + 542, + 546, + "named" + ] + ], + "validated": false, + "empirical_context": "training of CPC animators, and the school councils Results Area 5: Establishment of a standardized student learning assessment system for primary and secondary education \u2022 To undertake an assessment of current efforts in student learning assessments and develop a plan for undertaking regular assessments \u2022 Support for the establishment of the national unit for learning outcomes assessment and build its technical capacity \u2022 Development of assessment tests, administer them and produce learning assessment reports Results Area 6: Integrated EMIS functional and operational \u2022 Support for the development of national strategy for integrated EMIS involving four ministries \u2022 Support for increased technical capacity in support of EMIS systems development and information management \u2022 Support for training at deconcentrated levels on the use of the information technology platform for data management, descriptive statistics, and data management techniques ( data validation, tabulations, metadata and database management, report generation, among others ). Support will also include EMIS monitoring at central and deconcentrated levels.", + "type": "system", + "explanation": "However, EMIS is mentioned as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with data management and information systems.", + "contextual_reason_agent": "However, EMIS is mentioned as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 63, + "text": "will be measured, at a minimum, on a SNSOP MIS and payment schedules Payment data stored in the MIS will be compared with approved payment schedules. Selected Implementing Partner", + "ner_text": [ + [ + 37, + 46, + "named" + ] + ], + "validated": false, + "empirical_context": "will be measured, at a minimum, on a SNSOP MIS and payment schedules Payment data stored in the MIS will be compared with approved payment schedules. Selected Implementing Partner", + "type": "system", + "explanation": "However, the context indicates that 'SNSOP MIS' is referred to as a system, not explicitly as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MIS' suggests a Management Information System that could store data.", + "contextual_reason_agent": "However, the context indicates that 'SNSOP MIS' is referred to as a system, not explicitly as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 18, + "validated": 10, + "not_validated": 8 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 12, + "text": "The limited adoption of Sanad is due to low perceived relevance for many individuals and service providers, as well as to eligibility restricted so far to citizens, thus leaving various demographic groups ( such as non-Jordanians, amongst whom refugees, and military personnel ) ineligible for a digital ID. MODEE plans to increase the uptake of Sanad by ( 1 ) expanding eligibility and outreach to enroll additional users ( for example, through the operationalization of new GSCs and targeted visits to refugee communities ); ( 2 ) introducing new features to enhance trust, interoperability, security, and people centricity, such as adding a mechanism to collect user consent for data sharing; and ( 3 ) implementing a DPI ecosystem approach that is aligned with international good practice, to achieve effective, end-to-end, digitalized service delivery workflows. Taking a trusted, people-centric DPI ecosystem approach offers the opportunity to embrace international standards and open internet technologies by using trusted data 4 See the State of the Country Reports on the ESCJ website at https: / / www. esc. jo / Reportsen. aspx. 5 DPI refers to digital ID, payment, and data exchange capabilities that are fundamental to enabling service delivery at scale and supporting innovation in the digital economy. DPI provides reusable and foundational digital platforms that allow public - and private - sector service providers to build and innovate their products and services. 6 See Jordan \u2019 s detailed rating in Technical Assessment. 7 Kemp, Simon. 2023. \u201c Digital 2023: Jordan. \u201d Datareportal. https: / / datareportal. com / reports / digital-2023-jordan.", + "ner_text": [ + [ + 24, + 29, + "named" + ] + ], + "validated": false, + "empirical_context": "The limited adoption of Sanad is due to low perceived relevance for many individuals and service providers, as well as to eligibility restricted so far to citizens, thus leaving various demographic groups ( such as non-Jordanians, amongst whom refugees, and military personnel ) ineligible for a digital ID. MODEE plans to increase the uptake of Sanad by ( 1 ) expanding eligibility and outreach to enroll additional users ( for example, through the operationalization of new GSCs and targeted visits to refugee communities ); ( 2 ) introducing new features to enhance trust, interoperability, security, and people centricity, such as adding a mechanism to collect user consent for data sharing; and ( 3 ) implementing a DPI ecosystem approach that is aligned with international good practice, to achieve effective, end-to-end, digitalized service delivery workflows.", + "type": "program", + "explanation": "'Sanad' is not a dataset but rather a program aimed at providing digital ID services.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Sanad' is a dataset because it is mentioned in the context of digital ID and data sharing.", + "contextual_reason_agent": "'Sanad' is not a dataset but rather a program aimed at providing digital ID services.", + "contextual_signal": "'mentioned only as a project, not as a data source'", + "tags": [] + }, + { + "filename": "185_multi-page", + "page": 182, + "text": "Baseline ( KDHS, 1999 ): 90 percent * Economic situation will improve during Target ( KDHS, 2003 ): 100 percent project life. * Intensified prograrn of HIV / AIDS prevention, care and support will be * By 2003, the median age of first sex * Kenya Demographic and Health socially and culturally acceptable at among young women ( 15-24 years ) will Survey ( KDHS ) data community level. increase. Baseline ( KDHS, 1999 ): 12 years Target ( KDHS, 2003 ): 14 years * Reports from the Ministry of * By 2004, HIV / AIDS curricula will be Education and other Ministries integrated and examinable in an increasing number of primary and secondary schools, and various post-graduate institutions. Baseline: less than I percent Target: 75 percent", + "ner_text": [ + [ + 11, + 15, + "named" + ], + [ + 17, + 21, + "KDHS <> reference year" + ], + [ + 92, + 96, + "KDHS <> publication year" + ], + [ + 241, + 246, + "KDHS <> data geography" + ], + [ + 412, + 416, + "KDHS <> reference year" + ] + ], + "validated": true, + "empirical_context": "Baseline ( KDHS, 1999 ): 90 percent * Economic situation will improve during Target ( KDHS, 2003 ): 100 percent project life. * Intensified prograrn of HIV / AIDS prevention, care and support will be * By 2003, the median age of first sex * Kenya Demographic and Health socially and culturally acceptable at among young women ( 15-24 years ) will Survey ( KDHS ) data community level.", + "type": "survey", + "explanation": "KDHS is indeed a dataset as it is explicitly mentioned as a survey providing data for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed KDHS is a dataset because it is referenced alongside specific data points and outcomes.", + "contextual_reason_agent": "KDHS is indeed a dataset as it is explicitly mentioned as a survey providing data for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "144_84657-PAD-P132268-Project-Commitment-Paper", + "page": 21, + "text": "22 A key component of the project is the development of SIPs corresponding to set targets. The SIPs aim at guiding service providers towards providing higher service level and improve financial sustainability. The SIPs outline the steps that M / VCs and JSCs need to take to meet the set OBA Targets. The actions in the SIPs are based on specific issues that different M / VCs face to manage their SWM system, allowing M / VCs to address their unique challenges differently while working towards common goals for the entire project area. Mechanism for independent output verification The OBA grant will be subject to independent verification to assess the scores for each OBA Target and its associated indicators. The IVA will review progress annually semi-annually and evaluate achievements against the agreed target for the indicators identified. Each review will result in a score against which the payment is prorated assuming the minimum passing score is achieved for each indicator. At the JSC level, the IVA will review the MIS records to check that scores have been calculated correctly and subsequently select a sample of that data entered in the MIS to verify whether it has been recorded accurately. Acceptable verification will trigger the transfer of the corresponding OBA grant to JSC-H & B. The scorecard will be used for both independent verification and overall project \u2019 s M & E purposes.", + "ner_text": [ + [ + 1031, + 1034, + "named" + ] + ], + "validated": false, + "empirical_context": "Each review will result in a score against which the payment is prorated assuming the minimum passing score is achieved for each indicator. At the JSC level, the IVA will review the MIS records to check that scores have been calculated correctly and subsequently select a sample of that data entered in the MIS to verify whether it has been recorded accurately. Acceptable verification will trigger the transfer of the corresponding OBA grant to JSC-H & B.", + "type": "system", + "explanation": "'MIS' is mentioned as a management information system but not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it involves records and data entry.", + "contextual_reason_agent": "'MIS' is mentioned as a management information system but not as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "165_311820EG", + "page": 31, + "text": "M & E Implementation 7. M & E will be the responsibility o f the KG Department with the support o f the C A T and all other relevant partners and stakeholders at the central, sub-national, and community levels. Integration o f ECE into the existing EMIS is an important project activity to be carried out, which i s expected to help better monitor and evaluate project progress and impact. The ECE Department will provide semestrial project progress reports, and will make these available prior to supervision missions in addition to submitting copies to the Bank, CIDA and WFP. Project Development Objective and Performance Indicators 8. The Project Development Objective is to support the Arab Republic of Egypt in providing quality early childhood education that improves the school readiness of 4 and 5 year old children, particularly the disadvantaged. 9. The key performance indicator for this project is: a dupercent increase in enrollment over baseline in KGI-2 of adequate quality in targeted areas ( with particular emphasis on those who are disadvantaged by gender, poverty and disability ). 10.", + "ner_text": [ + [ + 249, + 253, + "named" + ] + ], + "validated": false, + "empirical_context": "M & E will be the responsibility o f the KG Department with the support o f the C A T and all other relevant partners and stakeholders at the central, sub-national, and community levels. Integration o f ECE into the existing EMIS is an important project activity to be carried out, which i s expected to help better monitor and evaluate project progress and impact. The ECE Department will provide semestrial project progress reports, and will make these available prior to supervision missions in addition to submitting copies to the Bank, CIDA and WFP.", + "type": "system", + "explanation": "However, EMIS is described as a system and not explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is related to monitoring and evaluation activities.", + "contextual_reason_agent": "However, EMIS is described as a system and not explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "159_38147core", + "page": 71, + "text": "Assessment Survey will be annexed to the CSP. Data from the UNHCR supervised survey in turn includes demographic characteristics o f beneficiary families, income and livelihood activities, housing condition, land ownership and other vulnerability conditions etc updated in April 2006. This complements the CSP providing extensive baseline data for future development and monitoring purposes. Continuous Social Impact Assessment: The CSIA will record community perceptions and grievances on the identification o f IDP households for participation in the housing program, the resentment between IDP and non IDP communities and social issues that impact upon project implementation. This would offer a useful tool for the PPU to introduce mid-course corrections to address grass roots concerns. Please see Annex 3 for more information. Social Safeguard Issues: Overall, the PHP i s expected to have limited negative social impact. None o f the PHP components include activities that trigger a drastic change with adverse social consequences. The housing assistance will provide support for the construction or upgrading o f already existing temporary or partly-completed houses o f those IDP families with uncontestable land title. This i s a socially feasible program targeted at poor IDP families who were forcibly displaced and have since lived in temporary shelters in their preferred location o f choice for 16 years.", + "ner_text": [ + [ + 0, + 17, + "named" + ], + [ + 101, + 153, + "Assessment Survey <> data description" + ], + [ + 273, + 283, + "Assessment Survey <> reference year" + ], + [ + 330, + 343, + "Assessment Survey <> data type" + ], + [ + 1435, + 1453, + "Assessment Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "Assessment Survey will be annexed to the CSP. Data from the UNHCR supervised survey in turn includes demographic characteristics o f beneficiary families, income and livelihood activities, housing condition, land ownership and other vulnerability conditions etc updated in April 2006.", + "type": "survey", + "explanation": "This is indeed a dataset as it is used to collect and analyze data on various demographic and vulnerability conditions.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as an 'Assessment Survey' which implies a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is used to collect and analyze data on various demographic and vulnerability conditions.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 20, + "text": "This may be exacerbated in situations of high insecurity as families may see marrying their young daughters to older men as a way to protect them and to improve access to natural and financial resources. In addition, proxy indicators in Niger for social norms appear to legitimize, condone, and promote GBV, with wife beating being seen as justified by 59. 6 percent of women ( DHS 2012 ), against a regional average of 45. 7 percent. In Niger, there are no laws on domestic violence or aggravated penalties for crimes against spouses or family members. However, the GoN has been taking measures in recent years, with support from development partners, to reduce gender inequality, such as child protection committees, family planning assistance to married adolescent girls, and improvement of educational attainment for girls to remain enrolled in school in the event of pregnancy or marriage. This was supported by the World Bank \u2019 s Development Policy Financing series 2019 \u2013 20 ( P173113 ). 5. The spillover of the crisis in Mali and the Boko Haram regional crisis is causing a significant displacement of people toward and within Niger. According to the United Nations High Commissioner for Refugees ( UNHCR ), Niger is home to more than 568, 410 displaced people ( August 2021 ).", + "ner_text": [ + [ + 217, + 233, + "named" + ] + ], + "validated": false, + "empirical_context": "This may be exacerbated in situations of high insecurity as families may see marrying their young daughters to older men as a way to protect them and to improve access to natural and financial resources. In addition, proxy indicators in Niger for social norms appear to legitimize, condone, and promote GBV, with wife beating being seen as justified by 59. 6 percent of women ( DHS 2012 ), against a regional average of 45.", + "type": "concept", + "explanation": "'Proxy indicators' are mentioned as measures rather than a dataset or data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'proxy indicators' refers to a structured collection of data used to measure social norms.", + "contextual_reason_agent": "'Proxy indicators' are mentioned as measures rather than a dataset or data source in the context.", + "contextual_signal": "mentioned only as a concept, not as a data source", + "tags": [] + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 17, + "text": "The global spread of COVID-19 has led to unprecedented disruptions in schooling, requiring understanding of how students \u2019 learning has changed. Moldova \u2019 s participation in PISA 2025 and the analysis of PISA 2022 results are important for managing learning recovery from the pandemic. The MoER consolidated and strengthened the student-level EMIS to support evidence-based decision-making. 22 EMIS now covers preschool, primary, general secondary, and vocational education data, which are regularly collected by the system. Reports on the performance of each school are produced and disclosed annually. However, significant differences exist between data reported in the EMIS open data portal and data published by the National Bureau of Statistics ( NBS ) of Moldova. A single integrated EMIS capable of linking information on students with population register data and financing data of the Ministry of Finance ( MoF ) is currently lacking. The current EMIS and established data reporting practices provide a solid foundation for the development of an integrated EMIS 22 Under support of the completed World Bank-financed Moldova Education Reform Project.", + "ner_text": [ + [ + 956, + 960, + "named" + ] + ], + "validated": false, + "empirical_context": "A single integrated EMIS capable of linking information on students with population register data and financing data of the Ministry of Finance ( MoF ) is currently lacking. The current EMIS and established data reporting practices provide a solid foundation for the development of an integrated EMIS 22 Under support of the completed World Bank-financed Moldova Education Reform Project.", + "type": "system", + "explanation": "However, EMIS is described as a system and not explicitly mentioned as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because EMIS (Education Management Information System) is related to data management.", + "contextual_reason_agent": "However, EMIS is described as a system and not explicitly mentioned as a data source in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 58, + "text": "The M & E system would track financial outlays, key program results ( including those core indicators that would be common across programs within the SP system ), impacts and beneficiary feedback as inputs to guide program management in the implementation of the programs. The grievance redress mechanism would track grievances linked to targeting, receipt of transfers and implementation of the complementary activities. 35. The project will also finance the development and management of a grievance redress mechanisms to respond to complaints and ensure a high level of accountability across program operations. These mechanisms include: in-person complaints to program commune focal point, SMS-based system to a third-party grievance manager ( conditional on finding a trusted and competent agent and for possibilities for social control of a Government program ), boxes at the colline-level in the care of a civil society organization, possibly a hotline at central-level. All complaints, whether received in person, through SMS, phone or boxes will be logged in the MIS and steps for their resolution will be tracked and monthly reports sent to the operation coordinator, the coordinator for complementary activities as well as the project coordinator. 36.", + "ner_text": [ + [ + 1072, + 1075, + "named" + ] + ], + "validated": false, + "empirical_context": "These mechanisms include: in-person complaints to program commune focal point, SMS-based system to a third-party grievance manager ( conditional on finding a trusted and competent agent and for possibilities for social control of a Government program ), boxes at the colline-level in the care of a civil society organization, possibly a hotline at central-level. All complaints, whether received in person, through SMS, phone or boxes will be logged in the MIS and steps for their resolution will be tracked and monthly reports sent to the operation coordinator, the coordinator for complementary activities as well as the project coordinator. 36.", + "type": "system", + "explanation": "However, 'MIS' is described as a management information system, not explicitly as a data source or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it involves logging and tracking complaints, which suggests data handling.", + "contextual_reason_agent": "However, 'MIS' is described as a management information system, not explicitly as a data source or dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 22, + "text": ") establish a quality of care system through development of guidelines, tools, and standards, training of trainers on quality of care, piloting quality of care teams and supporting national scale up, and support for National and State level quality improvement supervision. 38. Subcomponent 2. 4: Health Management Information Systems ( WHO; US $ 2. 50 million: US $ 0. 93 million equivalent IDA [ including US $ 0. 63 million WHR ] and US $ 1. 57 million Trust Funds [ US $ 0. 17 million SDTF and US $ 1. 40 million MDTF ] ). This subcomponent will focus on developing systems and procedures for the national HMIS, with an emphasis on supporting the collection of routine data through DHIS2, to standardize data collection, entry and cleaning, as well as instituting data quality improvement practices. This will enhance targeting and data tracking for refugees and provide regularly updated information to understand the evolving needs on the ground that will aid further in the decision-making process.", + "ner_text": [ + [ + 686, + 691, + "named" + ] + ], + "validated": false, + "empirical_context": "40 million MDTF ] ). This subcomponent will focus on developing systems and procedures for the national HMIS, with an emphasis on supporting the collection of routine data through DHIS2, to standardize data collection, entry and cleaning, as well as instituting data quality improvement practices. This will enhance targeting and data tracking for refugees and provide regularly updated information to understand the evolving needs on the ground that will aid further in the decision-making process.", + "type": "system", + "explanation": "However, DHIS2 is described as a system for data collection rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is associated with data collection and management.", + "contextual_reason_agent": "However, DHIS2 is described as a system for data collection rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 101, + "text": "Allocation by Component and DLI ( US $ million ) Component and DLI IDA RSW Grant Total Component 1: Improving Access, Quality, and Education System Management ( DLI ) 72 23 95 DLI 1: Improved distribution of teachers recruited by the state in public primary schools 12 8 20 DLI 2: Increased capacities of teachers in the effective and efficient use of the new curriculum in pre-primary and primary schools 10 0 10 DLI 3: Increased availability of essential textbooks in public primary schools 15 5 20 DLI 4: Increased access to pre-school in rural areas through community pre-school, according to standards 10 0 10 DLI 5: Establishment of a standardized student learning assessment system for primary and secondary education 15 0 15 DLI 6: Integrated EMIS functional and operational 10 0 10 DLI 7: Improved learning environment and quality of education for children in host community schools with refugees 0 10 10 Component 2: Improving School Effectiveness through Performance-based Financing 12 3 15 Component 3: Institutional Strengthening and Project Management 16 4 20 Total IDA Contribution 100 30 130", + "ner_text": [ + [ + 751, + 755, + "named" + ] + ], + "validated": false, + "empirical_context": "Allocation by Component and DLI ( US $ million ) Component and DLI IDA RSW Grant Total Component 1: Improving Access, Quality, and Education System Management ( DLI ) 72 23 95 DLI 1: Improved distribution of teachers recruited by the state in public primary schools 12 8 20 DLI 2: Increased capacities of teachers in the effective and efficient use of the new curriculum in pre-primary and primary schools 10 0 10 DLI 3: Increased availability of essential textbooks in public primary schools 15 5 20 DLI 4: Increased access to pre-school in rural areas through community pre-school, according to standards 10 0 10 DLI 5: Establishment of a standardized student learning assessment system for primary and secondary education 15 0 15 DLI 6: Integrated EMIS functional and operational 10 0 10 DLI 7: Improved learning environment and quality of education for children in host community schools with refugees 0 10 10 Component 2: Improving School Effectiveness through Performance-based Financing 12 3 15 Component 3: Institutional Strengthening and Project Management 16 4 20 Total IDA Contribution 100 30 130", + "type": "system", + "explanation": "'EMIS' is mentioned as an integrated system but not explicitly as a data source or dataset in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'EMIS' is a dataset because it is an acronym that could imply a structured collection of data.", + "contextual_reason_agent": "'EMIS' is mentioned as an integrated system but not explicitly as a data source or dataset in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "016_BOSIB029693bc600f090cb0745b2b12ee69", + "page": 38, + "text": "The DA ceiling will be variable and the DA threshold for direct payments will be US $ 500, 000 equivalent as captured in the Disbursement and Financial Information Letter ( DFIL ) and subject to amendment in accordance with the World Bank Disbursement Guidelines. The disbursement methods will be detailed in the DFIL and will include direct payment, reimbursement, advances and special commitment. The DA will be managed by the NT. Similarly, SDHUD will open a Project Account ( PA ) denominated in Kenya Shillings in the CBK from which payments for eligible activities will be made. The initial disbursement will be made upon receiving a withdrawal application. This Component will adopt the Statement of Expenditures ( SOE ) method of disbursement. The disbursement arrangements and thresholds will be detailed in the DFIL; ( iii ) Accounting - A qualified project accountant has been assigned under KUSP, who will also support KUSP2. Payments will be made using the government Integrated Financial Management System ( IFMIS ) system; ( iv ) Financial reporting - The NPCT will provide quarterly unaudited Interim Financial Reports ( IFRs ) and annual audit reports to the World Bank. Under KUSP, the NPCT has been providing quarterly IFRs and annual financial statements within the stipulated timelines; ( v ) Internal controls - The existing FM procedures manual will be updated to support KUSP2.", + "ner_text": [ + [ + 981, + 1019, + "named" + ] + ], + "validated": false, + "empirical_context": "The disbursement arrangements and thresholds will be detailed in the DFIL; ( iii ) Accounting - A qualified project accountant has been assigned under KUSP, who will also support KUSP2. Payments will be made using the government Integrated Financial Management System ( IFMIS ) system; ( iv ) Financial reporting - The NPCT will provide quarterly unaudited Interim Financial Reports ( IFRs ) and annual audit reports to the World Bank. Under KUSP, the NPCT has been providing quarterly IFRs and annual financial statements within the stipulated timelines; ( v ) Internal controls - The existing FM procedures manual will be updated to support KUSP2.", + "type": "system", + "explanation": "However, it is not a dataset as it refers to a financial management system rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'system' which can imply data management.", + "contextual_reason_agent": "However, it is not a dataset as it refers to a financial management system rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 65, + "text": "The World Bank Cash for Jobs Project ( P175327 ) Page 61 of 86 RESULT_FRAME_TBL_IO Indicator Name PBC Baseline End Target Upgraded MIS to scale up interventions at national level ( Yes / No ) No Yes Beneficiaries receiving social protection transfers through digital payments ( Percentage ) 0. 00 90. 00 Integration of refugee and host communities into national social protection systems Approval of a Social Protection Strategy integrating refugees and host communities as target groups ( Yes / No ) No Yes Project management and implementation GRM in place and being used to monitor feedback trends, including with mobile phones provided by the project, with claims addressed and closed and monthly reports being prepared ( Yes / No ) No Yes Process evaluation carried out ( Number ) 0. 00 4. 00 IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Households in targeted areas included in the National Social Registry Number of households that are registered in the National Social Registry in project implementation areas Annual National Social Registry The Project Implementation Unit will obtain the data from the agency handling the National Social Registry Project Implementation Unit Households in targeted areas included Number of refugee Annual National Socio-economic data Agency handling the", + "ner_text": [ + [ + 1119, + 1143, + "named" + ], + [ + 1001, + 1011, + "National Social Registry <> reference population" + ], + [ + 1212, + 1239, + "National Social Registry <> author" + ] + ], + "validated": true, + "empirical_context": "00 4. 00 IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Households in targeted areas included in the National Social Registry Number of households that are registered in the National Social Registry in project implementation areas Annual National Social Registry The Project Implementation Unit will obtain the data from the agency handling the National Social Registry Project Implementation Unit Households in targeted areas included Number of refugee Annual National Socio-economic data Agency handling the", + "type": "registry", + "explanation": "In the context, it is explicitly mentioned as a datasource for obtaining data, confirming its role as a structured collection of data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a source of data collection for households.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as a datasource for obtaining data, confirming its role as a structured collection of data.", + "contextual_signal": "mentioned as a datasource for obtaining data", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 20, + "text": "As contribution to the project, UNICEF would develop the nutrition and early childhood development package. They may also develop the lesson plans over the 36 months and provide additional assistance for the implementation, including quality control or adding modules on other topics. 34. The promotion activities will be implemented by local non-governmental organizations already involved in these issues. The Commune Family Development Centers ( CFDC ) from the Ministry of Human Rights, Social Affairs and Gender will participate in the supervision of the delivery of these activities, along with the program commune focal point. The Ministries of Health and Education will provide technical guidance as needed and ensure that the promotional activities encourage the use of health and education services. Component 2: Support to the key delivery mechanisms of a basic social safety net system ( US $ 7. 5 million equivalent ) 35. The second component will support the development and implementation of four key instruments: ( i ) a targeting mechanism and potential beneficiary database; ( ii ) core modules of a basic management information system; ( iii ) monitoring and evaluation modules; and ( iv ) capacity building for the implementation of the PNPS. The instruments will be anchored around the cash transfer program described in Component 1, but with a view to eventually serve a broader set of targeted programs that would contribute to the realization of the PNPS objectives. Sub-component 2. 1: Implementation of a precursor database for the registry ( US $ 2. 0 million equivalent ) 36. Among these instruments, the beneficiary database and its associated targeting mechanism are a key pillar for the coordination of programs. They are also critical for ensuring transparency in the selection of beneficiaries in the Burundian context of recurrent fragility and entrenched structural poverty. Initially, the database will be developed to identify the", + "ner_text": [ + [ + 1118, + 1153, + "named" + ] + ], + "validated": false, + "empirical_context": "5 million equivalent ) 35. The second component will support the development and implementation of four key instruments: ( i ) a targeting mechanism and potential beneficiary database; ( ii ) core modules of a basic management information system; ( iii ) monitoring and evaluation modules; and ( iv ) capacity building for the implementation of the PNPS. The instruments will be anchored around the cash transfer program described in Component 1, but with a view to eventually serve a broader set of targeted programs that would contribute to the realization of the PNPS objectives.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'management information system' which often relates to data handling.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 18, + "text": "The updated methodology will reflect the main improvements achieved during the first phase ( locality based, participative process, grievance mechanism, and so on ). 25. Households from Hodh Chargui, including those in the Mbera Refugee Camp that are considered poor, will be included in the Social Registry. This inclusion will be a positive step towards better coordination between humanitarian actors and the Government and between emergency and development support. The methodology to identify poor refugee households for inclusion in the Social Registry will be based on the profiling work carried out in 2018 by the UNHCR and WFP. It identified six different household categories: ( a ) deprived; ( b ) precarious; ( c ) unstable; ( d ) fragile ( e ) emerging; and ( f ) catalyst ( see figure 3 ). Each category gradually receives ( or will receive ) differentiated assistance based on its needs. This profiling work was carried out with the participation of camp and Government authorities. Based on preliminary data analysis, categories 1, 2 and 3 ( \u201c deprived \u201d, \u201c precarious \u201d, and \u201c unstable \u201d ) which include approximately 7, 500 households will enter the Social Registry. 26. This sub-component will finance: ( a ) the costs related to the community targeting ( Social Registry enrolment officers and related logistics and material ); ( b ) the socio-economic data collection costs for up to 350, 000 households ( 70, 000 households per year ); and ( c ) the costs related to quality insurance ( grievance mechanism, toll-free number, spot-check surveys, process evaluation, and so on ). Figure 3: Refugee categorization in Mauritania Source: UNHCR", + "ner_text": [ + [ + 292, + 307, + "named" + ], + [ + 223, + 241, + "Social Registry <> data geography" + ], + [ + 498, + 521, + "Social Registry <> reference population" + ], + [ + 610, + 614, + "Social Registry <> publication year" + ], + [ + 622, + 627, + "Social Registry <> publisher" + ], + [ + 1637, + 1647, + "Social Registry <> data geography" + ], + [ + 1656, + 1661, + "Social Registry <> publisher" + ] + ], + "validated": true, + "empirical_context": "25. Households from Hodh Chargui, including those in the Mbera Refugee Camp that are considered poor, will be included in the Social Registry. This inclusion will be a positive step towards better coordination between humanitarian actors and the Government and between emergency and development support.", + "type": "registry", + "explanation": "In this context, it is indeed a dataset as it is described as including households and facilitating coordination among actors.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Social Registry' implies a structured collection of data about households.", + "contextual_reason_agent": "In this context, it is indeed a dataset as it is described as including households and facilitating coordination among actors.", + "contextual_signal": "mentioned as a data source for including households", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "108_Jordan-PforR-PAD-P159522-FINAL-DISCLOSURE-10052016", + "page": 96, + "text": "88 Include refugees and economic migrants in the Department of Statistics Employment and Unemployment Surveys ( target date: quarterly survey starting January 2017 ) Y Carry out an awareness-raising campaign regarding household enterprises to inform the public about the availability of this option and the reduced restrictions and incentives to foster the creation of such businesses. Y Instruct the JIC to recruit 26 high caliber staff with relaxed salary constraints as recently allowed by the Cabinet ( target date: June 2017 ) Y Publish and publicize an \u2018 investor \u2019 s guide \u2019 for domestic and foreign investors with specific information for Syrian investors and other restricted nationalities ( target date: December 2017 ). Y Set up a Client Relationship Management ( CRM ) information technology ( IT ) system and database for investor targeting and aftercare at the JIC ( target date: December 2018 ) Y Strengthen the MOL \u2019 s Inspectorate Unit \u2019 s capacity to identify, track, and more effectively resolve noncompliance on labor and environmental standards through the development of IT tools ( database and program ) to enable staff to input and monitor data ( including development of a baseline data on labor and environmental compliance to standards ); establish linkages with other entities within the MOL ( such as hotline, child labor, operational safety and health [ OSH ] ), Ministry of Environment, and Ministry of Social Development; and increase", + "ner_text": [ + [ + 49, + 109, + "named" + ], + [ + 11, + 19, + "Department of Statistics Employment and Unemployment Surveys <> reference population" + ], + [ + 24, + 41, + "Department of Statistics Employment and Unemployment Surveys <> reference population" + ], + [ + 151, + 163, + "Department of Statistics Employment and Unemployment Surveys <> publication year" + ] + ], + "validated": true, + "empirical_context": "88 Include refugees and economic migrants in the Department of Statistics Employment and Unemployment Surveys ( target date: quarterly survey starting January 2017 ) Y Carry out an awareness-raising campaign regarding household enterprises to inform the public about the availability of this option and the reduced restrictions and incentives to foster the creation of such businesses. Y Instruct the JIC to recruit 26 high caliber staff with relaxed salary constraints as recently allowed by the Cabinet ( target date: June 2017 ) Y Publish and publicize an \u2018 investor \u2019 s guide \u2019 for domestic and foreign investors with specific information for Syrian investors and other restricted nationalities ( target date: December 2017 ).", + "type": "survey", + "explanation": "This is indeed a dataset as it is a structured collection of data used for analyzing employment and unemployment statistics.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey conducted by the Department of Statistics.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data used for analyzing employment and unemployment statistics.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 47, + "text": "Annual Progress reports Impact evaluation, progress reports drawing on data from selected schools MoER PMT Annual education statistics reports produced and publicly disseminated based on data generated by the integrated EMIS and used for education sector management including refugee response Reports providing education statistics are produced and publicly disseminated on an annual basis thanks to the integrated EMIS ( to identify determinants of student and school performance for decision making on teacher policies / training, tutoring program, and other investments in the Annual Integrated EMIS database; annual education reports progress and monitoring reports; annual education reports MoER, PMT", + "ner_text": [ + [ + 580, + 611, + "named" + ], + [ + 107, + 142, + "Annual Integrated EMIS database <> data type" + ] + ], + "validated": true, + "empirical_context": "Annual Progress reports Impact evaluation, progress reports drawing on data from selected schools MoER PMT Annual education statistics reports produced and publicly disseminated based on data generated by the integrated EMIS and used for education sector management including refugee response Reports providing education statistics are produced and publicly disseminated on an annual basis thanks to the integrated EMIS ( to identify determinants of student and school performance for decision making on teacher policies / training, tutoring program, and other investments in the Annual Integrated EMIS database; annual education reports progress and monitoring reports; annual education reports MoER, PMT", + "type": "database", + "explanation": "In the context, it is explicitly mentioned as part of the education statistics reports and is used for education sector management, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'database', which often implies a structured collection of data.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as part of the education statistics reports and is used for education sector management, confirming its role as a data source.", + "contextual_signal": "mentioned as a data source for education statistics reports", + "tags": [] + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 106, + "text": "The project will support REDs and municipalities to consider aspects of risk mitigation, crisis preparedness, and crisis response in their planning and connect it to the national management and response system. 9. Building capacities in monitoring key risks and trends in education service delivery. Because data collection in insecure and remoted environments is challenging, the project will adopt innovative solutions and integrate flexible M & E methods, such as iterative beneficiaries monitoring, third-party monitoring, and the use of ICT tools to collect real-time and localized data, including in hard-to-reach environments. The geo-mapping of schools / activities will help identify opportunities for coordination and collaboration with other World Bank projects, especially PARCA ( P164563 ) which supports the construction and rehabilitation of damaged school infrastructure in the refugee-affected areas of Diffa, Tahoua, and Tillaberi, and other donor \u2019 s interventions. The project \u2019 s RF and M & E arrangements will ensure that data are disaggregated by geographical area, gender, and social groups, when relevant and feasible, which in turn will allow for the monitoring of the impact of project interventions on FCV issues. 38 This includes refugees and host communities, including those IDPs that are also host communities.", + "ner_text": [ + [ + 542, + 551, + "named" + ] + ], + "validated": false, + "empirical_context": "Building capacities in monitoring key risks and trends in education service delivery. Because data collection in insecure and remoted environments is challenging, the project will adopt innovative solutions and integrate flexible M & E methods, such as iterative beneficiaries monitoring, third-party monitoring, and the use of ICT tools to collect real-time and localized data, including in hard-to-reach environments. The geo-mapping of schools / activities will help identify opportunities for coordination and collaboration with other World Bank projects, especially PARCA ( P164563 ) which supports the construction and rehabilitation of damaged school infrastructure in the refugee-affected areas of Diffa, Tahoua, and Tillaberi, and other donor \u2019 s interventions.", + "type": "tool", + "explanation": "'ICT tools' are mentioned as methods for data collection, not as a structured collection of data themselves.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'ICT tools' could refer to a dataset because it is associated with data collection methods.", + "contextual_reason_agent": "'ICT tools' are mentioned as methods for data collection, not as a structured collection of data themselves.", + "contextual_signal": "mentioned only as a method for data collection, not as a data source", + "tags": [] + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 119, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 114 of 117. ANNEX 6. PROGRAM ACTION PLAN Action Description Source DLI # Responsibility Timing Completion Measurement Undertake data analysis in the NEMIS platform and publish annual educational statistical booklet. Technical MoE Recurrent Yearly Annual educational statistical booklet published for core education data, including refugee children and learner with special needs. Conduct policy dialogue for inclusion / mainstrea ming of refugees in the NEMIS. Technical MoE Other Continuous Data for refugee children included in the NEMIS Training: implementation of the consolidated three year capacity building plan for staff development at MoE and the Program \u2019 s implementing entities ( IEs ). Technical MoE Recurrent Yearly Targets in the plan implemented Technical expertise to be engaged in the areas of CBC and formative assessments Technical MoE Recurrent Yearly two international Partner institutions procured to provide technical support to KICD and KNEC in the implementation and monitoring of the roll out of CBC and assessments. Utilization of the online based item portal for competency based assessment Technical KNEC Recurrent Continuous Teachers post assessment items in the portal and also use the assessment items in the portal. Timely release of funds Exchequer to the Implementing Entities Fiduciary Systems NT, MoE and TSC Recurrent Yearly Timely release of funds to the Implementing Entities ( IEs ) PPRA to undertake compliance assessment.", + "ner_text": [ + [ + 531, + 536, + "named" + ] + ], + "validated": false, + "empirical_context": "Technical MoE Recurrent Yearly Annual educational statistical booklet published for core education data, including refugee children and learner with special needs. Conduct policy dialogue for inclusion / mainstrea ming of refugees in the NEMIS. Technical MoE Other Continuous Data for refugee children included in the NEMIS Training: implementation of the consolidated three year capacity building plan for staff development at MoE and the Program \u2019 s implementing entities ( IEs ).", + "type": "system", + "explanation": "NEMIS is mentioned as a system for managing educational data, but it is not explicitly described as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is associated with educational data and statistics.", + "contextual_reason_agent": "NEMIS is mentioned as a system for managing educational data, but it is not explicitly described as a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 8, + "text": "There exists concurrently considerable excess capacity and wide-scale overcrowding of schools \u2013 much of it linked to rented facilities, but not exclusively. \u2022 ECE ( Early Childhood Education ): Significant achievements in construction, teacher training, curriculum development, teacher professional development, parent involvement and standards setting have helped make this one of the more dynamic elements of Jordan \u2019 s education system. The private sector is responsible for 90 percent of provision ( down from 95 percent in 2003 ), and improvements in regulation and guidance regarding standards have helped this subsector mature as it expands. Yet significant challenges remain: continued expansion of access to Kindergarten Year Two ( KG2 ) for the roughly half of children without access to KG2 classes competes with growing demand for greater public investment in Kindergarten Year 1 ( KG1 ) level provision; poor urban communities and children in rural areas compete for access to public provision; quantitative expansion competes with the need for consolidation of quality. \u2022 Technical, and Vocational Education and Training ( TVET ): Enrollment in secondary vocational education as a share of total secondary enrolment declined from 18 percent in 2000 to 12 percent in 2005, suggesting a critical demand side problem, at a time of growing need for skills for the economic transformation articulated in the National Agenda. This suggests the need for significant realignment of MoE vocational programs to be undertaken based on input from the employer community and deeper analysis of labor market information derived from Al Manar and similar initiatives, in a way that aligns them with the reforms initiated by the Ministries of Labour ( MoL ) and Higher Education and Scientific Research ( MoHESR ).", + "ner_text": [ + [ + 1633, + 1641, + "named" + ] + ], + "validated": false, + "empirical_context": "\u2022 Technical, and Vocational Education and Training ( TVET ): Enrollment in secondary vocational education as a share of total secondary enrolment declined from 18 percent in 2000 to 12 percent in 2005, suggesting a critical demand side problem, at a time of growing need for skills for the economic transformation articulated in the National Agenda. This suggests the need for significant realignment of MoE vocational programs to be undertaken based on input from the employer community and deeper analysis of labor market information derived from Al Manar and similar initiatives, in a way that aligns them with the reforms initiated by the Ministries of Labour ( MoL ) and Higher Education and Scientific Research ( MoHESR ).", + "type": "initiative", + "explanation": "'Al Manar' is mentioned as an initiative rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Al Manar' is a dataset because it is mentioned in the context of labor market information.", + "contextual_reason_agent": "'Al Manar' is mentioned as an initiative rather than a structured collection of data or a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "115_PAD1678-PAD-P152822-IDA-R2016-0090-1-Box394886B-OUO-9", + "page": 126, + "text": "Establish satisfactory procurement data management system IGAD During project implementation 5. Weaknesses in internal control and weak capacity of procurement oversight 1. Conduct post procurement review of procurement activities of the DRDIP at IGAD 2. Strengthen the internal control World Bank / IGAD During project implementation B. Frequency of Procurement Supervision 114. In addition to the prior review supervision to be carried out from Bank offices, the capacity assessments of the Implementing Agencies has recommended semi-annual supervision missions to conduct field visits, of which at least one mission will involve post review of procurement actions. Table 4. 8 provides the Prior Review Threshold. Table 4. 8. Procurement of Goods and Works Expenditure Category Contract Value ( Threshold ) US $ Procurement Method Contracts Subject to Prior Review", + "ner_text": [ + [ + 23, + 57, + "named" + ] + ], + "validated": false, + "empirical_context": "Establish satisfactory procurement data management system IGAD During project implementation 5. Weaknesses in internal control and weak capacity of procurement oversight 1.", + "type": "system", + "explanation": "However, it is not a dataset as it refers to a system rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'data management system'.", + "contextual_reason_agent": "However, it is not a dataset as it refers to a system rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 24, + "text": "The GoU will conduct a technical study to evaluate the most suitable mix of technologies and to explore options on how to engage the private sector under this activity to increase the broadband coverage in Uganda. c ) Pre-purchase of International Bandwidth. This activity involves lowering the average cost of international bandwidth through a bulk pre-purchase of international bandwidth, through aggregation of the needs of public institutions which allows to capitalize on the economies of scale. This activity has been initiated through the RCIP-5 Project. An additional 20 Gbit / s per year for the duration of the project will be financed to cater to the growing bandwidth needs of government institutions in terms of additional number of MDAs connected, and growing usage per MDA. d ) Mobile Broadband Deployment in Rural Areas. This activity will finance the deployment of broadband in rural areas and provisioning of mobile broadband in selected areas. This activity will involve installation of 50 masts ( tower type structures to support antennas and other telecom transmission equipment ) to improve voice and data services in underserved areas. The masts will serve 900 MDAs. The proposed location of the masts will follow considerations of current gaps in service coverage, service demand, and projected viability of the infrastructure, to be able to target the investments while ensuring long term 46 Activities under this component build on the experience of RCIP-5 that extended the backbone underserved regions through buildout of 842 km ( on the top of a network which was previously already extended to 1, 500 + km ).", + "ner_text": [ + [ + 23, + 38, + "named" + ] + ], + "validated": false, + "empirical_context": "The GoU will conduct a technical study to evaluate the most suitable mix of technologies and to explore options on how to engage the private sector under this activity to increase the broadband coverage in Uganda. c ) Pre-purchase of International Bandwidth.", + "type": "study", + "explanation": "However, it is not a dataset as it refers to a study rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'technical study' implies a structured analysis of data.", + "contextual_reason_agent": "However, it is not a dataset as it refers to a study rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a study, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 21, + "validated": 8, + "not_validated": 13 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 16, + "text": "It will build on the outcomes of the ongoing Mauritania Social Safety Net System Project ( P150430 ) which began implementation in September 2015 and has put in place: ( a ) the Social Registry, which provides an effective and transparent mechanism to target poor and vulnerable households; ( b ) the National Social Transfer Program, Tekavoul, which currently supports 30, 000 households in extreme poverty through cash transfers and social promotion activities; and ( c ) the shock-responsive SSN Program, Elmaouna, which reaches 3, 000 food-insecure households annually during the lean season. The SSN system main key instruments are summarized in figure 2.", + "ner_text": [ + [ + 178, + 193, + "named" + ], + [ + 45, + 55, + "Social Registry <> data geography" + ], + [ + 131, + 145, + "Social Registry <> publication year" + ], + [ + 259, + 289, + "Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "It will build on the outcomes of the ongoing Mauritania Social Safety Net System Project ( P150430 ) which began implementation in September 2015 and has put in place: ( a ) the Social Registry, which provides an effective and transparent mechanism to target poor and vulnerable households; ( b ) the National Social Transfer Program, Tekavoul, which currently supports 30, 000 households in extreme poverty through cash transfers and social promotion activities; and ( c ) the shock-responsive SSN Program, Elmaouna, which reaches 3, 000 food-insecure households annually during the lean season. The SSN system main key instruments are summarized in figure 2.", + "type": "registry", + "explanation": "The Social Registry is explicitly mentioned as providing a mechanism to target poor and vulnerable households, indicating its use as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a 'Social Registry' which implies a structured collection of data.", + "contextual_reason_agent": "The Social Registry is explicitly mentioned as providing a mechanism to target poor and vulnerable households, indicating its use as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 72, + "text": "for drafting bidding documents; ( 10 ) Coordination and integration of the Program will be done by a central agency, such as MOPIC, for Results Areas involving multiple agencies. Planning and Budgeting 4. The assessment confirms that Jordan \u2019 s budget systems are adequate for the Program. Implementing agencies use the GFMIS for budget preparation and execution. The GFMIS is fully utilized for budget execution, but not for budget preparation, given that manual interventions with various IT tools are being used. The GFMIS offers limited functionalities include: ( a ) the presentation of an initial policy-focused budget submission; ( b ) the entering of multiple quarters and multi - year commitments; and ( c ) capturing and populating the outstanding commitments and outstanding invoices for arrears monitoring. As a result, arrears have continued to be a problem. Jordan \u2019 s budgetary central government budget classification meets Government Financial Statistics / Classification of the Functions of Government standards. 31 These classifications are included in the current chart of accounts, allowing for all transactions to be reported in accordance with the appropriate standards. The budget is published on the GBD \u2019 s website ( www. gbd. gov. jo ). The final accounts and the monthly General Government Finance Bulletin, which include budgetary government finance statistics aggregated according to the economic and functional classifications ), are also published on the Ministry of Finance \u2019 s website. 31 Jordan: Public Expenditure and Financial Accountability ( PEFA ) Assessment ( 2022 ).", + "ner_text": [ + [ + 368, + 373, + "named" + ] + ], + "validated": false, + "empirical_context": "Implementing agencies use the GFMIS for budget preparation and execution. The GFMIS is fully utilized for budget execution, but not for budget preparation, given that manual interventions with various IT tools are being used. The GFMIS offers limited functionalities include: ( a ) the presentation of an initial policy-focused budget submission; ( b ) the entering of multiple quarters and multi - year commitments; and ( c ) capturing and populating the outstanding commitments and outstanding invoices for arrears monitoring.", + "type": "system", + "explanation": "However, GFMIS is described as a system used for budget management, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed GFMIS is a dataset because it is involved in budget execution and preparation.", + "contextual_reason_agent": "However, GFMIS is described as a system used for budget management, not as a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 65, + "text": "There has been significant progress in Jordan \u2019 s public FM where it has been able to: ( i ) improve its budget process through implementing a results based budget for the year 2008; ( ii ) advance the full implementation of a single treasury account with the Central Bank; and ( iii ) modernize the spending procedures and accounting through adopting a new chart of account and developing the GFMIS for implementation by the MoF, government departments and other spending units. So far, the GFMIS is still a work in progress and most of the ministries ( including MoE and GBD ) still do not have an accounting and reporting system in place that can be used for reporting on the project accounts. In addition, Jordan \u2019 s Corruption Perceived Index ( CPI ) as per Transparency International has improved from 4. 7 in 2007 to 5. 1 in 2008, ranking Jordan 5 / 20 regionally and 41 / 180 internationally. 9. The Project will be implemented through opening budget lines under MoE ( for counterpart funds ) and MoPIC ( for the Bank loan ) and funds will be allocated accordingly. MoE has already opened a budget line under its 2009 budget. 10. Project FM Risk. MoE and GBD \u2019 s FM arrangements were assessed based on the World Bank \u2019 s FM Guidelines, to determine if the FM arrangements for the Project are acceptable to the World Bank. Detailed FM questionnaires were completed for MoE and GBD and are included in the Project \u2019 s files. The risks identified and the mitigating measures addressing theses risks are detailed in the table below: 4 The Institutional Financial Management Assessment ( IFMCA ) for the Education and Social Sectors \u2013 June 2006.", + "ner_text": [ + [ + 492, + 497, + "named" + ] + ], + "validated": false, + "empirical_context": "There has been significant progress in Jordan \u2019 s public FM where it has been able to: ( i ) improve its budget process through implementing a results based budget for the year 2008; ( ii ) advance the full implementation of a single treasury account with the Central Bank; and ( iii ) modernize the spending procedures and accounting through adopting a new chart of account and developing the GFMIS for implementation by the MoF, government departments and other spending units. So far, the GFMIS is still a work in progress and most of the ministries ( including MoE and GBD ) still do not have an accounting and reporting system in place that can be used for reporting on the project accounts. In addition, Jordan \u2019 s Corruption Perceived Index ( CPI ) as per Transparency International has improved from 4.", + "type": "system", + "explanation": "GFMIS is mentioned as a system for managing financial information, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed GFMIS is a dataset because it is related to accounting and reporting.", + "contextual_reason_agent": "GFMIS is mentioned as a system for managing financial information, not as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "040_Iraq-COVID-19-Vaccination-Project", + "page": 38, + "text": "Gender inequities and norms influence access to critical health services, as well as risk of exposure to disease, particularly in emergency situations and pandemics. Factors that constrain access to and use of health services by women in Iraq include limited mobility and financial capacity, competing demands of paid and unpaid work, and limited access to information. 10 The reported incidence of COVID-19 is higher among men than women \u2013 59 percent of registered COVID-19 cases in Iraq to date were among men. Moreover, women have also been impacted by the discontinuity of essential RMNCAH-N services, including for maternal and sexual and reproductive health, and GBV. 11 The GBV Information Management System ( GBVIMS ) has recorded a marked rise in the number of reported incidents of violence in 2020. 12 10 UN Women ( 2018 ), Gender Profile - Iraq, A situation analysis on gender equality and women empowerment in Iraq. 11 UN Women ( 2020 ). Report on the Impact of COVID-19 on Women. 12 Gender Based Violence Information Management System Annual Narrative Report. January \u2013 December 2020. https: / / iraq. unfpa. org / sites / default / files / resource-pdf / gbvims_narrative_report_of_2020. pdf", + "ner_text": [ + [ + 681, + 714, + "named" + ], + [ + 238, + 242, + "GBV Information Management System <> data geography" + ], + [ + 484, + 488, + "GBV Information Management System <> data geography" + ], + [ + 523, + 528, + "GBV Information Management System <> reference population" + ], + [ + 770, + 800, + "GBV Information Management System <> data description" + ], + [ + 804, + 808, + "GBV Information Management System <> publication year" + ], + [ + 816, + 824, + "GBV Information Management System <> author" + ], + [ + 852, + 856, + "GBV Information Management System <> data geography" + ], + [ + 923, + 927, + "GBV Information Management System <> data geography" + ], + [ + 932, + 940, + "GBV Information Management System <> author" + ], + [ + 943, + 947, + "GBV Information Management System <> publication year" + ], + [ + 1093, + 1097, + "GBV Information Management System <> publication year" + ] + ], + "validated": true, + "empirical_context": "Moreover, women have also been impacted by the discontinuity of essential RMNCAH-N services, including for maternal and sexual and reproductive health, and GBV. 11 The GBV Information Management System ( GBVIMS ) has recorded a marked rise in the number of reported incidents of violence in 2020. 12 10 UN Women ( 2018 ), Gender Profile - Iraq, A situation analysis on gender equality and women empowerment in Iraq.", + "type": "system", + "explanation": "It is indeed a dataset as it is mentioned to have recorded incidents, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information Management System' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "It is indeed a dataset as it is mentioned to have recorded incidents, indicating it functions as a data source.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "162_PAD12050PAD0P15264600PUBLIC00Box391428B", + "page": 1, + "text": "2 CURRENCY EQUIVALENTS ( Exchange Rate Effective January 31, 2015 ) Currency Unit = Lebanese Pound ( LBP ) LBP 1, 510. 51 = US $ 1. 00 FISCAL YEAR January 1 \u2013 December 31 ABBREVIATIONS AND ACRONYMS CMU CPS Central Management Unit Country Partnership Strategy CQS Consultants \u2019 Qualifications Selection DA Designated Account DALYs Disability-Adjusted Life Years EHCP Essential Healthcare Package ESIA Economic and Social Impact Assessment FBS Fixed Budget Selection FM Financial Management FO Financial Officer FOT Fiduciary Operations Team GOL Government of Lebanon HIS Health Information System HNP Health, Nutrition and Population ICB International Competitive Bidding IFR Interim Financial Reports IPSAS International Public Sector Accounting Standards LCS Least Cost Selection LSCTF Lebanon Syrian Crisis Trust Fund M & E Monitoring and Evaluation MENA Middle East and North Africa Region MOF Ministry of Finance MoPH Ministry of Public Health MOSA Ministry of Social Affairs NCB National Competitive Bidding NCD Non-communicable Disease NGO Non-governmental Organization NHA National Health Accounts NPTP National Poverty Targeting Program OOP Out-of-Pocket expenditure OPD PCM Outpatient Departments Presidential Council of Ministers PDO Project Development Objective PFS Project Financial Statements PHCC Primary Health Care Center PMT Proxy-Means Testing PMU Program Management Unit POM Project Operations Manual QCBS Quality-and-Cost-Based-Selection RRP Regional Response Plans SOE Statements of Expenditure SSS Single Source Selection TOR Terms of Reference UN United Nations UNHCR United Nations High Commissioner for Refugees UNICEF United Nations Children ' s Fund VAT Value Added Tax WA Withdrawal Application Regional Vice President", + "ner_text": [ + [ + 566, + 595, + "named" + ] + ], + "validated": false, + "empirical_context": "51 = US $ 1. 00 FISCAL YEAR January 1 \u2013 December 31 ABBREVIATIONS AND ACRONYMS CMU CPS Central Management Unit Country Partnership Strategy CQS Consultants \u2019 Qualifications Selection DA Designated Account DALYs Disability-Adjusted Life Years EHCP Essential Healthcare Package ESIA Economic and Social Impact Assessment FBS Fixed Budget Selection FM Financial Management FO Financial Officer FOT Fiduciary Operations Team GOL Government of Lebanon HIS Health Information System HNP Health, Nutrition and Population ICB International Competitive Bidding IFR Interim Financial Reports IPSAS International Public Sector Accounting Standards LCS Least Cost Selection LSCTF Lebanon Syrian Crisis Trust Fund M & E Monitoring and Evaluation MENA Middle East and North Africa Region MOF Ministry of Finance MoPH Ministry of Public Health MOSA Ministry of Social Affairs NCB National Competitive Bidding NCD Non-communicable Disease NGO Non-governmental Organization NHA National Health Accounts NPTP National Poverty Targeting Program OOP Out-of-Pocket expenditure OPD PCM Outpatient Departments Presidential Council of Ministers PDO Project Development Objective PFS Project Financial Statements PHCC Primary Health Care Center PMT Proxy-Means Testing PMU Program Management Unit POM Project Operations Manual QCBS Quality-and-Cost-Based-Selection RRP Regional Response Plans SOE Statements of Expenditure SSS Single Source Selection TOR Terms of Reference UN United Nations UNHCR United Nations High Commissioner for Refugees UNICEF United Nations Children ' s Fund VAT Value Added Tax WA Withdrawal Application Regional Vice President", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Health Information System' suggests a collection of health-related data.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "037_Uganda-Investment-for-Industrial-Transformation-and-Employment-Project", + "page": 18, + "text": "Lack of adequate shared infrastructure for the supply of ( clean ) energy, treatment of effluent, recycling of waste, and so on contribute to this situation. Under a business-as-usual scenario, the expansion of industry needed for structural transformation to proceed could substantially increase pressures in terms of energy and natural resource use as well as of pollution and waste generation. Previous projects in the country financed by the World Bank and other development partners have shown the scope for efficiency gains, notably through profitable investments in technologies and processes for resource efficiency and cleaner production. 16. In addition, the regulatory and enabling environment is not supportive of new growth sectors. Discussions with Ugandan private sector actors and government officials indicate significant challenges with regulatory enforcement, contract enforcement, and the public private interface. A countrywide survey commissioned in 2017 by the Uganda National Bureau of Standards ( UNBS ) found that 54 percent of the products in the Ugandan market are either fake or counterfeit. The number of backlogged civil suits in the commercial court system ( which account for most of all substantive cases ) has increased. Private investors have reported that the process to obtain an investment license is cumbersome. 17. There is lack of investment in new production locations and markets, that is caused by inadequate access to basic private sector services.", + "ner_text": [ + [ + 937, + 955, + "named" + ], + [ + 972, + 976, + "countrywide survey <> publication year" + ], + [ + 984, + 1019, + "countrywide survey <> author" + ] + ], + "validated": true, + "empirical_context": "Discussions with Ugandan private sector actors and government officials indicate significant challenges with regulatory enforcement, contract enforcement, and the public private interface. A countrywide survey commissioned in 2017 by the Uganda National Bureau of Standards ( UNBS ) found that 54 percent of the products in the Ugandan market are either fake or counterfeit. The number of backlogged civil suits in the commercial court system ( which account for most of all substantive cases ) has increased.", + "type": "survey", + "explanation": "This is indeed a dataset as it is a structured collection of data from a survey commissioned by a recognized authority.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data on product authenticity in Uganda.", + "contextual_reason_agent": "This is indeed a dataset as it is a structured collection of data from a survey commissioned by a recognized authority.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 27, + "text": "The assessment will inform the identification of the institutional capacity-strengthening priorities. The scope of the component will focus on three levels: individual, organizational, and enabling environment. First, the assessment will identify to what extent the skills of the existing staff, procedures, policy frameworks, technology, and physical infrastructure at the national and sub-national levels are conducive to quality education service delivery. The assessment will focus on the impediments to the operational roles in areas such as planning, coordination and communication, policy formulation and implementation, procurement management, and public finance management, including budget implementation, reporting requirements, internal and external audits, public information on school budgets, and oversight mechanisms. Second, the assessment will lay out the foundation for preparing capacity development plans, highlighting the priority areas to strengthen the capacity in the education ministries in medium - and long-term capacity development areas. At the national level, the subcomponent will support the development of teacher policies and standards and mechanisms to coordinate the fragmented pre-service and in-service teacher training systems. This is to ensure that all the training provided by the partners is aligned with these standards and teachers trained by different partners can follow a path to become formal teachers. In addition, the subcomponent will support providing a digital teacher management system.", + "ner_text": [ + [ + 1508, + 1541, + "named" + ] + ], + "validated": false, + "empirical_context": "This is to ensure that all the training provided by the partners is aligned with these standards and teachers trained by different partners can follow a path to become formal teachers. In addition, the subcomponent will support providing a digital teacher management system.", + "type": "system", + "explanation": "However, the context indicates it is a system for managing teachers rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'management system' which can imply data handling.", + "contextual_reason_agent": "However, the context indicates it is a system for managing teachers rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "155_432110PAD0P09717632B01off0use0only1", + "page": 65, + "text": "At the same time, this approach allows for further expansion through private connections in a subsequent phase, once the necessary hydraulic capacity is present in the network. Household Survey and Community Consultations 13. The economic and social feasibility of this alternative was explored through the socio - economic feasibility study and household survey, and through several rounds of discussion groups with representatives of local communities. In addition, the project team and REGIDESO organized a participatory workshop, in which experiences to date with standpost management in Burundi, Kenya, Senegal and Rwanda were presented and discussed. The workshop, which included community representatives, members of government, REGIDESO staff, World Bank representatives, and international invited speakers with direct experience in standpost management, also served to produce recommendations on the type of standpost management that would be most appropriate in the context of Bujumbura. 14. As seen in the previous section, the household survey generated baseline information regarding water supply, sanitation, electricity provision, and general socio-economic and demographic data about the 26 neighborhoods under study. It also asked respondents to express their preferences about the type of service they would like. Not surprisingly, a majority of respondents ( 63. 9 % ) would prefer to pay to have a private connection to the network.", + "ner_text": [ + [ + 177, + 193, + "named" + ], + [ + 346, + 362, + "Household Survey <> data type" + ], + [ + 592, + 599, + "Household Survey <> data geography" + ], + [ + 987, + 996, + "Household Survey <> data geography" + ], + [ + 1039, + 1055, + "Household Survey <> data type" + ], + [ + 1248, + 1259, + "Household Survey <> reference population" + ], + [ + 1364, + 1375, + "Household Survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "At the same time, this approach allows for further expansion through private connections in a subsequent phase, once the necessary hydraulic capacity is present in the network. Household Survey and Community Consultations 13. The economic and social feasibility of this alternative was explored through the socio - economic feasibility study and household survey, and through several rounds of discussion groups with representatives of local communities.", + "type": "survey", + "explanation": "In the context, it is explicitly mentioned as part of the socio-economic feasibility study, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Household Survey' implies a structured collection of data from households.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as part of the socio-economic feasibility study, indicating it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "129_PAD1030-ENGLISH-P149242-PUBLIC-FINAL-LEB-ENPTP-English", + "page": 34, + "text": "24 Project Development Objective Indicators Indicator Name: Description ( indicator definition etc. ) Direct project beneficiaries of safety net programs ( individuals ), of which women ( % ) Direct beneficiaries of safety net programs are the number of NPTP card holders. Beneficiaries of safety net programs ( number ), of which are e-card food vouchers The breakdown of beneficiaries of which are e-card food voucher beneficiaries. NPTP beneficiaries from extremely poor households as a share of total NPTP beneficiaries. Beneficiaries = NPTP card holders. Extreme poverty = $ 3. 84 / day per person in 2012 prices. Number of NPTP Applicants Households that have applied to the program. Time Lapse between application and eligibility notification Acceptance notification must be accompanied by a benefits card ( not cumulative ). Household awareness of NPTP Percentage of respondents to the opinion poll survey that have head of the NPTP. Proportion of assisted people informed about the e - card food program ( who is included, what people receive, and where they can complain ) Households that have been provided training in SDCs on the e-card food voucher system.", + "ner_text": [ + [ + 894, + 913, + "named" + ], + [ + 606, + 610, + "opinion poll survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "Time Lapse between application and eligibility notification Acceptance notification must be accompanied by a benefits card ( not cumulative ). Household awareness of NPTP Percentage of respondents to the opinion poll survey that have head of the NPTP. Proportion of assisted people informed about the e - card food program ( who is included, what people receive, and where they can complain ) Households that have been provided training in SDCs on the e-card food voucher system.", + "type": "survey", + "explanation": "This is indeed a dataset as it involves a structured collection of data from respondents regarding their awareness of the NPTP.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects responses from participants.", + "contextual_reason_agent": "This is indeed a dataset as it involves a structured collection of data from respondents regarding their awareness of the NPTP.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 18, + "text": "The World Bank Mauritania Social Safety Net System Project II ( P171125 ) Page 13 of 61 Sub-component 1. 1: Updating and expanding the Social Registry ( US $ 6. 9 million ) 23. The objective of this sub-component is to update the Social Registry throughout Mauritania, including the end-to-end process ( quota methodology, community targeting mechanism, data collection, and verification stage ). An ongoing update of the Social Registry is critical because the programs which rely on it need up-to-date information, both on the demographic status of households ( migration, births, deaths, divorce ) and on the evolution of their socioeconomic status ( education, income generation, assets, access to services, and so on ). The update frequency is a trade-off between implementation costs and value of updated data for programs using the registry. 24. The objective agreed with the Government is to complete a full update every three years. The update will proceed by region, starting where the first households were registered ( Gorgol region ). Given this cycle, about 67, 000 households would be registered per year. Quotas will be recalculated based on the 2020 Poverty and Living Standards Measurement household Survey ( Enqu\u00eate sur la Pauvret\u00e9 et les Conditions de Vie, EPCV ) information.", + "ner_text": [ + [ + 135, + 150, + "named" + ], + [ + 4, + 14, + "Social Registry <> publisher" + ], + [ + 15, + 25, + "Social Registry <> data geography" + ], + [ + 257, + 267, + "Social Registry <> data geography" + ], + [ + 1002, + 1012, + "Social Registry <> reference population" + ], + [ + 1031, + 1044, + "Social Registry <> data geography" + ], + [ + 1162, + 1166, + "Social Registry <> publication year" + ] + ], + "validated": true, + "empirical_context": "The World Bank Mauritania Social Safety Net System Project II ( P171125 ) Page 13 of 61 Sub-component 1. 1: Updating and expanding the Social Registry ( US $ 6. 9 million ) 23.", + "type": "registry", + "explanation": "In the context, it is explicitly mentioned as part of a project focused on updating and expanding a registry, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Social Registry' suggests a structured collection of data related to social safety nets.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as part of a project focused on updating and expanding a registry, indicating it functions as a data source.", + "contextual_signal": "mentioned as part of a project focused on updating and expanding a registry", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "082_PAD2809-PAD-PUBLIC-disclosed-9-12-2018-IDA-R2018-0286-1", + "page": 55, + "text": "The World Bank Chad - Refugees and Host Communities Support Project ( P164748 ) Page 50 Health centers rehabilitated or newly built Twice a year Baseline data collected from UNHCR and WFP on number of health centers built or rehabilitated in target areas. The CFS is launching a baseline study which will help to confirm baseline numbers, to be reviewed at MTR. CFS Management information system - CNARR - Ministry of Health CFS local offices produce simple reports by region on number of health centers rehabilitated or newly built. Reports are then consolidated by CFS centrally and shared with the Project Steering Committee and with the World Bank. There will be two reports per year in June and December. Figures are reported for the period in question ( 6 months ) and also cumulatively. CFS Students attending new or rehabilitated schools Quarterly report Baseline data collected from UNHCR CFS local offices produce simple reports by region on CFS", + "ner_text": [ + [ + 145, + 158, + "named" + ], + [ + 4, + 14, + "Baseline data <> publisher" + ], + [ + 15, + 19, + "Baseline data <> data geography" + ], + [ + 174, + 179, + "Baseline data <> publisher" + ], + [ + 892, + 897, + "Baseline data <> publisher" + ], + [ + 971, + 989, + "Baseline data <> usage context" + ] + ], + "validated": true, + "empirical_context": "The World Bank Chad - Refugees and Host Communities Support Project ( P164748 ) Page 50 Health centers rehabilitated or newly built Twice a year Baseline data collected from UNHCR and WFP on number of health centers built or rehabilitated in target areas. The CFS is launching a baseline study which will help to confirm baseline numbers, to be reviewed at MTR.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to collected data that will be used to confirm baseline numbers in the project.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'baseline data' suggests a structured collection of information used for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it refers to collected data that will be used to confirm baseline numbers in the project.", + "contextual_signal": "follows 'collected from' indicating it is used as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 38, + "text": "The World Bank Pakistan: Balochistan Human Capital Investment Project ( P166308 ) Page 34 of 47 teachers trained using scripted lessons; ( c ) boundary wall and functional toilets; ( d ) desk and chair for each student; ( e ) trained PTSMCs meeting regularly and endorsing monthly teacher attendance ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Targeted HFs having majority of essential medicines for RMNCHN services Percentage of targeted HFs having > 75 percent of essential RMNCHN medicines. Bi \u2010 annual Health institutional database Self \u2010 reported data Health Department, HMIS, PPHI Absenteeism among key staff to provide RMNCHN services Percentage of key staff present at HFs during duty time to provide RMNCHN services. Baseline is a preliminary estimate. Bi \u2010 annual Real time monitoring system Administrative data Health Department, PMU Targeted health care providers with minimum knowledge and competencies in RMNCHN services Percentage of targeted staff with minimum knowledge and competencies in RMNCHN services. Bi \u2010 annual PMU Training report Health Department, PMU Targeted HMIS digitally integrated into DHIS ( 2 ) Cumulative number of RMNCHN relevant health information systems digitally integrated into DHIS ( 2 ).", + "ner_text": [ + [ + 1385, + 1389, + "named" + ] + ], + "validated": false, + "empirical_context": "Bi \u2010 annual Real time monitoring system Administrative data Health Department, PMU Targeted health care providers with minimum knowledge and competencies in RMNCHN services Percentage of targeted staff with minimum knowledge and competencies in RMNCHN services. Bi \u2010 annual PMU Training report Health Department, PMU Targeted HMIS digitally integrated into DHIS ( 2 ) Cumulative number of RMNCHN relevant health information systems digitally integrated into DHIS ( 2 ).", + "type": "system", + "explanation": "'DHIS' is mentioned as a system but not as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'DHIS' is a dataset because it is associated with health information systems.", + "contextual_reason_agent": "'DHIS' is mentioned as a system but not as a data source in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 11, + "validated": 5, + "not_validated": 6 + } + }, + { + "filename": "159_38147core", + "page": 23, + "text": "Homeowner Driven Construction: PHP \u2019 s strategy i s to provide cash grants to build houses. I t s operational scope i s to target those IDP families living in poor housing conditions as determined by the UNHCR survey. The cash grants would be used to replace all temporary and complete all partly - completed houses through the homeowner driven strategy. Those IDPs with permanent houses would not receive the cash grants except to build sanitation units if required. Basic community services such as drinking water, sanitation and environment management would cover all IDPs and select adjacent non IDP communities. Inclusiveness: The sequencing o f refugee camps for housing assistance was done on the basis o f a two stage screening process. All 141 IDP camps were ranked using social indicators that measured housing vulnerability. They were then screened in terms of environmental feasibility for the purpose o f phasing the investment. Based on this ranking, 20 camps were identified for Phase 1 o f the PHP. Building on Past Projects: The design o f the housing component i s based upon the successful IDA - financed NEHRP. It i s premised on the homeowner driven approach that entails the phased release o f cash grants upon meeting construction milestones as verified by the technical officers.", + "ner_text": [ + [ + 204, + 216, + "named" + ], + [ + 136, + 148, + "UNHCR survey <> reference population" + ], + [ + 781, + 798, + "UNHCR survey <> data type" + ] + ], + "validated": true, + "empirical_context": "Homeowner Driven Construction: PHP \u2019 s strategy i s to provide cash grants to build houses. I t s operational scope i s to target those IDP families living in poor housing conditions as determined by the UNHCR survey. The cash grants would be used to replace all temporary and complete all partly - completed houses through the homeowner driven strategy.", + "type": "survey", + "explanation": "This is indeed a dataset as it provides empirical data used to determine housing conditions for IDP families.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'UNHCR survey' implies a structured collection of data regarding housing conditions.", + "contextual_reason_agent": "This is indeed a dataset as it provides empirical data used to determine housing conditions for IDP families.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 20, + "text": "This subcomponent will finance the costs of the cash transfers, payment fees and operating costs for the social promotion for 45, 000 households. Social promotion activities include animators, communication materials and related logistics. Sub-component 2. 2: Re-certification and development of an exit strategy for Tekavoul ( US $ 2. 8 million ) 36. The project will support the design of a re-certification strategy. The first beneficiaries of Tekavoul were enrolled in the program in December 2016 and will therefore complete the five-year program cycle by the end of 2021. Households which are still classified as poor or extreme poor ( PMT score below that associated with the poverty line or still in the Social Registry ) will be re-enrolled for a new five-year cycle of support, and those that are found to be no longer eligible should exit the program. 37. Households which exit the Tekavoul program will receive an economic inclusion package. The economic inclusion package will consist of: ( a ) training on life skills and information to help households plan their exit, building on the last sessions of the social promotion and for a duration of six months after the last transfer; and ( b ) referral of specific household members to existing economic inclusion programs available in their moughataa.", + "ner_text": [ + [ + 712, + 727, + "named" + ], + [ + 488, + 501, + "Social Registry <> reference year" + ], + [ + 572, + 576, + "Social Registry <> publication year" + ], + [ + 578, + 588, + "Social Registry <> reference population" + ], + [ + 867, + 877, + "Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "The first beneficiaries of Tekavoul were enrolled in the program in December 2016 and will therefore complete the five-year program cycle by the end of 2021. Households which are still classified as poor or extreme poor ( PMT score below that associated with the poverty line or still in the Social Registry ) will be re-enrolled for a new five-year cycle of support, and those that are found to be no longer eligible should exit the program. 37.", + "type": "registry", + "explanation": "In the context, it is used to determine eligibility for program enrollment, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Social Registry' suggests a structured collection of data related to households' poverty status.", + "contextual_reason_agent": "In the context, it is used to determine eligibility for program enrollment, indicating it functions as a data source.", + "contextual_signal": "mentioned as a data source for determining eligibility", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 132, + "text": "Regular and DLI Evaluation Results area Regular Evaluation DLI evaluation Improved distribution of teachers recruited by the state in public primary schools Statistical Yearbook IVA Increased capacities of teachers on the effective and efficient use of the new curriculum in pre-primary and primary schools Statistical Yearbook IVA Increased availability of essential textbooks in public primary schools Statistical Yearbook Textbooks Technical Unit IVA Increased access to pre-school in rural areas through community pre-school centers ( CPCs ), in line with national standards Statistical Yearbook MINEDUB IVA Establishment of a standardized student learning assessment system for primary and secondary education Learning Assessment Technical Unit IVA Integrated EMIS functional and operational EMIS intersectoral Unit IVA Improved learning environment and quality of education for children in host community schools with refugees Statistical Yearbook MINEDUB DEMP ( Emergency Unit ) IVA", + "ner_text": [ + [ + 765, + 769, + "named" + ] + ], + "validated": false, + "empirical_context": "Regular and DLI Evaluation Results area Regular Evaluation DLI evaluation Improved distribution of teachers recruited by the state in public primary schools Statistical Yearbook IVA Increased capacities of teachers on the effective and efficient use of the new curriculum in pre-primary and primary schools Statistical Yearbook IVA Increased availability of essential textbooks in public primary schools Statistical Yearbook Textbooks Technical Unit IVA Increased access to pre-school in rural areas through community pre-school centers ( CPCs ), in line with national standards Statistical Yearbook MINEDUB IVA Establishment of a standardized student learning assessment system for primary and secondary education Learning Assessment Technical Unit IVA Integrated EMIS functional and operational EMIS intersectoral Unit IVA Improved learning environment and quality of education for children in host community schools with refugees Statistical Yearbook MINEDUB DEMP ( Emergency Unit ) IVA", + "type": "system", + "explanation": "'EMIS' is mentioned as a functional and operational system but not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'EMIS' is a dataset because it is an acronym that could imply a structured collection of data.", + "contextual_reason_agent": "'EMIS' is mentioned as a functional and operational system but not as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "050_Cameroon-Secondary-Education-and-Skills-Development-Project", + "page": 155, + "text": "The World Bank Secondary Education and Skills Development Project ( P170561 ) Page 151 of 159 Figure 5. 1. Education Levels and Wage, Nonwage and Agriculture, 2007 Sources: Third Cameroon Household Survey ( ECAM3 ), 2007 of the NIS; World Bank staff estimates. Note: The wage category includes those employed in the public sector receiving wages plus those working in the private, nonagricultural sector receiving wages. The nonwage category includes those working in the nonfarm sector ( small enterprises and informal sector ). The agriculture category includes those employed in the private agriculture sector receiving wages plus those working in the farm sector ( small and family farms ). Figure 5. 2. Education Levels and Wage, Nonwage, and Agriculture, 2014 Source: Fourth Cameroon Household Survey ( ECAM4 ) 2014 of the NIS; World Bank staff estimates. Note: The wage category includes those employed in the public sector receiving wages plus those working in the private, nonagricultural sector receiving wages. The nonwage category includes those working in the nonfarm sector ( small enterprises, informal sector ).", + "ner_text": [ + [ + 774, + 806, + "named" + ], + [ + 4, + 14, + "Fourth Cameroon Household Survey <> publisher" + ], + [ + 233, + 243, + "Fourth Cameroon Household Survey <> publisher" + ], + [ + 761, + 765, + "Fourth Cameroon Household Survey <> publication year" + ], + [ + 817, + 821, + "Fourth Cameroon Household Survey <> publication year" + ], + [ + 834, + 844, + "Fourth Cameroon Household Survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "2. Education Levels and Wage, Nonwage, and Agriculture, 2014 Source: Fourth Cameroon Household Survey ( ECAM4 ) 2014 of the NIS; World Bank staff estimates. Note: The wage category includes those employed in the public sector receiving wages plus those working in the private, nonagricultural sector receiving wages.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referenced as a source of information for the analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referenced as a source of information for the analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 15, + "text": "The performance of the education sector is affected by weak governance and limited use of data to inform decision making. The quality of education services at the local level is characterized by poor ownership, weak accountability of teachers and local education managers, frequent teacher absenteeism, and poor monitoring and measurement of student learning. To address these challenges, BESP 2020 \u2013 25 emphasizes the need to decentralize decision \u2010 making power to the cluster level. 29 It also entails the formation of a Local Education Council ( LEC ), allocation of a drawing and disbursement officer ( DDO ) code to the head teacher, training of the LEC in school \u2010 based and cluster \u2010 level budgeting and procurement, student learning assessments across all cluster schools, and the establishment of an Education Management Information System ( EMIS ) cell for improved data management. 30 The SED \u2019 s limited capacity in data analysis is also hampering its ability to make timely decisions and improve planning. 23 ASER Pakistan. 2019. Annual Status of Education Report \u2010 National 2018. 24 UNHCR. 2011. Population Profiling, Verification and Response Survey of Afghan Refugees in Pakistan. 25 SED, GoB. 2013. BESP 2013 \u2013 18. 26 SED, GoB. 2017. Balochistan Education Statistics 2016 \u2013 17. 27 Pakistan Bureau of Statistics. 2018. Labour Force Survey 2017 \u2010 18: Annual Report 28 SED, GoB. 2019. Draft BESP 2020 \u2013 25. 29 A group of public schools in a contiguous geographical area that forms the catchment area of a high school, where the high school acts as the cluster head and the middle and primary schools act as the satellites functioning under the cluster head. 30 SED, GoB. 2014. Policy, Devolution of Education Management and Services at Cluster Level.", + "ner_text": [ + [ + 810, + 849, + "named" + ] + ], + "validated": false, + "empirical_context": "To address these challenges, BESP 2020 \u2013 25 emphasizes the need to decentralize decision \u2010 making power to the cluster level. 29 It also entails the formation of a Local Education Council ( LEC ), allocation of a drawing and disbursement officer ( DDO ) code to the head teacher, training of the LEC in school \u2010 based and cluster \u2010 level budgeting and procurement, student learning assessments across all cluster schools, and the establishment of an Education Management Information System ( EMIS ) cell for improved data management. 30 The SED \u2019 s limited capacity in data analysis is also hampering its ability to make timely decisions and improve planning.", + "type": "system", + "explanation": "However, it is mentioned as a system for improved data management, not as a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a data-related function.", + "contextual_reason_agent": "However, it is mentioned as a system for improved data management, not as a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 59, + "text": "The World Bank Niger Learning Improvement for Results in Education Project ( P168779 ) Page 53 of 101 Number of primary and lower secondary schools benefiting from a grant based on a signed performance agreement This indicator will track the implementation of the school grant programs in select districts in the regions of Diffa, Zinder, Tillaberi, Tahoua and Maradi Annual Project data, signed performance agreement and copy of bank transfer Reports PCU Number of communities trained on verifying the use and implementation of school grants Citizen-engagement indicator. Communities will be regularly consulted on project interventions and it is expected that CE feedback will be integrated in school plans and throughout implementation. Indicator will monitor the number of communities who have been enabled, through appropriate training, to verify the use and implementation of grants in accordance with the school plan. Annual Project data Reports PCU New protocol on teacher deployment is established The protocol will support equitable deployment of teachers in the country using school-level information.", + "ner_text": [ + [ + 368, + 387, + "named" + ], + [ + 4, + 14, + "Annual Project data <> publisher" + ], + [ + 324, + 329, + "Annual Project data <> data geography" + ], + [ + 331, + 337, + "Annual Project data <> data geography" + ], + [ + 339, + 348, + "Annual Project data <> data geography" + ], + [ + 350, + 356, + "Annual Project data <> data geography" + ], + [ + 361, + 367, + "Annual Project data <> data geography" + ], + [ + 456, + 542, + "Annual Project data <> data description" + ] + ], + "validated": true, + "empirical_context": "The World Bank Niger Learning Improvement for Results in Education Project ( P168779 ) Page 53 of 101 Number of primary and lower secondary schools benefiting from a grant based on a signed performance agreement This indicator will track the implementation of the school grant programs in select districts in the regions of Diffa, Zinder, Tillaberi, Tahoua and Maradi Annual Project data, signed performance agreement and copy of bank transfer Reports PCU Number of communities trained on verifying the use and implementation of school grants Citizen-engagement indicator. Communities will be regularly consulted on project interventions and it is expected that CE feedback will be integrated in school plans and throughout implementation.", + "type": "data", + "explanation": "This is indeed a dataset as it is used to track the implementation of school grant programs and is mentioned in the context of project indicators.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'Annual Project data' which suggests a structured collection of information collected over a year.", + "contextual_reason_agent": "This is indeed a dataset as it is used to track the implementation of school grant programs and is mentioned in the context of project indicators.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 58, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 53 of 117 RESULT_FRAME_TBL_IO Indicator Name DLI Baseline Intermediate Targets End Target 1 refugee / host communities ( Number ) Strengthen Capacity for Implementing Initiated Reforms Capacity building of target Teacher Training Colleges for implementation of competency-based teacher education curriculum. ( Yes / No ) No Yes Yes Establishment of standards and tools for quality assurance mechanisms for Pre-primary education, including teacher appraisal tools, assessment and classroom observation tools. ( Yes / No ) No Yes Yes New classrooms constructed in existing schools as per the needs-based school infrastructure investment plan ( Number ) 0. 00 6, 500. 00 8, 000. 00 New classrooms constructed in refugee host communities existing schools as per the needs - based school infrastructure investment plan ( Number ) 0. 00 50. 00 50. 00 New classrooms constructed in existing non - refugee / host communities primary schools as per the needs-based school infrastructure investment plan ( Number ) 0. 00 6, 000. 00 7, 950. 00 Evidence of utilization of NEMIS data for better management of primary education ( Yes / No ) No Yes Yes Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS ( Yes / No ) No Yes Yes Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of", + "ner_text": [ + [ + 1250, + 1255, + "named" + ], + [ + 1215, + 1231, + "NEMIS <> reference population" + ], + [ + 1302, + 1318, + "NEMIS <> reference population" + ] + ], + "validated": true, + "empirical_context": "00 7, 950. 00 Evidence of utilization of NEMIS data for better management of primary education ( Yes / No ) No Yes Yes Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS ( Yes / No ) No Yes Yes Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of", + "type": "system", + "explanation": "NEMIS is indeed used as a data source for managing primary education, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is referenced in the context of data utilization and management.", + "contextual_reason_agent": "NEMIS is indeed used as a data source for managing primary education, confirming its role as a dataset.", + "contextual_signal": "mentioned as a data source for registration of refugee learners", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "112_IDAR2016-0221-PAD-09012016", + "page": 21, + "text": "The MoLG will be responsible for providing technical input to MDLF, namely all of the technical aspects of procurement, monitoring of progress towards the PDO, and details of project activities. MDLF has been performing at a satisfactory level to collect, aggregate, and analyze results achieved by municipalities under the MDP-II and the MoLG under LGSIP as both programs \u2019 implementing agency. The MDLF has a robust monitoring and evaluation system that uses a web-based Program Management Information System ( PGMIS ) as well as the Financial Management Information System ( FMIS ) to automate data aggregation, storage, and presentation as part of a results - based Monitoring and Evaluation ( M & E ) system. The MDLF \u2019 s Results-Based Monitoring Manual ( RBMM ) has aligned the individual objectives of the various MDLF-implemented projects and programs with the PA \u2019 s NDP strategic objectives. As such, no additional arrangements or staff are required for the M & E of this project. C. Sustainability 31. At present, there is a gap in metropolitan area urban planning policy, practice, and methodology in Palestine, and MoLG has indicated interest in developing capacity to administer, advise on, and implement such practice in Palestine. The Ministry conducted a study on metropolitan planning in 2007, which led to the establishment of the existing joint coordination unit for the RAB area. However, MoLG was unable to go beyond this initial study to mainstream metropolitan planning into government practice or policies. The proposed ICUD therefore would provide the initial metropolitan / urban area growth planning framework and local government driven implementation experience for MoLG to guide metropolitan planning policy development. The MoLG planning unit will technically oversee the proposed project activities and receive support to improve bottom-up metropolitan planning. The urban growth planning exercise will also inform necessary reforms in the PA \u2019 s own planning system, in addition to the inputs to developing LGUs urban area plans.", + "ner_text": [ + [ + 536, + 575, + "named" + ] + ], + "validated": false, + "empirical_context": "MDLF has been performing at a satisfactory level to collect, aggregate, and analyze results achieved by municipalities under the MDP-II and the MoLG under LGSIP as both programs \u2019 implementing agency. The MDLF has a robust monitoring and evaluation system that uses a web-based Program Management Information System ( PGMIS ) as well as the Financial Management Information System ( FMIS ) to automate data aggregation, storage, and presentation as part of a results - based Monitoring and Evaluation ( M & E ) system. The MDLF \u2019 s Results-Based Monitoring Manual ( RBMM ) has aligned the individual objectives of the various MDLF-implemented projects and programs with the PA \u2019 s NDP strategic objectives.", + "type": "system", + "explanation": "However, it is described as a system for managing financial information, not as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves data aggregation and management.", + "contextual_reason_agent": "However, it is described as a system for managing financial information, not as a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 21, + "text": "Page | 12 the outcome of the digitalization of student assessment and of the professionalization of the civil service in terms of gender equity; and under RA3, the PDO-level indicator focuses on e-participation as an outcome of e-information. Result Area 1 on improved service delivery through digitalization: the PDO-level indicators are: 1 ) The number of individuals accessing digitalized public - and private-sector services using trusted, people - centric DPI, which is enabled by increasing the inclusive adoption of people-centric digital identity and improving trusted, people-centric data sharing. To promote social inclusion, the indicator consists of sub-indicators for type of user ( women, elders, refugees ), as well as type of access ( remote and GSC ). The objective is to increase access to trusted and people-centric digitalized services, whose uptake has been limited despite the activation of over 800, 000 digital IDs on Sanad ( as of January 2024 ). 2 ) The number of beneficiaries who actively use patient-centric digital services offered through a national EMR platform as an outcome of digital transformation in health service delivery. The indicator is disaggregated for Syrian refugee users.", + "ner_text": [ + [ + 942, + 947, + "named" + ] + ], + "validated": false, + "empirical_context": "To promote social inclusion, the indicator consists of sub-indicators for type of user ( women, elders, refugees ), as well as type of access ( remote and GSC ). The objective is to increase access to trusted and people-centric digitalized services, whose uptake has been limited despite the activation of over 800, 000 digital IDs on Sanad ( as of January 2024 ). 2 ) The number of beneficiaries who actively use patient-centric digital services offered through a national EMR platform as an outcome of digital transformation in health service delivery.", + "type": "program", + "explanation": "'Sanad' is mentioned as a program related to digital IDs, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Sanad' is a dataset because it is associated with digital IDs and services.", + "contextual_reason_agent": "'Sanad' is mentioned as a program related to digital IDs, not as a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 99, + "text": "training of CPC animators, and the school councils Results Area 5: Establishment of a standardized student learning assessment system for primary and secondary education \u2022 To undertake an assessment of current efforts in student learning assessments and develop a plan for undertaking regular assessments \u2022 Support for the establishment of the national unit for learning outcomes assessment and build its technical capacity \u2022 Development of assessment tests, administer them and produce learning assessment reports Results Area 6: Integrated EMIS functional and operational \u2022 Support for the development of national strategy for integrated EMIS involving four ministries \u2022 Support for increased technical capacity in support of EMIS systems development and information management \u2022 Support for training at deconcentrated levels on the use of the information technology platform for data management, descriptive statistics, and data management techniques ( data validation, tabulations, metadata and database management, report generation, among others ). Support will also include EMIS monitoring at central and deconcentrated levels. Results Area 7: Improved learning environment and quality of education for children in host community schools with refugees \u2022 Survey of schools to identify potential beneficiaries from refugee-affected local councils \u2022 Mobilization and sensitization of communities in reaching refugees for enrolling and retaining children in school \u2022 Development of Response Strategy and Plan 2019-2023 Priority Area 3: Improving service delivery through performance-based Financing ( PBF ) at school level \u2022 Strengthening capacity of school management councils \u2022 Mobilizing and strengthening community participation in school management", + "ner_text": [ + [ + 1081, + 1085, + "named" + ] + ], + "validated": false, + "empirical_context": "training of CPC animators, and the school councils Results Area 5: Establishment of a standardized student learning assessment system for primary and secondary education \u2022 To undertake an assessment of current efforts in student learning assessments and develop a plan for undertaking regular assessments \u2022 Support for the establishment of the national unit for learning outcomes assessment and build its technical capacity \u2022 Development of assessment tests, administer them and produce learning assessment reports Results Area 6: Integrated EMIS functional and operational \u2022 Support for the development of national strategy for integrated EMIS involving four ministries \u2022 Support for increased technical capacity in support of EMIS systems development and information management \u2022 Support for training at deconcentrated levels on the use of the information technology platform for data management, descriptive statistics, and data management techniques ( data validation, tabulations, metadata and database management, report generation, among others ). Support will also include EMIS monitoring at central and deconcentrated levels. Results Area 7: Improved learning environment and quality of education for children in host community schools with refugees \u2022 Survey of schools to identify potential beneficiaries from refugee-affected local councils \u2022 Mobilization and sensitization of communities in reaching refugees for enrolling and retaining children in school \u2022 Development of Response Strategy and Plan 2019-2023 Priority Area 3: Improving service delivery through performance-based Financing ( PBF ) at school level \u2022 Strengthening capacity of school management councils \u2022 Mobilizing and strengthening community participation in school management", + "type": "system", + "explanation": "However, EMIS is mentioned as a system for managing information rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is related to information management and data handling.", + "contextual_reason_agent": "However, EMIS is mentioned as a system for managing information rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "098_CAMEROON-PAD-NEW-08032017", + "page": 89, + "text": "To assess avoided costs in the without investment scenario, the following information was collected from existing literature: a. Health expenses data were sourced from healthcare centers and from a national health survey28. b. Incidence of major water-borne diseases were also sourced from TBS329 c. Opportunity cost of lost working day was calculated from data of a national employment survey30. ( CFAF 2, 500 per work day ). It was assumed that, for the treatment duration, each patient will be taken care of by one adult. Time spent by this adult translates into an opportunity cost as the adult will forgo revenues he / she could otherwise earn. The daily revenue of the average adult was estimated at CFAF 2, 500 per day ( legal minimum salary ). Therefore, given the population in the different sub - projects, avoided costs included ( i ) the direct costs incurred for different water-borne diseases and ( ii ) indirect costs related to the opportunity costs of adults \u2019 time spent on care. 31 Costs were estimated at CFAF 45, 000 per household per year. ( ii ) Flood avoidance related benefits. To assess avoided costs related to avoiding floods, the following information was collected: a. Frequency and costs of major flood events in the project influence area32.", + "ner_text": [ + [ + 198, + 222, + "named" + ], + [ + 129, + 149, + "national health survey28 <> data type" + ], + [ + 227, + 266, + "national health survey28 <> data description" + ] + ], + "validated": true, + "empirical_context": "To assess avoided costs in the without investment scenario, the following information was collected from existing literature: a. Health expenses data were sourced from healthcare centers and from a national health survey28. b.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source of health expenses data in the context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a national health survey, which typically collects structured health-related data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source of health expenses data in the context.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "021_BOSIB05e6fc47e0770aeec00ad5e11774f2", + "page": 84, + "text": "Uganda remains committed to improving economic opportunities, social services and infrastructure to benefit refugees and host communities. Five CRRF program response plans have been finalized and are being supported by large national projects which include refugees and host community members with World Bank financing. These plans foresee medium and long-term development investments and support the transition of humanitarian assistance into Government services in RHDs. z There is strong progress on: the commitment to integrate refugee services into national service delivery systems. As outlined in the National Development Plan III ( NDP III ), refugee planning is integrated into national, sectoral and local government plans and data collection. The CRRF has developed sectoral plans for refugees and host communities and included both groups under the Uganda Intergovernmental Fiscal Transfer to support service provision through district development plans. Refugees and hosts have been included in the 2022 Uganda Demographic Health Survey. Although a refugee sample was not collected in the Uganda National Household Survey that was conducted in 2019 / 20 due to COVID-19 pressures, the Uganda Bureau of Statistics remains committed to include refugees in national data exercises. On the commitments to: ensure access for refugees and host population to quality, efficient and integrated basic social services; and enhance social infrastructure in refugee hosting areas, strong progress is being made on health and education service provision.", + "ner_text": [ + [ + 1012, + 1049, + "named" + ] + ], + "validated": true, + "empirical_context": "The CRRF has developed sectoral plans for refugees and host communities and included both groups under the Uganda Intergovernmental Fiscal Transfer to support service provision through district development plans. Refugees and hosts have been included in the 2022 Uganda Demographic Health Survey. Although a refugee sample was not collected in the Uganda National Household Survey that was conducted in 2019 / 20 due to COVID-19 pressures, the Uganda Bureau of Statistics remains committed to include refugees in national data exercises.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned as a survey that collects demographic health data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that includes specific populations.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a survey that collects demographic health data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "158_40156", + "page": 24, + "text": "Essentially, the process involves the collection of significant change ( SC ) stories emanating from the field level, and the systematic selection of the most significant of these stories by panels of designated stakeholders or staff. The designated staff and stakeholders are initially involved by \u2018 searching \u2019 for project impact. Once changes have been captured, various people sit down together, read the stories aloud and have regular and often in-depth discussions about the value of these reported changes. 79. In addition to surveys and surveillance, routine program monitoring data will be collected for components 1a and 1b, focusing on all the target populations to whom services will be provided, in order to monitor the progress with the implementation and coverage of HIV services to which the project has contributed. As the ACGF is complementary and supplementary funding and in line with the Three Ones principle on one M & E system, routine program monitoring data about medical HIV services generated from health facilities in refugee sites, returnee sites, IDP sites, surrounding populations and in \u2018 hot spot \u2019 areas will not measure services provided to target populations specifically, but it will record increases in 20", + "ner_text": [ + [ + 559, + 590, + "named" + ], + [ + 655, + 673, + "routine program monitoring data <> reference population" + ], + [ + 1061, + 1075, + "routine program monitoring data <> reference population" + ], + [ + 1077, + 1086, + "routine program monitoring data <> reference population" + ], + [ + 1259, + 1277, + "routine program monitoring data <> usage context" + ] + ], + "validated": true, + "empirical_context": "79. In addition to surveys and surveillance, routine program monitoring data will be collected for components 1a and 1b, focusing on all the target populations to whom services will be provided, in order to monitor the progress with the implementation and coverage of HIV services to which the project has contributed. As the ACGF is complementary and supplementary funding and in line with the Three Ones principle on one M & E system, routine program monitoring data about medical HIV services generated from health facilities in refugee sites, returnee sites, IDP sites, surrounding populations and in \u2018 hot spot \u2019 areas will not measure services provided to target populations specifically, but it will record increases in 20", + "type": "data", + "explanation": "This is indeed a dataset as it refers to collected data used for monitoring the implementation and coverage of HIV services.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific type of data collected for monitoring purposes.", + "contextual_reason_agent": "This is indeed a dataset as it refers to collected data used for monitoring the implementation and coverage of HIV services.", + "contextual_signal": "described as data collected for monitoring purposes", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "167_27761", + "page": 31, + "text": "In so doing, vaccination monitoring would be possible during implementation vis - & vis nutrition checkups. ( ii ) Education Grants. The following outcome indicators will be used: Net change in school attendance ( target: 2-5 % ) Net change in school dropout ( target: 24 % ) Net change in school enrollment ( target: 1-5 % ) Net change in transition rate, especially grades TBD ( target: > or = 0 ). 0 0 0 0 ( iii ) Awareness Grants 0 Attendance at awareness sessions ( target: 80 - 90 % ) ( iv ) MOSA Institutional Strengthening 0 0 Improved MIS in operation 0 New targeting instrument in operation Social Safety Net policy and strategy adopted by MOSA Output Indicators. The project MIS data will track the actual number of beneficiaries, as compared to the estimated number, together with the amount disbursed overall and between beneficiaries of different grant types. The estimates will be updated during a pilot phase based on the actual number of eligible beneficiaries registered in the MIS at MOSA.", + "ner_text": [ + [ + 996, + 999, + "named" + ] + ], + "validated": false, + "empirical_context": "The project MIS data will track the actual number of beneficiaries, as compared to the estimated number, together with the amount disbursed overall and between beneficiaries of different grant types. The estimates will be updated during a pilot phase based on the actual number of eligible beneficiaries registered in the MIS at MOSA.", + "type": "system", + "explanation": "'MIS' is mentioned as a project or system but not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is associated with tracking data about beneficiaries.", + "contextual_reason_agent": "'MIS' is mentioned as a project or system but not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 15, + "text": "Data from a survey on the schooling of refugee children conducted by MENFOP \u2019 s planning department, the OOSC 2019 and the EDAM4 provide some information on the differential experiences of refugee and host students. The most commonly cited reason by refugees for not attending school is the need to help the family ( 36 percent females; 21 percent males ), while among host communities, the most common reason was the lack of interest in school ( 39 percent females; 36 percent males ). Around 20 percent of host communities indicated that girls having to stay home as a reason for not attending school, compared to 14 percent of refugee respondents. Although food insecurity is common among both refugee households and rural host communities, the lack of school canteens is cited more frequently by refugees as a reason for non - school attendance. Data on the prevalence of disability among refugee and host community children is unavailable; however, 1 percent of host community respondents and 1. 4 percent of refugee respondents cite disability as a reason for not attending school. 16. Focus group discussions with various stakeholder groups ( school administrators, teachers, parents, and students ) in Ali-Addeh, Holl-Holl, and Markazi confirm the quantitative indicators and provide additional information on refugee school needs, which include school canteens, school kits, improving the school environment, and accelerating implementation of the national program ( which would allow certification to access to higher levels of education ), and increased teacher support. A study underway by the World Bank 5 One school in Djibouti Ville was observed to have an average class size of 1: 44", + "ner_text": [ + [ + 123, + 128, + "named" + ] + ], + "validated": false, + "empirical_context": "Data from a survey on the schooling of refugee children conducted by MENFOP \u2019 s planning department, the OOSC 2019 and the EDAM4 provide some information on the differential experiences of refugee and host students. The most commonly cited reason by refugees for not attending school is the need to help the family ( 36 percent females; 21 percent males ), while among host communities, the most common reason was the lack of interest in school ( 39 percent females; 36 percent males ).", + "type": "program", + "explanation": "However, EDAM4 is not explicitly described as a data source or structured collection of data in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EDAM4 is a dataset because it is mentioned alongside a survey and other data sources.", + "contextual_reason_agent": "However, EDAM4 is not explicitly described as a data source or structured collection of data in the context.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 69, + "text": "The World Bank Cash for Jobs Project ( P175327 ) Page 65 of 86 Beneficiaries of job-focused interventions - host communities, disaggregated by gender Beneficiaries of job-focused interventions that live in communities hosting refugees Annual Project Management Information System The enrolment module of the MIS contains information on the locality of residence of the beneficiaries PIU ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Number of provinces implementing cash transfers Number of provinces of the country where cash transfers activities financed by the project are implemented Annual Implementati on Progress Reports The Project Implementation Unit will inform of the provinces where the project is being implemented Project Implementation Unit Beneficiaries receiving payments on the schedule set-up in the operational manual Share of cash transfers beneficiaries that receive the transfers within the time period stipulated in the project operational manual Every 6 months Payment agent reports The payment agent reports include the date of payments and this will be compared with the date initially planned Proportion of beneficiary households in the beneficiary collines participating in behavior change promotion activities Share of households that attend sessions on human capital development Every six months Attendance sheets from NGOs carrying out the sessions NGOs implementing accompanying measures on human capital development will report on the attendance of Project Implementation Unit", + "ner_text": [ + [ + 308, + 311, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Cash for Jobs Project ( P175327 ) Page 65 of 86 Beneficiaries of job-focused interventions - host communities, disaggregated by gender Beneficiaries of job-focused interventions that live in communities hosting refugees Annual Project Management Information System The enrolment module of the MIS contains information on the locality of residence of the beneficiaries PIU ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Number of provinces implementing cash transfers Number of provinces of the country where cash transfers activities financed by the project are implemented Annual Implementati on Progress Reports The Project Implementation Unit will inform of the provinces where the project is being implemented Project Implementation Unit Beneficiaries receiving payments on the schedule set-up in the operational manual Share of cash transfers beneficiaries that receive the transfers within the time period stipulated in the project operational manual Every 6 months Payment agent reports The payment agent reports include the date of payments and this will be compared with the date initially planned Proportion of beneficiary households in the beneficiary collines participating in behavior change promotion activities Share of households that attend sessions on human capital development Every six months Attendance sheets from NGOs carrying out the sessions NGOs implementing accompanying measures on human capital development will report on the attendance of Project Implementation Unit", + "type": "system", + "explanation": "However, 'MIS' refers to a Management Information System, which is a system for managing information rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is mentioned in the context of data collection and reporting.", + "contextual_reason_agent": "However, 'MIS' refers to a Management Information System, which is a system for managing information rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 25, + "text": "In addition to software and hardware management, QRC is also responsible for data verification and will coordinate with the DCU and individual units to provide all necessary disaggregated data needed to monitor Program indicators and DLIs. 53. While training on the use of the OpenEMIS was conducted during the 2016 \u2013 2017 academic year, there is a critical need to build MOE staff capacity in this regard. In order to exploit the full potential of the OpenEMIS for policy development and planning, staff capacity in the fundamentals of data analytics is necessary and will be supported by this Program. C. Disbursement Arrangements 54. For the PforR part, the DLIs will be used for disbursement. There are eight DLIs spread across five years. 55. The disbursement will be contingent upon the Government furnishing evidence satisfactory to the World Bank that it has achieved the respective disbursement \u2010 linked results ( DLRs ) and these are verified by the Independent Verification Agency ( IVA ) as specified in the verification protocol. Application for withdrawal from the World Bank \u2019 s financing account of amounts allocated to individual DLRs and calculated in accordance with the relevant formula will be sent to the World Bank any time after the World Bank has notified the GOJ in writing that it has accepted evidence of achievement of the DLRs and the amount eligible for payment.", + "ner_text": [ + [ + 174, + 192, + "named" + ] + ], + "validated": false, + "empirical_context": "In addition to software and hardware management, QRC is also responsible for data verification and will coordinate with the DCU and individual units to provide all necessary disaggregated data needed to monitor Program indicators and DLIs. 53.", + "type": "data", + "explanation": "'Disaggregated data' refers to a type of data rather than a dataset itself, as it describes the nature of the data being collected.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'disaggregated data' refers to a structured collection of data used for analysis.", + "contextual_reason_agent": "'Disaggregated data' refers to a type of data rather than a dataset itself, as it describes the nature of the data being collected.", + "contextual_signal": "'mentioned as necessary to monitor Program indicators and DLIs'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "185_multi-page", + "page": 40, + "text": "41 2 ) Improved care and support Percent of graduates of medical Project data services for those both infected and nursing school in the past and affected by HIV / AIDS year trained in natural history of HIV and in diagnosis and care of common opportunistic infections Percentage of health facilities Survey data that are currently stocked with drugs for commnon opportunistic infections and to provide palliative care, and report no stock-outs in the past year Percent of districts with at least Service delivery data one center staffed by trained counselors providing HIV testing and counseling at either free or affordable rates Increase in the percentage of clients served by VCT services that meet minimum requirements for provision of quality counseling and testing services Percent increase in number of communities with improved prevention services, care, and support Increase in percent of orphaned children under 15 who are currently attending school Increase in percent of population receiving quality HIV / AIDS / STI / TB case management", + "ner_text": [ + [ + 301, + 312, + "named" + ], + [ + 899, + 925, + "Survey data <> reference population" + ] + ], + "validated": true, + "empirical_context": "41 2 ) Improved care and support Percent of graduates of medical Project data services for those both infected and nursing school in the past and affected by HIV / AIDS year trained in natural history of HIV and in diagnosis and care of common opportunistic infections Percentage of health facilities Survey data that are currently stocked with drugs for commnon opportunistic infections and to provide palliative care, and report no stock-outs in the past year Percent of districts with at least Service delivery data one center staffed by trained counselors providing HIV testing and counseling at either free or affordable rates Increase in the percentage of clients served by VCT services that meet minimum requirements for provision of quality counseling and testing services Percent increase in number of communities with improved prevention services, care, and support Increase in percent of orphaned children under 15 who are currently attending school Increase in percent of population receiving quality HIV / AIDS / STI / TB case management", + "type": "survey", + "explanation": "In this context, 'survey data' is explicitly mentioned as a source of information used to report on health facilities and services.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'survey data' typically refers to collected information from surveys.", + "contextual_reason_agent": "In this context, 'survey data' is explicitly mentioned as a source of information used to report on health facilities and services.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 49, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 37 effectiveness of the government program, the IGFT system, in promoting MDGs was analyzed in the 2014 Poverty and Social Impact Assessment ( PSIA ) of Decentralized Basic Service Delivery in Ethiopia. 74 The assessment concluded that the IGFT system was effective in helping Ethiopia \u2019 s efforts to reach the MDGs. The 2014 PSIA also found the system to be pro-poor with the bulk of benefits of woreda-level spending accruing to the bottom 40 percent poorest in the country and that it helped reduce gender gaps and gaps between regions. Specific findings included that the poorest two quintiles received 58 percent of the benefits, that there was no negative bias against women in education and health expenditure, and that historically the most disadvantaged regions in the country benefitted the most from GPG allocations. 92. At the same time, the Ethiopian labor market clearly rewards investment in human capital and yields particularly positive returns for women. Data show important returns to both individuals and households from better human capital and that such investment contributes to reducing inequality both in access to services delivery and labor market outcomes. Evidence from the 2015 / 2016 Ethiopia Socioeconomic Survey reveals that higher levels of education are associated with better earnings, an increased probability of wage employment, and increased likelihood of finding employment in more productive sectors.", + "ner_text": [ + [ + 1263, + 1304, + "named" + ] + ], + "validated": true, + "empirical_context": "Data show important returns to both individuals and households from better human capital and that such investment contributes to reducing inequality both in access to services delivery and labor market outcomes. Evidence from the 2015 / 2016 Ethiopia Socioeconomic Survey reveals that higher levels of education are associated with better earnings, an increased probability of wage employment, and increased likelihood of finding employment in more productive sectors.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source of evidence for the analysis of socioeconomic factors in Ethiopia.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides evidence and data on socioeconomic factors.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source of evidence for the analysis of socioeconomic factors in Ethiopia.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "005_BOSIB-8191b179-7209-4faa-b5e0-11783bcd492d", + "page": 11, + "text": "The World Bank Costa Rica Results in Education ( CORE ) ( P181174 ) Page 2 according to the National Household Survey data for age cohorts from the National Institute for Statistics and the Census, while a little over 40 percent of individuals aged 55-59 in 2023 had completed at least lower secondary education ( 9 years of education ), this number more than doubles to 85 percent of 20-24 year olds. The 2020 Human Capital Index shows that a child born in Costa Rica can expect to be 63 percent as productive with the current education and health services as he or she could be if he or she enjoyed complete education and full health, which is slightly higher than the LAC average of 60 percent. 9 Costa Rica aspires to go beyond being among the best in the LAC region, to be on the global vanguard of the digital age, with education outcomes on par with the OECD, a highly skilled workforce, and a transformative digitalization and management agenda. Foundational Learning 5. National assessments show weakness in foundational learning, and learning outcomes are strongly correlated to socioeconomic conditions, which points to the need for foundational learning with deeper attention to the education outcomes of students from poorer families.", + "ner_text": [ + [ + 92, + 117, + "named" + ], + [ + 15, + 25, + "National Household Survey <> data geography" + ], + [ + 232, + 254, + "National Household Survey <> reference population" + ], + [ + 258, + 262, + "National Household Survey <> publication year" + ], + [ + 385, + 400, + "National Household Survey <> reference population" + ], + [ + 458, + 468, + "National Household Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "The World Bank Costa Rica Results in Education ( CORE ) ( P181174 ) Page 2 according to the National Household Survey data for age cohorts from the National Institute for Statistics and the Census, while a little over 40 percent of individuals aged 55-59 in 2023 had completed at least lower secondary education ( 9 years of education ), this number more than doubles to 85 percent of 20-24 year olds. The 2020 Human Capital Index shows that a child born in Costa Rica can expect to be 63 percent as productive with the current education and health services as he or she could be if he or she enjoyed complete education and full health, which is slightly higher than the LAC average of 60 percent.", + "type": "survey", + "explanation": "The National Household Survey is explicitly mentioned as providing data used for empirical analysis regarding education levels.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a source of data for age cohorts.", + "contextual_reason_agent": "The National Household Survey is explicitly mentioned as providing data used for empirical analysis regarding education levels.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "084_Lebanon-PAD-final-06082018", + "page": 84, + "text": "The Program will support the GOL to make a fundamental shift in their approach to growth and job creation. Longer-term thinking around investment and jobs, and recognition of the complementary roles of the public and private sector, marks a substantial change in approach in Lebanon. The Program will support the GOL NJP in three ways: ( i ) implementation of long-delayed reforms to support trade and investment in key job-creating sectors; ( ii ) provide a platform for the GOL to test out new initiatives and new ways of working, with a strong emphasis on private sector leadership and the government \u2019 s role in correcting market distortions and government failures; ( iii ) support for enhanced coordination across ministries and agencies, incentivized by specific resources, programs, and results targets. 38 Based on the 2011-12 Household Budget Survey 39 Data on registered refugees from UNHCR as of November 2017", + "ner_text": [ + [ + 828, + 859, + "named" + ] + ], + "validated": true, + "empirical_context": "The Program will support the GOL NJP in three ways: ( i ) implementation of long-delayed reforms to support trade and investment in key job-creating sectors; ( ii ) provide a platform for the GOL to test out new initiatives and new ways of working, with a strong emphasis on private sector leadership and the government \u2019 s role in correcting market distortions and government failures; ( iii ) support for enhanced coordination across ministries and agencies, incentivized by specific resources, programs, and results targets. 38 Based on the 2011-12 Household Budget Survey 39 Data on registered refugees from UNHCR as of November 2017", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey providing data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that collects household budget data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey providing data used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 17, + "text": "The proposed project will allow for the inclusion of poor and vulnerable households in the refugee and host communities of the Hodh Chargui region within the SSN system, including the Social Registry, Tekavoul, and Elmaouna programs. Component 1: Updating and enhancing the Social Registry ( US $ 7. 3 million with US $ 4. 0 million from national IDA18, US $ 0. 9 million from IDA18 RSW, US $ 0. 9 million from ASP MDTF and US $ 1. 5 million from government ) 22. This component will be implemented by the Taazour General Delegation. It supports a full update of the Social Registry and inclusion of eligible refugee households ( Sub-component 1. 1 ) and its usage for operational and strategic purposes ( Sub-component 1. 2 ). Payment Platform POOR HOUSEHOLDS Social Transfer Program \u2022 Extreme Poor households \u2022 5-year cycle \u2022 CCT \u2022 Social promotion Social Registry \u2022 Identification \u2022 Data collection \u2022 Support to targeted social programs Shock - responsive Safety Net Program \u2022 Vulnerablehouseholds \u2022 4 months \u2022 Unconditional CT \u2022 Using SP tools Early Warning Systems DATA DATA Beneficiary list Beneficiary List", + "ner_text": [ + [ + 567, + 582, + "named" + ], + [ + 127, + 146, + "Social Registry <> data geography" + ], + [ + 506, + 532, + "Social Registry <> author" + ], + [ + 600, + 627, + "Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "This component will be implemented by the Taazour General Delegation. It supports a full update of the Social Registry and inclusion of eligible refugee households ( Sub-component 1. 1 ) and its usage for operational and strategic purposes ( Sub-component 1.", + "type": "registry", + "explanation": "The context indicates that the Social Registry is used for operational and strategic purposes, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Social Registry' suggests a structured collection of data related to households.", + "contextual_reason_agent": "The context indicates that the Social Registry is used for operational and strategic purposes, confirming its role as a data source.", + "contextual_signal": "mentioned as a data source for operational and strategic purposes", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 54, + "text": "The World Bank Niger Learning Improvement for Results in Education Project ( P168779 ) Page 48 of 101 ( refugee, host, general ), regions and districts. Female Out-of-school children and adolescents benefiting from direct interventions to support learning ( disaggregated by general population, host communities and refugees ) Beneficiaries include children and adolescents age 7-16 who are out-of - school and / or attending Makarantas schools. Annual Project data Reports from learning interventions. Efforts will be made to report disaggregated data per age, category of population ( general population, host communities, refugees ), regions and districts. PCU Female Primary and lower secondary schools benefiting from a performance grant and meeting the minimum requirements in terms of teaching and learning conditions Tracks whether project funded school grants are used to improve teaching / learning conditions at school level. Minimum requirements defined in Y1 and will include sub - indicators on the availability of essential inputs, differentiated for primary and lower secondary schools. Annual Project data Drawing from SDI survey methodology, data will be collected through visual inspections of classrooms and school premises in each primary and lower secondary schools surveyed.", + "ner_text": [ + [ + 446, + 465, + "named" + ], + [ + 4, + 14, + "Annual Project data <> publisher" + ], + [ + 153, + 198, + "Annual Project data <> reference population" + ], + [ + 349, + 382, + "Annual Project data <> reference population" + ] + ], + "validated": true, + "empirical_context": "Female Out-of-school children and adolescents benefiting from direct interventions to support learning ( disaggregated by general population, host communities and refugees ) Beneficiaries include children and adolescents age 7-16 who are out-of - school and / or attending Makarantas schools. Annual Project data Reports from learning interventions. Efforts will be made to report disaggregated data per age, category of population ( general population, host communities, refugees ), regions and districts.", + "type": "data report", + "explanation": "This is indeed a dataset as it is described as containing disaggregated data and is used for reporting on interventions.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'Annual Project data' which suggests a structured collection of information.", + "contextual_reason_agent": "This is indeed a dataset as it is described as containing disaggregated data and is used for reporting on interventions.", + "contextual_signal": "mentioned as a source of data for reporting", + "tags": [] + }, + { + "filename": "167_27761", + "page": 20, + "text": "The FMIS system at the MOF and the Post Bank were found to be adequate to support the project. The project \u2019 s financial statements, including Statement of Expenditures ( SOE ) and the Special Account ( SA ), would be audited quarterly internally and annually by an independent auditor, in accordance with internationally accepted standards. In addition, the auditor would carry out field spot check audits to ascertain compliance with contractual requirements. Compliance with conditional cash transfers would be monitored by an independent external consultant ( paragraph C. 3 ). 4. Social Opportunities, constraints, impacts, and risks arising. out of the socio-cultural and political context. The impact of closure and incursions in the West Bank and Gaza has been extensively documented. On the rise are poverty, unemployment, school drop out rates; on the decline or deteriorating are household incomes, living conditions, school attendance rates, at-large nutritional status and in particular of children 0-5 years of age. 13 The so called \u201c newly poor \u201d, those who are just above the poverty line before the outbreak of the second Intifada, in the third quarter of 2000, have been assisted by various interventions. To date, it has been much more difficult to find ways to assist the very poor, or first decile, whose coping mechanisms are essentially exhausted. The proposed project i s designed to support children ( 0-18 years old ) in the first decile. Although the SHC has targeted the first decile for more than 20 years, the program i s very small today in relation to need and requires new instruments to address the present situation and to shift MOSA \u2019 s overall strategy from one of coping to one of social springboard. Therefore, the project will support MOSA in reshaping its social assistance strategy and to improve effectiveness and efficiency of ~ l3 PCBS quarterly household surveys, Palestinian Living Conditions quarterly surveys, IUCN, University of Geneva, Living Standards quarterly surveys, Nutrition Study, Johns Hopkins University, 2002, Nutrition Survey, PCBS and BirZeit University, 2002. 17", + "ner_text": [ + [ + 4, + 8, + "named" + ] + ], + "validated": false, + "empirical_context": "The FMIS system at the MOF and the Post Bank were found to be adequate to support the project. The project \u2019 s financial statements, including Statement of Expenditures ( SOE ) and the Special Account ( SA ), would be audited quarterly internally and annually by an independent auditor, in accordance with internationally accepted standards.", + "type": "system", + "explanation": "'FMIS' is not a dataset as it is described as a system rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'FMIS' is a dataset because it is related to financial management and information systems.", + "contextual_reason_agent": "'FMIS' is not a dataset as it is described as a system rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "050_Cameroon-Secondary-Education-and-Skills-Development-Project", + "page": 144, + "text": "Data collected will cover the insertion rates of TVET graduates, the sectors of employment, the sustainability and the type of employment, and the relation between training received and occupied positions. This information will be used for monitoring the implementation of the NSDS and a costed plan also supported under this subcomponent. Furthermore, recommendations from the tracer studies will be useful for decision-making at several levels, including ( a ) training delivery ( for training institutions ); ( b ) program design ( for technical actors ); ( c ) central management ( operationalization and orientation of the skills strategy ); and ( d ) the sector \u2019 s steering bodies ( the political level ). ONEFOP will supervise the surveys in partnership with an inter-ministerial technical team ( including the NIS ). External expertise will be recruited to support the national team in defining and implementing the following elements: ( a ) overall design of the survey; ( b ) targeted indicators and setting up of a database; ( c ) study methods ( sampling, questionnaires, organization of the survey ); ( d ) actual survey implementation; ( e ) analysis; and ( f ) dissemination of the results. The support will include reinforcement of human and technical capacities necessary to achieve the desired results. 70.", + "ner_text": [ + [ + 378, + 392, + "named" + ] + ], + "validated": false, + "empirical_context": "This information will be used for monitoring the implementation of the NSDS and a costed plan also supported under this subcomponent. Furthermore, recommendations from the tracer studies will be useful for decision-making at several levels, including ( a ) training delivery ( for training institutions ); ( b ) program design ( for technical actors ); ( c ) central management ( operationalization and orientation of the skills strategy ); and ( d ) the sector \u2019 s steering bodies ( the political level ). ONEFOP will supervise the surveys in partnership with an inter-ministerial technical team ( including the NIS ).", + "type": "study", + "explanation": "'Tracer studies' are mentioned as a source of recommendations for decision-making, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'tracer studies' is a dataset because it involves data collection for monitoring purposes.", + "contextual_reason_agent": "'Tracer studies' are mentioned as a source of recommendations for decision-making, not as a structured collection of data.", + "contextual_signal": "mentioned only as a study, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 21, + "validated": 8, + "not_validated": 13 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 58, + "text": "The World Bank South Sudan Health Sector Transformation Project ( HSTP ) ( P181385 ) Page 55 of 68 Figure 1. 2. PMU Structure High-Level Steering Committee High-Level Steering Committee Mandate and Functions 7. The HSC will pursue the following ToRs. ( a ) The HSC is mandated to provide strategic direction and guidance on the sectoral challenges and future steps on service delivery and other health system pillars. ( b ) The HSC will oversee and monitor the implementation of the project-approved plans and different processes including health facilities hand-over plan, plan to expand coverage of health facilities, BHI expansion plan, MoH capacity-building plan, government and partners \u2019 commitments, and any reprogramming amendments. ( c ) Review and approve applications to join project funding and / or activities by other potential interested parties. ( d ) The committee will review project data to monitor progress towards achieving desired results, identify needed actions, and follow-up on actions during meetings. ( e ) The committee will ensure the existence and enforcement of effective coordination and communication between different constituencies and other stakeholders relevant to its mandate. ( f ) The committee will mobilize and sustain political commitment to take the necessary actions towards achieving the development goals and the effective processes.", + "ner_text": [ + [ + 894, + 906, + "named" + ], + [ + 4, + 14, + "project data <> publisher" + ] + ], + "validated": true, + "empirical_context": "( c ) Review and approve applications to join project funding and / or activities by other potential interested parties. ( d ) The committee will review project data to monitor progress towards achieving desired results, identify needed actions, and follow-up on actions during meetings. ( e ) The committee will ensure the existence and enforcement of effective coordination and communication between different constituencies and other stakeholders relevant to its mandate.", + "type": "data", + "explanation": "In this context, 'project data' is used as a source of information to assess project progress, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'project data' refers to a dataset because it is mentioned in the context of monitoring progress and actions.", + "contextual_reason_agent": "In this context, 'project data' is used as a source of information to assess project progress, indicating it functions as a dataset.", + "contextual_signal": "mentioned as a source to monitor progress", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "110_Lebaon-RACE2-PforR-Board-Package-PAD-WB-9-5-16-09072016", + "page": 41, + "text": "for transition from Cycle 1 to 2 94 for transition from Cycle 2 to 3 91 for transition from Cycle 3 to Secondary Non-Lebanese 96 for transition from Cycle 1 to 2 82 for transition from Cycle 2 to 3 82 for transition from Cycle 3 to Secondary Same rates maintained 2015 and 2019 or 2020 for all; Annually for non - Lebanese MICS Round 6, VASyR, UNHCR community - based reporting; MEHE information management Number of students in public schools successful in Grade 3 and Number Grade 3: 0 Grade 6: 0 ( Baseline at 0 because Grade 3: TBD after pilot Grade 6: TBD after pilot Annually CERD", + "ner_text": [ + [ + 323, + 327, + "named" + ], + [ + 264, + 268, + "MICS <> reference year" + ], + [ + 273, + 277, + "MICS <> publication year" + ], + [ + 281, + 285, + "MICS <> publication year" + ], + [ + 308, + 322, + "MICS <> reference population" + ] + ], + "validated": true, + "empirical_context": "for transition from Cycle 1 to 2 94 for transition from Cycle 2 to 3 91 for transition from Cycle 3 to Secondary Non-Lebanese 96 for transition from Cycle 1 to 2 82 for transition from Cycle 2 to 3 82 for transition from Cycle 3 to Secondary Same rates maintained 2015 and 2019 or 2020 for all; Annually for non - Lebanese MICS Round 6, VASyR, UNHCR community - based reporting; MEHE information management Number of students in public schools successful in Grade 3 and Number Grade 3: 0 Grade 6: 0 ( Baseline at 0 because Grade 3: TBD after pilot Grade 6: TBD after pilot Annually CERD", + "type": "survey", + "explanation": "MICS is mentioned in the context of data collection for non-Lebanese populations, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because MICS is commonly known as a Multiple Indicator Cluster Survey that collects data on various indicators.", + "contextual_reason_agent": "MICS is mentioned in the context of data collection for non-Lebanese populations, indicating it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 57, + "text": "This subcomponent will include ( a ) defining indicators to monitor the efficiency, effectiveness, and value for money obtained through public procurement spending and a mechanism for collecting the required data from MINMAP and one or two pilot ministries; ( b ) establishing a performance baseline and creating an annual review process to determine changes in performance; ( c ) introducing performance management, including, but not limited to, professional awards, performance contracts, and incentives for staff involved in procurement processes, and staff in one or more pilot agencies; and ( d ) introducing, where appropriate, mechanisms to enhance oversight and monitoring of procurement, including the participation of communities and nongovernmental bodies, to enhance performance. Component 4. Enhancing the use of statistics for policy making 38. The end goal of any statistical system is to produce high-quality data to inform policies and make them publicly available. Achieving this goal requires an investment not only in the production of micro-data ( censuses and surveys ) and routine statistics ( prices, national account, external trade, and so on ) but also in data processing, analyzing, archiving, and dissemination.", + "ner_text": [ + [ + 1057, + 1067, + "named" + ] + ], + "validated": true, + "empirical_context": "The end goal of any statistical system is to produce high-quality data to inform policies and make them publicly available. Achieving this goal requires an investment not only in the production of micro-data ( censuses and surveys ) and routine statistics ( prices, national account, external trade, and so on ) but also in data processing, analyzing, archiving, and dissemination.", + "type": "micro-data", + "explanation": "In this context, 'micro-data' is indeed used as a structured collection of data derived from censuses and surveys.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'micro-data' refers to a dataset because it is mentioned in the context of producing high-quality data for statistical purposes.", + "contextual_reason_agent": "In this context, 'micro-data' is indeed used as a structured collection of data derived from censuses and surveys.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "003_BOSIB-850e0c11-07c1-4c9c-8d44-4286704221bd", + "page": 19, + "text": "While IDPs live in host communities, this indicator excludes them from the target. The population data for Maradi and Zinder Regions in Niger indicate that the ratio of women to men is 50. 5 percent to 49. 5 percent. Therefore, considering a 2030 total target of inferred beneficiaries of 1, 412, 000 people, it is estimated that roughly 715, 000 are women. The population aged 0 to 24 in Niger is estimated at roughly 75 percent of the total population, so a target of approximately 1, 060, 000 youth beneficiaries is proposed. 58 This indicator assesses the number of people that experience improved access to climate-resilient road infrastructure, defined as being climate risk informed in its design and operations. The same ratios used for PDO indicator 3 are used to estimate the female and youth populations in host communities in the Maradi region. ( i. e., 50. 5 percent women and 75 percent youth ). 59 Adoption in this indicator refers to the active engagement and participation of the targeted stakeholders in value chain development initiatives to improve institutional capacities and plans to implement them.", + "ner_text": [ + [ + 87, + 102, + "named" + ], + [ + 107, + 113, + "population data <> data geography" + ], + [ + 118, + 132, + "population data <> data geography" + ], + [ + 136, + 141, + "population data <> data geography" + ], + [ + 160, + 181, + "population data <> data description" + ], + [ + 242, + 246, + "population data <> publication year" + ], + [ + 362, + 385, + "population data <> reference population" + ] + ], + "validated": true, + "empirical_context": "While IDPs live in host communities, this indicator excludes them from the target. The population data for Maradi and Zinder Regions in Niger indicate that the ratio of women to men is 50. 5 percent to 49.", + "type": "data", + "explanation": "In this context, 'population data' is used to provide empirical evidence about the gender ratio, confirming it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'population data' is a dataset because it refers to specific demographic information.", + "contextual_reason_agent": "In this context, 'population data' is used to provide empirical evidence about the gender ratio, confirming it functions as a data source.", + "contextual_signal": "mentioned as a source of demographic information", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 61, + "text": "This data is inputted into the SNSOP MIS to generate payment schedules Implementing partner Number of beneficiaries receiving cash for performing labor intensive public works who are refugees or host communities Number of total beneficiaries that directly receive cash transfer for This indicator will be Registration and payment data in the Beneficiary data is collected during registration and Implementing Partner", + "ner_text": [ + [ + 342, + 358, + "named" + ], + [ + 92, + 174, + "Beneficiary data <> data description" + ], + [ + 183, + 211, + "Beneficiary data <> reference population" + ], + [ + 212, + 277, + "Beneficiary data <> data description" + ], + [ + 305, + 334, + "Beneficiary data <> data type" + ] + ], + "validated": true, + "empirical_context": "This data is inputted into the SNSOP MIS to generate payment schedules Implementing partner Number of beneficiaries receiving cash for performing labor intensive public works who are refugees or host communities Number of total beneficiaries that directly receive cash transfer for This indicator will be Registration and payment data in the Beneficiary data is collected during registration and Implementing Partner", + "type": "data", + "explanation": "In the context, 'Beneficiary data' is explicitly mentioned as being collected during registration, indicating it serves as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a collection of information about beneficiaries.", + "contextual_reason_agent": "In the context, 'Beneficiary data' is explicitly mentioned as being collected during registration, indicating it serves as a data source.", + "contextual_signal": "described as data collected during registration", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 63, + "text": "To help accelerate the activities in the first phase, the Project Implementation Unit will initially be responsible for the technical and financial implementation of project activities, including procurement, financial management, M & E, mitigation of potential negative social and environmental impacts, and communication about project implementation and results. At mid-term review, the project team will evaluate the possibility of a transition strategy to embed the implementation unit within the SEP / CNPS, based on the results of the capacity building activities supported in Component 2. The unit will contract the development of the MIS, the survey firm ( s ) for household registration and impact evaluation, the financial institutions in charge of delivering the payments to the beneficiary households, and the NGOs in charge of delivering the complementary activities. In Kirundo, where Concern has been operating a pilot cash transfer for 2, 000 households in 2 communes, local implementation could be delegated to Concern.", + "ner_text": [ + [ + 642, + 645, + "named" + ] + ], + "validated": false, + "empirical_context": "At mid-term review, the project team will evaluate the possibility of a transition strategy to embed the implementation unit within the SEP / CNPS, based on the results of the capacity building activities supported in Component 2. The unit will contract the development of the MIS, the survey firm ( s ) for household registration and impact evaluation, the financial institutions in charge of delivering the payments to the beneficiary households, and the NGOs in charge of delivering the complementary activities. In Kirundo, where Concern has been operating a pilot cash transfer for 2, 000 households in 2 communes, local implementation could be delegated to Concern.", + "type": "system", + "explanation": "'MIS' refers to a management information system, which is a system but not mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is an acronym that could imply a structured collection of information.", + "contextual_reason_agent": "'MIS' refers to a management information system, which is a system but not mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 27, + "text": "Page | 18 \u2022 World Bank diagnostics and international benchmarks also warrant the Program \u2019 s sectoral focus on education and health. According to the Global Digital Health Monitor 202313, the digital landscape in the health sector is considered more mature in Jordan compared to neighboring countries in the MENA region, especially in developing digital services and applications ( see the technical assessment for further detail ). However, due to limited governance and multiple services and systems designed and operated in silos, health information systems have been fragmented with inconsistent data standards and quality. The education sector has also been advancing on the digital front. Jordan was one of the first countries in the region to respond to the COVID-19 pandemic and school closures by developing an online learning platform called Darsak covering the curriculum \u2019 s core subjects of Arabic, English, math, and science for grades 1 through 12. In addition, a newly launched platform for teacher training offers courses on distance learning tools, blended learning, and educational technology.", + "ner_text": [ + [ + 150, + 186, + "named" + ], + [ + 260, + 266, + "Global Digital Health Monitor 202313 <> data geography" + ], + [ + 308, + 319, + "Global Digital Health Monitor 202313 <> data geography" + ], + [ + 695, + 701, + "Global Digital Health Monitor 202313 <> data geography" + ] + ], + "validated": true, + "empirical_context": "Page | 18 \u2022 World Bank diagnostics and international benchmarks also warrant the Program \u2019 s sectoral focus on education and health. According to the Global Digital Health Monitor 202313, the digital landscape in the health sector is considered more mature in Jordan compared to neighboring countries in the MENA region, especially in developing digital services and applications ( see the technical assessment for further detail ). However, due to limited governance and multiple services and systems designed and operated in silos, health information systems have been fragmented with inconsistent data standards and quality.", + "type": "dataset", + "explanation": "It is indeed a dataset as it is referenced in the context of providing benchmarks and diagnostics for health information systems.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Monitor' in its name, suggesting a systematic collection of data.", + "contextual_reason_agent": "It is indeed a dataset as it is referenced in the context of providing benchmarks and diagnostics for health information systems.", + "contextual_signal": "mentioned as a source of information for health sector analysis", + "tags": [] + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 51, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 40 Indicator Name People within 200m ( 0. 2km ) of an improved water source benefitted in refugees hosting districts Definition / Description Percent of refugees and citizens in host communities that have an improved water source within 200m of walking distance. Frequency Bi-annually Data Source Primary sources include: MWE national M & E system, Refugee Database, Refugees GIS Mapping for WASH, Water Supply Atlas, and UPMiS. Secondary sources include: MWE / DWD / DWRM progress reports, annual water & environments sector performance report, NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE / DWD Rural Indicator Name of which women Definition / Description Frequency Bi-annually. Data Source Primary sources include: MWE national M & E system, Refugee Database, Refugees GIS Mapping for WASH, Water Supply Atlas, and UPMiS. Secondary sources include: MWE / DWD / DWRM progress reports, annual water & environments sector performance report, NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE / DWD Rural.", + "ner_text": [ + [ + 501, + 506, + "named" + ] + ], + "validated": true, + "empirical_context": "2km ) of an improved water source benefitted in refugees hosting districts Definition / Description Percent of refugees and citizens in host communities that have an improved water source within 200m of walking distance. Frequency Bi-annually Data Source Primary sources include: MWE national M & E system, Refugee Database, Refugees GIS Mapping for WASH, Water Supply Atlas, and UPMiS. Secondary sources include: MWE / DWD / DWRM progress reports, annual water & environments sector performance report, NWSC annual report.", + "type": "system", + "explanation": "UPMiS is indeed a dataset as it is mentioned as part of the primary sources for data collection in the context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed UPMiS is a dataset because it is listed among primary data sources.", + "contextual_reason_agent": "UPMiS is indeed a dataset as it is mentioned as part of the primary sources for data collection in the context.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "148_492780PAD0P111101Official0Use0Only1", + "page": 15, + "text": "This component will fund: ( i ) two local consultants that will provide technical assistance and implementation support to NIET ( one of the consultants will be based in Gaza ); and ( ii ) international technical assistance to be provided by a well-known institution with extensive experience in professional development and in-service teacher training. This international institution will assist NIET and the selected HE institutions with the preparation of tools to assess the level of competencies and skills of class teachers and, based on this assessment, to design a modular program for upgrading their skills. It will also provide implementation support for the evaluation of phase one and the preparation of a plan for scaling up this initiative using the evaluation data; ( iii ) training of trainers to be provided by the international institution; ( iv ) funds to finance the delivery of these training programs by higher education institutions through a consultant service contract with these participating local higher education institutions; ( v ) development and printing of training materials; ( vi ) a communication / dissemination campaign to inform unqualified teachers about the program; and ( vii ) financing of incremental operating costs to support NIET-TMT during project implementation. 30.", + "ner_text": [ + [ + 764, + 779, + "named" + ], + [ + 170, + 174, + "evaluation data <> data geography" + ], + [ + 515, + 529, + "evaluation data <> reference population" + ], + [ + 1331, + 1349, + "evaluation data <> usage context" + ] + ], + "validated": true, + "empirical_context": "This international institution will assist NIET and the selected HE institutions with the preparation of tools to assess the level of competencies and skills of class teachers and, based on this assessment, to design a modular program for upgrading their skills. It will also provide implementation support for the evaluation of phase one and the preparation of a plan for scaling up this initiative using the evaluation data; ( iii ) training of trainers to be provided by the international institution; ( iv ) funds to finance the delivery of these training programs by higher education institutions through a consultant service contract with these participating local higher education institutions; ( v ) development and printing of training materials; ( vi ) a communication / dissemination campaign to inform unqualified teachers about the program; and ( vii ) financing of incremental operating costs to support NIET-TMT during project implementation. 30.", + "type": "data", + "explanation": "In this context, 'evaluation data' is indeed used as a source of information for designing a modular program and scaling up the initiative.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'evaluation data' is a dataset because it refers to data collected for assessing competencies and skills.", + "contextual_reason_agent": "In this context, 'evaluation data' is indeed used as a source of information for designing a modular program and scaling up the initiative.", + "contextual_signal": "'uses data from' the evaluation to inform program design.", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "008_BOSIB-60d57288-4e09-4519-ae6c-ffdc0037e0b1", + "page": 15, + "text": "According to data obtained from stakeholder engagement activities and limited surveys made in the Region during Project preparation, at least 25 percent of respondents had suffered sexual harassment in public transport in the Region, and less than half of the respondents considered public transport safe for women. However, between 2020 and 2021, the Region recorded on average only 114 cases, 21 which highlights the prevalence of underreporting. Forty-eight percent of Brazilian women perceive their city as unsafe, while only 11 percent feel that their surroundings are safe; the Southern states are considered safer than the north. In addition, a lack of clarity regarding responsibilities and governance structures for the design and implementation of protocols for responding to sexual harassment in public transport limits the possibility of appropriately responding to survivors. Sexual harassment constitutes a barrier to gender equality, and directly impacts women \u2019 s access to economic opportunities.", + "ner_text": [ + [ + 32, + 65, + "named" + ] + ], + "validated": false, + "empirical_context": "According to data obtained from stakeholder engagement activities and limited surveys made in the Region during Project preparation, at least 25 percent of respondents had suffered sexual harassment in public transport in the Region, and less than half of the respondents considered public transport safe for women. However, between 2020 and 2021, the Region recorded on average only 114 cases, 21 which highlights the prevalence of underreporting.", + "type": "activity", + "explanation": "However, it is not a structured collection of data but rather a process of engagement.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves gathering information from stakeholders.", + "contextual_reason_agent": "However, it is not a structured collection of data but rather a process of engagement.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "068_Burundi-Integrated-Community-Development-Project", + "page": 15, + "text": "Similarly, the baseline survey for the Catholic Relief Services ( CRS ) Amashiga project showed that women of childbearing age had an average dietary diversity score of 3. 7 food groups ( out of 9 recommended by the World Health Organization ). While acute malnutrition averages five percent in children under five, food insecurity in terms of inadequate access to sufficient kilocalories ( kcal ) is prevalent, especially as a result of seasonal variations in food availability and negative impacts of production risks. A 2016 report identified the main drivers of stunting in Burundi as directly or indirectly related to diet. Access to clean water and sanitation facilities, as well as biomass fuel use were also identified as important factors ( see Figure 2 ). Furthermore, large households, with a fertility rate of 5. 5 children per woman, contribute to food insecurity and malnutrition even before children are born. 18 Figure 2: Stunting cases among two-year old attributable to individual risk factors in Burundi Source: Burundi case study for Danaei, et al. ( 2016 ). Risk factors for childhood stunting in 137 developing countries: a comparative risk assessment analysis at global, regional, and country levels. Access to basic services and socio-economic infrastructure 12.", + "ner_text": [ + [ + 15, + 30, + "named" + ], + [ + 39, + 63, + "baseline survey <> publisher" + ], + [ + 101, + 126, + "baseline survey <> reference population" + ], + [ + 142, + 185, + "baseline survey <> data description" + ], + [ + 523, + 527, + "baseline survey <> publication year" + ], + [ + 578, + 585, + "baseline survey <> data geography" + ], + [ + 1015, + 1022, + "baseline survey <> data geography" + ], + [ + 1031, + 1038, + "baseline survey <> data geography" + ], + [ + 1054, + 1068, + "baseline survey <> author" + ], + [ + 1071, + 1075, + "baseline survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "Similarly, the baseline survey for the Catholic Relief Services ( CRS ) Amashiga project showed that women of childbearing age had an average dietary diversity score of 3. 7 food groups ( out of 9 recommended by the World Health Organization ).", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to a survey that provides empirical data on dietary diversity scores.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'baseline survey' implies a structured collection of data collected for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a survey that provides empirical data on dietary diversity scores.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "107_PAD-Citizens-Charter-Afghanistan-P160567-Oct-7-Board-version-10072016", + "page": 60, + "text": "A core part of CCAP will be to strengthen citizens \u2019 monitoring and their ability to report problems at the same time as they are implementing the rural and urban grants. The project will develop simple citizens \u2019 scorecards to be completed by CDCs and Social Organizers to report upon the minimum service standards. CCAP will also track more closely through the scorecards, regular reporting and evaluations on the participation of women, poor and vulnerable groups, such as returnees and IDPs, during the project cycle. Furthermore, taking advantage of technology and high mobile access coverage in the country, CCAP will explore mobile applications for reporting and grievance redress. Second, the project will innovate and use the satellite imagery of the existing ARTF third party monitoring activity to validate infrastructure gaps and service delivery outputs. For example, the presence of schools and irrigation canals in a sample number of areas will be validated through satellite imagery against community monitoring reports. Lastly, this component will support ways to strengthen a coordinated approach across line ministries \u2019 monitoring and evaluation mechanisms including at the community, district and provincial levels, within government, and with third party monitors. As part of the Government \u2019 s strong commitment to making the Citizens \u2019 Charter operate effectively, the Office of the President and MoF will receive semi-annual progress reports on the achievement of the service standards so they can closely monitor progress, assist with removing bottlenecks in service delivery, and allocate budgetary resources as", + "ner_text": [ + [ + 735, + 752, + "named" + ] + ], + "validated": false, + "empirical_context": "Furthermore, taking advantage of technology and high mobile access coverage in the country, CCAP will explore mobile applications for reporting and grievance redress. Second, the project will innovate and use the satellite imagery of the existing ARTF third party monitoring activity to validate infrastructure gaps and service delivery outputs. For example, the presence of schools and irrigation canals in a sample number of areas will be validated through satellite imagery against community monitoring reports.", + "type": "non-dataset", + "explanation": "'Satellite imagery' is not a dataset itself but rather a method or tool used for data validation in the project.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'satellite imagery' is a dataset because it involves data collection through technology.", + "contextual_reason_agent": "'Satellite imagery' is not a dataset itself but rather a method or tool used for data validation in the project.", + "contextual_signal": "mentioned only as a method for validation, not as a data source", + "tags": [] + }, + { + "filename": "136_PAD7230P1476890AD0October0100final", + "page": 32, + "text": "21 Annex 1: Results Framework and Monitoring HASHEMITE KINGDOM OF JORDAN Emergency Services and Social Resilience Project ( P147689 ) Results Framework Project Development Objectives The project development objective is to help Jordanian municipalities and host communities address the immediate service delivery impacts of Syrian refugee inflows and strengthen municipal capacity to support local economic development. Project Development Objective Indicators Cumulative Target Values Frequency Data Source / Methodology Responsibility for Data Collection Indicator Name Core Unit of Measure Baseline 2013 2014 2015 2016 End Target Direct project beneficiaries, of which women2 \uf0fd Number, %, 0 395, 000 ( 45 % ) 590, 000 ( 45 % ) 790, 000 ( 45 % ) 790, 000 ( 45 % ) Annual Surveys and service delivery records of municipalities Municipalities, MOMA Conflict affected people to whom benefits have been delivered within the first year of project effectiveness, of which: ( i ) women; ( ii ) host population; ( iii ) refugees3 \uf0fd Number 0 545, 000 ( 245, 250 ) ( 395, 000 ) ( 150, 000 ) NA NA 545, 000 ( 245, 250 ) ( 395, 000 ) ( 150, 000 ) Annual Surveys and service delivery records of municipalities Municipalities, MOMA Participating municipalities ensuring pre-crisis levels of % 04 0 50 50 50 Annual Surveys and service delivery records of Municipalities, MOMA 2 Direct", + "ner_text": [ + [ + 766, + 780, + "named" + ], + [ + 66, + 72, + "Annual Surveys <> data geography" + ], + [ + 785, + 809, + "Annual Surveys <> data type" + ], + [ + 844, + 848, + "Annual Surveys <> publisher" + ], + [ + 975, + 980, + "Annual Surveys <> reference population" + ], + [ + 1215, + 1219, + "Annual Surveys <> publisher" + ], + [ + 1358, + 1362, + "Annual Surveys <> publisher" + ], + [ + 1387, + 1405, + "Annual Surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "21 Annex 1: Results Framework and Monitoring HASHEMITE KINGDOM OF JORDAN Emergency Services and Social Resilience Project ( P147689 ) Results Framework Project Development Objectives The project development objective is to help Jordanian municipalities and host communities address the immediate service delivery impacts of Syrian refugee inflows and strengthen municipal capacity to support local economic development. Project Development Objective Indicators Cumulative Target Values Frequency Data Source / Methodology Responsibility for Data Collection Indicator Name Core Unit of Measure Baseline 2013 2014 2015 2016 End Target Direct project beneficiaries, of which women2 \uf0fd Number, %, 0 395, 000 ( 45 % ) 590, 000 ( 45 % ) 790, 000 ( 45 % ) 790, 000 ( 45 % ) Annual Surveys and service delivery records of municipalities Municipalities, MOMA Conflict affected people to whom benefits have been delivered within the first year of project effectiveness, of which: ( i ) women; ( ii ) host population; ( iii ) refugees3 \uf0fd Number 0 545, 000 ( 245, 250 ) ( 395, 000 ) ( 150, 000 ) NA NA 545, 000 ( 245, 250 ) ( 395, 000 ) ( 150, 000 ) Annual Surveys and service delivery records of municipalities Municipalities, MOMA Participating municipalities ensuring pre-crisis levels of % 04 0 50 50 50 Annual Surveys and service delivery records of Municipalities, MOMA 2 Direct", + "type": "survey", + "explanation": "In this context, 'Annual Surveys' is explicitly mentioned as a source of data for project indicators, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'Annual Surveys' is a dataset because it refers to a systematic collection of data collected annually.", + "contextual_reason_agent": "In this context, 'Annual Surveys' is explicitly mentioned as a source of data for project indicators, confirming its role as a dataset.", + "contextual_signal": "follows 'uses data from'", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 59, + "text": "The evaluations are set up to provide learning between the first and second wave of communes implementation and to provide key impact information to garner support for cash transfers as a relevant social protection intervention in Burundi. The process evaluation will focus on the core operational processes: targeting and payment processes as well as the delivery of the complementary activities. In the first phase, the process evaluation will assess the relative efficiency of concentrating collines by zones within communes. This will inform the expansion in the second wave of communes and inform the design of the operating processes for the program. The process evaluation will be complemented by regular beneficiary surveys to help map out operational bottlenecks, complaints and issues, and complement the grievance redress mechanism. The impact evaluation will focus on key poverty, welfare, and human development indicators ( including women and children health and nutrition outcomes ) at the household and community - levels for the cash transfers and the behavior change communication. It may also explore variations in the delivery of the modules and specific women empowerment issues related to managing cash and fostering behavior change on parenting practices and social norms. 39. The process evaluations and beneficiary surveys will provide evidence for the mid - term review stock taking of the first phase of the program, which will be used to update processes for the expansion in the other two provinces. Subsequent evaluations will feed into the program monitoring system to assess the quality of implementation. 40. The main impact evaluation will take advantage of the random selection of collines at the commune-level for a randomized cluster design. Baseline data collection in participating and control collines will take place before transfers start for beneficiary households. Mid-line data collection will take place at 24 months ( at the same time of year ). End line data collection will take place six months after the CT program is supposed to end ( at 42 months ) to assess the medium -", + "ner_text": [ + [ + 712, + 731, + "named" + ], + [ + 231, + 238, + "beneficiary surveys <> data geography" + ], + [ + 947, + 995, + "beneficiary surveys <> data description" + ], + [ + 1885, + 1907, + "beneficiary surveys <> reference population" + ] + ], + "validated": true, + "empirical_context": "This will inform the expansion in the second wave of communes and inform the design of the operating processes for the program. The process evaluation will be complemented by regular beneficiary surveys to help map out operational bottlenecks, complaints and issues, and complement the grievance redress mechanism. The impact evaluation will focus on key poverty, welfare, and human development indicators ( including women and children health and nutrition outcomes ) at the household and community - levels for the cash transfers and the behavior change communication.", + "type": "survey", + "explanation": "In this context, 'beneficiary surveys' are explicitly mentioned as a means to gather data on operational bottlenecks and issues, confirming their role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'beneficiary surveys' implies a structured collection of data gathered from participants.", + "contextual_reason_agent": "In this context, 'beneficiary surveys' are explicitly mentioned as a means to gather data on operational bottlenecks and issues, confirming their role as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 15, + "text": "The World Bank Tajikistan Water Supply and Sanitation Investment Project ( P177325 ) Page 12 of 89 Figure 3. Average Rate of Infectious and Parasitic Diseases in the Khatlon Region ( per 100, 000 people ), 2016 \u2013 2020 Figure 4. Number of Registered Typhoid Fever Cases in the Khatlon Region ( per 100, 000 people ), 2016 \u2013 2020 Source: MoHSP Data, 2020 Source: MoHSP Data, 2020 13. The JMP reports that 85. 7 percent of the population has access to improved sanitation services, with nearly 94. 9 percent of rural population relying on pit latrines, and only 0. 5 percent of rural households reporting sewage treatment. Government data focused on access to centralized sewage systems, which indicates a great disparity between the urban and rural areas, estimating access to sewage network in urban areas at 80. 0 percent, 18. 2 percent in peri-urban areas, and 0. 2 percent in rural areas. 26 Lack of operational and capital funds, ageing of the facilities, and limited capabilities in wastewater management are key sector bottlenecks. Increased discharges of polluted or untreated wastewater facilitate the spread of pathogens in water bodies, open drains, and directly within urban areas, posing a severe public health risk.", + "ner_text": [ + [ + 336, + 346, + "named" + ], + [ + 166, + 180, + "MoHSP Data <> data geography" + ], + [ + 213, + 217, + "MoHSP Data <> publication year" + ], + [ + 228, + 268, + "MoHSP Data <> data description" + ], + [ + 276, + 290, + "MoHSP Data <> data geography" + ], + [ + 373, + 377, + "MoHSP Data <> publication year" + ], + [ + 508, + 524, + "MoHSP Data <> reference population" + ], + [ + 575, + 591, + "MoHSP Data <> reference population" + ] + ], + "validated": true, + "empirical_context": "Average Rate of Infectious and Parasitic Diseases in the Khatlon Region ( per 100, 000 people ), 2016 \u2013 2020 Figure 4. Number of Registered Typhoid Fever Cases in the Khatlon Region ( per 100, 000 people ), 2016 \u2013 2020 Source: MoHSP Data, 2020 Source: MoHSP Data, 2020 13. The JMP reports that 85.", + "type": "data", + "explanation": "It is indeed a dataset as it is explicitly mentioned as a source of data for the reported figures.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a source of data in the context.", + "contextual_reason_agent": "It is indeed a dataset as it is explicitly mentioned as a source of data for the reported figures.", + "contextual_signal": "mentioned as a source of data", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 53, + "text": "The World Bank Generating Growth Opportunities and Productivity for Women Enterprises Uganda ( P176747 ) Page 49 of 77 age, and disability status of firm owner. Gender gap between women participating in the project in ownership of and use of bank accounts The difference between women \u2019 s and men \u2019 s ownership and use of a bank account. Annual Survey of the women benefiting from the project and the men in their communities. The survey will be administered to a panel of women and men annually using the same questions as in the Global Findex survey. MGLSD to administer the surveys, and compile and report the data. Women participating in planning forums supported to identify priority infrastructure ( Number ) Participation in the project in forums to identify priority infrastructure. Annual Forum attendance sheets. The sponsors of the forums will ask participants to sign in ( in-person events ). Virtual participation will be recorded. MGLSD will collect the data upon completion of each forum and compile the data and present it in quarterly progress reports. Regional facilities constructed or rehabilitated under the project, including childcare facilities and GBV referral services, operating as planned ( Number ) The number of regional multi-purpose facilities constructed or rehabilitated by the project and their operational status. Annual Supervising engineers \u2019 reports, project progress reports.", + "ner_text": [ + [ + 791, + 821, + "named" + ], + [ + 86, + 92, + "Annual Forum attendance sheets <> data geography" + ], + [ + 619, + 624, + "Annual Forum attendance sheets <> reference population" + ] + ], + "validated": true, + "empirical_context": "Women participating in planning forums supported to identify priority infrastructure ( Number ) Participation in the project in forums to identify priority infrastructure. Annual Forum attendance sheets. The sponsors of the forums will ask participants to sign in ( in-person events ).", + "type": "dataset", + "explanation": "The attendance sheets are explicitly mentioned as a record of participation, indicating they are used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because attendance sheets typically contain structured data about participants.", + "contextual_reason_agent": "The attendance sheets are explicitly mentioned as a record of participation, indicating they are used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "108_Jordan-PforR-PAD-P159522-FINAL-DISCLOSURE-10052016", + "page": 70, + "text": "62 58. Monitoring the labor market will be very important to allow the Government to adjust policies in order to achieve the Compact Goals. In particular, it will be important to monitor ( at least quarterly ) the number of work permits applications received ( and number issued ) by governorate / camp, occupation, sector, gender, nationality, UNHCR status and new / renewal / change of employer. It will similarly be important to monitor the number of MOI ID card applications received ( and number issued ) by governorate / camp, occupation, sector, gender, and UNHCR status. Documentation of any changes in work permit regulations is also important. In order to monitor the impact on the overall labor market, it will be essential to expand the Department of Statistics quarterly Employment and Unemployment Survey to include coverage of refugees and economic migrants from various countries. This data is urgently needed and implementation of the revised survey will start by the beginning of 2017. Investment Climate Predictability of Private Sector Regulations 59. Jordan \u2019 s legal regime, administrative process, and enforcement of regulations related to business activity are widely regarded as complex, unstable, and fragmented. A number of reviews have confirmed the consensus among both Jordanians and foreign observers that the overall regime suffers from a lack of coherence, transparency, and predictability for investors.", + "ner_text": [ + [ + 784, + 818, + "named" + ], + [ + 749, + 773, + "Employment and Unemployment Survey <> publisher" + ], + [ + 842, + 850, + "Employment and Unemployment Survey <> reference population" + ], + [ + 855, + 872, + "Employment and Unemployment Survey <> reference population" + ], + [ + 998, + 1002, + "Employment and Unemployment Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "Documentation of any changes in work permit regulations is also important. In order to monitor the impact on the overall labor market, it will be essential to expand the Department of Statistics quarterly Employment and Unemployment Survey to include coverage of refugees and economic migrants from various countries. This data is urgently needed and implementation of the revised survey will start by the beginning of 2017.", + "type": "survey", + "explanation": "This is a dataset as it is described as a survey that will be expanded to include additional data on refugees and economic migrants.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data on employment and unemployment.", + "contextual_reason_agent": "This is a dataset as it is described as a survey that will be expanded to include additional data on refugees and economic migrants.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 28, + "text": "The World Bank Building Skills for Human Capital Development in South Sudan ( P178654 ) Page 24 of 66 65. Given its focus on building local capacity in the medium and long term, this Subcomponent is aligned with GCRF Pillar 4: Strengthening Policies, Institutions and Investments for Rebuilding Better. 66. Subcomponent 4. 2: Strengthening data for education system management ( US $ 2 million ). Strengthening the education system \u2019 s management also entails supporting the education governing ministries in establishing a data system to deliver reliable data in line with international standards. This activity will build on the existing EMIS. TA will be provided to diagnose the existing system regarding data collection processes, standards and templates, personnel, and IT infrastructure. Potential areas to be addressed may include ( i ) improved templates and standards; ( ii ) improved data collection processes, such as phone-based data collection to enable the various education stakeholders, including the community and parents, to collect and submit real-time data into the EMIS; and ( iii ) an improved web-based EMIS system that has the capacity to integrate real-time data. Real-time data would include schools \u2019 condition, teacher registration, teacher and student attendance, and the availability of teaching and learning materials. This could be effectively utilized once synced across the web-based platform.", + "ner_text": [ + [ + 1086, + 1090, + "named" + ] + ], + "validated": false, + "empirical_context": "TA will be provided to diagnose the existing system regarding data collection processes, standards and templates, personnel, and IT infrastructure. Potential areas to be addressed may include ( i ) improved templates and standards; ( ii ) improved data collection processes, such as phone-based data collection to enable the various education stakeholders, including the community and parents, to collect and submit real-time data into the EMIS; and ( iii ) an improved web-based EMIS system that has the capacity to integrate real-time data. Real-time data would include schools \u2019 condition, teacher registration, teacher and student attendance, and the availability of teaching and learning materials.", + "type": "system", + "explanation": "However, EMIS is described as a system rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it relates to data collection and management.", + "contextual_reason_agent": "However, EMIS is described as a system rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 32, + "text": "The World Bank Expanding Opportunities for Learning Project ( P166059 ) Page 27 of 85 Learning assessments do not inform classroom practice or policymaking Improving measurement of learning and use of results Technology for better capture of learning results Improved learning assessments are revised and administered Teacher practices do not effectively support learning for all School-based and continuous professional development for teachers Technology for providing continuous professional development to teachers Improved teaching practices Outdated teaching and learning resources Improving teacher and learning resources in math, science, and technology Digital learning resources School leaders unprepared for leadership roles Capacity building for better school management and the establishment of professional communities of practice Technology to share best practices and portals setup to access resources Improved school leadership Weak organization and management of education sector data Modernizing EMIS Technology for data collection and use Sector data, including student identification and human resource data, is better managed and utilized F. Rationale for Bank Involvement and Role of Partners 69. The rationale for public sector provisioning / financing is strongly justified in Djibouti, particularly in disadvantaged areas. The public sector remains the sole source of education provision in most areas of the country, except in Djibouti Ville where there is some private sector presence. The economic and social returns to public investment in basic education are well established.", + "ner_text": [ + [ + 1015, + 1019, + "named" + ], + [ + 4, + 14, + "EMIS <> publisher" + ], + [ + 981, + 1002, + "EMIS <> data type" + ], + [ + 1059, + 1070, + "EMIS <> data type" + ], + [ + 1082, + 1128, + "EMIS <> data description" + ], + [ + 1302, + 1310, + "EMIS <> data geography" + ], + [ + 1454, + 1468, + "EMIS <> data geography" + ] + ], + "validated": true, + "empirical_context": "The World Bank Expanding Opportunities for Learning Project ( P166059 ) Page 27 of 85 Learning assessments do not inform classroom practice or policymaking Improving measurement of learning and use of results Technology for better capture of learning results Improved learning assessments are revised and administered Teacher practices do not effectively support learning for all School-based and continuous professional development for teachers Technology for providing continuous professional development to teachers Improved teaching practices Outdated teaching and learning resources Improving teacher and learning resources in math, science, and technology Digital learning resources School leaders unprepared for leadership roles Capacity building for better school management and the establishment of professional communities of practice Technology to share best practices and portals setup to access resources Improved school leadership Weak organization and management of education sector data Modernizing EMIS Technology for data collection and use Sector data, including student identification and human resource data, is better managed and utilized F. Rationale for Bank Involvement and Role of Partners 69.", + "type": "system", + "explanation": "EMIS is indeed a data source as it relates to the management and utilization of education sector data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is mentioned in the context of modernizing data collection and management.", + "contextual_reason_agent": "EMIS is indeed a data source as it relates to the management and utilization of education sector data.", + "contextual_signal": "system but mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 35, + "text": "For instance, the Project will actively empower communities by setting up local supervision committees to provide oversight during construction and after completion. In the refugee settlements and host communities, stakeholder consultations will be conducted with local leaders and community members in collaboration with the OPM and the UNHCR to build consensus on issues and approaches for the Project. Beneficiary satisfaction surveys, including a survey specifically for districts hosting refugees, will be conducted at the Project \u2019 s start, midterm review, and at completion. Relevant corrective actions will be monitored over the life of the Project. The Project will also establish a grievance redress system dedicated to addressing residents \u2019 complaints related to the Project \u2019 s works. F. Climate Change Co-Benefits 81. An assessment of climate co-benefits for the Project was conducted for Components 1 to 3. A quantitative analysis was conducted to estimate the Project \u2019 s impact on GHG emissions and to value the externality using the shadow price of carbon ( Annex 4 ). For each component, GHG emissions were estimated in tCO2eq using the World Bank Water Global Practice \u2019 s GHG Accounting Excel Tool. The net emissions of the Project were estimated at \u2212 8, 124 tCO2eq. The Project ' s main contribution to reducing emissions will be through replacing water supply tanker truck use with piped systems in refugee host communities.", + "ner_text": [ + [ + 405, + 437, + "named" + ], + [ + 475, + 501, + "Beneficiary satisfaction surveys <> reference population" + ] + ], + "validated": true, + "empirical_context": "In the refugee settlements and host communities, stakeholder consultations will be conducted with local leaders and community members in collaboration with the OPM and the UNHCR to build consensus on issues and approaches for the Project. Beneficiary satisfaction surveys, including a survey specifically for districts hosting refugees, will be conducted at the Project \u2019 s start, midterm review, and at completion. Relevant corrective actions will be monitored over the life of the Project.", + "type": "survey", + "explanation": "This is indeed a dataset as it involves collecting structured data through surveys at multiple points in time.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured collection of responses from beneficiaries.", + "contextual_reason_agent": "This is indeed a dataset as it involves collecting structured data through surveys at multiple points in time.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "042_Burundi-Skills-for-Jobs-Women-and-Youth-Project", + "page": 44, + "text": "Therefore, the values for internal rate of return ( IRR ) and net present value ( NPV ) generated are lower bounds as they are based only on quantifiable labor market returns, and do not include the valuation of positive externalities. 82. Cost-benefit analysis: The cost-benefit analysis undertaken uses conservative assumptions based on similar projects undertaken in comparable contexts. The analysis includes Components 1 and 2 of the project, which account for 88 percent of the project \u2019 s budget, and each component is evaluated separately. For all components, working life is assumed to be 35 years, with a 7 percent discount rate, constant wages and a lifetime effect of the program on earnings and / or employment. The unemployment rate is 8 percent for all beneficiaries. Opportunity costs and post-program earnings are based on ECVMB 2013 survey data for project target sub-populations. There are no direct private costs to participating in the program, which lasts five years, with the last cohort of beneficiaries entering the labor market in 2029. 83. NPV and IRR: The NPV of Component 1 is estimated to be US $ 95 million, with an IRR of 17. 4 percent, with lower bounds of US $ 40 million and 11. 7 percent.", + "ner_text": [ + [ + 840, + 862, + "named" + ] + ], + "validated": true, + "empirical_context": "The unemployment rate is 8 percent for all beneficiaries. Opportunity costs and post-program earnings are based on ECVMB 2013 survey data for project target sub-populations. There are no direct private costs to participating in the program, which lasts five years, with the last cohort of beneficiaries entering the labor market in 2029.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as 'survey data' used for empirical analysis of project target sub-populations.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'survey data', which typically refers to collected data from a structured survey.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as 'survey data' used for empirical analysis of project target sub-populations.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 77, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 72 of 117 availability of an approved costed annual work plan; evidence of appropriate store ledgers; undertake annual procurement audits; and institutional level public disclosures total funds received, enrolment and staffing, and income and expenditures ). Establishment of standards and tools for quality assurance mechanisms for Pre - primary education, including teacher appraisal tools, assessment and classroom observation tools. This indicator entails: a ) conducting a sample - based national survey to measure Pre-school quality and Pre-primary children \u2019 s School readiness ( cognitive, motor, language, and socio-emotional skills ). b ) establishment of standards and tools for quality assurance mechanisms for Pre - primary education, including assessment and classroom education observation tools. Once MoE, CoG MoE coordinates with the CoG for the assessments and development of the standards and tools.", + "ner_text": [ + [ + 555, + 585, + "named" + ], + [ + 620, + 640, + "sample - based national survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "Establishment of standards and tools for quality assurance mechanisms for Pre - primary education, including teacher appraisal tools, assessment and classroom observation tools. This indicator entails: a ) conducting a sample - based national survey to measure Pre-school quality and Pre-primary children \u2019 s School readiness ( cognitive, motor, language, and socio-emotional skills ). b ) establishment of standards and tools for quality assurance mechanisms for Pre - primary education, including assessment and classroom education observation tools.", + "type": "survey", + "explanation": "This is indeed a dataset as it involves conducting a survey to gather structured data on specific educational indicators.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a national survey that collects data on Pre-school quality and children's school readiness.", + "contextual_reason_agent": "This is indeed a dataset as it involves conducting a survey to gather structured data on specific educational indicators.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "173_multi0page", + "page": 28, + "text": "Project Development Outcome / Impact Project reports: ( from Objective to Goal ) Objective: Indicators: Limiting and reversing the - At least 55 % of high risk KAP studies Continued political support by trend of the epidemic by population ( youth, army, sex ministers and religious groups preventing new infections workers, truck drivers, miners ) used a condom in their last sexual encounter. - at least 50 % of people who Baseline is the 1999 DHS are aware of AIDS have used a which showed 27 % for men condom with a partner other and 18 % for women. than their regular partner in the last 12 months - Keep the HIV prevalence Data from sentinel sites, and armong 15 to 24 year old sero-prevalence surveys in urban pregnant women below 2002 and in 2006 5 % by 2006. Output from each Output Indicators: Project reports: ( from Outputs to Objective ) Component: Support to Commrunity based initiatives A social support system is - Increase by at least 1, 500 the Surveys Contracted agency ( ies ) able to developed in which CCC number of orphans who attend provide the relevant service. activities are strengthened in school regularly as of 2004. the communities for all groups affected by HIV / AIDS ( orphans, PLWHA, high risk groups ) - Decrease by at least 20 % the Baseline is the 1999 DHS; % of women and men who KAP Surveys don ' t know any mean to CNLS progress reports - 25 -", + "ner_text": [ + [ + 445, + 448, + "named" + ], + [ + 440, + 444, + "DHS <> reference year" + ], + [ + 613, + 632, + "DHS <> data type" + ], + [ + 710, + 730, + "DHS <> reference population" + ], + [ + 749, + 753, + "DHS <> publication year" + ], + [ + 1140, + 1144, + "DHS <> publication year" + ], + [ + 1285, + 1289, + "DHS <> reference year" + ] + ], + "validated": true, + "empirical_context": "Project Development Outcome / Impact Project reports: ( from Objective to Goal ) Objective: Indicators: Limiting and reversing the - At least 55 % of high risk KAP studies Continued political support by trend of the epidemic by population ( youth, army, sex ministers and religious groups preventing new infections workers, truck drivers, miners ) used a condom in their last sexual encounter. - at least 50 % of people who Baseline is the 1999 DHS are aware of AIDS have used a which showed 27 % for men condom with a partner other and 18 % for women. than their regular partner in the last 12 months - Keep the HIV prevalence Data from sentinel sites, and armong 15 to 24 year old sero-prevalence surveys in urban pregnant women below 2002 and in 2006 5 % by 2006.", + "type": "survey", + "explanation": "In this context, 'DHS' refers to the Demographic and Health Surveys, which are structured collections of data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'DHS' is a dataset because it is referenced in the context of baseline data for HIV awareness and condom use statistics.", + "contextual_reason_agent": "In this context, 'DHS' refers to the Demographic and Health Surveys, which are structured collections of data used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 7, + "validated": 6, + "not_validated": 1 + } + }, + { + "filename": "110_Lebaon-RACE2-PforR-Board-Package-PAD-WB-9-5-16-09072016", + "page": 52, + "text": "45 Table A3. 2 DLIs / DLRs Verification Protocol Table DLI # Definition / Description of achievement Scalability of Disbursements ( Yes / No ) Protocol to evaluate achievement of the DLRs and data / results verification Data source / agency Verificatio n Entity Procedure DLI # 1 Population of students enrolled in public formal schooling ( KG through Grade 12 ). The increase in enrollment numbers is calculated with reference to the baseline value at YEAR 0. Data should be reported disaggregated by type of school, education cycle, nationality ( Lebanese, non-Lebanese ), and gender. Yes Enrollment lists from MEHE Third Party Enrollment data should be provided in March and verified within a month period. There should be a review of enrollment numbers through site visits and spot checks in a sample of randomly selected public schools. DLI # 2. 1 Calculated by dividing the number of public school students in the target year who enroll in Grade 5 for the first time ( not repeated students ) by the number of public school students who enrolled in Grade 4 at the beginning of the preceding scholastic year. Data to be disaggregated by nationality ( Lebanese, non-Lebanese ) and gender. Yes Enrollment lists from MEHE Third Party Enrollment data should be provided in March and verified within a month period.", + "ner_text": [ + [ + 613, + 645, + "named" + ], + [ + 591, + 607, + "MEHE Third Party Enrollment data <> data type" + ], + [ + 826, + 840, + "MEHE Third Party Enrollment data <> data geography" + ], + [ + 890, + 912, + "MEHE Third Party Enrollment data <> reference population" + ], + [ + 1016, + 1038, + "MEHE Third Party Enrollment data <> reference population" + ] + ], + "validated": true, + "empirical_context": "Data should be reported disaggregated by type of school, education cycle, nationality ( Lebanese, non-Lebanese ), and gender. Yes Enrollment lists from MEHE Third Party Enrollment data should be provided in March and verified within a month period. There should be a review of enrollment numbers through site visits and spot checks in a sample of randomly selected public schools.", + "type": "data", + "explanation": "This is indeed a dataset as it is explicitly mentioned as data that should be provided and verified for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific enrollment data that is to be reported and verified.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as data that should be provided and verified for analysis.", + "contextual_signal": "mentioned as data that should be provided and verified", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 48, + "text": "The World Bank Socio-economic Inclusion of Refugees & Host Communities in Rwanda Project Phase II ( P509677 ) Page 33 Data source BDF records on beneficiaries and loan amounts. Methodology for Data Collection Accessed from loan applications to PFIs. Responsibility for Data Collection BDF & MINEMA consolidates. Environmental Management & Climate Resilience Landscapes under enhanced conservation and / or sustainable management ( terrestrial and inland water areas ) ( Hectare ( Ha ) ) Description Quantitative indicator counting number of hectares ( total area ) of terrestrial and inland water areas enhanced by the interventions under component 3. Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation. Hectarage mapped to include area of direct activity implementation ( site of works ) and area benefiting from the works ( such as water catchment area ). Responsibility for Data Collection MINEMA. Climate resilience subprojects completed ( Number ) Description Quantitative indicator conting the number of climate resilience subprojects completed under component 3. Covers water harvesting tanks, drainage systems and septage tank access ways. Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation. Responsibility for Data Collection MINEMA. Project Management, M & E, Capacity-Building, Research and Learning Grievances registered related to the delivery of project benefits that are addressed Description Quantitative indicator counting number of grievances registered and addressed.", + "ner_text": [ + [ + 684, + 695, + "named" + ], + [ + 4, + 14, + "Project MIS <> publisher" + ], + [ + 74, + 80, + "Project MIS <> data geography" + ], + [ + 499, + 521, + "Project MIS <> data type" + ] + ], + "validated": true, + "empirical_context": "Environmental Management & Climate Resilience Landscapes under enhanced conservation and / or sustainable management ( terrestrial and inland water areas ) ( Hectare ( Ha ) ) Description Quantitative indicator counting number of hectares ( total area ) of terrestrial and inland water areas enhanced by the interventions under component 3. Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation.", + "type": "system", + "explanation": "In the context, 'Project MIS' is explicitly identified as a data source for monitoring project implementation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'Project MIS' is a dataset because it is mentioned as a data source in the context.", + "contextual_reason_agent": "In the context, 'Project MIS' is explicitly identified as a data source for monitoring project implementation.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 4, + "validated": 1, + "not_validated": 3 + } + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 48, + "text": "The World Bank Uganda Digital Acceleration Project \u2013 GovNet ( P171305 ) Page 36 of 76 to the national development priorities and institutional mandates to ensure that the different activities are fully supported. Institutional strengthening of NITA-U has already taken place during implementation of RCIP-5 and the use of alternative delivery models involving partnerships with the private sector and NGOs to complement government efforts will also be applied under this project. Despite these mitigation measures, the residual stakeholder risk remains substantial for the time being. 100. Refugee protection is an \u2018 other \u2019 risk that is rated as Moderate. The WB, in consultation with UNHCR, has confirmed that Uganda \u2019 s protection framework is adequate for accessing funding under the IDA19 WHR. Uganda is adopting comprehensive humanitarian and development programs aimed at mitigating protection risks faced by refugees, including the managed arrival of refugees despite COVID-19 border closures. However, there is a moderate risk that Uganda \u2019 s asylum space and refugee policies could become more restrictive in response to the strain on services and the natural environment, continuing refugee population growth, and COVID-19-related and political pressure.", + "ner_text": [ + [ + 53, + 59, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Uganda Digital Acceleration Project \u2013 GovNet ( P171305 ) Page 36 of 76 to the national development priorities and institutional mandates to ensure that the different activities are fully supported. Institutional strengthening of NITA-U has already taken place during implementation of RCIP-5 and the use of alternative delivery models involving partnerships with the private sector and NGOs to complement government efforts will also be applied under this project.", + "type": "project", + "explanation": "'GovNet' is mentioned only as a project, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'GovNet' is a dataset because it is associated with a project that involves data-related activities.", + "contextual_reason_agent": "'GovNet' is mentioned only as a project, not as a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "029_IBArchive-ea5ff2ad-6f1f-45bf-a2ad-036440562c18", + "page": 64, + "text": "The World Bank Electricity Access Scale-up Project ( EASP ) ( P166685 ) Page 59 of 130 electricity access under the project, disaggregated by gender. Average household size of 4. 7 people, UBOS National Household Survey 2016-2017. reports. Average household size based on UBOS National Household Survey. People provided with access to electricity under the project with off - grid of which female The indicator will track the number of females benefiting from off-grid electricity access under the project. Share of female is 51 %, UBOS Women and Men in Uganda, Facts and Figures 2016. Quarterly Project implementati on progress reports. UBOS Women and Men in Uganda, Facts and Figures, 2016 Approved loan and grants applications UECCC, MEMD People provided with access to electricity with grid, mini-grid, off-grid in refugee - hosting districts The indicator will track the number of host communities and refugees beneficiaries of access to grid, mini-grid, and off-grid technologies under the project in District hosting refugees. Average household size of 4. 7 people, UBOS National Household Survey 2016 - Quarterly Project implementati on progress reports. Average household size based on UBOS National Household Survey. Service Providers customer database, MEMD database, IVA reports, approved loan applications and installation reports.", + "ner_text": [ + [ + 1195, + 1225, + "named" + ], + [ + 150, + 187, + "UBOS National Household Survey <> data description" + ], + [ + 220, + 229, + "UBOS National Household Survey <> publication year" + ], + [ + 554, + 560, + "UBOS National Household Survey <> data geography" + ], + [ + 580, + 584, + "UBOS National Household Survey <> publication year" + ], + [ + 660, + 666, + "UBOS National Household Survey <> data geography" + ], + [ + 1360, + 1378, + "UBOS National Household Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "7 people, UBOS National Household Survey 2016 - Quarterly Project implementati on progress reports. Average household size based on UBOS National Household Survey. Service Providers customer database, MEMD database, IVA reports, approved loan applications and installation reports.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a National Household Survey, which is a structured collection of data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides data on household size.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a National Household Survey, which is a structured collection of data used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "143_749060PAD0P124010Box374388B00OUO090", + "page": 88, + "text": "Table 2: Estimated annual value of each NPTP benefit Expected number Annual of affected Beneit vlue ( marginal + infra - Comments Education benefit US $ 150 per 28, 263 students The value is estimated by the MEHE; the ( fee waivers, student aged 6-18 years number of students of eligible age is textbooks ) calculated from the NPTP database Health benefit US $ 374 per 13, 492 individuals The value is calculated based on MOPH ( hospitalization fee individual data of age-specific morbidity rates; the waiver copayments ) number of affected individuals is at twice the current national morbidity rate ( MOPH estimate ) to account for expected increase in utilization of hospital care Electricity benefit US $ 159. 60 18, 801 households The value is calculated as 10, 000 L. L. of ( cost of connection + per monthly connection charges and 10, 000 lump-sum discount household L. L. of monthly lump-sum discount off the electricity bill ) annualized and converted to US $ Health benefits ( Cost US $ 120 per 16, 315 individuals The value includes registration fee for of chronic drugs ) individual monthly checkup of chronic diseases as well as medicines provided by MOSA SDCs * The expected coverage rate of extremely poor individuals by the end of the project.", + "ner_text": [ + [ + 429, + 464, + "named" + ] + ], + "validated": false, + "empirical_context": "Table 2: Estimated annual value of each NPTP benefit Expected number Annual of affected Beneit vlue ( marginal + infra - Comments Education benefit US $ 150 per 28, 263 students The value is estimated by the MEHE; the ( fee waivers, student aged 6-18 years number of students of eligible age is textbooks ) calculated from the NPTP database Health benefit US $ 374 per 13, 492 individuals The value is calculated based on MOPH ( hospitalization fee individual data of age-specific morbidity rates; the waiver copayments ) number of affected individuals is at twice the current national morbidity rate ( MOPH estimate ) to account for expected increase in utilization of hospital care Electricity benefit US $ 159. 60 18, 801 households The value is calculated as 10, 000 L.", + "type": "data", + "explanation": "However, it is not functioning as a data source in this context, as it refers to individual data rather than a structured dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'data' which often implies a structured collection.", + "contextual_reason_agent": "However, it is not functioning as a data source in this context, as it refers to individual data rather than a structured dataset.", + "contextual_signal": "mentioned only as individual data, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 8, + "text": "There exists concurrently considerable excess capacity and wide-scale overcrowding of schools \u2013 much of it linked to rented facilities, but not exclusively. \u2022 ECE ( Early Childhood Education ): Significant achievements in construction, teacher training, curriculum development, teacher professional development, parent involvement and standards setting have helped make this one of the more dynamic elements of Jordan \u2019 s education system. The private sector is responsible for 90 percent of provision ( down from 95 percent in 2003 ), and improvements in regulation and guidance regarding standards have helped this subsector mature as it expands. Yet significant challenges remain: continued expansion of access to Kindergarten Year Two ( KG2 ) for the roughly half of children without access to KG2 classes competes with growing demand for greater public investment in Kindergarten Year 1 ( KG1 ) level provision; poor urban communities and children in rural areas compete for access to public provision; quantitative expansion competes with the need for consolidation of quality. \u2022 Technical, and Vocational Education and Training ( TVET ): Enrollment in secondary vocational education as a share of total secondary enrolment declined from 18 percent in 2000 to 12 percent in 2005, suggesting a critical demand side problem, at a time of growing need for skills for the economic transformation articulated in the National Agenda. This suggests the need for significant realignment of MoE vocational programs to be undertaken based on input from the employer community and deeper analysis of labor market information derived from Al Manar and similar initiatives, in a way that aligns them with the reforms initiated by the Ministries of Labour ( MoL ) and Higher Education and Scientific Research ( MoHESR ).", + "ner_text": [ + [ + 1595, + 1619, + "named" + ] + ], + "validated": false, + "empirical_context": "\u2022 Technical, and Vocational Education and Training ( TVET ): Enrollment in secondary vocational education as a share of total secondary enrolment declined from 18 percent in 2000 to 12 percent in 2005, suggesting a critical demand side problem, at a time of growing need for skills for the economic transformation articulated in the National Agenda. This suggests the need for significant realignment of MoE vocational programs to be undertaken based on input from the employer community and deeper analysis of labor market information derived from Al Manar and similar initiatives, in a way that aligns them with the reforms initiated by the Ministries of Labour ( MoL ) and Higher Education and Scientific Research ( MoHESR ).", + "type": "information", + "explanation": "However, it is not presented as a structured collection of data or a data source in the context provided.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'labor market information' sounds like it could be structured data related to employment statistics.", + "contextual_reason_agent": "However, it is not presented as a structured collection of data or a data source in the context provided.", + "contextual_signal": "mentioned only as a concept, not as a data source", + "tags": [] + }, + { + "filename": "116_PAD1510-PAD-P152821-IDA-R2016-0078-1-Box394886B-OUO-9", + "page": 63, + "text": "Since the Financial Management assessment was completed, the NCCS implemented the following mitigation measures: ( a ) completed the development of the budget tracking tool, ( b ) developed and adopted a plan to embed the financial management of the NCCS into the project module of the IFMIS, ( c ) initiated and is in the process of completing the recruitment of an internal auditor, and ( d ) engaged the office of the controller of internal audits and the Western Province Provincial internal audit department to undertake internal audits of the Secretariat, the PIU, and all the sub-projects. 57. The overall conclusion of the assessment is that despite the control environment issues affecting the country, as assessed by various diagnostic studies in Zambia, the implementing agency ( NCCS ) satisfies the minimum financial management requirements as stated in the Bank \u2019 s OP / BP 10. 00. The risk rating for the Project \u2019 s financial management arrangements has therefore been assessed as Moderate. Financial Management Arrangements for the Project 58. Staffing. The NCCS has a Finance Department headed by a Financial Management Specialist ( FMS ) who is assisted by three ( 03 ) Project Accountants. However, this staffing arrangement is not adequate; therefore, it is recommended that the Project employs its own dedicated Project Accountant at the NCCS secretariat to be assisted by two Assistant Accounts, one at each Provincial Project Implementation Unit. 59. Budgeting arrangements. Budget preparation and monitoring will follow national procedures. NCCS produces the budget using COSTAB, and includes sufficient details to allow for regular and effective implementation of planned activities and use of funds. The Project will prepare Annual Work Plans and Budgets ( AWPBs ), which will be the basis for budget preparation. The approval process will follow government procedures and is expanded in NCCS \u2019 s Project Financial Procedures Manual.", + "ner_text": [ + [ + 286, + 291, + "named" + ] + ], + "validated": false, + "empirical_context": "Since the Financial Management assessment was completed, the NCCS implemented the following mitigation measures: ( a ) completed the development of the budget tracking tool, ( b ) developed and adopted a plan to embed the financial management of the NCCS into the project module of the IFMIS, ( c ) initiated and is in the process of completing the recruitment of an internal auditor, and ( d ) engaged the office of the controller of internal audits and the Western Province Provincial internal audit department to undertake internal audits of the Secretariat, the PIU, and all the sub-projects. 57.", + "type": "system", + "explanation": "However, IFMIS is mentioned as a project module and not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed IFMIS is a dataset because it is related to financial management and data tracking.", + "contextual_reason_agent": "However, IFMIS is mentioned as a project module and not as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "050_Cameroon-Secondary-Education-and-Skills-Development-Project", + "page": 16, + "text": "In addition, according to a study by the nongovernmental organization ( NGO ) Association pour la Promotion du D\u00e9veloppement local ( APDEL ), 61 percent of sexual violence in schools is committed by adults, 31 percent by teachers, 12 percent by administrative staff, and 13 percent by repeaters. The overrepresentation of administrative officials in the perpetrators of sexual violence is an additional element that explains why sexual violence is widely tolerated in schools, whether perpetrated by adults or students. 24 World Bank. 2019. \u201c Discussion approfondie pour sur le SWEDD 2 pour les pays en pr\u00e9paration du projet. \u201d 25 UNICEF. 2014. \u201c Cameroon Multiple Indicator Cluster Survey. \u201d 76 22 15 5 57 10 4 1 98 66 59 39 99 70 62 41 0 50 100 Grade 1 Grade 6 Grade 7 Grade 9 Percentage Boy-rural-poor Girl-rural-poor Boy-urban-rich Girl-urban-rich", + "ner_text": [ + [ + 647, + 689, + "named" + ], + [ + 142, + 182, + "Cameroon Multiple Indicator Cluster Survey <> data description" + ], + [ + 631, + 637, + "Cameroon Multiple Indicator Cluster Survey <> publisher" + ], + [ + 639, + 643, + "Cameroon Multiple Indicator Cluster Survey <> publication year" + ], + [ + 647, + 655, + "Cameroon Multiple Indicator Cluster Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "2014. \u201c Cameroon Multiple Indicator Cluster Survey. \u201d 76 22 15 5 57 10 4 1 98 66 59 39 99 70 62 41 0 50 100 Grade 1 Grade 6 Grade 7 Grade 9 Percentage Boy-rural-poor Girl-rural-poor Boy-urban-rich Girl-urban-rich", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly named as a survey that collects multiple indicators.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'survey', which often refers to structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly named as a survey that collects multiple indicators.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 14, + "text": "This lack of investment in education over the last decade has affected the country \u2019 s stock of human capital, and youth now entering the labor market have few employable skills. It has also degraded the quality of education institutions in the country, placing the human capital of future generations in jeopardy. 11. Rebuilding the country \u2019 s education system requires addressing the significant shortfalls in skilled teachers. According to the 2021 Education Census Report, 26 percent of schools across the country are non-operational due to lack of teachers. At the same time, the high pupil-qualified teacher ratio of 86: 1 in primary schools significantly compromises the ability to provide quality education. There is also a large gap between male and female teachers. Of the 60, 711 teachers in the country, only about 18 percent are female. 13 The difficulty in finding qualified teachers has led the education system to recruit volunteer teachers. For instance, 46 percent of primary education teachers are volunteers without training. The lack of qualified teachers has impacted student learning, with more than 94 percent of school children unable to read and understand a simple text by the age of 10. 14 To offer quality education 11 World Bank. ( 2018 ). The Human Capital Project. Washington, DC: World Bank. 12 UNICEF. 2017. \" South Sudan: The impact of the crisis on children-Briefing Note. \" 13 Data based on Government of South Sudan National Education Census Report 2021. 14 World Bank. 2022. Strategies for Addressing Stunting and Learning Poverty in South Sudan.", + "ner_text": [ + [ + 448, + 476, + "named" + ] + ], + "validated": true, + "empirical_context": "Rebuilding the country \u2019 s education system requires addressing the significant shortfalls in skilled teachers. According to the 2021 Education Census Report, 26 percent of schools across the country are non-operational due to lack of teachers. At the same time, the high pupil-qualified teacher ratio of 86: 1 in primary schools significantly compromises the ability to provide quality education.", + "type": "report", + "explanation": "This is a dataset as it provides structured data on the operational status of schools and teacher ratios used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a report that provides statistical information about education.", + "contextual_reason_agent": "This is a dataset as it provides structured data on the operational status of schools and teacher ratios used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 20, + "validated": 4, + "not_validated": 16 + } + }, + { + "filename": "165_311820EG", + "page": 19, + "text": "In the target govemorates, the NGO and public sectors have the largest share o f the delivery at 33 percent and 39 percent o f the market share, respectively. The project will, therefore, support expansion in the public and not-for-profit ( NGOs ) sectors. Financial 48. The annual cost o f this investment ( including recurrent expenditures ) will approximate 0. 48 percent o f the 1999-2000 Government budget and annual recurrent costs after project close in 2010 are estimated to be approximately 0. 2 percent o f the Government budget o f US $ 2. 69 billion for all levels o f education. Thus the investment is affordable and sustainable. 49. The fiscal impact o f the agreed fee reduction in ECEEP schools is minimal and affordable as it would represent 0. 06 percent o f 1999-2000 Government expenditure o n all levels o f education. Technical 50. The proposed project is based on the ECE Strategic Options paper ( 2002 ) prepared by the Bank in close partnership with the MOE. This paper recommends effective, technically sound and viable approaches for the Government to consider. The project is based o n a series o f technical studies and advice from international experts. It builds on the innovative experiences already tried successfully in Egypt and in the region. Finally, it builds o n the experiences and recommendations from the ECE service providers ( through a Private Sector Survey ) and beneficiaries, such as teachers and parents ( through a Social Assessment ). Among the technical reports conducted for the study, the following are particularly pertinent: Private ECE Sector survey o f service providers ( 2003 ); Social Assessment o f the key 14", + "ner_text": [ + [ + 1381, + 1402, + "named" + ], + [ + 944, + 948, + "Private Sector Survey <> author" + ], + [ + 979, + 982, + "Private Sector Survey <> author" + ], + [ + 1254, + 1259, + "Private Sector Survey <> data geography" + ], + [ + 1347, + 1368, + "Private Sector Survey <> reference population" + ], + [ + 1432, + 1452, + "Private Sector Survey <> reference population" + ], + [ + 1631, + 1635, + "Private Sector Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "It builds on the innovative experiences already tried successfully in Egypt and in the region. Finally, it builds o n the experiences and recommendations from the ECE service providers ( through a Private Sector Survey ) and beneficiaries, such as teachers and parents ( through a Social Assessment ). Among the technical reports conducted for the study, the following are particularly pertinent: Private ECE Sector survey o f service providers ( 2003 ); Social Assessment o f the key 14", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey that provides data for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data from service providers.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey that provides data for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 34, + "text": "; iii. development of curriculum support materials for learners with special needs and indigenous languages ( VMGF ); iv. development of formative assessment materials for the new Competency Based Teacher Education ( CBTE ) curriculum; v. training of teachers in CBA; vi. utilization of NEMIS data for mapping out school needs and the development budget allocations for rollout of the CBC; vii. implementation of the needs - based school infrastructure investment plan to complement roll out of the CBC; and viii. establishment of standards and tools for quality assurance for preschool for alignment with the CBC and CBA \u2022 Curriculum design, instructional and CBA materials, human resources ( teachers ready to implement CBC ), and infrastructure ( new classrooms ) are in place for rolling out the CBC and CBA in basic education Improved learning outcomes in higher order competencies in early grades in literacy and numeracy in target Counties. Improved retention of girls in upper primary. The CBC and CBA assessments are successfully rolled out in basic education.", + "ner_text": [ + [ + 287, + 292, + "named" + ], + [ + 938, + 946, + "NEMIS <> data geography" + ] + ], + "validated": true, + "empirical_context": "training of teachers in CBA; vi. utilization of NEMIS data for mapping out school needs and the development budget allocations for rollout of the CBC; vii. implementation of the needs - based school infrastructure investment plan to complement roll out of the CBC; and viii.", + "type": "system", + "explanation": "NEMIS is confirmed as a dataset since it is explicitly used for mapping out school needs and budget allocations.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is mentioned in the context of utilizing data for mapping school needs.", + "contextual_reason_agent": "NEMIS is confirmed as a dataset since it is explicitly used for mapping out school needs and budget allocations.", + "contextual_signal": "follows 'utilization of' indicating it serves as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "136_PAD7230P1476890AD0October0100final", + "page": 32, + "text": "beneficiaries include the Jordanian host population who will benefit irrespective of the number of Syrian refugees living in the participating municipalities. 3 Conflicted affected people include both the Jordanian host population and the Syrian refugees living in participating municipalities. 4 Baseline data on municipal pre-crisis investments is available for all participating municipalities.", + "ner_text": [ + [ + 297, + 310, + "named" + ], + [ + 26, + 51, + "Baseline data <> reference population" + ], + [ + 413, + 431, + "Baseline data <> usage context" + ] + ], + "validated": true, + "empirical_context": "3 Conflicted affected people include both the Jordanian host population and the Syrian refugees living in participating municipalities. 4 Baseline data on municipal pre-crisis investments is available for all participating municipalities.", + "type": "data", + "explanation": "'Baseline data' is explicitly mentioned as being available for analysis in the context of pre-crisis investments.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'baseline data' refers to a structured collection of data used for analysis.", + "contextual_reason_agent": "'Baseline data' is explicitly mentioned as being available for analysis in the context of pre-crisis investments.", + "contextual_signal": "described as available for all participating municipalities", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 8, + "text": "3 \u2022 Quality: In international assessments ( TIMSS & PISA ), Jordan has performed well in comparison with other countries in the Region, but well below many countries at comparable income and education expenditure levels, and at or below the international averages. National assessments ( both of \u201c knowledge economy skills \u201d and more general subject-area competence ) have revealed that the majority of students still perform below desired achievement levels. In addition up to 30 percent of students drop out before completion of 12th Grade. \u2022 Teacher Policy: Studies conducted as part of ERfKE preparation revealed that while the majority of teachers are in possession of the required formal qualifications, and the current student teacher ratios do not suggest a significant shortage of teachers, there are significant challenges regarding teacher recruitment, utilization, professional development and morale. There is still a relatively low level of actual use of the new methods and approaches in the classroom, and the new learning materials are often used in a conventional teaching approach. Teacher morale remains low. Teachers in Jordan are not recruited by the MoE but are assigned by the Public Service Bureau on the basis of examination scores.", + "ner_text": [ + [ + 44, + 49, + "named" + ] + ], + "validated": false, + "empirical_context": "3 \u2022 Quality: In international assessments ( TIMSS & PISA ), Jordan has performed well in comparison with other countries in the Region, but well below many countries at comparable income and education expenditure levels, and at or below the international averages. National assessments ( both of \u201c knowledge economy skills \u201d and more general subject-area competence ) have revealed that the majority of students still perform below desired achievement levels.", + "type": "assessment", + "explanation": "However, TIMSS is mentioned as an assessment rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed TIMSS is a dataset because it is associated with international assessments that provide data on student performance.", + "contextual_reason_agent": "However, TIMSS is mentioned as an assessment rather than a structured collection of data or a data source.", + "contextual_signal": "mentioned only as an assessment, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + }, + "term_stats": { + "total": 3, + "validated": 2, + "not_validated": 1 + } + }, + { + "filename": "162_PAD12050PAD0P15264600PUBLIC00Box391428B", + "page": 53, + "text": "This will enable management decisions to be made based on an assessment of whether the program is moving towards its objectives. A periodic financial audit will be conducted which will also help to identify and mitigate any potential sign of fraud and governance issues. The program monitoring system will rely on regular and accurate data collection and analysis to identify the timely implementation of activities, the achievement of intended results, and positive and negative unintended effects. i. HIS will form the basis for a well-functioning monitoring system and will comprise: ( i ) design of a program database; ( ii ) development of registers and forms to gather data ( enrollment registers, provider data collection forms ); ( iii ) development of the claims processing system, which will feed data into the HIS; ( iv ) collection and analysis of program indicators; ( v ) design of wider monitoring and verification activities ( such as spot checks of providers, user satisfaction surveys through sampling of beneficiaries at the household level ) which will provide data with which to counter-check the HIS data; and ( vi ) capacity building of MoPH in monitoring of the EHCP program. ii. Sources of Data: ( i ) the routine data collected through the claims processing system; ( ii ) data routinely collected at health facility level; ( iii ) additional data collected by MoPH such as from enrollment registers, spot checks and ad-hoc surveys; ( iv ) information collected during the verification processes; and ( v ) data from the MoPH rapid facility assessment.", + "ner_text": [ + [ + 977, + 1002, + "named" + ], + [ + 704, + 734, + "user satisfaction surveys <> data type" + ], + [ + 1023, + 1036, + "user satisfaction surveys <> reference population" + ], + [ + 1594, + 1612, + "user satisfaction surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "i. HIS will form the basis for a well-functioning monitoring system and will comprise: ( i ) design of a program database; ( ii ) development of registers and forms to gather data ( enrollment registers, provider data collection forms ); ( iii ) development of the claims processing system, which will feed data into the HIS; ( iv ) collection and analysis of program indicators; ( v ) design of wider monitoring and verification activities ( such as spot checks of providers, user satisfaction surveys through sampling of beneficiaries at the household level ) which will provide data with which to counter-check the HIS data; and ( vi ) capacity building of MoPH in monitoring of the EHCP program. ii.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned as a method to gather data for analysis in the context of monitoring the program.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'user satisfaction surveys' imply a structured collection of data gathered from beneficiaries.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a method to gather data for analysis in the context of monitoring the program.", + "contextual_signal": "follows 'will provide data with which to counter-check the HIS data'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 69, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 57 refugee host areas percentage of refugee children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in high refugee host areas Share of new entrants in the first grade of primary school attended pre-primary education in 29 selected SPG woredas The indicator measures children enrolled in the first year of primary school with some exposure to preschool education and calculated by dividing the gross enrollment rate for pre-primary level by the gross intake rate of primary level. Annual EMIS Annual data collection through school census MOE and WOFED Pregnant women & caregivers of children 0-23 months participating in Community Conversations ( CC ) in 29 selected SPG woredas These are currently pregnant women and caregivers of children 0-23 months who attended at least 1 CC session per month Annual UNISE / DHIS2 UNISE / DHIS2 - - Data collected by woreda health offices MOH and MOF Quarterly reports of minutes of multisectoral committee meetings in 29 selected SPG woredas Quarterly minutes report submitted by WOFED to MOF and verified by ESS Annual Admin WOFED report quarterly meeting minutes with all committee members signed to MOF WOFED Students benefiting from direct interventions to enhance learning Annual Project M & E Questionnaires will be developed to keep track on number of beneficiaries MOE and WOEFD", + "ner_text": [ + [ + 915, + 920, + "named" + ], + [ + 626, + 639, + "DHIS2 <> data type" + ], + [ + 654, + 705, + "DHIS2 <> reference population" + ], + [ + 979, + 982, + "DHIS2 <> publisher" + ], + [ + 991, + 1055, + "DHIS2 <> data description" + ] + ], + "validated": true, + "empirical_context": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 57 refugee host areas percentage of refugee children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in high refugee host areas Share of new entrants in the first grade of primary school attended pre-primary education in 29 selected SPG woredas The indicator measures children enrolled in the first year of primary school with some exposure to preschool education and calculated by dividing the gross enrollment rate for pre-primary level by the gross intake rate of primary level. Annual EMIS Annual data collection through school census MOE and WOFED Pregnant women & caregivers of children 0-23 months participating in Community Conversations ( CC ) in 29 selected SPG woredas These are currently pregnant women and caregivers of children 0-23 months who attended at least 1 CC session per month Annual UNISE / DHIS2 UNISE / DHIS2 - - Data collected by woreda health offices MOH and MOF Quarterly reports of minutes of multisectoral committee meetings in 29 selected SPG woredas Quarterly minutes report submitted by WOFED to MOF and verified by ESS Annual Admin WOFED report quarterly meeting minutes with all committee members signed to MOF WOFED Students benefiting from direct interventions to enhance learning Annual Project M & E Questionnaires will be developed to keep track on number of beneficiaries MOE and WOEFD", + "type": "database", + "explanation": "In this context, DHIS2 is mentioned as a source of data collected by woreda health offices, indicating its role as a structured collection of health-related data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because DHIS2 is often associated with health data collection systems.", + "contextual_reason_agent": "In this context, DHIS2 is mentioned as a source of data collected by woreda health offices, indicating its role as a structured collection of health-related data.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "082_PAD2809-PAD-PUBLIC-disclosed-9-12-2018-IDA-R2018-0286-1", + "page": 57, + "text": "The World Bank Chad - Refugees and Host Communities Support Project ( P164748 ) Page 52 for Performance Based Financing Health by region on level of utilization of the standard forms Performance Based Financing introduced by the P148052 Mother and Child Health Services Strengthening Project. Information is based on representative surveys. Cash transfer beneficiaries ( households ) Quarterly Baseline data collected from UNHCR and WFP on number of refugees receiving cash transfers in target areas. The CFS is launching a baseline study which will help to confirm baseline numbers, to be reviewed at MTR. CFS local offices produce simple reports by region on number of households receiving the transfer. This number is than multiplied by 5 since the average size of families is of five member. The reports are then consolidated by the CFS. Data are non-cumulative by cohort. CFS", + "ner_text": [ + [ + 317, + 339, + "named" + ], + [ + 15, + 19, + "representative surveys <> data geography" + ], + [ + 341, + 368, + "representative surveys <> reference population" + ], + [ + 505, + 508, + "representative surveys <> author" + ] + ], + "validated": true, + "empirical_context": "The World Bank Chad - Refugees and Host Communities Support Project ( P164748 ) Page 52 for Performance Based Financing Health by region on level of utilization of the standard forms Performance Based Financing introduced by the P148052 Mother and Child Health Services Strengthening Project. Information is based on representative surveys. Cash transfer beneficiaries ( households ) Quarterly Baseline data collected from UNHCR and WFP on number of refugees receiving cash transfers in target areas.", + "type": "survey", + "explanation": "In this context, it is confirmed as a dataset since it is used to provide information on the level of utilization of health services.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'representative surveys' implies a structured collection of data used for analysis.", + "contextual_reason_agent": "In this context, it is confirmed as a dataset since it is used to provide information on the level of utilization of health services.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 48, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 37 Indicator Name of which citizens in host communities Definition / Description People benefitted from improved sanitation facilities that have been constructed or rehabilitated under the project, including: pit latrine with slab, ventilated improved pit ( VIP ) latrine, composting toilet, and flush or pour-flush toilet / latrine to piped sewer system and septic tank, and fecal sludge treatment plants. It also includes shared sanitation facilities built in institutions and public places. Frequency Bi-annually. Data Source Primary sources include: MWE M & E system, NWSC Area performance reports, Refugee Database, Refugees GIS Mapping for WASH and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE / NWSC.", + "ner_text": [ + [ + 633, + 649, + "named" + ] + ], + "validated": false, + "empirical_context": "Frequency Bi-annually. Data Source Primary sources include: MWE M & E system, NWSC Area performance reports, Refugee Database, Refugees GIS Mapping for WASH and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'system' in its name, which can imply data collection.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "154_431120PAD0P1021e0only10R20081014311", + "page": 10, + "text": "The Botswana HIV / AIDS epidemic is diverse, with the highest infection rates consistently reported from the northern areas o f the country, compared with those found in the southern and western regions. The 2004 Botswana AIDS Impact Survey ( BAIS 11 ) reported highest population-based, district-specific prevalence in the northeastern district o f Chobe ( 29. 4 percent ), with Francistown recording the second highest rate ( 24. 6 percent ). The 2004 Botswana AIDS Impact Survey ( BAIS II ) found prevalence to be highest among women 30-34 years age and estimated that nearly half ( 44 percent ) o f this cohort i s living with HIV-infection. The 2006 Botswana HIV / AIDS Sentinel Surveillance Technical Report also found that almost half o f women aged 25-34 years were infected ( Annex 1 ). 3. Figure 1 illustrates the epidemic dynamics over the past 25 years. The number o f new infections rose rapidly during the early 199Os, peaking in the mid-1990s. The number o f AIDS deaths started to grow rapidly about 10 years subsequent to this rise in new infections, peaking in 2003, just before the emergency expansion o f the national A I D S treatment program. By 2003, ' Current status of the HIVIAIDS epidemic in Botswana ( Draft, March 2008 ). NACA. 1", + "ner_text": [ + [ + 208, + 240, + "named" + ] + ], + "validated": true, + "empirical_context": "The Botswana HIV / AIDS epidemic is diverse, with the highest infection rates consistently reported from the northern areas o f the country, compared with those found in the southern and western regions. The 2004 Botswana AIDS Impact Survey ( BAIS 11 ) reported highest population-based, district-specific prevalence in the northeastern district o f Chobe ( 29. 4 percent ), with Francistown recording the second highest rate ( 24.", + "type": "survey", + "explanation": "This is a dataset as it provides empirical data on HIV/AIDS prevalence in Botswana, used for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a survey that reports specific prevalence rates.", + "contextual_reason_agent": "This is a dataset as it provides empirical data on HIV/AIDS prevalence in Botswana, used for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "158_40156", + "page": 70, + "text": "There is adequate back up arrangements ( on site and off-site back systems system ), maintenance support from SUN office in Nairobi, and security to various users in terms data entry, verification and approvals. The system can also maintain data base information which does not contain accounting data but can be used to enhance the program monitoring system such as M & E, procurement etc. 17. Periodic Reporting for Project Monitoring: Formats of the interim un-audited financial reports ( IFRs ), i. e. periodic financial monitoring reports, are designed to provide quality and timely information to the World Bank and various stakeholders on the project \u2019 s performance. ( Samples of these reports are included in the project financial procedure addendum. Refer to Financial Monitoring Reports for World Bank-Financed Projects: Guidelines for Borrowers Dated November 30, 2002 ). 18. Within 45 days of the end of each quarter, the following reports would be prepared by the IGAD and submitted to the World Bank, and other stakeholders. The contents of these reports should, at a minimum, include the following: ( i ) Financial reports which sets forth sources and uses of funds by project activity / component, and statement of actual and budget expenditures, both cumulatively and for the period covered by said report, showing separately funds provided under the IDA and other financiers, and explains variances between the actual and planned uses of such funds; ( ii ) physical progress / output monitoring report which describes physical progress in Project implementation, both cumulatively and for the period covered by said report, and explains variances between the actual and planned Project implementation; and ( iii ) Procurement report which sets forth the status of procurement under the Project, as at the end of the period covered by said report 66", + "ner_text": [ + [ + 241, + 262, + "named" + ] + ], + "validated": false, + "empirical_context": "There is adequate back up arrangements ( on site and off-site back systems system ), maintenance support from SUN office in Nairobi, and security to various users in terms data entry, verification and approvals. The system can also maintain data base information which does not contain accounting data but can be used to enhance the program monitoring system such as M & E, procurement etc. 17. Periodic Reporting for Project Monitoring: Formats of the interim un-audited financial reports ( IFRs ), i.", + "type": "data", + "explanation": "However, it is not functioning as a data source in this context, as it is described in relation to a system rather than as a structured collection of data used for analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'data base information' suggests a collection of data.", + "contextual_reason_agent": "However, it is not functioning as a data source in this context, as it is described in relation to a system rather than as a structured collection of data used for analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 82, + "text": "Data source / Agency MoE \u2019 s annual school census using EMIS, Verification Entity ESS Procedure Data collected by MOE and verified by ESS. The verified information is then submitted by MOF to the World Bank in a consolidated DLI verification report as part of each semiannual Government / World Bank review. DLI_TBL_VERIFICATION DLI 7 Pregnant women and caregivers of children 0-23 months participating in community conversations sessions in 29 selected SPG woredas. Description These are currently pregnant women and caregivers of children 0-23 months who attended at least 1 CC session per month Data source / Agency DHIS2, MoH Verification Entity ESS", + "ner_text": [ + [ + 56, + 60, + "named" + ] + ], + "validated": false, + "empirical_context": "Data source / Agency MoE \u2019 s annual school census using EMIS, Verification Entity ESS Procedure Data collected by MOE and verified by ESS. The verified information is then submitted by MOF to the World Bank in a consolidated DLI verification report as part of each semiannual Government / World Bank review.", + "type": "system", + "explanation": "However, EMIS is described as a system rather than a structured collection of data or a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with data collection and verification processes.", + "contextual_reason_agent": "However, EMIS is described as a system rather than a structured collection of data or a dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "021_BOSIB05e6fc47e0770aeec00ad5e11774f2", + "page": 15, + "text": "Like host communities, refugees have been adversely affected by COVID-19 shocks, coupled with high levels of food insecurity and aid dependency on limited food rations. 13 RHDs face challenges in the productivity of the land. There are limited soil conservation interventions in refugee settlements. Settlements are located in refugee hosting districts which often have less-productive agricultural land. Extension services are limited in settlements, and interventions are fragmented across different funding partners making it difficult to ensure sustainable use of natural resources for restoration, preservation of the environment, and improvement in agricultural productivity. Informal arrangements for sharecropping and refugees working on host community land provide vulnerability and protection challenges, as does the contestation of some land around refugee settlements. This has implications for social cohesion between refugee and host communities. 7. To address poverty and reverse the impacts of land degradation and promote the adoption and scale-up of appropriate land management practices and climate smart technologies for sustained productivity and poverty reduction, the Government of Uganda ( GoU ) would need to: ( a ) Invest in strengthening institutions at varying levels \u2014 communities and local governments \u2014 to promote economies of scale and mindset change among policy makers and communities regarding the benefits of promoting climate smart technologies, innovations, and management practices ( TIMPs ) and to enhance community resilience to climatic shocks. ( b ) Invest in climate smart technology generation and adaptation to facilitate and enhance farmers \u2019 adoption of TIMPs, climate smart technologies and SLM practices. ( c ) Address market access and infrastructure challenges to incentivize increased investments into climate smart TIMPs and enable a shift from subsistence farming to commercial oriented production. 13 World Bank. 2021. Monitoring Social and Economic Impacts of COVID-19 on Refugees in Uganda: Results from the High - Frequency Phone Survey - Third Round. Washington, DC: World Bank.", + "ner_text": [ + [ + 2066, + 2095, + "named" + ], + [ + 23, + 31, + "High - Frequency Phone Survey <> reference population" + ], + [ + 726, + 734, + "High - Frequency Phone Survey <> reference population" + ], + [ + 1205, + 1211, + "High - Frequency Phone Survey <> data geography" + ], + [ + 1957, + 1967, + "High - Frequency Phone Survey <> publisher" + ], + [ + 1969, + 1973, + "High - Frequency Phone Survey <> publication year" + ], + [ + 2029, + 2037, + "High - Frequency Phone Survey <> reference population" + ], + [ + 2041, + 2047, + "High - Frequency Phone Survey <> data geography" + ], + [ + 2127, + 2137, + "High - Frequency Phone Survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "2021. Monitoring Social and Economic Impacts of COVID-19 on Refugees in Uganda: Results from the High - Frequency Phone Survey - Third Round. Washington, DC: World Bank.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly described as a survey that provides empirical results.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that collects data on social and economic impacts.", + "contextual_reason_agent": "This is a dataset as it is explicitly described as a survey that provides empirical results.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 53, + "text": "The World Bank Generating Growth Opportunities and Productivity for Women Enterprises Uganda ( P176747 ) Page 49 of 77 age, and disability status of firm owner. Gender gap between women participating in the project in ownership of and use of bank accounts The difference between women \u2019 s and men \u2019 s ownership and use of a bank account. Annual Survey of the women benefiting from the project and the men in their communities. The survey will be administered to a panel of women and men annually using the same questions as in the Global Findex survey. MGLSD to administer the surveys, and compile and report the data. Women participating in planning forums supported to identify priority infrastructure ( Number ) Participation in the project in forums to identify priority infrastructure. Annual Forum attendance sheets. The sponsors of the forums will ask participants to sign in ( in-person events ). Virtual participation will be recorded. MGLSD will collect the data upon completion of each forum and compile the data and present it in quarterly progress reports. Regional facilities constructed or rehabilitated under the project, including childcare facilities and GBV referral services, operating as planned ( Number ) The number of regional multi-purpose facilities constructed or rehabilitated by the project and their operational status. Annual Supervising engineers \u2019 reports, project progress reports.", + "ner_text": [ + [ + 531, + 551, + "named" + ], + [ + 4, + 14, + "Global Findex survey <> publisher" + ], + [ + 86, + 92, + "Global Findex survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "Annual Survey of the women benefiting from the project and the men in their communities. The survey will be administered to a panel of women and men annually using the same questions as in the Global Findex survey. MGLSD to administer the surveys, and compile and report the data.", + "type": "survey", + "explanation": "The Global Findex survey is explicitly mentioned as a source of questions for the annual survey, indicating it is used for data collection.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a survey that collects data.", + "contextual_reason_agent": "The Global Findex survey is explicitly mentioned as a source of questions for the annual survey, indicating it is used for data collection.", + "contextual_signal": "follows 'using the same questions as in the Global Findex survey'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "153_476340PAD0ZR0P101Official0Use0Only1", + "page": 3, + "text": "Policy Framework Steering Committee Strategic Environmental Assessment Socio-Environmental Impact Assessment Social and Environmental Management Plan Sweden International Development Agency Systbme d \u2019 information de gestion forestibre ( Integrated Forest Data Management System ) Statement of Expenditure Service permanent d \u2019 inventaire et d \u2019 amknagement forestibre ( Permanent service of forest inventory and installation ) i v", + "ner_text": [ + [ + 238, + 278, + "named" + ], + [ + 157, + 189, + "Integrated Forest Data Management System <> publisher" + ] + ], + "validated": true, + "empirical_context": "Policy Framework Steering Committee Strategic Environmental Assessment Socio-Environmental Impact Assessment Social and Environmental Management Plan Sweden International Development Agency Systbme d \u2019 information de gestion forestibre ( Integrated Forest Data Management System ) Statement of Expenditure Service permanent d \u2019 inventaire et d \u2019 amknagement forestibre ( Permanent service of forest inventory and installation ) i v", + "type": "system", + "explanation": "It is indeed a dataset as it is described as a system that manages forest data, indicating its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Data Management System' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "It is indeed a dataset as it is described as a system that manages forest data, indicating its role as a data source.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "062_Turkey-Municipal-Services-Improvement-Project", + "page": 46, + "text": "Municipal PIUs and ILBANK PMU People benefitting from safely managed solid waste services in the selected municipalities ( out of which host community ) This indicator measures the percentage of host community benefitted from safely managed solid waste services in the selected municipalities that have been provided through this Project. Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs. Municipal PIUs and ILBANK PMU People benefitting from safely managed solid waste services in the selected municipalities ( out of which This indicator measures the percentage of refugees benefitted from safely Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU Municipal PIUs and ILBANK PMU", + "ner_text": [ + [ + 750, + 758, + "named" + ], + [ + 395, + 409, + "PMU Data <> author" + ], + [ + 426, + 446, + "PMU Data <> data type" + ], + [ + 857, + 875, + "PMU Data <> usage context" + ] + ], + "validated": true, + "empirical_context": "Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs. Municipal PIUs and ILBANK PMU People benefitting from safely managed solid waste services in the selected municipalities ( out of which This indicator measures the percentage of refugees benefitted from safely Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU Municipal PIUs and ILBANK PMU", + "type": "data", + "explanation": "In the context, 'PMU Data' is explicitly mentioned as being compiled and recorded, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'PMU Data' is a dataset because it is referenced in the context of compiling reports and recording beneficiary records.", + "contextual_reason_agent": "In the context, 'PMU Data' is explicitly mentioned as being compiled and recorded, indicating it functions as a data source.", + "contextual_signal": "mentioned as a data source for compiling reports", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 10, + "validated": 9, + "not_validated": 1 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 18, + "text": "The World Bank Mauritania Social Safety Net System Project II ( P171125 ) Page 13 of 61 Sub-component 1. 1: Updating and expanding the Social Registry ( US $ 6. 9 million ) 23. The objective of this sub-component is to update the Social Registry throughout Mauritania, including the end-to-end process ( quota methodology, community targeting mechanism, data collection, and verification stage ). An ongoing update of the Social Registry is critical because the programs which rely on it need up-to-date information, both on the demographic status of households ( migration, births, deaths, divorce ) and on the evolution of their socioeconomic status ( education, income generation, assets, access to services, and so on ). The update frequency is a trade-off between implementation costs and value of updated data for programs using the registry. 24. The objective agreed with the Government is to complete a full update every three years. The update will proceed by region, starting where the first households were registered ( Gorgol region ). Given this cycle, about 67, 000 households would be registered per year. Quotas will be recalculated based on the 2020 Poverty and Living Standards Measurement household Survey ( Enqu\u00eate sur la Pauvret\u00e9 et les Conditions de Vie, EPCV ) information.", + "ner_text": [ + [ + 1162, + 1224, + "named" + ] + ], + "validated": true, + "empirical_context": "Given this cycle, about 67, 000 households would be registered per year. Quotas will be recalculated based on the 2020 Poverty and Living Standards Measurement household Survey ( Enqu\u00eate sur la Pauvret\u00e9 et les Conditions de Vie, EPCV ) information.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referenced as a source of information used for recalculating quotas.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referenced as a source of information used for recalculating quotas.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "078_Afghanistan-Eshteghal-Zaiee-Karmondena-EZ-Kar-Project", + "page": 82, + "text": "In case of conflict / contradiction between the World Bank \u2019 s procurement procedures and any national rules and regulations, the World Bank \u2019 s procurement procedures will take precedence according to the Article 4 ( 2 ) of the Procurement Law of the Islamic Republic of Afghanistan dated 27 / 06 / 1396 ( September 17, 2016 ) published in the Official Gazette No. 1223. 16. Systematic Tracking of Exchanges in Procurement ( STEP ) and procurement planning: The project will implement STEP, a World Bank planning and tracking system, which will provide data on procurement activities and establish benchmarks. The Procurement Plan will be prepared in STEP and the same will be updated in agreement with the project team annually or as required to reflect the actual project implementation needs and improvements in institutional capacity. 17. Afghanistan Civil Services Commission in coordination with NPA recently completed mass recruitment of procurement staff for almost all budget entities. MoEC, KM, and IDLG are among the agencies where newly hired individuals are deployed. Almost all the procurement staff of these procurement department are new, and they are less familiar with complex procurement and are also not familiar with World Bank procurement regulations.", + "ner_text": [ + [ + 486, + 490, + "named" + ] + ], + "validated": false, + "empirical_context": "16. Systematic Tracking of Exchanges in Procurement ( STEP ) and procurement planning: The project will implement STEP, a World Bank planning and tracking system, which will provide data on procurement activities and establish benchmarks. The Procurement Plan will be prepared in STEP and the same will be updated in agreement with the project team annually or as required to reflect the actual project implementation needs and improvements in institutional capacity.", + "type": "system", + "explanation": "However, STEP is mentioned as a planning and tracking system, not as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed STEP is a dataset because it is described as a system that provides data on procurement activities.", + "contextual_reason_agent": "However, STEP is mentioned as a planning and tracking system, not as a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 15, + "validated": 1, + "not_validated": 14 + } + }, + { + "filename": "162_PAD12050PAD0P15264600PUBLIC00Box391428B", + "page": 53, + "text": "This will enable management decisions to be made based on an assessment of whether the program is moving towards its objectives. A periodic financial audit will be conducted which will also help to identify and mitigate any potential sign of fraud and governance issues. The program monitoring system will rely on regular and accurate data collection and analysis to identify the timely implementation of activities, the achievement of intended results, and positive and negative unintended effects. i. HIS will form the basis for a well-functioning monitoring system and will comprise: ( i ) design of a program database; ( ii ) development of registers and forms to gather data ( enrollment registers, provider data collection forms ); ( iii ) development of the claims processing system, which will feed data into the HIS; ( iv ) collection and analysis of program indicators; ( v ) design of wider monitoring and verification activities ( such as spot checks of providers, user satisfaction surveys through sampling of beneficiaries at the household level ) which will provide data with which to counter-check the HIS data; and ( vi ) capacity building of MoPH in monitoring of the EHCP program. ii. Sources of Data: ( i ) the routine data collected through the claims processing system; ( ii ) data routinely collected at health facility level; ( iii ) additional data collected by MoPH such as from enrollment registers, spot checks and ad-hoc surveys; ( iv ) information collected during the verification processes; and ( v ) data from the MoPH rapid facility assessment.", + "ner_text": [ + [ + 1118, + 1121, + "named" + ] + ], + "validated": false, + "empirical_context": "i. HIS will form the basis for a well-functioning monitoring system and will comprise: ( i ) design of a program database; ( ii ) development of registers and forms to gather data ( enrollment registers, provider data collection forms ); ( iii ) development of the claims processing system, which will feed data into the HIS; ( iv ) collection and analysis of program indicators; ( v ) design of wider monitoring and verification activities ( such as spot checks of providers, user satisfaction surveys through sampling of beneficiaries at the household level ) which will provide data with which to counter-check the HIS data; and ( vi ) capacity building of MoPH in monitoring of the EHCP program. ii.", + "type": "system", + "explanation": "However, 'HIS' is described as a health information system, which is a framework rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'HIS' is a dataset because it is mentioned in the context of data collection and monitoring.", + "contextual_reason_agent": "However, 'HIS' is described as a health information system, which is a framework rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "022_Tajikistan-Water-Supply-and-Sanitation-Investment-Project", + "page": 61, + "text": "Consolidat ed informatio Multiple sources - community mobilization company reports, PMU data, self - reported data, reports of target utilities, survey data. Methodology for each CE channel will be reported separately as prescribed in the POM. MEWR, KMK, PMU", + "ner_text": [ + [ + 84, + 92, + "named" + ], + [ + 94, + 114, + "PMU data <> data type" + ], + [ + 145, + 156, + "PMU data <> data type" + ] + ], + "validated": true, + "empirical_context": "Consolidat ed informatio Multiple sources - community mobilization company reports, PMU data, self - reported data, reports of target utilities, survey data. Methodology for each CE channel will be reported separately as prescribed in the POM.", + "type": "data", + "explanation": "'PMU data' is indeed used in the research context as it is mentioned alongside other data sources.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'PMU data' is a dataset because it is listed among other data sources.", + "contextual_reason_agent": "'PMU data' is indeed used in the research context as it is mentioned alongside other data sources.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 30, + "text": "The MIS will collect information from applicants during registration, such as ( a ) key contact information, gender, age, marital status, education status, refugee status, employment; ( b ) business level outcomes such as business partners, business age, monthly profits, number of employees; and ( c ) access to other programs and loans. The MIS database will be updated as program applicants make use of specific components. The GROW_ID can be entered to update the database with any additional information on services received to ensure the applicant is only registered once. For example, dates of business trainings, service provider, or amount of funds will all be entered into the MIS. This database will help support operations through feedback loops as it can track who is accessing which services in real time. The MIS will also be important in being able to establish a sample of study participants to draw on for an impact evaluation or other learning activities. 61. The project will also support MGLSD \u2019 s capacity to lead and oversight social risk management. At the national level, this will support capacity building of unit staff, public fora, SRM experts, etc.", + "ner_text": [ + [ + 343, + 346, + "named" + ] + ], + "validated": false, + "empirical_context": "The MIS will collect information from applicants during registration, such as ( a ) key contact information, gender, age, marital status, education status, refugee status, employment; ( b ) business level outcomes such as business partners, business age, monthly profits, number of employees; and ( c ) access to other programs and loans. The MIS database will be updated as program applicants make use of specific components. The GROW_ID can be entered to update the database with any additional information on services received to ensure the applicant is only registered once.", + "type": "system", + "explanation": "'MIS' refers to a management information system, which is a system but not explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it involves collecting and managing information.", + "contextual_reason_agent": "'MIS' refers to a management information system, which is a system but not explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 19, + "text": "PDO level indictors are as follows: \u2022 Participating teachers with improved teaching practices \u2022 Improved learning outcomes of students benefitting from project-supported tutoring program ( disaggregated by gender, urban / rural, refugee / vulnerability status ) \u2022 Annual education statistics reports produced and publicly disseminated using data generated by the integrated EMIS for education sector management and refugee response 24 From Learning Recovery to Education Transformation: Insights and Reflections from the Fourth Survey on National Education Responses to COVID-19 School Closures. https: / / openknowledge. worldbank. org / handle / 10986 / 38112 25 UNICEF; the United Nations Educational, Scientific and Cultural Organization ( UNESCO ); and UNESCO \u2019 s Institute for Statistics. 26 World Bank Group. 2022. Moldova \u2013 Digital Education Readiness Assessment 2021-22. Washington, D. C.: World Bank Group. 27 Navigating Multiple Crises, Staying the Course on Long-term Development: The World Bank Group \u2019 s Response to the Crises Affecting Developing Countries ( English ), Washington, DC, World Bank Group.", + "ner_text": [ + [ + 374, + 378, + "named" + ] + ], + "validated": false, + "empirical_context": "PDO level indictors are as follows: \u2022 Participating teachers with improved teaching practices \u2022 Improved learning outcomes of students benefitting from project-supported tutoring program ( disaggregated by gender, urban / rural, refugee / vulnerability status ) \u2022 Annual education statistics reports produced and publicly disseminated using data generated by the integrated EMIS for education sector management and refugee response 24 From Learning Recovery to Education Transformation: Insights and Reflections from the Fourth Survey on National Education Responses to COVID-19 School Closures. https: / / openknowledge.", + "type": "system", + "explanation": "However, EMIS is described as a management information system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with education statistics and data generation.", + "contextual_reason_agent": "However, EMIS is described as a management information system, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "050_Cameroon-Secondary-Education-and-Skills-Development-Project", + "page": 81, + "text": "The World Bank Secondary Education and Skills Development Project ( P170561 ) Page 77 of 159 MINESEC ( lyc\u00e9e technique ) and TVET institutions from other ministries in selected economic sectors, public and private. The tracer survey is a questionnaire which includes basic demographic information and information on labor market outcomes of graduates including employment status, industry, etc. The results of the tracer survey, conducted by the firm to be contracted by PCU, are analyzed by the ONEFOP. ONEFOP will prepare a report, including policy recommendations, and publish this report on the MINEFOP website. Tracer studies will be carried out 3 times during the project implementation period ( baseline, mid-line and end - line surveys ). Improved strategic planning and management This indicator measures DLI 6. NSDS will: ( i ) define general NSDS - Years 1, 2, and 5 NQCF - NSDS - MINEPAT NQCF - MINEFOP For both NSDS and NQCF, MINEFOP sends the documents to PCU, independent MINEFOP, PCU", + "ner_text": [ + [ + 219, + 232, + "named" + ], + [ + 238, + 251, + "tracer survey <> data type" + ], + [ + 267, + 296, + "tracer survey <> data description" + ], + [ + 341, + 350, + "tracer survey <> reference population" + ], + [ + 496, + 502, + "tracer survey <> author" + ], + [ + 599, + 606, + "tracer survey <> publisher" + ], + [ + 907, + 914, + "tracer survey <> publisher" + ], + [ + 939, + 946, + "tracer survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "The World Bank Secondary Education and Skills Development Project ( P170561 ) Page 77 of 159 MINESEC ( lyc\u00e9e technique ) and TVET institutions from other ministries in selected economic sectors, public and private. The tracer survey is a questionnaire which includes basic demographic information and information on labor market outcomes of graduates including employment status, industry, etc. The results of the tracer survey, conducted by the firm to be contracted by PCU, are analyzed by the ONEFOP. ONEFOP will prepare a report, including policy recommendations, and publish this report on the MINEFOP website.", + "type": "survey", + "explanation": "The tracer survey is explicitly described as a questionnaire that collects demographic and labor market data, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects structured data on labor market outcomes.", + "contextual_reason_agent": "The tracer survey is explicitly described as a questionnaire that collects demographic and labor market data, confirming its role as a dataset.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 29, + "text": "The World Bank Education Quality Improvement Project ( P179363 ) Page 20 of 68 52. Inclusion. Despite significant achievements on inclusive education, students with disabilities still face exclusion in Moldova. The Joint Evaluation of the Implementation of the Programme for Development of Inclusive Education 2011 \u2013 2020 highlighted significant efforts that have been made in recent years to increase disability inclusion in education. Nevertheless, the report revealed that educational institutions, at all levels, are only partially prepared to facilitate the access of children with disabilities ( especially motor skill disorders and hearing and / or visual impairments ) through access infrastructure. In this regard, the accessibility of all types of educational institutions is still an issue for Moldova. These accessibility constraints are reflected in wider disparities in education and subsequently work opportunities for people with disabilities. Principles of universal access will guide the project preparation including the design of the new high schools in terms of physical access, safety and emergency egress, and access to learning opportunities to ensure inclusion and safety of students with disabilities. 53. Personal data protection. The EMIS and national and international assessments adhere to the national personal data protection legislation. The national practices are informed and strengthened by the successful participation of Moldova in PISA, governed by the OECD Personal Data Protection Rules and practices.", + "ner_text": [ + [ + 1262, + 1266, + "named" + ] + ], + "validated": false, + "empirical_context": "Personal data protection. The EMIS and national and international assessments adhere to the national personal data protection legislation. The national practices are informed and strengthened by the successful participation of Moldova in PISA, governed by the OECD Personal Data Protection Rules and practices.", + "type": "system", + "explanation": "However, EMIS is described as a system and not explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is mentioned in the context of data protection and assessments.", + "contextual_reason_agent": "However, EMIS is described as a system and not explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "183_multi0page", + "page": 26, + "text": "and workplans adequate financial and oEducational data system, the Districts is improved in educational data. ( e. g., financial reporting system. planning process. proposals for new school * Monitoring reports, project construction, using data from management information EMIS ) system. - 23 -", + "ner_text": [ + [ + 119, + 145, + "named" + ] + ], + "validated": false, + "empirical_context": "g. , financial reporting system. planning process.", + "type": "system", + "explanation": "However, the term 'financial reporting system' is mentioned as a system and not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'reporting' which is often associated with data collection.", + "contextual_reason_agent": "However, the term 'financial reporting system' is mentioned as a system and not as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 23, + "text": "To facilitate data sharing and use, the subcomponent will develop a data visualization and use platform ( software ) focusing on visual representations of TPM and routine data, inclusive of BHI data. Linking of platforms, including DHIS2 and the HSF platform will be integral to the work. The data visualization platform will include visualization of Results Framework data and other core indicators from the HSSP, linking TPM and DHIS2 data using maps, charts, and graphs and will incorporate HSF data along with the overlay of health and meteorologic data to better understand the impact of climatic patterns on health. The platform will include analysis of health service delivery in refugee and host community areas to facilitate improved health service delivery among the critical underserved populations. The development of an integrated, institutionalized, and sustainable platform which will strengthen MoH systems will be emphasized. Annex 2 provides further details on the platform. 43. Subcomponent 3. 3: Contract and Program Management Capacity Development ( PMU; US $ 3. 44 million: US $ 1. 54 million equivalent IDA [ WHR ] and US $ 1. 9 million Trust Funds [ US $ 0. 20 million SDTF and US $ 1. 7 million MDTF ] ).", + "ner_text": [ + [ + 431, + 436, + "named" + ] + ], + "validated": false, + "empirical_context": "Linking of platforms, including DHIS2 and the HSF platform will be integral to the work. The data visualization platform will include visualization of Results Framework data and other core indicators from the HSSP, linking TPM and DHIS2 data using maps, charts, and graphs and will incorporate HSF data along with the overlay of health and meteorologic data to better understand the impact of climatic patterns on health. The platform will include analysis of health service delivery in refugee and host community areas to facilitate improved health service delivery among the critical underserved populations.", + "type": "system", + "explanation": "However, DHIS2 is described as a platform, not a structured collection of data or a dataset itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned in the context of data visualization and analysis.", + "contextual_reason_agent": "However, DHIS2 is described as a platform, not a structured collection of data or a dataset itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "158_40156", + "page": 40, + "text": "36 approaches to HIV service delivery for target populations amongst all 7 IGAD member states Component 3 Ability to plan and implement activities - Project coordination and management \u2022 Number of civil society organizations20 funded by the project in the last 12 months, by type of civil society organization \u2022 Amount of funds disbursed to civil society organizations providing services to CBMPs, refugees, returnees, IDPs and surrounding populations in the 7 IGAD countries Capacity building \u2022 Number of NGOs that are able to design HIV service delivery programs for CBMPs in line with the IGAD HIV strategy \u2022 Number of persons from IGAD Member states trained in M & E including the use of Data Track the extent of capacity strengthening Strengthened capacity of IGAD, member states and contractor to plan, implement, monitor and evaluate HIV / AIDS programs for targeted populations M & E system ( including structured learning agenda ) \u2022 Number of website hits in the last 12 months \u2022 Percentage of NACs that submit program monitoring data to the PFO within specified reporting time frames \u2022 Percentage of refugee, IDP, returnee and cross border areas included in IGAD member states \u2019 National Sentinel Surveillance system in the last 12 months \u2022 Number of operational research studies funded Track whether functioning M & E system regularly monitors and reports on epidemic. Track level of implementation of work planning for HIV / AIDS programming.", + "ner_text": [ + [ + 1020, + 1043, + "named" + ] + ], + "validated": false, + "empirical_context": "36 approaches to HIV service delivery for target populations amongst all 7 IGAD member states Component 3 Ability to plan and implement activities - Project coordination and management \u2022 Number of civil society organizations20 funded by the project in the last 12 months, by type of civil society organization \u2022 Amount of funds disbursed to civil society organizations providing services to CBMPs, refugees, returnees, IDPs and surrounding populations in the 7 IGAD countries Capacity building \u2022 Number of NGOs that are able to design HIV service delivery programs for CBMPs in line with the IGAD HIV strategy \u2022 Number of persons from IGAD Member states trained in M & E including the use of Data Track the extent of capacity strengthening Strengthened capacity of IGAD, member states and contractor to plan, implement, monitor and evaluate HIV / AIDS programs for targeted populations M & E system ( including structured learning agenda ) \u2022 Number of website hits in the last 12 months \u2022 Percentage of NACs that submit program monitoring data to the PFO within specified reporting time frames \u2022 Percentage of refugee, IDP, returnee and cross border areas included in IGAD member states \u2019 National Sentinel Surveillance system in the last 12 months \u2022 Number of operational research studies funded Track whether functioning M & E system regularly monitors and reports on epidemic. Track level of implementation of work planning for HIV / AIDS programming.", + "type": "data", + "explanation": "However, 'program monitoring data' refers to information collected for monitoring purposes, not a structured dataset itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'data' which often implies a structured collection.", + "contextual_reason_agent": "However, 'program monitoring data' refers to information collected for monitoring purposes, not a structured dataset itself.", + "contextual_signal": "mentioned only as a type of data, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 22, + "text": ") establish a quality of care system through development of guidelines, tools, and standards, training of trainers on quality of care, piloting quality of care teams and supporting national scale up, and support for National and State level quality improvement supervision. 38. Subcomponent 2. 4: Health Management Information Systems ( WHO; US $ 2. 50 million: US $ 0. 93 million equivalent IDA [ including US $ 0. 63 million WHR ] and US $ 1. 57 million Trust Funds [ US $ 0. 17 million SDTF and US $ 1. 40 million MDTF ] ). This subcomponent will focus on developing systems and procedures for the national HMIS, with an emphasis on supporting the collection of routine data through DHIS2, to standardize data collection, entry and cleaning, as well as instituting data quality improvement practices. This will enhance targeting and data tracking for refugees and provide regularly updated information to understand the evolving needs on the ground that will aid further in the decision-making process.", + "ner_text": [ + [ + 610, + 614, + "named" + ] + ], + "validated": false, + "empirical_context": "40 million MDTF ] ). This subcomponent will focus on developing systems and procedures for the national HMIS, with an emphasis on supporting the collection of routine data through DHIS2, to standardize data collection, entry and cleaning, as well as instituting data quality improvement practices. This will enhance targeting and data tracking for refugees and provide regularly updated information to understand the evolving needs on the ground that will aid further in the decision-making process.", + "type": "system", + "explanation": "However, HMIS is described as a system for managing health information, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HMIS is a dataset because it relates to data collection and management.", + "contextual_reason_agent": "However, HMIS is described as a system for managing health information, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 21, + "validated": 4, + "not_validated": 17 + } + }, + { + "filename": "037_Uganda-Investment-for-Industrial-Transformation-and-Employment-Project", + "page": 15, + "text": "For example, the average value of assets among all households ( both refugee and host ) in the District of Arua is UGX 560, 000 ( US $ 144 ), which is only 10 percent of comparable asset values in the Kampala region. 9. COVID-19 is already disrupting incomes and livelihoods, with the poorest wealth quintiles most adversely affected. Since the COVID-19 outbreak, 91 percent of households have reported reduced income ( or losses ) from at least one of their sources of livelihood. Services, such as trade, transport and accommodation and food services have been the sectors most affected by the COVID-19 restrictions and have also lost the highest share of workers. Although employment levels have recovered partially, income levels for many households have not returned to pre-COVID-19 levels. By April 2021, income levels were still below pre-COVID-19 levels for at least one third of households. The second lockdown in mid-2021 is likely to have stalled and even possibly reversed progress in income recovery. In fact, 49 percent of MSMEs interviewed on the impact of the second lockdown 10 Uganda Comprehensive Refugee Response Portal ( https: / / data2. unhcr. org / en / country / uga ), September 20, 2020. 11 Calculation based on district-level firm data from Census of Business Establishments, and refugee and host community household data from the Refugee and Host Community Household Survey.", + "ner_text": [ + [ + 1359, + 1402, + "named" + ], + [ + 95, + 111, + "Refugee and Host Community Household Survey <> data geography" + ], + [ + 201, + 215, + "Refugee and Host Community Household Survey <> data geography" + ], + [ + 805, + 809, + "Refugee and Host Community Household Survey <> publication year" + ], + [ + 1209, + 1213, + "Refugee and Host Community Household Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "org / en / country / uga ), September 20, 2020. 11 Calculation based on district-level firm data from Census of Business Establishments, and refugee and host community household data from the Refugee and Host Community Household Survey.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referenced as a source of household data in the context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Survey' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referenced as a source of household data in the context.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 118, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 113 of 117 Action Description DLI Respon sible Timing Completion measurement 2. Review and analyse NEMIS capabilities on collecting, disaggregation and reporting on data for learners with special needs and disability and recommendations to close identified gaps. Integrated in DLI # 6 MoE / KI SE / KICD / KNEC / TSC Year 1 Reports generated from NEMIS with granular data on special needs children by category of disability, age, and gender. 3. Complete the ongoing process to institutionalize the process and procedures for administration and management of scholarships by Jomo Kenyatta Foundation ( JKF ) Integrated in DLI # 4 MoE Year 2 Scholarship administration and management processes and procedures established at JKF. 4. Design and develop a GRM MIS module compatible with NEMIS and interoperable between agencies and able to capture refugee data. N / A. MoE By Year 2, 2023 Complaints and grievances lodged through the NEMIS. Number of grievances satisfactorily addressed 5. Assess the outcomes of biogas pilots and analyze existing practices on use of biogas in schools N / A MoE Year 2 Costed action plan developed to scale up biogas projects.", + "ner_text": [ + [ + 859, + 864, + "named" + ] + ], + "validated": false, + "empirical_context": "4. Design and develop a GRM MIS module compatible with NEMIS and interoperable between agencies and able to capture refugee data. N / A.", + "type": "system", + "explanation": "However, NEMIS is described as a module and system, not explicitly as a data source or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is mentioned in the context of data capture and interoperability.", + "contextual_reason_agent": "However, NEMIS is described as a module and system, not explicitly as a data source or dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "037_Uganda-Investment-for-Industrial-Transformation-and-Employment-Project", + "page": 15, + "text": "For example, the average value of assets among all households ( both refugee and host ) in the District of Arua is UGX 560, 000 ( US $ 144 ), which is only 10 percent of comparable asset values in the Kampala region. 9. COVID-19 is already disrupting incomes and livelihoods, with the poorest wealth quintiles most adversely affected. Since the COVID-19 outbreak, 91 percent of households have reported reduced income ( or losses ) from at least one of their sources of livelihood. Services, such as trade, transport and accommodation and food services have been the sectors most affected by the COVID-19 restrictions and have also lost the highest share of workers. Although employment levels have recovered partially, income levels for many households have not returned to pre-COVID-19 levels. By April 2021, income levels were still below pre-COVID-19 levels for at least one third of households. The second lockdown in mid-2021 is likely to have stalled and even possibly reversed progress in income recovery. In fact, 49 percent of MSMEs interviewed on the impact of the second lockdown 10 Uganda Comprehensive Refugee Response Portal ( https: / / data2. unhcr. org / en / country / uga ), September 20, 2020. 11 Calculation based on district-level firm data from Census of Business Establishments, and refugee and host community household data from the Refugee and Host Community Household Survey.", + "ner_text": [ + [ + 1269, + 1302, + "named" + ], + [ + 95, + 111, + "Census of Business Establishments <> data geography" + ], + [ + 1209, + 1213, + "Census of Business Establishments <> publication year" + ], + [ + 1239, + 1263, + "Census of Business Establishments <> data type" + ] + ], + "validated": true, + "empirical_context": "org / en / country / uga ), September 20, 2020. 11 Calculation based on district-level firm data from Census of Business Establishments, and refugee and host community household data from the Refugee and Host Community Household Survey.", + "type": "census", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source of firm data used in the analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a 'Census' which typically indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source of firm data used in the analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 28, + "text": "The World Bank SPF: Improved Livelihoods for Internally Displaced Persons in Azerbaijan ( P178125 ) Page 26 of 34 Indicator Name Corporate Unit of Measure Baseline End Target Frequency Data Source / Methodology Responsibility for Data Collection Description: Name: Percentage of beneficiaries taking a more active role in their communities Percentag e 0. 00 50. 00 Twice, once before civic engagement training and again at least three months after civic engagement training completion. For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection. For individuals trained only as part of Component 3, a separate pre-training survey will be conducted as well as a follow-up survey conducted at least three months after civic engagement training. M & E Specialist Percentage of beneficiaries taking a more active role in their communities - female Percentag e 0. 00 50. 00 Twice, once before civic engagement training and again at least three months after civic engagement training completion. For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection.", + "ner_text": [ + [ + 564, + 595, + "named" + ], + [ + 4, + 14, + "Post-Training Completion Survey <> publisher" + ], + [ + 45, + 73, + "Post-Training Completion Survey <> reference population" + ], + [ + 77, + 87, + "Post-Training Completion Survey <> data geography" + ], + [ + 265, + 339, + "Post-Training Completion Survey <> data description" + ] + ], + "validated": true, + "empirical_context": "00 Twice, once before civic engagement training and again at least three months after civic engagement training completion. For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection. For individuals trained only as part of Component 3, a separate pre-training survey will be conducted as well as a follow-up survey conducted at least three months after civic engagement training.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey used for collecting data in the context of the project.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey used for data collection.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey used for collecting data in the context of the project.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "108_Jordan-PforR-PAD-P159522-FINAL-DISCLOSURE-10052016", + "page": 79, + "text": "This approach improves transparency with respect to the purposes of resources and the efficiency of their use. It is also helpful in improving communications between the Government and its citizens. The GFMIS will easily provide information on how the annual Budget Law and its execution support the strategic priorities of the Government. The GFMIS has been fully rolled out to budget units ( ministries, departments, and regional financial centers ) encompassed in the Budget Law except three ( the Ministry of Foreign Affairs, Water Authority, and civil defense ) that would be completed in 2016. The Government has launched GFMIS phase two aiming to connect IPUs by 2018. 28. The current GFMIS implementation utilizes a subset of the functionalities of the underlying application software. The current core application software comprises: ( a ) Hyperion for budget preparation; ( b ) Oracle Financials for budget execution; and ( c ) software for interfacing to other software for debt management, payroll, bank reconciliation, and revenue management. The GFMIS is fully utilized for", + "ner_text": [ + [ + 344, + 349, + "named" + ] + ], + "validated": false, + "empirical_context": "The GFMIS will easily provide information on how the annual Budget Law and its execution support the strategic priorities of the Government. The GFMIS has been fully rolled out to budget units ( ministries, departments, and regional financial centers ) encompassed in the Budget Law except three ( the Ministry of Foreign Affairs, Water Authority, and civil defense ) that would be completed in 2016. The Government has launched GFMIS phase two aiming to connect IPUs by 2018.", + "type": "system", + "explanation": "However, GFMIS is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed GFMIS is a dataset because it is associated with providing information on budget execution.", + "contextual_reason_agent": "However, GFMIS is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 132, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 120 will be maintained. Variance analysis with explanations formed part of the regular reports for ESPES. The reporting for HCO builds on that experience. Management will take corrective measures based on these reports. 30. Policies and procedures. The government follows a double-entry bookkeeping system and modified cash basis of accounting, as documented in the GOE \u2019 s Accounting Manual. For the HCO, the GOE \u2019 s accounting policies and procedures will be used for the accounting of the project. The specific project arrangements with regard to the submission of quarterly reports, fund flow, and audits will be included in the POM that will be prepared for the operation not later than six months after effectiveness. 31. Accounting system. The MOF and federal-level implementers use IFMIS while the regional BOF and implementing sector bureaus use the IBEX system, which will be tailored to the project on a standalone basis. The HCO chart of accounts will be developed using the GOE \u2019 s chart of accounts to capture the components, subcomponents, categories, and activities of the project expenditures. The chart of accounts will be part of the FM section of the POM. 32. Accounting centers and accounting documents.", + "ner_text": [ + [ + 919, + 930, + "named" + ] + ], + "validated": false, + "empirical_context": "Accounting system. The MOF and federal-level implementers use IFMIS while the regional BOF and implementing sector bureaus use the IBEX system, which will be tailored to the project on a standalone basis. The HCO chart of accounts will be developed using the GOE \u2019 s chart of accounts to capture the components, subcomponents, categories, and activities of the project expenditures.", + "type": "system", + "explanation": "However, the context indicates that it is an accounting system, not a data source or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'IBEX system' sounds like a structured tool for managing data.", + "contextual_reason_agent": "However, the context indicates that it is an accounting system, not a data source or dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 21, + "text": "On the supply side, labor force surveys ( LFSs ) are irregular and not frequent enough. On the demand side, the annual enterprises survey does not emphasize employment issues, the informal sector survey is as irregular as the LFS, and there is no annual agricultural survey. Finally, line ministries could benefit from INS data production and expertise to inform program preparation and M & E. 27. National accounts are engaged in a modernization process requiring support but suffer from a lack of sufficient and comprehensive trade data. While many Sub-Saharan countries are still following the 1993 national accounts framework, Cameroon transitioned successfully to the 2008 system and has been producing trimestral accounts since 2015. However, the classic annual national accounts suffer from a lack of reliable agriculture statistics. The last agriculture and livestock census was undertaken in 1984 and annual surveys stopped in the early 1990s. The Ministry of Livestock, Fishery, and Animal Industry and the Ministry of Agriculture Rural Development currently rely on indirect sources to produce the necessary basic sector statistics. A new agricultural and livestock census was originally planned for 2017, but the cost is high compared to similar exercises in the region ( CFAF 23. 6 billion ). However, the AfDB and EU are exploring ways to contribute to the financing of this census in synergy with the population census.", + "ner_text": [ + [ + 112, + 137, + "named" + ], + [ + 1450, + 1468, + "annual enterprises survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "On the supply side, labor force surveys ( LFSs ) are irregular and not frequent enough. On the demand side, the annual enterprises survey does not emphasize employment issues, the informal sector survey is as irregular as the LFS, and there is no annual agricultural survey. Finally, line ministries could benefit from INS data production and expertise to inform program preparation and M & E.", + "type": "survey", + "explanation": "However, the context indicates it does not emphasize employment issues and is not described as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically collects data.", + "contextual_reason_agent": "However, the context indicates it does not emphasize employment issues and is not described as a data source.", + "contextual_signal": "mentioned only as a survey, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "182_multi0page", + "page": 41, + "text": "Annex 1: Project Design Summary ALBANIA: Social Services Delivery 141 of s MQniorl & 4hiIe 3, m_ & Sector-related CAS Goal: Sector Indicators: Sector / country reports: ( from Goal to Bank Mission ) Improve standards of living I. Increased # of beneficiaries 1. Vulnerability Needs and I. Government su Yports and promote social cohesion gained access to the social Institutional Capabilities multiple sector reforms through community-based services baseline study 2. Political stabili! y is social services targeted to 2. Raised awareness of social 2. Regional needs assessment maintained poor and vulnerable inclusion issues of reports 3. Government mntintains its population groups vulnerable population 3. Periodic beneficiary impact commitment to operate the groups assessment community-bast: d services on 3. Increased community 4. MOLSA / GASS records, participatory w.. y participation in decision other participants making information and databases. 5. Public opinion surveys 6. Household budget survey Project Development Outcome / Impact Project reports: ( from Objective to Goal ) Objective: Indicators: The project development 1. Increased efficiency and 1. MOLSAIGASS reports 1. Governments and social objectives are to assist the reduced cost per 2.", + "ner_text": [ + [ + 988, + 1011, + "named" + ], + [ + 32, + 39, + "Household budget survey <> data geography" + ], + [ + 685, + 706, + "Household budget survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "Public opinion surveys 6. Household budget survey Project Development Outcome / Impact Project reports: ( from Objective to Goal ) Objective: Indicators: The project development 1. Increased efficiency and 1.", + "type": "survey", + "explanation": "In the context, 'Household budget survey' is explicitly mentioned as part of public opinion surveys, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'survey', which often refers to structured data collection.", + "contextual_reason_agent": "In the context, 'Household budget survey' is explicitly mentioned as part of public opinion surveys, indicating it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "113_Project-Appraisal-Document-PAD-disclosable-version-P156479-08122016", + "page": 60, + "text": "In addition, under Component 2, the project, by developing harmonized data collection procedures and targeting instruments, will support the government in developing a social registry, which would provide a platform for effective monitoring of beneficiaries; effectiveness of safety net interventions, and serve as a repository of data to support program evaluations and other assessments as a single beneficiary registry. Role of Partners 41. The ASP MDTF is co-financing the project. The ASP MDTF is a U. K. Department for International Development-World Bank initiative and financing platform that is supporting a broader Sahel Adaptive Social Protection Program ( ASPP ), whose objectives are to increase access to effective adaptive social protection systems for poor and vulnerable populations in the Sahel ( Burkina Faso, Chad, Mali, Mauritania, Niger and Senegal ). The ASPP contributes to building long-term resilience and to the World Bank \u2019 s global strategy of reducing absolute poverty and promoting shared prosperity by supporting the development of sustainable systems", + "ner_text": [ + [ + 168, + 183, + "named" + ], + [ + 259, + 300, + "social registry <> data description" + ], + [ + 768, + 799, + "social registry <> reference population" + ], + [ + 815, + 827, + "social registry <> data geography" + ], + [ + 829, + 833, + "social registry <> data geography" + ], + [ + 835, + 839, + "social registry <> data geography" + ], + [ + 841, + 851, + "social registry <> data geography" + ], + [ + 853, + 858, + "social registry <> data geography" + ], + [ + 863, + 870, + "social registry <> data geography" + ], + [ + 939, + 949, + "social registry <> publisher" + ] + ], + "validated": true, + "empirical_context": "In addition, under Component 2, the project, by developing harmonized data collection procedures and targeting instruments, will support the government in developing a social registry, which would provide a platform for effective monitoring of beneficiaries; effectiveness of safety net interventions, and serve as a repository of data to support program evaluations and other assessments as a single beneficiary registry. Role of Partners 41.", + "type": "registry", + "explanation": "The term is indeed a dataset as it is described as a repository of data to support program evaluations and assessments.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'social registry' implies a structured collection of data for monitoring beneficiaries.", + "contextual_reason_agent": "The term is indeed a dataset as it is described as a repository of data to support program evaluations and assessments.", + "contextual_signal": "described as a repository of data to support program evaluations", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 8, + "validated": 7, + "not_validated": 1 + } + }, + { + "filename": "028_Ethiopia-Response-Recovery-Resilience-for-Conflict-Affected-Communities-in-Ethiopia-Project", + "page": 18, + "text": "The World Bank Response - Recovery - Resilience for Conflict-Affected Communities in Ethiopia Project ( P177233 ) Page 14 of 104 months. 27 Nearly 90 percent of the IDPs in Ethiopia are in the Tigray Region ( over 1. 8 million people ), the Somali Region ( about 900, 000 people ), the Oromia Region ( about 500, 000 people ), and the Amhara Region ( about 350, 000 people ). These numbers are likely higher as IOM data do not cover IDPs who are not located in identifiable IDP settlements, and some parts of Ethiopia ( notably the Tigray, parts of Western Oromia, and Benishangul-Gumuz regions ) \u2013 among the most affected by conflict \u2013 have not been fully accessible for data collection. 11. The primary factors impeding IDP return include the lack of access to basic infrastructure and services. 28 A study commissioned by the World Bank in 2020 on the drivers of internal displacement and its impacts on development projects29 noted critical conflict impacts and climate-related hazards to water and sanitation services, access to education, and infrastructure. Water sources ( including potable water sources ) have been destroyed in several conflict-affected Woredas. This is particularly challenging given the drought-prone nature of many Woredas, with projections indicating that climate change will further increase the frequency and severity of drought.", + "ner_text": [ + [ + 411, + 419, + "named" + ], + [ + 85, + 93, + "IOM data <> data geography" + ], + [ + 165, + 169, + "IOM data <> reference population" + ], + [ + 193, + 206, + "IOM data <> data geography" + ], + [ + 241, + 254, + "IOM data <> data geography" + ], + [ + 286, + 299, + "IOM data <> data geography" + ], + [ + 843, + 847, + "IOM data <> publication year" + ] + ], + "validated": true, + "empirical_context": "8 million people ), the Somali Region ( about 900, 000 people ), the Oromia Region ( about 500, 000 people ), and the Amhara Region ( about 350, 000 people ). These numbers are likely higher as IOM data do not cover IDPs who are not located in identifiable IDP settlements, and some parts of Ethiopia ( notably the Tigray, parts of Western Oromia, and Benishangul-Gumuz regions ) \u2013 among the most affected by conflict \u2013 have not been fully accessible for data collection. 11.", + "type": "data", + "explanation": "In this context, 'IOM data' is indeed used as a source of information regarding internally displaced persons (IDPs) in Ethiopia.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'IOM data' is a dataset because it refers to information collected by the International Organization for Migration.", + "contextual_reason_agent": "In this context, 'IOM data' is indeed used as a source of information regarding internally displaced persons (IDPs) in Ethiopia.", + "contextual_signal": "mentioned as a source of information on IDPs", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 17, + "text": "The proposed project will allow for the inclusion of poor and vulnerable households in the refugee and host communities of the Hodh Chargui region within the SSN system, including the Social Registry, Tekavoul, and Elmaouna programs. Component 1: Updating and enhancing the Social Registry ( US $ 7. 3 million with US $ 4. 0 million from national IDA18, US $ 0. 9 million from IDA18 RSW, US $ 0. 9 million from ASP MDTF and US $ 1. 5 million from government ) 22. This component will be implemented by the Taazour General Delegation. It supports a full update of the Social Registry and inclusion of eligible refugee households ( Sub-component 1. 1 ) and its usage for operational and strategic purposes ( Sub-component 1. 2 ). Payment Platform POOR HOUSEHOLDS Social Transfer Program \u2022 Extreme Poor households \u2022 5-year cycle \u2022 CCT \u2022 Social promotion Social Registry \u2022 Identification \u2022 Data collection \u2022 Support to targeted social programs Shock - responsive Safety Net Program \u2022 Vulnerablehouseholds \u2022 4 months \u2022 Unconditional CT \u2022 Using SP tools Early Warning Systems DATA DATA Beneficiary list Beneficiary List", + "ner_text": [ + [ + 184, + 199, + "named" + ], + [ + 127, + 146, + "Social Registry <> data geography" + ], + [ + 506, + 532, + "Social Registry <> author" + ], + [ + 600, + 627, + "Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "The proposed project will allow for the inclusion of poor and vulnerable households in the refugee and host communities of the Hodh Chargui region within the SSN system, including the Social Registry, Tekavoul, and Elmaouna programs. Component 1: Updating and enhancing the Social Registry ( US $ 7.", + "type": "registry", + "explanation": "The context indicates that the Social Registry is part of the SSN system, which implies it functions as a data source for the project.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Social Registry' suggests a structured collection of data related to households.", + "contextual_reason_agent": "The context indicates that the Social Registry is part of the SSN system, which implies it functions as a data source for the project.", + "contextual_signal": "mentioned as part of the SSN system", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 3, + "text": "PIT Project Implementation Team PO Production officers POM Program Operations Manual PPDA Public Procurement and Disposal of Public Assets PPP Private-Public Partnership PPSD Project Procurement Strategy for Development PSC Project Steering Committee PSFU Private Sector Foundation Uganda PTC Project Technical Committee RHD Refugee-Hosting District SOPs Standard Operating Procedures SORT Systematic Operations Risk-rating Tool STEP Systematic Tracking of Exchanges in Procurement UBOS Uganda Bureau of Statistics UGGDS Uganda Green Growth Development Strategy UIA Uganda Investment Authority UIRI Uganda Industrial Research Institute UNHCR United Nations High Commissioner for Refugees UNHS Uganda National Household Survey UEW Unsafe Environment for Women UWEP Uganda Women Entrepreneurship Program VSLAs Village Savings and Loans Associations WEE Women \u2019 s Economic Empowerment WHR Window for Host Communities and Refugees", + "ner_text": [ + [ + 693, + 725, + "named" + ], + [ + 282, + 288, + "Uganda National Household Survey <> data geography" + ], + [ + 521, + 527, + "Uganda National Household Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "PIT Project Implementation Team PO Production officers POM Program Operations Manual PPDA Public Procurement and Disposal of Public Assets PPP Private-Public Partnership PPSD Project Procurement Strategy for Development PSC Project Steering Committee PSFU Private Sector Foundation Uganda PTC Project Technical Committee RHD Refugee-Hosting District SOPs Standard Operating Procedures SORT Systematic Operations Risk-rating Tool STEP Systematic Tracking of Exchanges in Procurement UBOS Uganda Bureau of Statistics UGGDS Uganda Green Growth Development Strategy UIA Uganda Investment Authority UIRI Uganda Industrial Research Institute UNHCR United Nations High Commissioner for Refugees UNHS Uganda National Household Survey UEW Unsafe Environment for Women UWEP Uganda Women Entrepreneurship Program VSLAs Village Savings and Loans Associations WEE Women \u2019 s Economic Empowerment WHR Window for Host Communities and Refugees", + "type": "survey", + "explanation": "The Uganda National Household Survey is explicitly mentioned as a survey, which is a structured collection of data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Survey' in its name, suggesting it collects data.", + "contextual_reason_agent": "The Uganda National Household Survey is explicitly mentioned as a survey, which is a structured collection of data used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "003_BOSIB-850e0c11-07c1-4c9c-8d44-4286704221bd", + "page": 47, + "text": "The World Bank Southern Niger Connectivity and Integration Project ( P179770 ) Page 37 of women to physically access obstetric care in the project area. Frequency At baseline, midterm, and after completion Data source A survey on rural accessibility will be used to collect mobility data on the impact of the newly rehabilitated and upgraded rural roads on people \u2019 s access to health services, particularly on women \u2019 s travel time to access obstetric care in the project areas. Methodology for Data Collection - Prepare a GEMS form to collect mobility data from a sample of women beneficiaries of maternity services. - Measure the reduction in travel time compared to the baseline value ( based on a GIS transportation model which relies on mix of modelled data and empirical data ). Responsibility for Data Collection General Directorate for Public Health ( DGSP ) / ministry in charge of public health and social affairs, in collaboration with some World Bank experts. Length of rehabilitated RN1 Maradi \u2013 Zinder section incorporating climate resilience measures ( Km ) Description This indicator measures the total length, in kilometers, of the RN1 road between Maradi and Zinder that has been rehabilitated taking into account climate resilience measures.", + "ner_text": [ + [ + 524, + 528, + "named" + ] + ], + "validated": false, + "empirical_context": "Frequency At baseline, midterm, and after completion Data source A survey on rural accessibility will be used to collect mobility data on the impact of the newly rehabilitated and upgraded rural roads on people \u2019 s access to health services, particularly on women \u2019 s travel time to access obstetric care in the project areas. Methodology for Data Collection - Prepare a GEMS form to collect mobility data from a sample of women beneficiaries of maternity services. - Measure the reduction in travel time compared to the baseline value ( based on a GIS transportation model which relies on mix of modelled data and empirical data ).", + "type": "program", + "explanation": "'GEMS' is not a dataset but rather a form or program used for data collection.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'GEMS' is a dataset because it is mentioned in the context of data collection.", + "contextual_reason_agent": "'GEMS' is not a dataset but rather a form or program used for data collection.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "042_Burundi-Skills-for-Jobs-Women-and-Youth-Project", + "page": 43, + "text": "Youth in the labor force: Much of the progress made in primary education has yet to transfer to employment and productivity gains: while basic education needs are now mostly met, secondary schooling can leave many youths are ill-equipped to enter the labor market. 14 percent of Burundi \u2019 s youth are neither in employment, education or training ( NEET ) 48; youth aged 15-24 have the nation \u2019 s highest unemployment rates; and as many as 40 percent are underemployed. Women, in particular, are marginalized in employment outcomes. Despite accounting for over half of the workforce, they occupy disproportionate numbers of farming, unpaid, and unreported jobs. The need to transfer schooling gains to employment outcomes, in particular for women, is central to ensuring Burundi \u2019 s lasting growth trajectory. 47 PASEC2019 Qualit\u00e9 des Syst\u00e8mes \u00c9ducatifs en Afrique Subsaharienne Francophone. 48 Youth Labour Statistics, International Labour Organization, https: / / ilostat. ilo. org / topics / youth /.", + "ner_text": [ + [ + 894, + 917, + "named" + ], + [ + 279, + 286, + "Youth Labour Statistics <> data geography" + ], + [ + 359, + 375, + "Youth Labour Statistics <> reference population" + ], + [ + 770, + 777, + "Youth Labour Statistics <> data geography" + ], + [ + 919, + 952, + "Youth Labour Statistics <> publisher" + ] + ], + "validated": true, + "empirical_context": "47 PASEC2019 Qualit\u00e9 des Syst\u00e8mes \u00c9ducatifs en Afrique Subsaharienne Francophone. 48 Youth Labour Statistics, International Labour Organization, https: / / ilostat. ilo.", + "type": "statistics", + "explanation": "In the context, 'Youth Labour Statistics' is explicitly linked to the International Labour Organization, indicating it serves as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced alongside a known statistical source.", + "contextual_reason_agent": "In the context, 'Youth Labour Statistics' is explicitly linked to the International Labour Organization, indicating it serves as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 3, + "validated": 2, + "not_validated": 1 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 12, + "text": "The government has also maintained a policy of granting refugees access to its territory, land for cultivation and livelihoods, and practical arrangements for their initial reception and registration. Refugees are granted freedom of movement and in principle free to settle anywhere in the country. The Commission for Refugee Affairs ( CRA ) plays the leading role in developing government policy on refugee issues, including protection and coordinating government and external support for refugees. The CRA is present in all refugee-hosting areas, even as capacity limitations impede its ability to fully fulfill its designated responsibilities. 8 UNHCR. 2022. Operations Data Portal \u2013 Refugee Situations: South Sudan. https: / / data. unhcr. org / en / country / ssd. 9 UNHCR defines a host community as \u201c the local, regional, and national governmental, social and economic structures within which refugees live. In the context of refugee camps, the host community may encompass the camp, or may simply neighbor the camp but have interaction with, or otherwise be impacted by, the refugees residing in the camp \u201d. https: / / www. unhcr. org / en-us / protection / resettlement / 4cd7d1509 / unhcr-ngo-toolkit-practical-cooperation-resettlement-community - outreach. html.", + "ner_text": [ + [ + 662, + 684, + "named" + ] + ], + "validated": false, + "empirical_context": "2022. Operations Data Portal \u2013 Refugee Situations: South Sudan. https: / / data.", + "type": "portal", + "explanation": "However, it is referred to as a 'portal' and not explicitly mentioned as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Data' in its name.", + "contextual_reason_agent": "However, it is referred to as a 'portal' and not explicitly mentioned as a data source or structured collection of data.", + "contextual_signal": "mentioned only as a portal, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 9, + "validated": 1, + "not_validated": 8 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 79, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 74 of 117 prior to construction. Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "ner_text": [ + [ + 1041, + 1046, + "named" + ], + [ + 4, + 14, + "NEMIS <> publisher" + ], + [ + 589, + 608, + "NEMIS <> data type" + ], + [ + 777, + 793, + "NEMIS <> reference population" + ] + ], + "validated": true, + "empirical_context": "Evidence of utilization of NEMIS data for better management of primary education Evidence of utilization of NEMIS data through: ( a ) registration of refugee children in NEMIS; ( b ) at least 90 percent of students capitation grants to primary school allocated and disbursed based on data in the NEMIS online platform; ( c ) categorization of primary schools annually based on the levels of meeting ( or not ) minimum essential school inputs; and ( d ) utilization by MoE of the categorization data in the allocation of MoE \u2019 s annual development grants to primary schools. Annual MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Refugee learners are registered in NEMIS as per the MoE guidelines for registration of refugee learners in NEMIS Annual MoE, NEMIS Technical Team MoE, NEMIS Technical Team Reports generated from the NEMIS on line based platform MoE Level of satisfaction of beneficiaries ( Citizen Engagement indicator and part of Grievance Redress Mechanism ) Number of complaints and grievances lodged through the NEMIS GRM satisfactorily addressed.", + "type": "system", + "explanation": "NEMIS is indeed a dataset as it is described as an online platform that collects and manages data related to primary education.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is referenced in the context of data utilization for education management.", + "contextual_reason_agent": "NEMIS is indeed a dataset as it is described as an online platform that collects and manages data related to primary education.", + "contextual_signal": "mentioned as a data source for managing primary education", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "041_Burkina-Faso-Support-to-Land-and-Mining-Management-Strengthening-Project", + "page": 23, + "text": "The World Bank Support to Land and Mining Management Strengthening Project ( P169267 ) Page 17 of 95 targeted towards female students, dedicated training modules and micro-grants to support women \u2019 s entrepreneurial initiatives are needed for the mining sector in Burkina Faso and will be supported by the project. These interventions together with the activities aimed at ensuring registration of land in the name of women ( as measured by PDO indicator on titles and certificates of occupancy issued disaggregated by sex ) align the project with the World Bank Gender Strategy and addresses the Pillar 3 ( Removing Barriers to Women \u2019 s Ownership and Control of Asset ). C. Relevance to Higher Level Objectives 24. The proposed project is fully consistent with the 2017 Systematic Country Diagnostic ( SCD ), the CPF FY2018-2023 ( Report No. 123712-BF ) and the 2020 Risks and Resilience Assessment for the Sahel ( RRA ). The SCD identified three top priorities necessary to end extreme poverty and increase shared prosperity in Burkina Faso: ( a ) improving natural resource management; ( b ) promoting skills development; and ( c ) reducing gender bias against women.", + "ner_text": [ + [ + 441, + 454, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Support to Land and Mining Management Strengthening Project ( P169267 ) Page 17 of 95 targeted towards female students, dedicated training modules and micro-grants to support women \u2019 s entrepreneurial initiatives are needed for the mining sector in Burkina Faso and will be supported by the project. These interventions together with the activities aimed at ensuring registration of land in the name of women ( as measured by PDO indicator on titles and certificates of occupancy issued disaggregated by sex ) align the project with the World Bank Gender Strategy and addresses the Pillar 3 ( Removing Barriers to Women \u2019 s Ownership and Control of Asset ). C.", + "type": "indicator", + "explanation": "'PDO indicator' is not a dataset but rather a measurement tool used to assess project outcomes.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'PDO indicator' is a dataset because it is associated with measuring outcomes related to the project.", + "contextual_reason_agent": "'PDO indicator' is not a dataset but rather a measurement tool used to assess project outcomes.", + "contextual_signal": "mentioned only as a project indicator, not as a data source", + "tags": [] + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 22, + "text": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 18 of 47 Figure 1. Theory of Change Output Inadequate infrastructure, equipment, and medicine \uf0b7 Renovation / upgradation of selected HFs to 24 / 7 \uf0b7 Purchasing of equipment and medicines \uf0b7 Increased number of HFs renovated / upgraded including female staff hostels * \uf0b7 Increased number of HFs with essential inputs Improved utilization of quality health services in selected refugee hosting districts in Balochistan Improved child health outcomes ( reduced under \u2010 five mortality, reduced stunting ) among refugees and host communities in Balochistan Improved human capital among refugees and host communities in Balochistan Shortage, absence, low productivity, and competency of providers \uf0b7 Contracting of especially female providers * \uf0b7 Creation of HRH database \uf0b7 Competency \u2010 based training \uf0b7 Increased availability of ( especially female ) providers * at the HFs \uf0b7 Functional HRH database \uf0b7 Increased number of providers with minimum clinical competency Low quality and suboptimal use of health data \uf0b7 Digitization and integration of the HMIS into the DHIS with dashboards \uf0b7 Data review meetings \uf0b7 HMIS ( DHIS, vertical programs ) digitized and integrated into DHIS / DHIS2 \uf0b7 Improved quality of DHIS reports Weak institutional capacity \uf0b7 Capacity building ( HMIS, monitoring, supervision, and management ) \uf0b7 Increased number of targeted staff with basic / advanced competencies Low demand of health and education services \uf0b7 Advocacy and awareness \u2010 raising", + "ner_text": [ + [ + 1186, + 1190, + "named" + ], + [ + 4, + 14, + "DHIS <> publisher" + ], + [ + 15, + 26, + "DHIS <> data geography" + ], + [ + 481, + 492, + "DHIS <> data geography" + ], + [ + 583, + 591, + "DHIS <> reference population" + ], + [ + 616, + 627, + "DHIS <> data geography" + ], + [ + 657, + 665, + "DHIS <> reference population" + ], + [ + 690, + 701, + "DHIS <> data geography" + ], + [ + 1069, + 1080, + "DHIS <> data type" + ] + ], + "validated": true, + "empirical_context": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 18 of 47 Figure 1. Theory of Change Output Inadequate infrastructure, equipment, and medicine \uf0b7 Renovation / upgradation of selected HFs to 24 / 7 \uf0b7 Purchasing of equipment and medicines \uf0b7 Increased number of HFs renovated / upgraded including female staff hostels * \uf0b7 Increased number of HFs with essential inputs Improved utilization of quality health services in selected refugee hosting districts in Balochistan Improved child health outcomes ( reduced under \u2010 five mortality, reduced stunting ) among refugees and host communities in Balochistan Improved human capital among refugees and host communities in Balochistan Shortage, absence, low productivity, and competency of providers \uf0b7 Contracting of especially female providers * \uf0b7 Creation of HRH database \uf0b7 Competency \u2010 based training \uf0b7 Increased availability of ( especially female ) providers * at the HFs \uf0b7 Functional HRH database \uf0b7 Increased number of providers with minimum clinical competency Low quality and suboptimal use of health data \uf0b7 Digitization and integration of the HMIS into the DHIS with dashboards \uf0b7 Data review meetings \uf0b7 HMIS ( DHIS, vertical programs ) digitized and integrated into DHIS / DHIS2 \uf0b7 Improved quality of DHIS reports Weak institutional capacity \uf0b7 Capacity building ( HMIS, monitoring, supervision, and management ) \uf0b7 Increased number of targeted staff with basic / advanced competencies Low demand of health and education services \uf0b7 Advocacy and awareness \u2010 raising", + "type": "system", + "explanation": "In this context, 'DHIS' is used as a health management information system that serves as a data source for health data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'DHIS' is a dataset because it is mentioned in the context of digitization and integration of health data.", + "contextual_reason_agent": "In this context, 'DHIS' is used as a health management information system that serves as a data source for health data.", + "contextual_signal": "system but mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 11, + "validated": 5, + "not_validated": 6 + } + }, + { + "filename": "033_IBArchive-38b35d89-2c5c-4636-ab7e-0ba662b95a5f", + "page": 63, + "text": "The World Bank Djibouti Skills Development for Employment Project ( P175483 ) Page 59 of 68 IRI # 11: Share of girls and women in TVET programs increased. Sub-component 2. 2 IRI # 12: Number of short-term training programs completed ( disaggregated by gender, disability and refugee status ) Sub-component 2. 2 IRI # 13: Number of individuals who are certified through newly developed RPL procedures ( disaggregated by gender, disability and refugee status ) Sub-component 2. 2 IRI # 14: Number of trainers undertaking initial training ( disaggregated by gender, disability and refugee status ) Sub-component 2. 2 IRI # 15 Beneficiary, Stakeholder and other feedback Technology based beneficiary feedback Biannual Survey conducted by PMU Survey using technology DGETFP IRI # 16: Number of trainers engaged in CPD ( disaggregated by gender, disability and refugee status ) Sub-component 2. 2 Annual targets Data collected from the DGETFP.", + "ner_text": [ + [ + 705, + 720, + "named" + ], + [ + 4, + 14, + "Biannual Survey <> publisher" + ], + [ + 15, + 23, + "Biannual Survey <> data geography" + ], + [ + 184, + 232, + "Biannual Survey <> data description" + ], + [ + 488, + 535, + "Biannual Survey <> data description" + ], + [ + 779, + 812, + "Biannual Survey <> data description" + ] + ], + "validated": true, + "empirical_context": "2 IRI # 14: Number of trainers undertaking initial training ( disaggregated by gender, disability and refugee status ) Sub-component 2. 2 IRI # 15 Beneficiary, Stakeholder and other feedback Technology based beneficiary feedback Biannual Survey conducted by PMU Survey using technology DGETFP IRI # 16: Number of trainers engaged in CPD ( disaggregated by gender, disability and refugee status ) Sub-component 2. 2 Annual targets Data collected from the DGETFP.", + "type": "survey", + "explanation": "This is indeed a dataset as it is described as a survey that collects data for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data.", + "contextual_reason_agent": "This is indeed a dataset as it is described as a survey that collects data for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 93, + "text": "An appropriate policy action is needed to improve the quality of education, which will increase the potential of the youth and equip them with the skills that will enable them to join the productive labor force. Returns to Education 5. In terms of the benefits of education, Niger \u2019 s labor market provides a strong signal that investment in education yields higher returns and better employment opportunities to both individuals and households and contributes to reducing inequality in access to education as well as post-education labor market outcomes. In Niger, evidence from the 2014 household survey, labelled ECVMA, reveals that higher levels of education are associated with better earnings, an increased probability of wage employment, and increased likelihood of finding employment in more productive sectors; and the 48 % 52 % 44 % 87 % 42 % 58 % 63 % 44 % 19 % 47 % 62 % 22 % 28 % 74 % 118 % National Male Female Urban Rural Q1 Q5 Agadez Diffa Dosso Maradi Tahoua Tillaberi Zinder Niamey Gender Area Wealth Quintile Region", + "ner_text": [ + [ + 616, + 621, + "named" + ], + [ + 275, + 280, + "ECVMA <> data geography" + ], + [ + 584, + 588, + "ECVMA <> publication year" + ], + [ + 589, + 605, + "ECVMA <> data type" + ], + [ + 703, + 743, + "ECVMA <> data description" + ] + ], + "validated": true, + "empirical_context": "In terms of the benefits of education, Niger \u2019 s labor market provides a strong signal that investment in education yields higher returns and better employment opportunities to both individuals and households and contributes to reducing inequality in access to education as well as post-education labor market outcomes. In Niger, evidence from the 2014 household survey, labelled ECVMA, reveals that higher levels of education are associated with better earnings, an increased probability of wage employment, and increased likelihood of finding employment in more productive sectors; and the 48 % 52 % 44 % 87 % 42 % 58 % 63 % 44 % 19 % 47 % 62 % 22 % 28 % 74 % 118 % National Male Female Urban Rural Q1 Q5 Agadez Diffa Dosso Maradi Tahoua Tillaberi Zinder Niamey Gender Area Wealth Quintile Region", + "type": "survey", + "explanation": "ECVMA is indeed a dataset as it is explicitly mentioned as a household survey that provides empirical evidence.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed ECVMA is a dataset because it is referenced in the context of providing evidence from a household survey.", + "contextual_reason_agent": "ECVMA is indeed a dataset as it is explicitly mentioned as a household survey that provides empirical evidence.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "064_Mauritania-Water-and-Sanitation-Sectoral-Project", + "page": 13, + "text": "The World Bank Mauritania Water and Sanitation Sectoral Project ( P167328 ) Page 9 of 65 8. Access to water and sanitation is relatively high, especially in urban areas, but far from universal. As measured by established indicators8, national and urban access rates are higher in Mauritania than rates observed on average in SSA ( Figure 1 ). However, only 45 percent of the rural population had access to basic drinking water services in 2015 \u2013 34 percent through piped systems and 11 percent through hand pumps. It should be noted that these figures are for nationals only, and do not include refugees. According to a 2016 inventory of piped systems, solar pumping is used by 80 percent of the water posts and 50 percent of the small - scale water systems ( alimentation en eau potable, AEP ) and mini-AEPs. 9. The development of sanitation lags water supply. In 2015, 63 percent of the urban population, and 20 percent of the rural population had access to an improved sanitation facility ( either individually or shared with other households ) 9. Open defecation is prevalent in rural areas, where it is practiced by 61 percent of the population.", + "ner_text": [ + [ + 620, + 651, + "named" + ] + ], + "validated": true, + "empirical_context": "It should be noted that these figures are for nationals only, and do not include refugees. According to a 2016 inventory of piped systems, solar pumping is used by 80 percent of the water posts and 50 percent of the small - scale water systems ( alimentation en eau potable, AEP ) and mini-AEPs. 9.", + "type": "inventory", + "explanation": "This is indeed a dataset as it provides empirical data regarding the use of solar pumping in water systems.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to an inventory, which typically involves a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it provides empirical data regarding the use of solar pumping in water systems.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "010_BOSIB1554c314c0a2187c019d7e85bc2a91", + "page": 45, + "text": "In the aftermath of heavy rains during March-May 2018, flooding across the country led to a major increase in cholera outbreaks of 5, 470 cases and 78 deaths in 19 counties with over 700 cases in Turkana and Garissa Counties. 32 33 Floods, storms, landslides, and extreme heat threaten the functioning of health infrastructure and hinder health service delivery and access especially in flood prone Coastal regions, Tana River region, the Lake Victoria Basin, and rural remote areas of the country. 29 Kenya: IPC Acute Food Insecurity and Acute Malnutrition Analysis ( July 2023 - January 2024 ) 30 https: / / www. cdc. gov / malaria / malaria_worldwide / cdc_activities / kenya. html 31 Kenya Malaria Indicator Survey, 2020. 32 Kenya Humanitarian Situation Report. 31 December 2018. 33 MSF responds to cholera outbreak amid heavy rains and flooding. Project update. 18 May 2018.", + "ner_text": [ + [ + 688, + 718, + "named" + ], + [ + 502, + 507, + "Kenya Malaria Indicator Survey <> data geography" + ], + [ + 688, + 693, + "Kenya Malaria Indicator Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "gov / malaria / malaria_worldwide / cdc_activities / kenya. html 31 Kenya Malaria Indicator Survey, 2020. 32 Kenya Humanitarian Situation Report.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referred to as a survey, which is a recognized form of data collection.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Survey' in its title, which often indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referred to as a survey, which is a recognized form of data collection.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "013_BOSIB0efb09b920d90858a0135df22da7d1", + "page": 12, + "text": "Foundational ID systems11 are broadly recognized as key enablers for inclusive digitalization and development. For people, the ability to establish and verify their identity is often a prerequisite for access to services and economic opportunities, such as social protection, healthcare, education, financial services, and employment. Proof of legal identity is also the basis for exercising rights, such as property ownership, and nationality. For governments and businesses, ID systems can serve as a platform for more effective and efficient service delivery by enabling the unique identification and verification of persons. Importantly, ID systems can promote greater inclusion by de-risking and reducing the costs of 8 UNHCR ' s Ethiopia Update on the Total Number of Refugees and Asylum Seekers as of August 31, 2023. 9 In Tigray, new internal displacement data has been reported, including 1, 021, 798 IDPs ( 250, 468 households ) in 643 sites across six zones ( excluding 20 woredas / districts hard to reach due to security or environmental factors ). 10 IOM. 2023. Ethiopia National Displacement Report 16 - Site Assessment Round 33 and Village Assessment Survey Round 16: Nov 2022 - Jun 2023. https: / / reliefweb. int / report / ethiopia / ethiopia-national-displacement-report-16-site-assessment-round-33-and-village-assessment-survey-round - 16-november-2022-june-2023. 11 Foundational ID systems are primarily created to provide credentials to the general population as proof of identity for a wide variety of public and private sector transactions. Common types of foundational ID systems include civil registries, national ID systems, and population registers.", + "ner_text": [ + [ + 842, + 868, + "named" + ], + [ + 725, + 730, + "internal displacement data <> publisher" + ], + [ + 819, + 823, + "internal displacement data <> publication year" + ], + [ + 830, + 836, + "internal displacement data <> data geography" + ], + [ + 910, + 914, + "internal displacement data <> reference population" + ], + [ + 1199, + 1203, + "internal displacement data <> publication year" + ] + ], + "validated": true, + "empirical_context": "Importantly, ID systems can promote greater inclusion by de-risking and reducing the costs of 8 UNHCR ' s Ethiopia Update on the Total Number of Refugees and Asylum Seekers as of August 31, 2023. 9 In Tigray, new internal displacement data has been reported, including 1, 021, 798 IDPs ( 250, 468 households ) in 643 sites across six zones ( excluding 20 woredas / districts hard to reach due to security or environmental factors ). 10 IOM.", + "type": "data", + "explanation": "This is indeed a dataset as it provides specific numerical data on internal displacement in Tigray.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific figures related to internal displacement.", + "contextual_reason_agent": "This is indeed a dataset as it provides specific numerical data on internal displacement in Tigray.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 94, + "text": "The World Bank Niger Accelerating Electricity Access Project ( Hask\u00e9 ) ( P174034 ) Page 87 of 174 ( DEREC ) / Proje ct database. managing the subsidies and the verification agent will report this number periodically. It will be assumed that each set of clean cooking would benefit to about 6 people ( average size of a household ). of which, female This relates to the share of people provided with clean and efficient cooking solutions that is female in the beneficiary population. Semi - annually. National statistics / Proj ect data. Estimated based on the share of female in the national population as published by the national statistics institute or other credible sources. DEREC / ANERSOL. Refugee and host population provided with with clean and efficient cooking solutions This relates to the number of refugees and host population provided with clean and efficient cooking solutions under the project. Semi - annually. National statistics / Proj ect data / UNHCR data / World Bank data. Estimated based on collected data about refugees and host communities zones and project-level data. DEREC / ANERSOL. of which, refugees This relates to the number of refugees provided with clean and efficient cooking solutions under the project. Semi - annually. National statistics / Proj ect data / UNHCR data / World Bank data. Estimated based on collected data about refugees and project - level data.", + "ner_text": [ + [ + 1077, + 1095, + "named" + ], + [ + 4, + 14, + "project-level data <> publisher" + ], + [ + 980, + 990, + "project-level data <> publisher" + ], + [ + 1311, + 1321, + "project-level data <> publisher" + ] + ], + "validated": true, + "empirical_context": "National statistics / Proj ect data / UNHCR data / World Bank data. Estimated based on collected data about refugees and host communities zones and project-level data. DEREC / ANERSOL.", + "type": "data", + "explanation": "In this context, 'project-level data' is indeed used as a source of information related to the collected data about refugees and host communities.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'project-level data' is a dataset because it refers to specific data collected for projects.", + "contextual_reason_agent": "In this context, 'project-level data' is indeed used as a source of information related to the collected data about refugees and host communities.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "110_Lebaon-RACE2-PforR-Board-Package-PAD-WB-9-5-16-09072016", + "page": 48, + "text": "41 Total Financing As % of Total Indicative timeline for DLI achievement to Grade 11 DLI # 3 Teacher performance measured and evaluated 6 2. 7 % No evidence of impact First Impact Evaluation Study ( IES ) conducted Second IES conducted DLI # 4 Number of participating schools that implement formative and summative assessments for students in Grade 3 in reading and math 18. 4 8. 2 % Not currently monitored 50 180 DLI # 5 Proportion of participating schools with active community partnerships 15. 6 7. 0 % 0 % for second - shift schools 50 % 80 % DLI # 6 Timely and robust data available for evidence informed policymaking and planning. DLR # 6. 1 A data and information management framework developed and adopted by MEHE and CERD 3 1. 3 % Misalignment in data collection and data management functions across different MEHE and CERD units Expected to be completed by Y1 DLR # 6. 2 Annual data available from participating schools on disaggregated data on student 12 5. 4 % Data on student enrollment not available until end of school year Student enrollment data by March 1st Student enrollment data by March 1st Student enrollment data by March 1st Student enrollment data by March 1st", + "ner_text": [ + [ + 1077, + 1100, + "named" + ], + [ + 934, + 952, + "Student enrollment data <> data type" + ] + ], + "validated": true, + "empirical_context": "2 Annual data available from participating schools on disaggregated data on student 12 5. 4 % Data on student enrollment not available until end of school year Student enrollment data by March 1st Student enrollment data by March 1st Student enrollment data by March 1st Student enrollment data by March 1st", + "type": "data", + "explanation": "This is indeed a dataset as it refers to structured data collected on student enrollment.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific data on student enrollment.", + "contextual_reason_agent": "This is indeed a dataset as it refers to structured data collected on student enrollment.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "012_BOSIB-9a6accb6-73d1-4bd1-8307-d41a339a51ab", + "page": 47, + "text": "Frequency Annual measurement Data source County Government Water Department M & E records Methodology for Data Collection Qualitative inspections and quantitative data collection using M & E protocols defined in the POM Responsibility for Data Collection County Government Water Department Rural water supply schemes constructed under the program that adopt a WASH plus approach ( provide water for multiple productive uses beyond doemstic portable water supply ) ( Number ) Description This indicator measures the number of rural water schemes constructed by each county that provide water for productive uses such as irrigation and so on beyond domestic portable water supply. Frequency Annual measurement Data source County Government Water Department M & E records Methodology for Data Collection Qualitative inspections and quantitative data collection using M & E protocols defined in the POM Responsibility for Data Collection County Government Water Department Climate-vulnerable households provided with access to improved water services ( Number ) DLI Description This indicator measures the cumulative number of households that have access to an improved water source constructed through the program. The definition of a household as applied in the 2022 Kenya Demographic and Health Survey ( KDHS ) applies. The KDHS indicates that the mean household size in rural areas is four people. Frequency Annual measurement Data source County Government Water Department M & E records Methodology for Data Collection Qualitative inspections and quantitative data collection using M & E protocols defined in the POM Responsibility for Data Collection County Government Water Department Sustainably functioning rural water supply schemes ( Number ) Description This indicator primarily measures the number of water schemes in the participating counties that are operated under an approved professional service provider model as per WASREB regulations. This is to ensure functionality of the schemes with functionality defined in the POM. Frequency Annual measurement Data source County Government Water Department M & E records Methodology for Data Qualitative inspections and quantitative data collection using M & E protocols defined in the POM", + "ner_text": [ + [ + 1323, + 1327, + "named" + ], + [ + 969, + 998, + "KDHS <> reference population" + ], + [ + 1260, + 1264, + "KDHS <> publication year" + ], + [ + 1347, + 1396, + "KDHS <> data description" + ] + ], + "validated": true, + "empirical_context": "The definition of a household as applied in the 2022 Kenya Demographic and Health Survey ( KDHS ) applies. The KDHS indicates that the mean household size in rural areas is four people. Frequency Annual measurement Data source County Government Water Department M & E records Methodology for Data Collection Qualitative inspections and quantitative data collection using M & E protocols defined in the POM Responsibility for Data Collection County Government Water Department Sustainably functioning rural water supply schemes ( Number ) Description This indicator primarily measures the number of water schemes in the participating counties that are operated under an approved professional service provider model as per WASREB regulations.", + "type": "survey", + "explanation": "KDHS is indeed a dataset as it provides structured data on household sizes and is used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed KDHS is a dataset because it is referenced as a source of data in the context of demographic and health measurements.", + "contextual_reason_agent": "KDHS is indeed a dataset as it provides structured data on household sizes and is used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "008_BOSIB-60d57288-4e09-4519-ae6c-ffdc0037e0b1", + "page": 15, + "text": "According to data from the International Labor Organization, a lack of safe transport reduces the probability of women accessing economic opportunities by 16. 5 percent. 15 In 2020, the regional rate was 18. 7 fatalities per 100, 000 inhabitants, which is higher than the national rate ( 15. 5 ) ( DATASUS data ). 16 Looking at the formal jobs of the Region, 69 percent of the people that receive more than ten minimum wages, on average, are men. Meanwhile, 94 percent of indigenous or black women receive less than 3 minimum wages. Data Source: RAIS, 2019. 17 Gender-disaggregated mobility data for Foz do R\u00edo Itaja\u00ed were collected during the preparation of the Stakeholder Engagement Plan and other project-related documents. 18 Data derived from PNAD-Cont\u00ednua ( 2019 ) State of Santa Catarina. 19 Hayd\u00e9e Svab, Marina Kohler Harkot, and Beatriz Moura Dos Santos, A Baseline Study of Gender and Transport in Sao Paulo, Brazil: Present Initiatives to Improve Women \u2019 s Mobility ( English ) ( Washington, DC: World Bank, 2021 ). 20 Brazil - Improving Mobility and Urban Inclusion in the Amazonas Corridor in Belo Horizonte Project ( Washington, DC: World Bank ). 21 Data reflect cases of harassment in public spaces without disaggregation. However, given the trend in Brazil and the Region, it can be inferred that in the Santa Catarina Region, more women are also survivors of sexual harassment.", + "ner_text": [ + [ + 298, + 310, + "named" + ], + [ + 176, + 180, + "DATASUS data <> reference year" + ], + [ + 552, + 556, + "DATASUS data <> reference year" + ], + [ + 600, + 617, + "DATASUS data <> data geography" + ], + [ + 772, + 795, + "DATASUS data <> data geography" + ], + [ + 1321, + 1342, + "DATASUS data <> data geography" + ] + ], + "validated": true, + "empirical_context": "7 fatalities per 100, 000 inhabitants, which is higher than the national rate ( 15. 5 ) ( DATASUS data ). 16 Looking at the formal jobs of the Region, 69 percent of the people that receive more than ten minimum wages, on average, are men.", + "type": "database", + "explanation": "In the context, 'DATASUS data' is explicitly mentioned as a source of data, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'DATASUS data' is referenced as a source of statistical information.", + "contextual_reason_agent": "In the context, 'DATASUS data' is explicitly mentioned as a source of data, confirming its role as a dataset.", + "contextual_signal": "mentioned as a data source", + "tags": [] + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 30, + "text": "The scope of the HCO expenditure is determined by: ( a ) focusing on woreda-level spending of the GPG across the four main human capital-related sectors62 ( i. e., the scope is woreda-level expenditures ); ( b ) excluding contributions from other World Bank-financed activities from this scope ( e. g., investments through GEQIP-E, CALM, Health SDG AF, and WASH CWA supply-side investments ) ( i. e., focuses on complementary investments to the ongoing World Bank-financed projects ); and ( c ) allocating expenditures proportionate to historical sectoral budget shares on woreda spending. To avoid overlap with the block grant contribution from the World Bank-financed Second Additional Financing ( AF ) to ESPES, the two disbursements will be sequenced and based on a different set of Disbursement-Linked Indicators ( DLIs ). 43. The expenditure framework for the HCO is based on the lessons learned and experiences from the ESPES program. The ESPES program expenditure framework which was based on the estimated woreda level 59 M. Frost and C. Rolleston ( 2013 ), \u201c Improving Education Quality, Equity and Access: A Report on Findings from the Young Lives School Survey ( Round 1 ) in Ethiopia, \u201d ( Oxford, UK: Young Lives ). 60 https: / / glcopmcgill. ca / wp-content / uploads / 2019 / 11 / Gender-Strategy-for-the-Education-and-Training-Sector-Ethiopia-Ministry-of-Education. pdf 61 Roads are part of the government \u2019 s five \u201c pro-poor \u201d sectors but are not included as a \u201c human capital \u201d sector in the HCO. 62 The four sectors cover health, education, agriculture, and water", + "ner_text": [ + [ + 787, + 817, + "named" + ] + ], + "validated": false, + "empirical_context": ", focuses on complementary investments to the ongoing World Bank-financed projects ); and ( c ) allocating expenditures proportionate to historical sectoral budget shares on woreda spending. To avoid overlap with the block grant contribution from the World Bank-financed Second Additional Financing ( AF ) to ESPES, the two disbursements will be sequenced and based on a different set of Disbursement-Linked Indicators ( DLIs ). 43.", + "type": "indicator", + "explanation": "However, it is not a dataset but rather a set of criteria used for disbursement decisions.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Disbursement-Linked Indicators' suggests a structured set of metrics.", + "contextual_reason_agent": "However, it is not a dataset but rather a set of criteria used for disbursement decisions.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "066_Jordan-Youth-Technology-and-Jobs-Project", + "page": 90, + "text": "The World Bank Jordan Youth, Technology, and Jobs Project ( P170669 ) Page 86 of 106 high school ( relative to the other students ). Given that a digital curriculum requires updating periodically, we count the estimated the benefits for the first 4 cohorts of students that are exposed to the curriculum. Thus, we take the number of students enrolled in grades 6, 7, 8, and 9 at 2 years after program starts and calculate the proportion of each that would be going directly into the job market after high school, after 2-year technical schools, and after 4-year universities. Since someone at grade 7 graduates high school 5 years later, the benefit streams from the curriculum begins seven years after beginning of project and, following the literature, counted for the next roughly 45 years. 12. Assumptions and parameters in the analysis: Below in table 1 we present the set of values used when to estimate the costs and benefits to each of the above listed approaches and components. The assumptions are mainly based on evidence from the literature, labor force survey data ( JLMPS 2016 ), or our expectations of outputs for the program from different components, which are described in other section of this PAD.", + "ner_text": [ + [ + 1054, + 1077, + "named" + ], + [ + 4, + 14, + "labor force survey data <> publisher" + ], + [ + 1080, + 1090, + "labor force survey data <> publication year" + ], + [ + 1233, + 1251, + "labor force survey data <> usage context" + ] + ], + "validated": true, + "empirical_context": "Assumptions and parameters in the analysis: Below in table 1 we present the set of values used when to estimate the costs and benefits to each of the above listed approaches and components. The assumptions are mainly based on evidence from the literature, labor force survey data ( JLMPS 2016 ), or our expectations of outputs for the program from different components, which are described in other section of this PAD.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as data used for empirical analysis in the context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific data collected from a survey.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as data used for empirical analysis in the context.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "172_multi0page", + "page": 39, + "text": "Conflict resolution and peace-building initiatives were considered mostly as activities to be undertaken by community and religious leaders. 6. 2 Participatory Approach: How are key stakeholders participating in the project? 6. 2. 1 Both the preparation and implementation of the project have been highly participatory. The participation of NGOs, missions, and community-based groups in the delivery of education services in Sierra Leone has been remarkable. The proposed project will build on the many currently on-going initiatives. Project preparation activities was undertaken in a highly participatory way. Through the PHRD, the Partnership Program for education was developed, with participation of the missions, NGOs, Sierra Leone Teachers Union, Teacher Colleges and donors, UN agencies ( including UNAMSIL, UNICEF, UN Office for the Coordination of Humanitarian Affairs ( OCHA ), NRC, Plan International, NaCSA ( formerly NCRRR ). Equally, the school survey and the social assessment were conducted with participation of the above organizations, under the lead of the CSO.", + "ner_text": [ + [ + 953, + 966, + "named" + ], + [ + 425, + 437, + "school survey <> data geography" + ], + [ + 1077, + 1080, + "school survey <> author" + ], + [ + 1097, + 1115, + "school survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "Through the PHRD, the Partnership Program for education was developed, with participation of the missions, NGOs, Sierra Leone Teachers Union, Teacher Colleges and donors, UN agencies ( including UNAMSIL, UNICEF, UN Office for the Coordination of Humanitarian Affairs ( OCHA ), NRC, Plan International, NaCSA ( formerly NCRRR ). Equally, the school survey and the social assessment were conducted with participation of the above organizations, under the lead of the CSO.", + "type": "survey", + "explanation": "The term is confirmed as a dataset since it is mentioned in the context of being conducted with participation from various organizations, indicating it serves as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'school survey' implies a structured collection of data related to schools.", + "contextual_reason_agent": "The term is confirmed as a dataset since it is mentioned in the context of being conducted with participation from various organizations, indicating it serves as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 43, + "text": "The increase in service utilization is translated into deaths averted using published empirical studies; mortality rates for Balochistan were taken from the 2012 and 2017 \u2013 18 PDHS. 70 Effect sizes for the impacts of an additional skilled birth on maternal and neonatal mortality rates come from Graham, Bell, and Bullough ( 2001 ) 71 and Bhutta et al. ( 2014 ), 72 respectively. The effect size used to estimate the impact of measles vaccination on mortality of children under five is based on McGovern and Canning ( 2015 ). 73 Because their effect size is for full child vaccination, an adjustment factor of 0. 89 is applied to the share of children with 69 Pakistan Bureau of Statistics. Population Census 2017. http: / / www. pbs. gov. pk / content / block \u2010 wise \u2010 provisional \u2010 summary \u2010 results \u2010 6th \u2010 population \u2010 housing \u2010 census \u2010 2017 \u2010 january \u2010 03 \u2010 2018; for data from the 1998 and earlier censuses, see http: / / www. pbs. gov. pk / sites / default / files / population_census / Administrative % 20Units. pdf. 70 Maternal mortality rates from PDHS 2012 \u2013 13 are used since no maternal mortality data is available from PDHS 2017 \u2013 18. 71 Graham, W. J., J. S.", + "ner_text": [ + [ + 1134, + 1138, + "named" + ], + [ + 105, + 120, + "PDHS <> data description" + ], + [ + 125, + 136, + "PDHS <> data geography" + ], + [ + 157, + 161, + "PDHS <> reference year" + ], + [ + 166, + 175, + "PDHS <> reference year" + ], + [ + 296, + 322, + "PDHS <> author" + ], + [ + 339, + 352, + "PDHS <> author" + ], + [ + 495, + 515, + "PDHS <> author" + ], + [ + 660, + 689, + "PDHS <> publisher" + ], + [ + 709, + 713, + "PDHS <> publication year" + ], + [ + 1064, + 1073, + "PDHS <> reference year" + ], + [ + 1139, + 1148, + "PDHS <> publication year" + ], + [ + 1189, + 1207, + "PDHS <> usage context" + ] + ], + "validated": true, + "empirical_context": "pdf. 70 Maternal mortality rates from PDHS 2012 \u2013 13 are used since no maternal mortality data is available from PDHS 2017 \u2013 18. 71 Graham, W.", + "type": "survey", + "explanation": "In this context, 'PDHS' is confirmed as a dataset since it provides maternal mortality rates used for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'PDHS' is a dataset because it is referenced in relation to maternal mortality rates and data collection.", + "contextual_reason_agent": "In this context, 'PDHS' is confirmed as a dataset since it provides maternal mortality rates used for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 43, + "text": "The World Bank Education Quality Improvement Project ( P179363 ) Page 34 of 68 RESULT_FRAME_TBL_PDO Indicator Name PBC Baseline Intermediate Targets End Target 1 2 3 4 5 based on data generated by the integrated EMIS and used for education sector management including refugee response ( Text ) assessment, NBS and MoF data ( report on trends, regional variations in learning outcomes and inputs, and determinants of student learning ) assessment, NBS and MoF data ( report on trends, regional variations in learning outcomes and inputs, and determinants of student learning ) using its data, are publicly disseminated and used for education sector management including refugee response. PDO Table SPACE Intermediate Results Indicators by Components RESULT_FRAME_TBL_IO Indicator Name PBC Baseline Intermediate Targets End Target 1 2 3 4 5 Improving Quality of Teaching Teachers trained on professional standards under the project ( % of teachers in ECEC facilities and grades 1 \u2013 12 ) ( Percentage ) 0. 00 0. 00 5. 00 10. 00 15. 00 20. 00 20. 00 Rapid student assessment mechanism developed and piloted ( for selected grade ) ( Yes / No ) No No Yes Yes Yes Yes Yes Tutoring / accelerated learning program for disadvantaged students developed and implemented ( for selected grade ) ( Yes / No ) No No Yes Yes Yes Yes Yes Students benefiting from direct interventions to 0. 00 0. 00", + "ner_text": [ + [ + 306, + 309, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Education Quality Improvement Project ( P179363 ) Page 34 of 68 RESULT_FRAME_TBL_PDO Indicator Name PBC Baseline Intermediate Targets End Target 1 2 3 4 5 based on data generated by the integrated EMIS and used for education sector management including refugee response ( Text ) assessment, NBS and MoF data ( report on trends, regional variations in learning outcomes and inputs, and determinants of student learning ) assessment, NBS and MoF data ( report on trends, regional variations in learning outcomes and inputs, and determinants of student learning ) using its data, are publicly disseminated and used for education sector management including refugee response. PDO Table SPACE Intermediate Results Indicators by Components RESULT_FRAME_TBL_IO Indicator Name PBC Baseline Intermediate Targets End Target 1 2 3 4 5 Improving Quality of Teaching Teachers trained on professional standards under the project ( % of teachers in ECEC facilities and grades 1 \u2013 12 ) ( Percentage ) 0.", + "type": "organization", + "explanation": "'NBS' refers to an organization (National Bureau of Statistics) and is not presented as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'NBS' is a dataset because it is mentioned alongside data-related terms.", + "contextual_reason_agent": "'NBS' refers to an organization (National Bureau of Statistics) and is not presented as a data source itself.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 28, + "text": "The World Bank SPF: Improved Livelihoods for Internally Displaced Persons in Azerbaijan ( P178125 ) Page 26 of 34 Indicator Name Corporate Unit of Measure Baseline End Target Frequency Data Source / Methodology Responsibility for Data Collection Description: Name: Percentage of beneficiaries taking a more active role in their communities Percentag e 0. 00 50. 00 Twice, once before civic engagement training and again at least three months after civic engagement training completion. For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection. For individuals trained only as part of Component 3, a separate pre-training survey will be conducted as well as a follow-up survey conducted at least three months after civic engagement training. M & E Specialist Percentage of beneficiaries taking a more active role in their communities - female Percentag e 0. 00 50. 00 Twice, once before civic engagement training and again at least three months after civic engagement training completion. For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection.", + "ner_text": [ + [ + 1152, + 1183, + "named" + ], + [ + 4, + 14, + "Post-Training Completion Survey <> publisher" + ], + [ + 45, + 73, + "Post-Training Completion Survey <> reference population" + ], + [ + 77, + 87, + "Post-Training Completion Survey <> data geography" + ], + [ + 265, + 339, + "Post-Training Completion Survey <> data description" + ] + ], + "validated": true, + "empirical_context": "00 Twice, once before civic engagement training and again at least three months after civic engagement training completion. For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned as a survey used for collecting data in the context of the project.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey used for data collection.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a survey used for collecting data in the context of the project.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "054_Sudan-Basic-Education-Emergency-Support-Project", + "page": 40, + "text": "The World Bank Sudan Basic Education Emergency Support Project ( P172812 ) Page 36 of 40 5. Repetition rates are relatively low compared to other countries in the SSA region. There was a slight improvement in the repetition rates: from 5. 3 percent in 2015 to 4. 8 percent in 2018. Though, an estimated SDG 336 million ( US $ 10. 4 million ) is used annually to deliver basic education services to repeaters and pupils that drop out3. 6. Learning levels of students in basic schools in Sudan are generally weak. Representative evidence from the National Learning Assessment find that on average 39 percent of grade 3 pupils are not able to read a single word and only 5 percent of pupils read fluently ( more than 60 words per minute ) in Arabic ( NLA, 2018 ). Furthermore, the assessment of reading speed among third graders indicated an average speed of 15 words per minute, which is far below the estimated minimum reading speed of 40 words per minute thought to be necessary to gain understanding of and meaning from the text. The high share of illiterate pupils in grade 3 means that 39 percent of public resources spent on pupils in grades 1-3 are wasted in the system, which is equivalent to SDG 473 million ( US $ 14. 6 million ). 7.", + "ner_text": [ + [ + 545, + 573, + "named" + ], + [ + 4, + 14, + "National Learning Assessment <> publisher" + ], + [ + 15, + 20, + "National Learning Assessment <> data geography" + ], + [ + 276, + 280, + "National Learning Assessment <> publication year" + ], + [ + 486, + 491, + "National Learning Assessment <> data geography" + ], + [ + 609, + 623, + "National Learning Assessment <> reference population" + ], + [ + 753, + 757, + "National Learning Assessment <> publication year" + ], + [ + 778, + 825, + "National Learning Assessment <> data description" + ] + ], + "validated": true, + "empirical_context": "Learning levels of students in basic schools in Sudan are generally weak. Representative evidence from the National Learning Assessment find that on average 39 percent of grade 3 pupils are not able to read a single word and only 5 percent of pupils read fluently ( more than 60 words per minute ) in Arabic ( NLA, 2018 ). Furthermore, the assessment of reading speed among third graders indicated an average speed of 15 words per minute, which is far below the estimated minimum reading speed of 40 words per minute thought to be necessary to gain understanding of and meaning from the text.", + "type": "assessment", + "explanation": "It is indeed a dataset as it is referenced for empirical analysis of student reading abilities.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it provides representative evidence and statistics about student learning levels.", + "contextual_reason_agent": "It is indeed a dataset as it is referenced for empirical analysis of student reading abilities.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + }, + "term_stats": { + "total": 3, + "validated": 2, + "not_validated": 1 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 33, + "text": "Percentage of newly appointed teachers completing post-recruitment initial training. ( Target: 80 % ) Number / percentage of subjects by grade reviewed and fine-tuned to ensure alignment with knowledge economy skills. ( Target: All subjects in all grades ) The present Tawjihi examination reviewed in the context of the ERfKE II curriculum and knowledge economy skills. The content and design of NAfKE is reviewed for overall technical soundness and alignment with the ERfKE curriculum, and arrangements put in place for any necessary revisions. Extent of e-Learning / ICT utilization in the teaching and learning process ( by subject, grade ) as revealed by nationally representative classroom observation studies. ( Target: increase from 67 % to 73 % ) These indicators will be used to monitor progress on the Teaching & Learning Resource Development component of ERfKE II and to inform project implementation and refinement specifically in relation to this component. The indicators will be reviewed through supervision missions and also used as focal points for discussion with GoJ and other stakeholders. Special Focus Program Development: Early Childhood Education: Access to quality early childhood education has been increased through further development of facilities, learning resources, and parental and community involvement. Percentage of eligible children enrolled in KG. ( Target: Increase from 51. 8 % to 60 % ) Percentage of KG teachers / supervisors successfully completing prescribed training program for early childhood education. ( Target: 100 % ) Number of parents of KG students enrolled in volunteer programs. ( Target: increase from 2, 000 to 3, 500 ) These indicators will be used to monitor progress on the Early Childhood Education component of ERfKE II and to inform project implementation and refinement specifically in relation to this component. The indicators will be reviewed through supervision missions and also used as focal points for discussion with GOJ and other stakeholders.", + "ner_text": [ + [ + 659, + 714, + "named" + ], + [ + 546, + 621, + "nationally representative classroom observation studies <> data description" + ] + ], + "validated": true, + "empirical_context": "The content and design of NAfKE is reviewed for overall technical soundness and alignment with the ERfKE curriculum, and arrangements put in place for any necessary revisions. Extent of e-Learning / ICT utilization in the teaching and learning process ( by subject, grade ) as revealed by nationally representative classroom observation studies. ( Target: increase from 67 % to 73 % ) These indicators will be used to monitor progress on the Teaching & Learning Resource Development component of ERfKE II and to inform project implementation and refinement specifically in relation to this component.", + "type": "study", + "explanation": "This is indeed a dataset as it provides empirical data used to monitor progress and inform project implementation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to studies that collect data on e-Learning utilization.", + "contextual_reason_agent": "This is indeed a dataset as it provides empirical data used to monitor progress and inform project implementation.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 21, + "validated": 8, + "not_validated": 13 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 51, + "text": "There is an overall improvement in the financial management function of the government through institutionalized trainings on PFM, woreda benchmarks to assess and support woredas, as well as improvements in internal control processes. With regards to procurement, the existing Proclamation No. 649 / 2009, which has been governing procurement for more than 10 years, will be replaced by the new proclamation that is expected to be ratified at the federal level soon. Following this, regional administrations are expected to revise their laws accordingly. On the other hand, progress is noted in the improved function of procurement regulatory bodies, which are registering procurement process data and reporting on procurement performance through agreed KPIs, though reporting needs further improvement. Similarly, regional regulatory bodies are making efforts to deliver on their responsibility to undertake procurement audits on procurement implementing institutions, with improvement needed in the quality and coverage of these audits. The government has also established a checks and balances system to handle fraud and corruption risks and complaints through the Federal Ethics and Anti-Corruption Commission ( FEACC ) and the Regional Ethics and Anti-Corruption Commissions ( REACCs ) which have a responsibility for prevention activities. The recently completed World Bank financed PFM project has achieved results in the areas of strengthening information technology systems of the government ( IFMIS and e-procurement ); maintaining the currently used system at the woreda level ( IBEX ); institutionalizing PFM trainings; professionalization of procurement function;", + "ner_text": [ + [ + 1503, + 1508, + "named" + ] + ], + "validated": false, + "empirical_context": "The government has also established a checks and balances system to handle fraud and corruption risks and complaints through the Federal Ethics and Anti-Corruption Commission ( FEACC ) and the Regional Ethics and Anti-Corruption Commissions ( REACCs ) which have a responsibility for prevention activities. The recently completed World Bank financed PFM project has achieved results in the areas of strengthening information technology systems of the government ( IFMIS and e-procurement ); maintaining the currently used system at the woreda level ( IBEX ); institutionalizing PFM trainings; professionalization of procurement function;", + "type": "system", + "explanation": "However, IFMIS is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed IFMIS is a dataset because it is related to information technology systems used by the government.", + "contextual_reason_agent": "However, IFMIS is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 85, + "text": "The World Bank Niger Learning Improvement for Results in Education Project ( P168779 ) Page 79 of 101 Focus Skills Needed Annual Resource Estimate ( Staff Weeks ) First 12 months \u2022 Environmental and social safeguards monitoring and reporting \u2022 EMIS and school mapping \u2022 Design and baseline of impact evaluation \u2022 Preparation of teacher training modules \u2022 M & E ( surveys ) \u2022 Procurement training and supervision \u2022 Environment and social monitoring and reporting \u2022 Impact evaluation specialist \u2022 Institutional capacity building Environmental: 5 Social: 5 Administrative support: 10 Years 2-6 \u2022 Team leadership \u2022 Technical review / support \u2022 Implementation support and supervision \u2022 Fiduciary support and management \u2022 Environmental and social safeguards monitoring and reporting \u2022 Monitoring and reporting \u2022 Student learning assessment \u2022 Project impact evaluation \u2022 Technical expertise for teacher quality, PBC, school grants, civil works, governance, accountability, gender, IT, teacher training \u2022 Procurement management \u2022 FM and disbursement-related support \u2022 Monitoring and reporting \u2022 Program supervision and monitoring and reporting \u2022 Learning assessment specialist \u2022 Impact evaluation specialist Task team leaders: 90 Education / operations specialists: 150 Education specialist: 150 M & E: 25 Procurement: 30 FM: 25 Environmental: 25 Social: 25 Administrative support: 50 Table 1. 8.", + "ner_text": [ + [ + 244, + 248, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Niger Learning Improvement for Results in Education Project ( P168779 ) Page 79 of 101 Focus Skills Needed Annual Resource Estimate ( Staff Weeks ) First 12 months \u2022 Environmental and social safeguards monitoring and reporting \u2022 EMIS and school mapping \u2022 Design and baseline of impact evaluation \u2022 Preparation of teacher training modules \u2022 M & E ( surveys ) \u2022 Procurement training and supervision \u2022 Environment and social monitoring and reporting \u2022 Impact evaluation specialist \u2022 Institutional capacity building Environmental: 5 Social: 5 Administrative support: 10 Years 2-6 \u2022 Team leadership \u2022 Technical review / support \u2022 Implementation support and supervision \u2022 Fiduciary support and management \u2022 Environmental and social safeguards monitoring and reporting \u2022 Monitoring and reporting \u2022 Student learning assessment \u2022 Project impact evaluation \u2022 Technical expertise for teacher quality, PBC, school grants, civil works, governance, accountability, gender, IT, teacher training \u2022 Procurement management \u2022 FM and disbursement-related support \u2022 Monitoring and reporting \u2022 Program supervision and monitoring and reporting \u2022 Learning assessment specialist \u2022 Impact evaluation specialist Task team leaders: 90 Education / operations specialists: 150 Education specialist: 150 M & E: 25 Procurement: 30 FM: 25 Environmental: 25 Social: 25 Administrative support: 50 Table 1. 8.", + "type": "system", + "explanation": "However, in this context, EMIS is mentioned as part of a broader set of activities and not explicitly as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is often associated with data collection in educational contexts.", + "contextual_reason_agent": "However, in this context, EMIS is mentioned as part of a broader set of activities and not explicitly as a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 53, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 42 Indicator Name Customer satisfaction Index for services supported under the project Definition / Description Percentage of surveyed customer / citizen that are satisfied with the services Frequency Annually Data Source MWE / NWSC progress reports, annual water and environment sector performance reports, and water supply and utility databases.. Methodology for Data Collection Responsibility for Data Collection MWE / NWSC Indicator Name Selected service providers achieve > 80 percent in collection efficiency under the project Definition / Description Number of the service providers that achieve 80 percent or greater in collection efficiency. Collection efficiency is revenue collected over billed for the selected water service providers at the national and local levels under the project. Frequency Bi-annually Data Source MWE / NWSC / MWE progress reports, annual water and environment sector performance reports, and water supply and utility databases.", + "ner_text": [ + [ + 330, + 385, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Integrated Water Management and Development Project ( P163782 ) 42 Indicator Name Customer satisfaction Index for services supported under the project Definition / Description Percentage of surveyed customer / citizen that are satisfied with the services Frequency Annually Data Source MWE / NWSC progress reports, annual water and environment sector performance reports, and water supply and utility databases. .", + "type": "document", + "explanation": "However, it is mentioned as a report, which is a document and not a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'reports' which can imply structured data.", + "contextual_reason_agent": "However, it is mentioned as a report, which is a document and not a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 44, + "text": "The key delivery mechanisms put in place for the project, such as the MIS, will enable the project to produce regular progress reports. The PIU will organize annual financial audits for the project, annual reviews of progress, and a mid-term review to guide the project implementation after the first 2. 5 years of implementation. The mid-term review will involve project \u2019 s stakeholders and civil society in the review of performance, intermediary results, institutional arrangements, and outcomes. It will confirm the plans and processes for expansion of the project. 129. A Monitoring and Evaluation manual was produced under Merankabandi. The manual will be updated for the project, even though most of the monitoring and evaluation processes will be kept the same. The following table describes the main M & E instruments of the project. Table 4. Project M & E instruments Instrument Description Data to be collected Management Information System The MIS will collect information from different processes including registration, enrolment, payments, and claims. Information on registration and enrolment is obtained through direct collection of data; payment information is provided by the service providers and claims are fed into the system by different collection channels - Socio-economic data of households enrolled - Number of households receiving cash transfers - Amounts paid to beneficiaries - Claims submitted by beneficiaries Social Registry The Social Registry compiles socio - economic information from households that will be used to determine eligibility for social programs - Socio-economic data of households registered Iterative Beneficiary Monitoring The IBM is a phone-based survey to a sample of households ( beneficiaries and non-beneficiaries of social programs ) to inquire on program performance, satisfaction, and context - Satisfaction information of beneficiaries and non - beneficiaries of social programs - Qualitative information on the vulnerability context", + "ner_text": [ + [ + 1443, + 1458, + "named" + ], + [ + 1522, + 1532, + "Social Registry <> reference population" + ], + [ + 1723, + 1733, + "Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "Project M & E instruments Instrument Description Data to be collected Management Information System The MIS will collect information from different processes including registration, enrolment, payments, and claims. Information on registration and enrolment is obtained through direct collection of data; payment information is provided by the service providers and claims are fed into the system by different collection channels - Socio-economic data of households enrolled - Number of households receiving cash transfers - Amounts paid to beneficiaries - Claims submitted by beneficiaries Social Registry The Social Registry compiles socio - economic information from households that will be used to determine eligibility for social programs - Socio-economic data of households registered Iterative Beneficiary Monitoring The IBM is a phone-based survey to a sample of households ( beneficiaries and non-beneficiaries of social programs ) to inquire on program performance, satisfaction, and context - Satisfaction information of beneficiaries and non - beneficiaries of social programs - Qualitative information on the vulnerability context", + "type": "registry", + "explanation": "The Social Registry is indeed a dataset as it compiles socio-economic information used to determine eligibility for social programs.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it compiles socio-economic information from households.", + "contextual_reason_agent": "The Social Registry is indeed a dataset as it compiles socio-economic information used to determine eligibility for social programs.", + "contextual_signal": "described as a registry that compiles socio-economic information", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 53, + "text": "The project preparation team worked with the Poverty team and the National Statistical Institute ( ISTEEBU ) to build small-area extreme poverty maps ( based on the ECVMB 2013-14 in combination with the 2010 Census data ), which provided the basis for a more systematic geographical targeting at the commune-level and could inform the planned expansion of the safety net program and other programs using the registry. In each province, the four communes with the highest poverty ( from 21 The CNPS includes 11 ministers under the authority of the President 22 The country is divided in 18 provinces, 129 communes and 2, 638 collines. 23 The Amashiga program started in 2016 and aims to foster community based-nutrition, using the lessons of the Tubaramure pilot in Cankuso and Ruyigi. The program would distribute food to all households with pregnant women or children under two, and foster behavior change in terms of food consumption, preparation, production, water and sanitation and hygiene practices, and access to health.", + "ner_text": [ + [ + 118, + 149, + "named" + ] + ], + "validated": false, + "empirical_context": "The project preparation team worked with the Poverty team and the National Statistical Institute ( ISTEEBU ) to build small-area extreme poverty maps ( based on the ECVMB 2013-14 in combination with the 2010 Census data ), which provided the basis for a more systematic geographical targeting at the commune-level and could inform the planned expansion of the safety net program and other programs using the registry. In each province, the four communes with the highest poverty ( from 21 The CNPS includes 11 ministers under the authority of the President 22 The country is divided in 18 provinces, 129 communes and 2, 638 collines.", + "type": "project", + "explanation": "However, it is not a dataset but rather a project output that visualizes data rather than serving as a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves mapping data related to poverty.", + "contextual_reason_agent": "However, it is not a dataset but rather a project output that visualizes data rather than serving as a structured collection of data itself.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "029_IBArchive-ea5ff2ad-6f1f-45bf-a2ad-036440562c18", + "page": 111, + "text": "A draft PPSD was developed to improve the implementation of the project and help achieve results. The PPSD resulted in the preparation of an initial 18-month PP setting forth the selection methods to be followed by the IA during project implementation in the procurement of goods, works, and non-consulting and consulting services financed by the World Bank. The PP will be updated at least annually or as required to reflect the actual project implementation needs and improvements in institutional capacity. The IA will use STEP in the implementation of the project. This is a planning and tracking system, which would provide data on procurement activities, establish benchmarks, monitor delays, and measure procurement performance. After getting the World Bank \u2019 s agreement to the work plan, all documents at each stage of the procurement process will be uploaded in STEP for the World Bank \u2019 s post review. 7. Procurement capacity risk assessment. The procurement activities will be implemented by the PIUs at MEMD and UECCC. Assessment of the respective IAs was conducted as part of project preparation, and it was noted that project management will make use of existing procurement management arrangements. The proposed IAs have experience implementing World Bank-funded projects, and the project will leverage the gain in procurement capacity training of procurement staff through the implementation of the previous and the ongoing World Bank-funded projects, ERT-3 and GERP. The MEMD has a PCU and a PIU supported by 66 Supplies - UGX 1 billion ( US $ 266, 667 ), road works - UGX45 billion ( US $ 12 million ), public works - UGX10 billion ( US $ 2, 7 million ), consultancy services - UGX1 billion ( US $ 266, 667 ), and non-consultancy services - UGX 200 million ( US $ 53, 000 ).", + "ner_text": [ + [ + 526, + 530, + "named" + ] + ], + "validated": false, + "empirical_context": "The PP will be updated at least annually or as required to reflect the actual project implementation needs and improvements in institutional capacity. The IA will use STEP in the implementation of the project. This is a planning and tracking system, which would provide data on procurement activities, establish benchmarks, monitor delays, and measure procurement performance.", + "type": "system", + "explanation": "However, STEP is described as a planning and tracking system, not a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed STEP is a dataset because it is mentioned in the context of providing data on procurement activities.", + "contextual_reason_agent": "However, STEP is described as a planning and tracking system, not a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 15, + "validated": 1, + "not_validated": 14 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 62, + "text": "This indicator will be measured at least on a quarterly basis SNSOP MIS This data will be collected through registration and payments Implementing Partner Number of beneficiaries receiving economic opportunities who are youth Number of beneficiaries receiving economic opportunities under Component 2, in accordance with the Project Operations Manual, of which are youth, defined as people between the ages of 18 and 35 years, and have receive at least 1 This indicator will be measured, at a minimum, on a quarterly basis SNSOP MIS Beneficiary data will be gathered at registration and will be updated over the course of project implementation. Payment data will be regularly updated in the SNSOP MIS The Implementing Partner in charge of Component 2 will be responsible for data collection", + "ner_text": [ + [ + 646, + 658, + "named" + ] + ], + "validated": false, + "empirical_context": "This indicator will be measured at least on a quarterly basis SNSOP MIS This data will be collected through registration and payments Implementing Partner Number of beneficiaries receiving economic opportunities who are youth Number of beneficiaries receiving economic opportunities under Component 2, in accordance with the Project Operations Manual, of which are youth, defined as people between the ages of 18 and 35 years, and have receive at least 1 This indicator will be measured, at a minimum, on a quarterly basis SNSOP MIS Beneficiary data will be gathered at registration and will be updated over the course of project implementation. Payment data will be regularly updated in the SNSOP MIS The Implementing Partner in charge of Component 2 will be responsible for data collection", + "type": "data", + "explanation": "'Payment data' is mentioned as a type of information collected but not as a standalone dataset or data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Payment data' refers to a structured collection of data related to payments.", + "contextual_reason_agent": "'Payment data' is mentioned as a type of information collected but not as a standalone dataset or data source.", + "contextual_signal": "mentioned only as a type of data, not as a dataset", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 5, + "validated": 4, + "not_validated": 1 + } + }, + { + "filename": "161_28046", + "page": 34, + "text": ". Re-energizing o f health committees around health centers would be done by ensuring that health centers who serve poor populations receive their govemment allocation - as they only receive 30 % now according to the Health Expenditures Tracking Survey carried out in 2003. In order to improve governance, the publication o f the budget received by each health structure and how the money was spent would be done throughout the country so that the population i s aware and government officials are accountable to them. o Sub-component C: Oualitv Insurance: the M O H has taken the option to improve utilization o f health services by strengthening their quality. Such a system has been put in place with the help o f GTZ in two prefectures. Evaluation o f this pilot was carried out end o f 2004, preliminary evidence seems to indicate that this approach i s successful. Lessons leamt from the evaluation would be taken into account and based on this evaluation, the project would help extend this improved system to all 18 targeted prefectures. Following a self-evaluation, health structures decide on a plan to improve six basic aspects o f health care: technical skills, client satisfaction, continued training, community participation, management o f the district, financial management. The best plans would be rewarded. Then each structure plans its annual operation plan based on its diagnosis.", + "ner_text": [ + [ + 217, + 252, + "named" + ], + [ + 91, + 105, + "Health Expenditures Tracking Survey <> reference population" + ], + [ + 268, + 272, + "Health Expenditures Tracking Survey <> publication year" + ], + [ + 310, + 398, + "Health Expenditures Tracking Survey <> data description" + ] + ], + "validated": true, + "empirical_context": ". Re-energizing o f health committees around health centers would be done by ensuring that health centers who serve poor populations receive their govemment allocation - as they only receive 30 % now according to the Health Expenditures Tracking Survey carried out in 2003. In order to improve governance, the publication o f the budget received by each health structure and how the money was spent would be done throughout the country so that the population i s aware and government officials are accountable to them.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to a specific survey that tracks health expenditures, which is used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it refers to a specific survey that tracks health expenditures, which is used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 50, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 39 Indicator Name Improved community water points ( constructed or rehabilitated ) under the project Definition / Description Number of improved community water points constructed or rehabilitated under the project. A community water point is defined as a public outlet for the provision of water supply to a number of households. Improved community water points refer to standpipes, protected dug well, borehole, or protected spring. It does not include, inter alia, unprotected wells or unprotected springs.. Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC tracking tools, Refugee Database and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE / NWSC Indicator Name Precent of water and sanitations committees with women in key position in project areas Definition / Description Women holding at least one key position in water user committees for influencing or making decisions on WSS related matters. The key positions are: Chairperson, Vice Chairperson, Secretary and Treasurer. Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC tracking tools, Refugee Database and UPMiS.", + "ner_text": [ + [ + 649, + 665, + "named" + ] + ], + "validated": false, + "empirical_context": ". Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC tracking tools, Refugee Database and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'system' in its name, which can imply data collection.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "107_PAD-Citizens-Charter-Afghanistan-P160567-Oct-7-Board-version-10072016", + "page": 140, + "text": "Only 57 percent of Afghan men believe that women should be allowed to work outside of the home, and even this level of endorsement comes with caveats as to the types and places of work that are considered acceptable for women. 46 The 2013 / 2014 Afghanistan Living Conditions Survey found that three-quarters of women do not leave the dwelling without the company of another person and about half leave the house four times or less per month, while 12 percent of women indicate they never left the house in an entire month. Furthermore, female decision-making on spending money is quite restricted. 47 3. Over the past decade, the government has developed several strategies to improve opportunities for women. These include the National Action Plan for the Women of Afghanistan ( 2008-2018 ), the Elimination of Violence against Women law ( 2009 ), and a Gender Mainstreaming Guideline for Municipalities ( 2014 ). The Government is currently preparing a National Women \u2019 s Economic Empowerment Program designed to create conditions that will enable women to become full participants in every level of the economy. Experience under NSP and Urban Areas and Lessons Learned 4. Over the years, NSP has put in place several procedures to increase women \u2019 s participation in the program. For example, gender awareness training was required for the FP 45 See CSO, 2016. 46 See The Asia Foundation, 2013. 47 See CSO, 2016.", + "ner_text": [ + [ + 246, + 282, + "named" + ], + [ + 43, + 48, + "Afghanistan Living Conditions Survey <> reference population" + ], + [ + 234, + 245, + "Afghanistan Living Conditions Survey <> publication year" + ], + [ + 246, + 257, + "Afghanistan Living Conditions Survey <> data geography" + ], + [ + 312, + 317, + "Afghanistan Living Conditions Survey <> reference population" + ], + [ + 781, + 790, + "Afghanistan Living Conditions Survey <> reference year" + ] + ], + "validated": true, + "empirical_context": "Only 57 percent of Afghan men believe that women should be allowed to work outside of the home, and even this level of endorsement comes with caveats as to the types and places of work that are considered acceptable for women. 46 The 2013 / 2014 Afghanistan Living Conditions Survey found that three-quarters of women do not leave the dwelling without the company of another person and about half leave the house four times or less per month, while 12 percent of women indicate they never left the house in an entire month. Furthermore, female decision-making on spending money is quite restricted.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned as a survey that provides data used for empirical analysis of living conditions in Afghanistan.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides empirical data on living conditions.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a survey that provides data used for empirical analysis of living conditions in Afghanistan.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 33, + "text": "WFP works to complement MENFOP \u2019 s school canteen program. UNHCR coordinates on the educational needs of refugees. The Japanese International Cooperation Agency currently sends experts in mathematics instruction and school management committees. The French Development Agency, also active in the sector, works in TVET and Higher Education. Finally, the EAA Foundation is collaborating with the World Bank on their Educate A Child program for out of school children. They are contributing US $ 3. 8 million with results-based funding aimed at helping the GoD enroll OOSC in the project. G. Lessons Learned and Reflected in the Project Design 72. The proposed project will benefit from the technical and policy dialogue in education conducted with the GoD on an ongoing basis since 2000. It will also benefit from the World Bank \u2019 s strong analytical understanding of contemporary education issues in Djibouti that is based on: ( a ) the Systematic Country Diagnostic ( 2018 ), ( b ) the EDAM4 Household Survey ( 2018 ), and ( c ) the support to MENFOP in their revision of their Education Action Plan for 2017-2020. The World Bank has also recently completed a SABER-ECD study ( 2019 ) and is currently studying the educational needs of refugees ( expected 2019 ). The project will also draw on lessons from the UNICEF-supported revision to the Out of School Children Survey ( 2019 ), which focuses on refugees, girls, and school leavers. 73. The following table highlights lessons from the two previous projects, PAEQ ( P145323 ) and ACIGEF ( P123315 ), that will be incorporated into the new Project. Table 3: Lessons learned from previous IDA-financed projects in education Lessons Mitigation Measures in this Project When developing a sub-sector in which there has been limited activity, like preschool, it is important to consider whether the legislative and regulatory framework is in place and is needed as a first step In the pre-primary sub-sector, the policy and quality framework will be developed first by incentivizing MENFOP to invest in the creation of an enabling environment through a DLI.", + "ner_text": [ + [ + 1344, + 1373, + "named" + ], + [ + 105, + 113, + "Out of School Children Survey <> reference population" + ], + [ + 394, + 404, + "Out of School Children Survey <> publisher" + ], + [ + 816, + 826, + "Out of School Children Survey <> publisher" + ], + [ + 899, + 907, + "Out of School Children Survey <> data geography" + ], + [ + 968, + 972, + "Out of School Children Survey <> reference year" + ], + [ + 1119, + 1129, + "Out of School Children Survey <> publisher" + ], + [ + 1178, + 1182, + "Out of School Children Survey <> publication year" + ], + [ + 1311, + 1327, + "Out of School Children Survey <> publisher" + ], + [ + 1376, + 1380, + "Out of School Children Survey <> publication year" + ], + [ + 1401, + 1409, + "Out of School Children Survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "The World Bank has also recently completed a SABER-ECD study ( 2019 ) and is currently studying the educational needs of refugees ( expected 2019 ). The project will also draw on lessons from the UNICEF-supported revision to the Out of School Children Survey ( 2019 ), which focuses on refugees, girls, and school leavers. 73.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it is mentioned in relation to educational needs and is supported by UNICEF, indicating it serves as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically collects structured data.", + "contextual_reason_agent": "The context confirms it is a dataset as it is mentioned in relation to educational needs and is supported by UNICEF, indicating it serves as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "158_40156", + "page": 15, + "text": "In order to track the increase in regional collaboration the Most Significant Change ( MSC ) technique has been incorporated into the project design. This technique of monitoring regional collaboration is further detailed in the Annex 6 and in the in Project Operations Manual. Data collected in this project will be used by National AIDS Authorities to inform initiatives aimed at reaching CBMPs, refugees, returnees, IDPs and surrounding populations. In this way, IGAD would become the central synthesizer of HIV / AIDS M & E information about these targeted populations. To facilitate information sharing, an interactive website will be developed, hosted and managed by IGAD to enable countries to share information and key data about HIV / AIDS-related issues in the region and about the implementation of this project. 45. The M & E system used for the project will be a precursor to a regional M & E framework that will be developed once IGAD ( 1st of the Three Ones at the regional level ) has developed a regional HIV strategy. Once the regional HIV strategy has been developed, the project \u2019 s M & E system will be subsumed within the regional HIV M & E system that will also be managed by IGAD.", + "ner_text": [ + [ + 612, + 631, + "named" + ] + ], + "validated": false, + "empirical_context": "In this way, IGAD would become the central synthesizer of HIV / AIDS M & E information about these targeted populations. To facilitate information sharing, an interactive website will be developed, hosted and managed by IGAD to enable countries to share information and key data about HIV / AIDS-related issues in the region and about the implementation of this project. 45.", + "type": "website", + "explanation": "However, the term 'interactive website' refers to a platform for sharing information rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it mentions sharing information and data.", + "contextual_reason_agent": "However, the term 'interactive website' refers to a platform for sharing information rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 74, + "text": "The department has a senior accountant and several accounts assistants. The main accounts of the MWE are computerized with the IFMS. However, this system is only operational for government funds, and the Project module is not yet fully operational. As a result, Project financial reports cannot be generated directly from the IFMS. The accounting section also has a unit that handles the WMDP that has fully qualified accounting staff. Although the unit has a large number of staff, only three are senior ( at the level of officer and above ), which affects decision-making. The current WMDP has a financial management specialist ( FMS ) who is qualified and experienced and is expected to be dedicated for the Project. It also has the position of assistant FMS that fell vacant recently and will be filled in due course. With this level of staffing, there will be sufficient hands to manage the implementation of the Project. 10. The MWE has an Internal Audit Unit comprising four internal auditors seconded from the MoFPED \u2019 s Department of Internal Audit. There is also an audit committee in place at the MoFPED to which the Internal Audit Unit reports. The committee meets quarterly to review internal audit findings and the actions that have been taken to address them.", + "ner_text": [ + [ + 127, + 131, + "named" + ] + ], + "validated": false, + "empirical_context": "The department has a senior accountant and several accounts assistants. The main accounts of the MWE are computerized with the IFMS. However, this system is only operational for government funds, and the Project module is not yet fully operational.", + "type": "system", + "explanation": "However, IFMS is described as a system, not a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed IFMS is a dataset because it is related to data management.", + "contextual_reason_agent": "However, IFMS is described as a system, not a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 60, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 56 of 74 Project Operations Manual, and have received cash transfers, at least for one payment cycle. minimum on a quarterly basis Information System ( MIS ) course of project implementation. Payment data will be liked to and updated in the MIS. Number of beneficiaries receiving cash for performing labor intensive public works who are female Number of total beneficiaries that directly receive cash transfer for working on LIPW under sub - component 1. 1 on behalf of beneficiary HH, of which are female This indicator will be measured at a minimum on a quarterly basis Registration and payment data in the SNSOP MIS Beneficiary data is collected during registration and updated over the course of the project. Payment data will also be periodically updated in the MIS Selected Implementing Partner Number of beneficiary households receiving cash transfer for participating in the behavioral change communication training The number of beneficiary households that participate in behavioral change communication training activities to receive their cash transfer.", + "ner_text": [ + [ + 703, + 712, + "named" + ], + [ + 4, + 14, + "SNSOP MIS <> publisher" + ], + [ + 286, + 298, + "SNSOP MIS <> data type" + ], + [ + 807, + 819, + "SNSOP MIS <> data type" + ] + ], + "validated": true, + "empirical_context": "Number of beneficiaries receiving cash for performing labor intensive public works who are female Number of total beneficiaries that directly receive cash transfer for working on LIPW under sub - component 1. 1 on behalf of beneficiary HH, of which are female This indicator will be measured at a minimum on a quarterly basis Registration and payment data in the SNSOP MIS Beneficiary data is collected during registration and updated over the course of the project. Payment data will also be periodically updated in the MIS Selected Implementing Partner Number of beneficiary households receiving cash transfer for participating in the behavioral change communication training The number of beneficiary households that participate in behavioral change communication training activities to receive their cash transfer.", + "type": "management information system", + "explanation": "It is indeed a dataset as it is described as a management information system that stores records related to beneficiaries and payments.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MIS' typically refers to a system that manages and stores data.", + "contextual_reason_agent": "It is indeed a dataset as it is described as a management information system that stores records related to beneficiaries and payments.", + "contextual_signal": "described as a management information system that stores records", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 18, + "validated": 10, + "not_validated": 8 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 14, + "text": "Women entrepreneurs are likely to be excluded from the channels of information, networks, and mentors associated with the more profitable, male-dominated sectors and businesses within them. Throughout a firm \u2019 s life, the diversity of networks can impact whether an entrepreneur has access to credit, learns about new market opportunities, and acquires the skills needed to successfully operate their businesses. 22 Studies of women entrepreneurs in Uganda find that women who work closely with a mentor \u2014 often male, and usually a family member \u2014 are more likely to transition into higher-profit sectors. 23 24 15. Additional factors that block women from developing growth-oriented enterprises in profitable sectors are related to the failure of existing business development services to address the needs of women-owned firms. According to an enterprise survey conducted in 2014, MSMEs lacked key skills needed for business growth. Only 28 percent of firms surveyed said they do book-keeping to track revenues and expenses; a mere 10 percent had invested in training for employees; and just 36 percent had access to the internet. Female-owned firms appear to be particularly lacking when it comes to the use of standard business practices. A recent microenterprise survey showed a gender gap of 24 percentage points on an index of adoption of good business practices.", + "ner_text": [ + [ + 846, + 863, + "named" + ], + [ + 0, + 19, + "enterprise survey <> reference population" + ], + [ + 450, + 456, + "enterprise survey <> data geography" + ], + [ + 877, + 881, + "enterprise survey <> publication year" + ], + [ + 883, + 888, + "enterprise survey <> reference population" + ], + [ + 1133, + 1151, + "enterprise survey <> reference population" + ], + [ + 1386, + 1404, + "enterprise survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "Additional factors that block women from developing growth-oriented enterprises in profitable sectors are related to the failure of existing business development services to address the needs of women-owned firms. According to an enterprise survey conducted in 2014, MSMEs lacked key skills needed for business growth. Only 28 percent of firms surveyed said they do book-keeping to track revenues and expenses; a mere 10 percent had invested in training for employees; and just 36 percent had access to the internet.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned that it is an enterprise survey conducted in 2014, providing empirical data on business practices.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data on MSMEs.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned that it is an enterprise survey conducted in 2014, providing empirical data on business practices.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "191_multi-page", + "page": 26, + "text": "Number of agreements reports. to raise local resources * Improved capacity of signed with NGOs. a Financial Audit reports to compensate for some private sector: local * Number of infrastructure prepared by indeper dent of any shortfall in contractors, consultants sub-projects completed, by, auditors. Government funding. and NGOs. type. * Mid-term review and ICR. Community \u2022 A targeted and. Days of employment prepared by Twitezimbere Development transitional increase in created. and the Bank. Committees conduct wage incomes.. Training programs routine maintenance. lc. conducted for NGOs and * The Government. Improvement in health Smal and Medium increases training and and nutrition of young Enterprises. imnproves incentives for children and mothers. Ic. teachers, nurses, 2.. Households reached by doctors, and other. Recent & relevant data infonnation, education, and skilled staff of social made available for communication activities. infrastructure. policymaking and * Persons trained to improve * Poverty monitoring progress monitoring. the health and nutrition of system is actually used. Recent & relevant data young children. by policy makers to on progress of the 2. design better policies to project is available. * Statistical surveys produced reduce poverty and 3. by the national statistics target interventions. * Project effectively institute ( ISTEEBU ) and managed and other local institutions implemented. trained by the project 3. * Cost of administration as a percentage of expenditures on infrastructure sub - projects and ( plus ) community mobilization. * Average time for sub - project cycle ( from identification to hand over ).", + "ner_text": [ + [ + 1235, + 1254, + "named" + ], + [ + 424, + 436, + "Statistical surveys <> author" + ], + [ + 491, + 495, + "Statistical surveys <> publisher" + ], + [ + 1127, + 1141, + "Statistical surveys <> reference population" + ], + [ + 1369, + 1376, + "Statistical surveys <> publisher" + ], + [ + 1588, + 1624, + "Statistical surveys <> data description" + ] + ], + "validated": true, + "empirical_context": "design better policies to project is available. * Statistical surveys produced reduce poverty and 3. by the national statistics target interventions.", + "type": "survey", + "explanation": "In this context, 'Statistical surveys' are explicitly mentioned as a means to reduce poverty, indicating they are used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Statistical surveys' typically involve structured collections of data used for analysis.", + "contextual_reason_agent": "In this context, 'Statistical surveys' are explicitly mentioned as a means to reduce poverty, indicating they are used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "005_BOSIB-8191b179-7209-4faa-b5e0-11783bcd492d", + "page": 13, + "text": "Two recent studies provide a detailed general analysis of policy options. 18 For STEAM courses, the causes relate to: ( i ) enjoyment of reading and preference for mathematics; ( ii ) self-efficacy or belief in own capabilities, often reinforced by teachers; ( iii ) parental support or lack thereof due to low socioeconomic status; ( iv ) teacher preparation to deal effectively with diversity. Figure 1a: PISA learning assessment scores: gender comparison Source: PISA; Differences greater than the grey bands ( + or - two standard errors ) are statistically significant. 16 Source: Government Budget for 2024 and MEP School Census data from 2023. 17 Program-Based budgeting at a government-wide level is being implemented under the Fiscal Management Improvement Project ( P172352, Loan 9075-CR ), known as Hacienda Digital. Investing in readiness to implement program-based budgeting at MEP, the biggest Ministry ( in terms of budget and staff ) is a priority for the Government of Costa Rica. 18 Desigualdades por g\u00e9nero en Primaria y Secundaria, Chapter 4 in Noveno Estado de la Educaci\u00f3n, 2023; and Villlobos and Azofeifa, La paradoja en educaci\u00f3n, alta inversi\u00f3n del PIB y alta brechas de g\u00e9nero, Logos ( II ) 1, 2021.", + "ner_text": [ + [ + 466, + 470, + "named" + ], + [ + 644, + 648, + "PISA <> reference year" + ], + [ + 985, + 995, + "PISA <> data geography" + ], + [ + 1095, + 1099, + "PISA <> reference year" + ], + [ + 1105, + 1127, + "PISA <> author" + ], + [ + 1220, + 1224, + "PISA <> publication year" + ] + ], + "validated": true, + "empirical_context": "18 For STEAM courses, the causes relate to: ( i ) enjoyment of reading and preference for mathematics; ( ii ) self-efficacy or belief in own capabilities, often reinforced by teachers; ( iii ) parental support or lack thereof due to low socioeconomic status; ( iv ) teacher preparation to deal effectively with diversity. Figure 1a: PISA learning assessment scores: gender comparison Source: PISA; Differences greater than the grey bands ( + or - two standard errors ) are statistically significant. 16 Source: Government Budget for 2024 and MEP School Census data from 2023.", + "type": "dataset", + "explanation": "In this context, 'PISA' is indeed a dataset as it provides structured data on learning assessments used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'PISA' is a dataset because it is referenced in relation to learning assessment scores.", + "contextual_reason_agent": "In this context, 'PISA' is indeed a dataset as it provides structured data on learning assessments used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "154_431120PAD0P1021e0only10R20081014311", + "page": 15, + "text": "national response. The magnitude o f these demands have placed a substantial strain on the health sector human resource base central to more general health sector activities. Similarly, the Central Medical Stores ( CMS ) system has been largely consumed by the procurement and distribution o f ARVs, resulting in cases o f drug stock-outs for other supplies, as well as unavailability o f condoms at some health centers, particularly in more remote districts. Other key epidemic impacts 19. Gender and age diferentials. HIV / AIDS does not affect all people equally. Risk and vulnerability to HIV / AIDS are substantially different for men and women in Botswana, as i s clear in the age - and sex-differentiated prevalence rates. The impact o f HIV / AIDS differs markedly by gender, reflecting traditional roles and responsibilities in both household and market activities. Gender inequality, and the role o f power in sexual relations, especially women \u2019 s lack o f economic empowerment, are important factors in the spread of HIV / AIDS, as are gender-based socio-cultural, legal, and physiological factors. 20. Gender-based vulnerability to HIV infection i s clearly demonstrated in population - based serosurveys in Botswana, with prevalence rates consistently ranging up to three times higher among young women ( 15-19 years ) than young men o f the same age group.", + "ner_text": [ + [ + 1187, + 1217, + "named" + ], + [ + 653, + 661, + "population - based serosurveys <> data geography" + ], + [ + 683, + 728, + "population - based serosurveys <> data description" + ], + [ + 1221, + 1229, + "population - based serosurveys <> data geography" + ], + [ + 1305, + 1316, + "population - based serosurveys <> reference population" + ] + ], + "validated": true, + "empirical_context": "20. Gender-based vulnerability to HIV infection i s clearly demonstrated in population - based serosurveys in Botswana, with prevalence rates consistently ranging up to three times higher among young women ( 15-19 years ) than young men o f the same age group.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to structured surveys that provide empirical data on HIV prevalence among different demographics.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'population-based serosurveys' implies a structured collection of data related to HIV prevalence rates.", + "contextual_reason_agent": "This is indeed a dataset as it refers to structured surveys that provide empirical data on HIV prevalence among different demographics.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "143_749060PAD0P124010Box374388B00OUO090", + "page": 47, + "text": "( c ) Enabling evidence-based policy making for poverty reduction. This sub-component will: ( i ) Support the functioning of the Social-IMC and creation of a poverty analysis capability. In order to build capacity for evidence-based policymaking for poverty reduction, SPPP will finance ( i ) technical assistance to support the Inter-ministerial Committee for Social Policy ( Social-IMC ) and its Secretariat; ( ii ) technical assistance to establish a Poverty Analysis Team, whose role will be to assess the poverty and inequality situation in Lebanon using Household Budget Survey ( HBS ) data; and ( iii ) the design and implementation of the next HBS ( in 2017 ), which will be a large - sample survey representative at the governorate ( Mohafazat ) level. Component 4: Project Management ( US $ 2. 2 million total cost, of which US $ 2. 0 million to be financed from IBRD ) 16.", + "ner_text": [ + [ + 560, + 583, + "named" + ], + [ + 546, + 553, + "Household Budget Survey <> data geography" + ], + [ + 586, + 589, + "Household Budget Survey <> acronym" + ], + [ + 661, + 665, + "Household Budget Survey <> publication year" + ], + [ + 685, + 706, + "Household Budget Survey <> data type" + ] + ], + "validated": true, + "empirical_context": "This sub-component will: ( i ) Support the functioning of the Social-IMC and creation of a poverty analysis capability. In order to build capacity for evidence-based policymaking for poverty reduction, SPPP will finance ( i ) technical assistance to support the Inter-ministerial Committee for Social Policy ( Social-IMC ) and its Secretariat; ( ii ) technical assistance to establish a Poverty Analysis Team, whose role will be to assess the poverty and inequality situation in Lebanon using Household Budget Survey ( HBS ) data; and ( iii ) the design and implementation of the next HBS ( in 2017 ), which will be a large - sample survey representative at the governorate ( Mohafazat ) level. Component 4: Project Management ( US $ 2.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source of data used for assessing poverty and inequality.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey that collects data on household budgets.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source of data used for assessing poverty and inequality.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "148_492780PAD0P111101Official0Use0Only1", + "page": 44, + "text": "The implementation areas that would be subject to close examination during the evaluation are: ( i ) the extent to which the collaborating teachers become real mentors using mentoring performance indicators developed by the implementation team; ( ii ) the extent that the program of studies of the teaching training institutions provide more opportunities for school - based practicum; ( iii ) the extent by which teaching practice teams established in the teacher training institutions get involve with the participating schools in organizing the teaching practicum; and ( iv ) observable differences between student teachers in participating and non - participating schools on the \u201c professional skills and applications \u201d dimension of the readiness to teach indicator. 140. During this stage, Al-Azhar University of Gaza; Arab American University in the north; Bethlehem University in the south; and Ramallah Men \u2019 s Training Center - RMTC ( UNRWA ) in Ramallah will work with two or three affiliated schools which together would develop top quality school-based practicum experiences for future class teachers. The number of affiliated ( cooperating ) schools will depend on the size ( enrollment ) offered by each higher education institution. The selection of the participating schools will be done using a set of criteria that will be developed by the MOEHE team managing this component, in collaboration with the selected higher education institution at the beginning of project implementation.", + "ner_text": [ + [ + 174, + 206, + "named" + ] + ], + "validated": false, + "empirical_context": "The implementation areas that would be subject to close examination during the evaluation are: ( i ) the extent to which the collaborating teachers become real mentors using mentoring performance indicators developed by the implementation team; ( ii ) the extent that the program of studies of the teaching training institutions provide more opportunities for school - based practicum; ( iii ) the extent by which teaching practice teams established in the teacher training institutions get involve with the participating schools in organizing the teaching practicum; and ( iv ) observable differences between student teachers in participating and non - participating schools on the \u201c professional skills and applications \u201d dimension of the readiness to teach indicator. 140.", + "type": "indicator", + "explanation": "However, it is not a dataset as it refers to performance indicators rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'indicators' which can imply a structured measure.", + "contextual_reason_agent": "However, it is not a dataset as it refers to performance indicators rather than a structured collection of data.", + "contextual_signal": "mentioned only as a performance measure, not as a data source", + "tags": [] + }, + { + "filename": "027_Jordan-Emergency-Food-Security-Project", + "page": 51, + "text": "The World Bank Emergency Food Security Project ( P178936 ) Page 47 of 54 for the wheat and barley operations within the Ministry. 7. MoITS wheat and barley operations. Although MoITS follows government systems, guidelines and procedures in the majority of its operations and mainly operational and capital expenditures, the wheat and barley operations are managed diffidently, very similar to private sector operations. A separate designated special trade bank account is opened at the CBJ and is used for all transactions related to wheat and barley including revenues and expenditures, separate financial statements are prepared using the accrual basis of accounting and based on the International Financial Reporting Standards ( IFRS ). In addition, MOIST follows the best practice procedures and standards available in the local and global wheat and barley markets in their operations. 8. Accounting and Financial Reporting. The project will follow International Public Sector Accounting Standards ( IPSAS ) or the IFRS - cash basis for accounting. The Government adopts a Chart of Accounts ( COA ) that is compatible with Government Financial Management Information System ( GFMIS ) 2001. MOF rolled out GFMIS to all of Jordan \u2019 s line ministries including MOIST in November 2019. The current GFMIS implementation utilizes a subset of the functionalities of the underlying application software.", + "ner_text": [ + [ + 1127, + 1177, + "named" + ] + ], + "validated": false, + "empirical_context": "The project will follow International Public Sector Accounting Standards ( IPSAS ) or the IFRS - cash basis for accounting. The Government adopts a Chart of Accounts ( COA ) that is compatible with Government Financial Management Information System ( GFMIS ) 2001. MOF rolled out GFMIS to all of Jordan \u2019 s line ministries including MOIST in November 2019.", + "type": "system", + "explanation": "However, it is described as a system for financial management, not as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a data-related function.", + "contextual_reason_agent": "However, it is described as a system for financial management, not as a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "026_Djibouti-Health-System-Strengthening-Project", + "page": 59, + "text": "The World Bank Djibouti Health System Strengthening ( P178033 ) Page 55 of 64 resident of the commune of Balbala. Public health facilities with safe disposal of sharps waste Quarterly IVA Quarterly IVA and PBF report MOH Facilities receiving PBF payment within 30 days of submission of verification of results Semi - annual Accounting system Semi-annual internal project review by DGP MOH Average user experience score among PBF / DFF participating facilities User experience score is a zero if not collected. Quarterly IVA Quarterly IVA and PBF report MOH Primary health facilities with functional Facility Community Committee assessed by the PBF quality checklist Semi - annual Human resources MIS Semi-annual internal project review by DGP MOH Communities with functioning community health workers per the norms set by the National Community Health Strategy Semi - annual Human resources MIS Semi-annual internal project review by DGP MOH Communities that have formally declared the abandonment of the practice of FGM Annual Official act by community leadership Annual internal project review by DGP ( combined with semi - annual review ) MOH Facilities with DHIS-2 implementation ( DHIS-2 data entry at the facility level ) Semi - annual DHIS2 Semi-annual internal project review by DGP MOH Facilities that are connected to the National Logistics Management Information System ( eLMIS ) Semi - annual eLMIS Semi-annual internal project review by DGP MOH Health surveys conducted and results made public Survey results will lead to action plans after every Semi - annual DGP Semi-annual", + "ner_text": [ + [ + 1332, + 1380, + "named" + ] + ], + "validated": false, + "empirical_context": "Public health facilities with safe disposal of sharps waste Quarterly IVA Quarterly IVA and PBF report MOH Facilities receiving PBF payment within 30 days of submission of verification of results Semi - annual Accounting system Semi-annual internal project review by DGP MOH Average user experience score among PBF / DFF participating facilities User experience score is a zero if not collected. Quarterly IVA Quarterly IVA and PBF report MOH Primary health facilities with functional Facility Community Committee assessed by the PBF quality checklist Semi - annual Human resources MIS Semi-annual internal project review by DGP MOH Communities with functioning community health workers per the norms set by the National Community Health Strategy Semi - annual Human resources MIS Semi-annual internal project review by DGP MOH Communities that have formally declared the abandonment of the practice of FGM Annual Official act by community leadership Annual internal project review by DGP ( combined with semi - annual review ) MOH Facilities with DHIS-2 implementation ( DHIS-2 data entry at the facility level ) Semi - annual DHIS2 Semi-annual internal project review by DGP MOH Facilities that are connected to the National Logistics Management Information System ( eLMIS ) Semi - annual eLMIS Semi-annual internal project review by DGP MOH Health surveys conducted and results made public Survey results will lead to action plans after every Semi - annual DGP Semi-annual", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a data-related function.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 14, + "text": "Note: Disaggregation by preschool highlights differences in PISA for students who attended two years or more of preschool and childcare versus those attending one year or less. 12. School consolidation and the quality of the learning environment are inextricably linked, affecting more students from disadvantaged areas. Education in Moldova is largely publicly financed, with falling or stagnant student numbers in all subsectors except for preschool. In recent years, the general education system has been optimized in response to the declining demographic trends ( largely through reduction of the number of classes and teachers - table 1 ). 16. As a result of the school network consolidation, students are transported to receiving schools when the institution in their locality is closed or downsized. However, these receiving schools offer learning environments that are generally outdated and lack quality educational inputs. While the financial savings from the school consolidation create a more efficient education system, more can be done to foster higher-quality education, particularly for affected students from these disadvantaged areas. True efficiency in the sector can only be realized when fiscal savings are complemented with investments in quality enhancing inputs for the most vulnerable ( qualified teachers, modern facilities, appropriate information technology, and laboratory equipment ). 12 PISA 2018 data.", + "ner_text": [ + [ + 60, + 64, + "named" + ], + [ + 6, + 33, + "PISA <> data description" + ], + [ + 334, + 341, + "PISA <> data geography" + ], + [ + 1423, + 1427, + "PISA <> publication year" + ], + [ + 1449, + 1467, + "PISA <> usage context" + ] + ], + "validated": true, + "empirical_context": "Note: Disaggregation by preschool highlights differences in PISA for students who attended two years or more of preschool and childcare versus those attending one year or less. 12.", + "type": "dataset", + "explanation": "In this context, 'PISA' refers to the Programme for International Student Assessment, which is indeed a structured collection of data used for empirical analysis of educational outcomes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'PISA' is a dataset because it is referenced in the context of disaggregation and analysis of student performance.", + "contextual_reason_agent": "In this context, 'PISA' refers to the Programme for International Student Assessment, which is indeed a structured collection of data used for empirical analysis of educational outcomes.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "153_476340PAD0ZR0P101Official0Use0Only1", + "page": 60, + "text": "The C U will include an internationally recruited Monitoring and Evaluation Expert, supported by one nationally recruited specialist. The C U will work with the DEP relevant technical departments to evaluate the technical content o f workplans proposed by MOD contractors and review progress reports to adequate information i s collected to feed into the project \u2019 s monitoring system. The chart in the following page depicts these arrangements 8. DEP itself i s a target o f project capacity building initiatives to be used for training, equipment, and site visits. The C U will share results and their reports with DEP to build on efforts to standardize and harmonize planning and evaluation within the Ministry. Sharing project indicator data collection with DEP will allow project indicators to be integrated into DEP \u2019 s SystBme de planzjkation et de suivi e \u2018 valuation ( SPSE ). A liaison in DGF will reinforce the CU \u2019 s capacity to evaluate the technical aspects o f NGO workplans and provide feedback to them and other contractors, as well as supply information related from the forest management information system ( SIGEF ). 48", + "ner_text": [ + [ + 1089, + 1125, + "named" + ] + ], + "validated": false, + "empirical_context": "Sharing project indicator data collection with DEP will allow project indicators to be integrated into DEP \u2019 s SystBme de planzjkation et de suivi e \u2018 valuation ( SPSE ). A liaison in DGF will reinforce the CU \u2019 s capacity to evaluate the technical aspects o f NGO workplans and provide feedback to them and other contractors, as well as supply information related from the forest management information system ( SIGEF ). 48", + "type": "system", + "explanation": "However, it is mentioned as a management information system, not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'system' which can imply data management.", + "contextual_reason_agent": "However, it is mentioned as a management information system, not as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 51, + "text": "Data sharing is considered \u201c trusted \u201d when DPI ecosystem participants: ( 1 ) adhere to the applicable privacy - by-design principles; ( 2 ) adhere to the applicable data minimization principles; ( 3 ) integrate accessible, available, secure, and transparent data audit logging, as applicable, to provide people with transparency about how their data is used. Data sharing is considered \u201c people-centric \u201d when DPI ecosystem participants: ( 4 ) integrate consent-based data sharing by default when sharing personal data; ( 5 ) integrate people-centric, standards-based, digitally verifiable credentials as a data sharing mechanism, as applicable; and ( 6 ) integrate adequate grievance redress. A DPI ecosystem means all DPI providers and at least five DPI relying parties. A DPI provider means MODEE and any other entity that provides digital identification, data sharing, or other trust services to DPI relying parties. A DPI relying party is any public - or private-sector entity that relies on a digital identification or other trust service for verification, or on an authoritative data source for shared data, as provided by a DPI provider. Data source / Agency ( a ) Annual reports on DPI implementation from MODEE, sectoral ministries, and the private sector, ( b ) MODEE \u2019 s software documentation and testing reports, and ( c ) Third-party assessment reports.", + "ner_text": [ + [ + 1338, + 1368, + "named" + ] + ], + "validated": false, + "empirical_context": "A DPI relying party is any public - or private-sector entity that relies on a digital identification or other trust service for verification, or on an authoritative data source for shared data, as provided by a DPI provider. Data source / Agency ( a ) Annual reports on DPI implementation from MODEE, sectoral ministries, and the private sector, ( b ) MODEE \u2019 s software documentation and testing reports, and ( c ) Third-party assessment reports.", + "type": "document", + "explanation": "However, it is not a dataset as it refers to documents that assess rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'reports' which can imply structured information.", + "contextual_reason_agent": "However, it is not a dataset as it refers to documents that assess rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 55, + "text": "These shall include: ( 1 ) a client registry ( that supports the unique identification and management of patient identities ); ( 2 ) one or more terminology registries accessible as a service ( that provide a standardized classification or a query-able source for health information exchange, including clinical terminologies, ontologies, dictionaries, code systems, and value sets ); ( 3 ) a universal facility registry ( that sets the unique identifier for locations where health services are provided ); ( 4 ) a health professional registry ( that sets the unique identifier for health workers that provide services within a country ); and ( 5 ) a common product catalogue ( that properly defines and categorizes medical products or commodities ). 24 Core standards include: ( 1 ) content standards ( that dictate the structure of electronic documents and types of data they must contain by ensuring data is properly organized and represented in a clear manner ); ( 2 ) terminology standards ( that ensure that all parties will be able to understand and use it while exchanging health data ); ( 3 ) transport standards ( that facilitate data exchange between different systems by defining what formats, document architecture, data elements, methods, and application programming interfaces to use for achieving interoperability ); and ( 4 ) security standards ( that establish administrative and technical rules to protect sensitive data from misuse, unauthorized access, or disclosure ). 25 Health information systems could include the national EMR platform, supply chain management information system, and surveillance systems managed by the MOH. Selected health information systems will be described in the Program Operational Manual ( including the verification protocol ).", + "ner_text": [ + [ + 29, + 44, + "named" + ] + ], + "validated": true, + "empirical_context": "These shall include: ( 1 ) a client registry ( that supports the unique identification and management of patient identities ); ( 2 ) one or more terminology registries accessible as a service ( that provide a standardized classification or a query-able source for health information exchange, including clinical terminologies, ontologies, dictionaries, code systems, and value sets ); ( 3 ) a universal facility registry ( that sets the unique identifier for locations where health services are provided ); ( 4 ) a health professional registry ( that sets the unique identifier for health workers that provide services within a country ); and ( 5 ) a common product catalogue ( that properly defines and categorizes medical products or commodities ). 24 Core standards include: ( 1 ) content standards ( that dictate the structure of electronic documents and types of data they must contain by ensuring data is properly organized and represented in a clear manner ); ( 2 ) terminology standards ( that ensure that all parties will be able to understand and use it while exchanging health data ); ( 3 ) transport standards ( that facilitate data exchange between different systems by defining what formats, document architecture, data elements, methods, and application programming interfaces to use for achieving interoperability ); and ( 4 ) security standards ( that establish administrative and technical rules to protect sensitive data from misuse, unauthorized access, or disclosure ).", + "type": "registry", + "explanation": "In the context, the 'client registry' is explicitly described as supporting the unique identification and management of patient identities, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'client registry' is a dataset because it refers to a structured collection of patient identities.", + "contextual_reason_agent": "In the context, the 'client registry' is explicitly described as supporting the unique identification and management of patient identities, indicating it functions as a data source.", + "contextual_signal": "described as a registry that supports unique identification and management of patient identities", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "185_multi-page", + "page": 40, + "text": "41 2 ) Improved care and support Percent of graduates of medical Project data services for those both infected and nursing school in the past and affected by HIV / AIDS year trained in natural history of HIV and in diagnosis and care of common opportunistic infections Percentage of health facilities Survey data that are currently stocked with drugs for commnon opportunistic infections and to provide palliative care, and report no stock-outs in the past year Percent of districts with at least Service delivery data one center staffed by trained counselors providing HIV testing and counseling at either free or affordable rates Increase in the percentage of clients served by VCT services that meet minimum requirements for provision of quality counseling and testing services Percent increase in number of communities with improved prevention services, care, and support Increase in percent of orphaned children under 15 who are currently attending school Increase in percent of population receiving quality HIV / AIDS / STI / TB case management", + "ner_text": [ + [ + 65, + 77, + "named" + ] + ], + "validated": false, + "empirical_context": "41 2 ) Improved care and support Percent of graduates of medical Project data services for those both infected and nursing school in the past and affected by HIV / AIDS year trained in natural history of HIV and in diagnosis and care of common opportunistic infections Percentage of health facilities Survey data that are currently stocked with drugs for commnon opportunistic infections and to provide palliative care, and report no stock-outs in the past year Percent of districts with at least Service delivery data one center staffed by trained counselors providing HIV testing and counseling at either free or affordable rates Increase in the percentage of clients served by VCT services that meet minimum requirements for provision of quality counseling and testing services Percent increase in number of communities with improved prevention services, care, and support Increase in percent of orphaned children under 15 who are currently attending school Increase in percent of population receiving quality HIV / AIDS / STI / TB case management", + "type": "project", + "explanation": "However, 'Project data' is mentioned only as a descriptor and not as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Project data' refers to a dataset due to the term 'data' being included.", + "contextual_reason_agent": "However, 'Project data' is mentioned only as a descriptor and not as a structured collection of data or a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 52, + "text": "The World Bank Education Quality Improvement Project ( P179363 ) Page 43 of 68 inclusion, and sustainability ) Number of sector staff participating in capacity building trainings under the project This indicator will measure the number of sector staff trained on ( EMIS, national and international assessments and data analysis, tutoring / accelerated learning programs, ECEC reforms, integrated approach to designing and implementing resilient, sustainable, and inclusive school and preschool projects ). Progress will be tracked, and the capacity building plan will be updated every two years. Annual Progress and monitoring reports, capacity building plan Progress and monitoring reports NORLD, MoER, PMT Monitoring tool with reliable gender - disaggregated data to identify disadvantaged students to receive accelerated learning This indicator will capture the development of the monitoring tool with reliable gender-disaggregated data to identify disadvantaged students to receive tutoring, accelerated learning or other catch-up program Annual EMIS EMIS and monitoring tool data CTICE, MoER, PMT Improved regulatory environment for affordable and quality childcare for preschool children This indicator will measure the number of changes made to subsector regulations, with adjustments to the norms on the space, trainings for Annual Policy documents, progress and monitoring reports Policy documents, progress and monitoring reports MoER, PMT", + "ner_text": [ + [ + 1055, + 1059, + "named" + ] + ], + "validated": false, + "empirical_context": "Progress will be tracked, and the capacity building plan will be updated every two years. Annual Progress and monitoring reports, capacity building plan Progress and monitoring reports NORLD, MoER, PMT Monitoring tool with reliable gender - disaggregated data to identify disadvantaged students to receive accelerated learning This indicator will capture the development of the monitoring tool with reliable gender-disaggregated data to identify disadvantaged students to receive tutoring, accelerated learning or other catch-up program Annual EMIS EMIS and monitoring tool data CTICE, MoER, PMT Improved regulatory environment for affordable and quality childcare for preschool children This indicator will measure the number of changes made to subsector regulations, with adjustments to the norms on the space, trainings for Annual Policy documents, progress and monitoring reports Policy documents, progress and monitoring reports MoER, PMT", + "type": "system", + "explanation": "However, EMIS is referred to as a monitoring tool and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is mentioned in the context of data collection and monitoring.", + "contextual_reason_agent": "However, EMIS is referred to as a monitoring tool and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "013_BOSIB0efb09b920d90858a0135df22da7d1", + "page": 23, + "text": "This subcomponent will support NIDP, in collaboration with UNHCR and RRS, to prioritize registration and ID issuance, through registration partner and Fayda center channels, to host communities and refugees in regions where large populations of refugees exist, namely Gambella, Somali, Benishangul - Gumuz, Afar, Tigray, 38 and Amhara, as well as urban refugees in Addis Ababa. This is estimated to cover up to 1. 7 million persons in host communities and up to 924, 000 refugees. 39 NIDP will work closely with RRS and UNHCR to utilize existing and upcoming initiatives for issuing and renewing refugee ID cards. This involves reusing biographic and biometric data collected by RRS through the UNHCR ProGres system for Fayda registration. NIDP will also develop registration strategies for individuals who require to be \u2018 introduced \u2019 by a witness in the absence of supporting documentation ( for example, due to delay in issuance of refugee cards ). Fayda will not substitute existing documents issued to refugees ( refugee ID card, proof of registration, and so on ) but will be used as a complementary form of identification. 35 Floodlist 2023. Ethiopia-Flooding Continues in Several Regions, Displacing Thousands and Threatening Food Security. htps: / / floodlist. com / africa / ethiopia-floods-may-2023 36 Governments must respond quickly to climate or other shocks and provide emergency assistance.", + "ner_text": [ + [ + 695, + 715, + "named" + ], + [ + 59, + 64, + "UNHCR ProGres system <> publisher" + ], + [ + 268, + 276, + "UNHCR ProGres system <> data geography" + ], + [ + 278, + 284, + "UNHCR ProGres system <> data geography" + ], + [ + 286, + 305, + "UNHCR ProGres system <> data geography" + ], + [ + 313, + 319, + "UNHCR ProGres system <> data geography" + ], + [ + 328, + 334, + "UNHCR ProGres system <> data geography" + ], + [ + 365, + 376, + "UNHCR ProGres system <> data geography" + ], + [ + 520, + 525, + "UNHCR ProGres system <> publisher" + ], + [ + 636, + 665, + "UNHCR ProGres system <> data description" + ], + [ + 695, + 700, + "UNHCR ProGres system <> publisher" + ], + [ + 1143, + 1147, + "UNHCR ProGres system <> publication year" + ], + [ + 1422, + 1440, + "UNHCR ProGres system <> usage context" + ] + ], + "validated": true, + "empirical_context": "39 NIDP will work closely with RRS and UNHCR to utilize existing and upcoming initiatives for issuing and renewing refugee ID cards. This involves reusing biographic and biometric data collected by RRS through the UNHCR ProGres system for Fayda registration. NIDP will also develop registration strategies for individuals who require to be \u2018 introduced \u2019 by a witness in the absence of supporting documentation ( for example, due to delay in issuance of refugee cards ).", + "type": "system", + "explanation": "In the context, it is used as a source of data for registration, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a system that collects biographic and biometric data.", + "contextual_reason_agent": "In the context, it is used as a source of data for registration, indicating it functions as a dataset.", + "contextual_signal": "mentioned as a data source for registration", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "054_Sudan-Basic-Education-Emergency-Support-Project", + "page": 39, + "text": "Table A3-1: Access, enrollment, and completion rates in basic education in Sudan Sudan Urban Rural Net entry rate ( 6-year-olds ) * 82. 8 % 90. 4 % 79. 9 % NER ( 6-13-year-olds ) * 69. 1 % 85. 8 % 62. 6 % GER ( 6-13-year-olds ) * 73. 3 % 88. 0 % 67. 4 % Grade 4 survival rate * * 84. 7 % 97. 7 % 76. 8 % Grade 6 survival rate * * 66. 6 % 83. 3 % 56. 5 % Grade 8 survival rate * * 49. 3 % 68. 2 % 37. 8 % Completion rate ( 13-year-olds ) * 55. 0 % 58. 2 % 52. 6 % Source: authors \u2019 estimates based on the data from Sudan MICS 2014 * and School Census 2018 * * 3. About seven percent of people never attended school in 2014. Real access increased by eight percentage points from 85 percent in 2009 to 93 percent in 2014. Access at age six increased by 30 percentage points from 40 percent in 2009 to 70 percent in 2014. The increase is driven by an increase in access among the bottom-40 percent of the population. System demonstrated growth between 2009 and 2014 with late entry remaining constant at 11 years. Those who are not ever attended school at age 11 will never attend. 4. The poor retention rate in basic education comes from a lack of school demand, in particular, among the poorest. Economic difficulties and behavior such as early marriage, pregnancy, and economic hardships explain the fragility of school demand. The lack of supply ( overcrowded classrooms, \u2018 open-air \u2019 or temporary classrooms, and incomplete schools ) also negatively effects retention rates. According to the School Census data, 16 percent of students are enrolled in a school that does not provide full course of basic education cycle ( 8 grades ). In addition, these students are likely to drop out before completion.", + "ner_text": [ + [ + 514, + 529, + "named" + ] + ], + "validated": true, + "empirical_context": "2 % 52. 6 % Source: authors \u2019 estimates based on the data from Sudan MICS 2014 * and School Census 2018 * * 3. About seven percent of people never attended school in 2014.", + "type": "survey", + "explanation": "In the context, it is explicitly mentioned as a source of data for estimates, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referenced as a source of estimates in the context.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as a source of data for estimates, confirming its role as a dataset.", + "contextual_signal": "mentioned as a source of estimates", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 26, + "text": "The World Bank Expanding Opportunities for Learning Project ( P166059 ) Page 21 of 85 52. MENFOP will benefit from strengthened capacity to design and use learning assessments to monitor student learning and improve pedagogical practices. An improved assessment system will yield more actionable data that can provide feedback to education stakeholders and drive improvements in education processes and outcomes. The project will improve two types of assessments currently administered in the system: ( a ) national learning assessment ( sample-based ) for monitoring and providing policymakers and practitioners with relevant information on overall performance levels in the system ( evaluations ind\u00e9pendantes administered by the service d \u2019 evaluation ). Eventually, the national learning assessment will be administered digitally through tablets to allow for faster and more accurate data processing and sharing of results; and ( b ) examinations for making decisions about an individual student \u2019 s progress through the education system ( OTIs administered by the Direction des Examens ). Results of the revised assessment will show the percentage of students reaching learning levels, or benchmarks. In addition, the project will introduce ( c ) an evaluation of digital competencies. The activity will introduce for the first time an evaluation of digital competencies at primary school for Grade 4 students. This evaluation will serve as a baseline measure that will guide subsequent reflection in building relevant digital skills. 53.", + "ner_text": [ + [ + 507, + 535, + "named" + ] + ], + "validated": false, + "empirical_context": "An improved assessment system will yield more actionable data that can provide feedback to education stakeholders and drive improvements in education processes and outcomes. The project will improve two types of assessments currently administered in the system: ( a ) national learning assessment ( sample-based ) for monitoring and providing policymakers and practitioners with relevant information on overall performance levels in the system ( evaluations ind\u00e9pendantes administered by the service d \u2019 evaluation ). Eventually, the national learning assessment will be administered digitally through tablets to allow for faster and more accurate data processing and sharing of results; and ( b ) examinations for making decisions about an individual student \u2019 s progress through the education system ( OTIs administered by the Direction des Examens ).", + "type": "assessment", + "explanation": "However, it is described as an assessment rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves assessments that generate data.", + "contextual_reason_agent": "However, it is described as an assessment rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + } + }, + { + "filename": "167_27761", + "page": 32, + "text": "The program \u2019 s internal monitoring initially will be based on the information delivered by the MIS and the project coordinators. The pertinent department at MOSA will systematically deliver the information as part of the quarterly reports to be submitted to the Bank and other donors. These reports will place special emphasis on financial data ( Le., disbursements, payments made, payment commitments ), the number of beneficiaries by type, and percentage of condition compliance by the beneficiaries. Likewise, once a year, the external firm will make sample verification in situ in order to determine whether the amounts reported by schools and clinics are real and that no significant statistical differences exist. Through the use of qualitative analysis and field visits once a year, the external fm will ensure whether the procedures set forth in the operational manual are being met. The participating institutions will be provided with M & E reports recommending adjustments to procedures in order to improve operational efficiency. If substantial deviations are found to affect the program concept, the Bank will work with MOSA in incorporating mutually acceptable corrective measures. 29", + "ner_text": [ + [ + 96, + 99, + "named" + ] + ], + "validated": false, + "empirical_context": "The program \u2019 s internal monitoring initially will be based on the information delivered by the MIS and the project coordinators. The pertinent department at MOSA will systematically deliver the information as part of the quarterly reports to be submitted to the Bank and other donors.", + "type": "system", + "explanation": "However, 'MIS' is described as a management information system, which does not function as a data source itself in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is an acronym that could imply a structured system for managing information.", + "contextual_reason_agent": "However, 'MIS' is described as a management information system, which does not function as a data source itself in this context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "161_28046", + "page": 7, + "text": "by Region. Public expenditures seem to benefit the rich more than the poor as hospitalization i s the most subsidized of public health services and the 10 % wealthiest use almost a quarter o f the hospitalization expenditures. c ) Insufficient Health expenditures: Health expenditures in relation to total government expenditure are extremely low in Guinea. Government spending on health even decreased from less than 1 % o f GDP in 1993 to approximately 0. 75 % in 1998. The sector \u2019 s recurrent expenditure represents only 5. 6 % o f the country \u2019 s total recurrent budget, and the share o f health sector investment budget i s around 4. 75 o f the Government \u2019 s total investment budget. Health sector budget allocations have been invariably low over the past decade, representing less than \u2018 / 4 o f the education sector budget allocation, when in most countries this ratio i s closer to half. d ) Poor Budget execution: The already low budgetary allocation i s further hampered by poor execution; the executed budget i s 30 % o f the allocated budget, according to a Health Expenditures Tracking Survey undertaken in 2003.", + "ner_text": [ + [ + 1072, + 1107, + "named" + ], + [ + 350, + 356, + "Health Expenditures Tracking Survey <> data geography" + ], + [ + 433, + 437, + "Health Expenditures Tracking Survey <> reference year" + ], + [ + 1122, + 1126, + "Health Expenditures Tracking Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "Health sector budget allocations have been invariably low over the past decade, representing less than \u2018 / 4 o f the education sector budget allocation, when in most countries this ratio i s closer to half. d ) Poor Budget execution: The already low budgetary allocation i s further hampered by poor execution; the executed budget i s 30 % o f the allocated budget, according to a Health Expenditures Tracking Survey undertaken in 2003.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey that provides data on health expenditures.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey that provides data on health expenditures.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 22, + "text": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 18 of 47 Figure 1. Theory of Change Output Inadequate infrastructure, equipment, and medicine \uf0b7 Renovation / upgradation of selected HFs to 24 / 7 \uf0b7 Purchasing of equipment and medicines \uf0b7 Increased number of HFs renovated / upgraded including female staff hostels * \uf0b7 Increased number of HFs with essential inputs Improved utilization of quality health services in selected refugee hosting districts in Balochistan Improved child health outcomes ( reduced under \u2010 five mortality, reduced stunting ) among refugees and host communities in Balochistan Improved human capital among refugees and host communities in Balochistan Shortage, absence, low productivity, and competency of providers \uf0b7 Contracting of especially female providers * \uf0b7 Creation of HRH database \uf0b7 Competency \u2010 based training \uf0b7 Increased availability of ( especially female ) providers * at the HFs \uf0b7 Functional HRH database \uf0b7 Increased number of providers with minimum clinical competency Low quality and suboptimal use of health data \uf0b7 Digitization and integration of the HMIS into the DHIS with dashboards \uf0b7 Data review meetings \uf0b7 HMIS ( DHIS, vertical programs ) digitized and integrated into DHIS / DHIS2 \uf0b7 Improved quality of DHIS reports Weak institutional capacity \uf0b7 Capacity building ( HMIS, monitoring, supervision, and management ) \uf0b7 Increased number of targeted staff with basic / advanced competencies Low demand of health and education services \uf0b7 Advocacy and awareness \u2010 raising", + "ner_text": [ + [ + 1277, + 1281, + "named" + ], + [ + 4, + 14, + "DHIS <> publisher" + ], + [ + 15, + 26, + "DHIS <> data geography" + ], + [ + 481, + 492, + "DHIS <> data geography" + ], + [ + 583, + 591, + "DHIS <> reference population" + ], + [ + 616, + 627, + "DHIS <> data geography" + ], + [ + 657, + 665, + "DHIS <> reference population" + ], + [ + 690, + 701, + "DHIS <> data geography" + ], + [ + 1069, + 1080, + "DHIS <> data type" + ] + ], + "validated": true, + "empirical_context": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 18 of 47 Figure 1. Theory of Change Output Inadequate infrastructure, equipment, and medicine \uf0b7 Renovation / upgradation of selected HFs to 24 / 7 \uf0b7 Purchasing of equipment and medicines \uf0b7 Increased number of HFs renovated / upgraded including female staff hostels * \uf0b7 Increased number of HFs with essential inputs Improved utilization of quality health services in selected refugee hosting districts in Balochistan Improved child health outcomes ( reduced under \u2010 five mortality, reduced stunting ) among refugees and host communities in Balochistan Improved human capital among refugees and host communities in Balochistan Shortage, absence, low productivity, and competency of providers \uf0b7 Contracting of especially female providers * \uf0b7 Creation of HRH database \uf0b7 Competency \u2010 based training \uf0b7 Increased availability of ( especially female ) providers * at the HFs \uf0b7 Functional HRH database \uf0b7 Increased number of providers with minimum clinical competency Low quality and suboptimal use of health data \uf0b7 Digitization and integration of the HMIS into the DHIS with dashboards \uf0b7 Data review meetings \uf0b7 HMIS ( DHIS, vertical programs ) digitized and integrated into DHIS / DHIS2 \uf0b7 Improved quality of DHIS reports Weak institutional capacity \uf0b7 Capacity building ( HMIS, monitoring, supervision, and management ) \uf0b7 Increased number of targeted staff with basic / advanced competencies Low demand of health and education services \uf0b7 Advocacy and awareness \u2010 raising", + "type": "system", + "explanation": "In this context, 'DHIS' is referred to as a system that integrates health data, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'DHIS' is a dataset because it is mentioned in the context of health data management and integration.", + "contextual_reason_agent": "In this context, 'DHIS' is referred to as a system that integrates health data, indicating it functions as a data source.", + "contextual_signal": "system but mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 11, + "validated": 5, + "not_validated": 6 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 40, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 29 Monitoring & Evaluation Plan: PDO Indicators Indicator Name Direct Project Beneficiaries Definition / Description Beneficiaries are people or groups who directly receive benefits from interventions of the project ( water supply, sanitation, and WRM related investments ). Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC Area performance reports, Refugee Database and UPMiS. Secondary sources include: MWE / DWD / DWRM progress reports, annual water and environments sector performance report and NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE and NWSC Indicator Name of which female Definition / Description Beneficiaries are people or groups who directly receive benefits from interventions of the project ( water supply, sanitation, and WRM related investments ). Frequency Bi-annually Data Source MWE / DWD / DWRM progress reports, annual water & environments sector performance report, Water Supply Atlas ( database of water supply, functionality and distribution of all Uganda districts ), NWSC annual report and refugee database.", + "ner_text": [ + [ + 483, + 488, + "named" + ], + [ + 4, + 14, + "UPMiS <> publisher" + ], + [ + 142, + 170, + "UPMiS <> reference population" + ], + [ + 552, + 607, + "UPMiS <> data type" + ], + [ + 1135, + 1141, + "UPMiS <> data geography" + ] + ], + "validated": true, + "empirical_context": "The World Bank Integrated Water Management and Development Project ( P163782 ) 29 Monitoring & Evaluation Plan: PDO Indicators Indicator Name Direct Project Beneficiaries Definition / Description Beneficiaries are people or groups who directly receive benefits from interventions of the project ( water supply, sanitation, and WRM related investments ). Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC Area performance reports, Refugee Database and UPMiS. Secondary sources include: MWE / DWD / DWRM progress reports, annual water and environments sector performance report and NWSC annual report.", + "type": "system", + "explanation": "UPMiS is indeed a dataset as it is explicitly mentioned as a primary source of data for monitoring and evaluation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed UPMiS is a dataset because it is listed as a primary data source in the context.", + "contextual_reason_agent": "UPMiS is indeed a dataset as it is explicitly mentioned as a primary source of data for monitoring and evaluation.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 12, + "text": "They employ nearly 2. 5 million people, 90 percent of all private sector employees, produce 80 percent of manufactured products, and generate 20 percent of GDP. 12 8. Yet most women-led firms never grow past the micro level, while male-owned firms are twice as likely to move from micro to small size. Estimates from various surveys suggest that 80 \u2013 94 percent of all women-owned firms in Uganda 5 United Nations High Commissioner for Refugees ( UNHCR ) and the Office of the Prime Minister ( OPM ). 2022. Uganda Comprehensive Refugee Response Poral. 6 Host community numbers are UNHCR and OPM figures based on projected UBOS 2020 census data for women aged 20-59. 7 World Bank. 2019 Informing the Refugee Policy Response in Uganda: Results from the Uganda Refugee and Host Communities 2018 Household Survey ( English ). Washington, DC: World Bank. 8 World Bank. 2021. Monitoring Social and Economic Impacts of COVID-19 on Refugees in Uganda: Results from the High-Frequency Phone - Third Round. World Bank, Washington, DC. World Bank. 9 High-Frequency Phone Survey - Third Round. 2021. 10 Understood as firms in which at least 51 percent of shares are owned by women. 11 2021. Rapid Profiling of the Socioeconomic Dimensions of Female Entrepreneurs in Uganda. GROW Preparation, October 2021. 12 Financial inclusion and the growth of small medium enterprises in Uganda: empirical evidence from selected districts in Lango subregion. J Innov Entrep 10, 23 ( 2021 ).", + "ner_text": [ + [ + 751, + 808, + "named" + ] + ], + "validated": true, + "empirical_context": "7 World Bank. 2019 Informing the Refugee Policy Response in Uganda: Results from the Uganda Refugee and Host Communities 2018 Household Survey ( English ). Washington, DC: World Bank.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referred to as a survey that provides results used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Household Survey' in its title, suggesting a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referred to as a survey that provides results used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "158_40156", + "page": 40, + "text": "36 approaches to HIV service delivery for target populations amongst all 7 IGAD member states Component 3 Ability to plan and implement activities - Project coordination and management \u2022 Number of civil society organizations20 funded by the project in the last 12 months, by type of civil society organization \u2022 Amount of funds disbursed to civil society organizations providing services to CBMPs, refugees, returnees, IDPs and surrounding populations in the 7 IGAD countries Capacity building \u2022 Number of NGOs that are able to design HIV service delivery programs for CBMPs in line with the IGAD HIV strategy \u2022 Number of persons from IGAD Member states trained in M & E including the use of Data Track the extent of capacity strengthening Strengthened capacity of IGAD, member states and contractor to plan, implement, monitor and evaluate HIV / AIDS programs for targeted populations M & E system ( including structured learning agenda ) \u2022 Number of website hits in the last 12 months \u2022 Percentage of NACs that submit program monitoring data to the PFO within specified reporting time frames \u2022 Percentage of refugee, IDP, returnee and cross border areas included in IGAD member states \u2019 National Sentinel Surveillance system in the last 12 months \u2022 Number of operational research studies funded Track whether functioning M & E system regularly monitors and reports on epidemic. Track level of implementation of work planning for HIV / AIDS programming.", + "ner_text": [ + [ + 1189, + 1226, + "named" + ], + [ + 419, + 423, + "National Sentinel Surveillance system <> reference population" + ] + ], + "validated": true, + "empirical_context": "36 approaches to HIV service delivery for target populations amongst all 7 IGAD member states Component 3 Ability to plan and implement activities - Project coordination and management \u2022 Number of civil society organizations20 funded by the project in the last 12 months, by type of civil society organization \u2022 Amount of funds disbursed to civil society organizations providing services to CBMPs, refugees, returnees, IDPs and surrounding populations in the 7 IGAD countries Capacity building \u2022 Number of NGOs that are able to design HIV service delivery programs for CBMPs in line with the IGAD HIV strategy \u2022 Number of persons from IGAD Member states trained in M & E including the use of Data Track the extent of capacity strengthening Strengthened capacity of IGAD, member states and contractor to plan, implement, monitor and evaluate HIV / AIDS programs for targeted populations M & E system ( including structured learning agenda ) \u2022 Number of website hits in the last 12 months \u2022 Percentage of NACs that submit program monitoring data to the PFO within specified reporting time frames \u2022 Percentage of refugee, IDP, returnee and cross border areas included in IGAD member states \u2019 National Sentinel Surveillance system in the last 12 months \u2022 Number of operational research studies funded Track whether functioning M & E system regularly monitors and reports on epidemic. Track level of implementation of work planning for HIV / AIDS programming.", + "type": "system", + "explanation": "In this context, it is indeed a dataset as it is mentioned in relation to monitoring and reporting on epidemic data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'system' which often relates to data collection.", + "contextual_reason_agent": "In this context, it is indeed a dataset as it is mentioned in relation to monitoring and reporting on epidemic data.", + "contextual_signal": "mentioned as a data source for monitoring and reporting", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 75, + "text": "Project activities under component 3 will include both basic and advanced digital skills development are designed to help women qualify for higher productivity jobs. Removing barriers to women ' s ownership of and control over assets All IDA19 financing operations for Digital Development will support women \u2019 s increased access to and usage of digital services. All components are designed with an explicit objective to increase women \u2019 s access to Internet connectivity, e-services, affordable devices, and skills development programs. The project will measure the progress through gender - disaggregated indicators. 74Universally accessible \u2019 means that GovTech services are designed so that they can be accessed, understood and used by all people, regardless of disability, age, use of assistive devices, location or means of Internet access. It applies to hardware and software. 75GovTech solutions include hardware, software, applications, and other technology to improve access and quality of public services; facilitate citizen engagement ( CivicTech ); and improve core government operations. These include enabling analog complements to strengthen institutions for GovTech implementation, including devising related strategies, building capacity, passing related laws on e - government, data access and use; and developing regulatory frameworks to facilitate interoperability.", + "ner_text": [ + [ + 584, + 617, + "named" + ] + ], + "validated": false, + "empirical_context": "All components are designed with an explicit objective to increase women \u2019 s access to Internet connectivity, e-services, affordable devices, and skills development programs. The project will measure the progress through gender - disaggregated indicators. 74Universally accessible \u2019 means that GovTech services are designed so that they can be accessed, understood and used by all people, regardless of disability, age, use of assistive devices, location or means of Internet access.", + "type": "indicator", + "explanation": "However, 'gender-disaggregated indicators' refers to metrics rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'indicators' can imply measurable data points.", + "contextual_reason_agent": "However, 'gender-disaggregated indicators' refers to metrics rather than a structured collection of data.", + "contextual_signal": "mentioned only as a measurement tool, not as a data source", + "tags": [] + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 48, + "text": "The World Bank Strengthening Public Sector Efficiency and Statistical Capacity Project ( P151155 ) Page 45 of 93 Intermediate Results Indicators FY Indicator Name Baseline YR1 YR2 YR3 YR4 YR5 YR6 End Target Number of program budget and associated result indicators redesigned ( based on the new budget classification ) revised 0. 00 0. 00 6. 00 6. 00 6. 00 6. 00 6. 00 6. 00 Validation of Public Investment Selection Manual that include Poverty in selection criteria N Y Y Y Y Y Y Y Comprehensive budgeted Investment Projects database available online and updated quarterly within 4 weeks after the end of each quarter N N N Y Y Y Y Y Publication of annual budget and spending through on-line BOOST database by January 30 N N Y Y Y Y Y Y Procurement staff trained with demonstrated competency in procurement 0. 00 0. 00 70. 00 70. 00 70. 00 70. 00 70. 00 70. 00 ECAM 5 survey revised methodology is aligned with new international standards on Poverty surveys N N Y Y Y Y Y Y Interval between two Population Census reduced 17. 00 12. 00 12. 00 12. 00 12. 00 12. 00 12. 00 12. 00", + "ner_text": [ + [ + 862, + 875, + "named" + ], + [ + 4, + 14, + "ECAM 5 survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "00 70. 00 ECAM 5 survey revised methodology is aligned with new international standards on Poverty surveys N N Y Y Y Y Y Y Interval between two Population Census reduced 17. 00 12.", + "type": "survey", + "explanation": "The context indicates that the ECAM 5 survey is aligned with international standards on poverty surveys, suggesting it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey methodology.", + "contextual_reason_agent": "The context indicates that the ECAM 5 survey is aligned with international standards on poverty surveys, suggesting it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "036_Tajikistan-Resilient-Landscape-Restoration-Project", + "page": 27, + "text": "The regional spillovers of this component are related to improved connectivity and integrity of natural resources across borders ( including biodiversity ), increased resilience of key regional infrastructure such as roads, and increased resilience and reduced fragility of natural resource management-based livelihoods of corridor communities. All four subcomponents are designed to enhance rural livelihoods through land - based restoration and / or conservation activities, as well as address climate variability and change. Approaches such as JFM and community-based tourism are relatively new in Tajikistan but offer pathways to broaden rural livelihood options while restoring productive natural resources. Support for integrated and community-based pasture management and climate-smart cropping practices will be opportunities to enhance the financial viability and sustainability of existing, more traditional livelihood strategies. Subcomponent 2. 1. Forest Restoration and Sustainable Forest Management ( US $ 15. 50 million ) 53. The expected outcome from this subcomponent is to restore degraded forests and improve management of existing forests through proper planning and implementation of activities such as afforestation and natural regeneration. The FA will lead on the technical aspects of this subcomponent, which includes the key activities detailed in the following paragraphs: 54. National Forest Inventory. The project will finance a national-level systematic NFI using a low sampling density. The NFI exercise will employ state-of-the art methodologies for conducting forest inventories, including geospatial and earth observation data. 55. Forest management plans. The project will finance the preparation and implementation of up to eight participatory sustainable forest management plans for SFMEs ( district - / region-based forest entities ) in the project sites. Preparation of the plans will build upon experience of earlier methods. 55 Based on these activities, 10-year plans will be elaborated, with measures and costs identified for sustainable forest management ( including JFM plans ) and KBAs and corresponding maps developed. Development of plans 55 Under the Kreditanstalt f\u00fcr Wiederaufbau ( KfW ) - supported project, \u2018 Climate Adaptation through Sustainable Forestry in Important River Catchment Areas in Tajikistan \u2019, a methodology for the preparation of participatory forest management plans for SFMEs has been developed. At present, only Khovaling SFME, a project site for KfW, has a such a plan.", + "ner_text": [ + [ + 1404, + 1429, + "named" + ], + [ + 601, + 611, + "National Forest Inventory <> data geography" + ], + [ + 1268, + 1270, + "National Forest Inventory <> author" + ], + [ + 1484, + 1487, + "National Forest Inventory <> acronym" + ], + [ + 1522, + 1525, + "National Forest Inventory <> acronym" + ], + [ + 1623, + 1660, + "National Forest Inventory <> data type" + ], + [ + 2348, + 2358, + "National Forest Inventory <> data geography" + ], + [ + 2519, + 2522, + "National Forest Inventory <> publisher" + ] + ], + "validated": true, + "empirical_context": "The FA will lead on the technical aspects of this subcomponent, which includes the key activities detailed in the following paragraphs: 54. National Forest Inventory. The project will finance a national-level systematic NFI using a low sampling density.", + "type": "dataset", + "explanation": "In the context, it is described as a national-level systematic inventory, indicating it functions as a structured collection of data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'National Forest Inventory' suggests a systematic collection of data related to forests.", + "contextual_reason_agent": "In the context, it is described as a national-level systematic inventory, indicating it functions as a structured collection of data.", + "contextual_signal": "described as a systematic inventory that collects data", + "tags": [] + }, + { + "filename": "037_Uganda-Investment-for-Industrial-Transformation-and-Employment-Project", + "page": 19, + "text": "Refugees have been more adversely affected by COVID-19 shocks than their host communities and slower to recover \u2013 with high levels of food insecurity, aid dependency on limited food rations and are ten times more likely to suffer from depression. 29 Female refugees were more likely to stop working following COVID-19 lockdowns than nationals or their male counterparts. Increased uncertainty and reduced demand in markets, is thought to be weakening already fragile supply chains and making investors less willing or more averse to pursuing business opportunities in RHD areas, exacerbating the existing vulnerability of these communities, and increasing their reliance on aid. This will further reinforce the negative path dependency of RHD communities, into higher levels of poverty and vulnerability, making social cohesion more difficult to achieve between refugees and their host communities. 21. A global economic downturn and increasing humanitarian demands have seen humanitarian assistance to refugees increasingly strained. Ration cuts coupled with COVID-19 impacts deteriorated food security among refugees substantially. It has gradually improved in the twelve months following the initial June 2020 lockdown, but remains worse than in 2018. Shortfalls in humanitarian funding to World Food Programme ( WFP ) has seen a 27 Informing the Refugee Policy Response in Uganda: Results from the Uganda Refugee and Host Communities 2018 Household Survey ( English ). Washington, DC: World Bank Group. http: / / documents1. worldbank. org / curated / en / 571081569598919068 / pdf / Informing-the-Refugee-Policy-Response-in-Uganda-Results - from-the-Uganda-Refugee-and-Host-Communities-2018-Household-Survey. pdf 28 World Bank. 2020. Covid-19 Impact Monitoring: Uganda, Round 1. World Bank, Washington, DC. 29 World Bank. 2021. Monitoring Social and Economic Impacts of COVID-19 on Refugees in Uganda: Results from the High-Frequency Phone - Third Round. World Bank, Washington, DC. \u00a9 World Bank", + "ner_text": [ + [ + 1402, + 1459, + "named" + ] + ], + "validated": true, + "empirical_context": "It has gradually improved in the twelve months following the initial June 2020 lockdown, but remains worse than in 2018. Shortfalls in humanitarian funding to World Food Programme ( WFP ) has seen a 27 Informing the Refugee Policy Response in Uganda: Results from the Uganda Refugee and Host Communities 2018 Household Survey ( English ). Washington, DC: World Bank Group.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referred to as a survey that provides results relevant to the context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Household Survey', which typically indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referred to as a survey that provides results relevant to the context.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 21, + "text": "The platform will serve as the foundation for engaging communities, identifying beneficiaries, and encouraging women entrepreneurs to participate in various activities supported under the project in an integrated manner. A particular area of focus will be linking women entrepreneurs to trainings on climate resilient livelihoods and Green, Resilient, Inclusive Development approaches. To reach refugee women, local platform chapters in refugee settlements or between refugees and host communities in RHDs will be created or existing ones strengthened. The local platform chapters will encourage membership / participation of emerging and established women business leaders to serve as role models for newly established and young women entrepreneurs and provide spaces and opportunities for women entrepreneurs to enhance their voice and agency in legal and policy processes. For refugees, the local platform chapters will target specific barriers to voice and agency ( such as language and specific cultural norms ) and the additional barriers women refugees have to business information ( such as lack of access to formal business channels, mentors, and inputs ). This subcomponent implementation is aligned to the PDM Pillar 5: Community Mobilization and Mindset Change. 35. The subcomponent will finance: ( a ) mobilization costs for the establishment of local platform chapters ( 20 \u2013 25 women per platform ); ( b ) establishment of a digital platform for women entrepreneurs and its linkage to other existing 34 The poverty incidence ( headcount ratio ) figures are based on the Uganda National Household Survey ( UNHS ) 2019 / 2020.", + "ner_text": [ + [ + 1585, + 1617, + "named" + ], + [ + 1585, + 1591, + "Uganda National Household Survey <> data geography" + ], + [ + 1627, + 1638, + "Uganda National Household Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "35. The subcomponent will finance: ( a ) mobilization costs for the establishment of local platform chapters ( 20 \u2013 25 women per platform ); ( b ) establishment of a digital platform for women entrepreneurs and its linkage to other existing 34 The poverty incidence ( headcount ratio ) figures are based on the Uganda National Household Survey ( UNHS ) 2019 / 2020.", + "type": "survey", + "explanation": "This is indeed a dataset as it provides structured data used for empirical analysis regarding poverty incidence.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a source for poverty incidence figures.", + "contextual_reason_agent": "This is indeed a dataset as it provides structured data used for empirical analysis regarding poverty incidence.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 19, + "text": "Teacher shortages, inadequate instructional materials and school running costs ( including for assessment and national exams ), as well as lack of basic school infrastructure have been identified as key priority issues in the refugee camp-based and host community schools in Kakuma, Dadaab and Kalobeyei. 16 Additional support to these schools in refugee hosting counties would contribute to the raising of education outcomes in some of the most economically and educationally disadvantaged counties. In urban areas, refugees \u2019 main barrier to access education is the cost of transport, books, uniforms, and other indirect costs. Other key limitations include different educational experiences and linguistic competencies which can result in students falling behind or dropping out, lack of information and resources to support the processes for recognition for prior learning, and lack of birth certificates and differences in registration documents, required to be registered in the National Education Management Information System ( NEMIS ) and for national examinations. C. Relationship to the CPS / CPF and Rationale for Use of Instrument 21. The proposed PforR is aligned with a draft World Bank Group Country Partnership Framework for Kenya ( CPF, FY22 \u2013 - FY27 ), which identifies three High-Level Outcomes ( HLO ): ( i ) faster labor productivity growth; ( ii ) inclusion and equality institutionalized; and ( iii ) greater productivity and preservation of Kenya \u2019 s 14 Ibid. 15 UNHCR and World Bank, ( 2021 ), Understanding the Socio-economic Conditions of Refugees in Kenya: Volume C: Results from the 2020-21 Urban Socioeconomic Survey. 16 Baseline assessment conducted by UNHCR and MoE in light of schools reopening", + "ner_text": [ + [ + 985, + 1033, + "named" + ], + [ + 226, + 271, + "National Education Management Information System <> reference population" + ], + [ + 275, + 281, + "National Education Management Information System <> data geography" + ], + [ + 283, + 289, + "National Education Management Information System <> data geography" + ], + [ + 294, + 303, + "National Education Management Information System <> data geography" + ], + [ + 1036, + 1041, + "National Education Management Information System <> acronym" + ], + [ + 1242, + 1247, + "National Education Management Information System <> data geography" + ], + [ + 1498, + 1508, + "National Education Management Information System <> publisher" + ], + [ + 1512, + 1516, + "National Education Management Information System <> publication year" + ], + [ + 1685, + 1690, + "National Education Management Information System <> author" + ] + ], + "validated": true, + "empirical_context": "In urban areas, refugees \u2019 main barrier to access education is the cost of transport, books, uniforms, and other indirect costs. Other key limitations include different educational experiences and linguistic competencies which can result in students falling behind or dropping out, lack of information and resources to support the processes for recognition for prior learning, and lack of birth certificates and differences in registration documents, required to be registered in the National Education Management Information System ( NEMIS ) and for national examinations. C.", + "type": "system", + "explanation": "It is indeed a dataset as it is mentioned in the context of being required for registration and national examinations, indicating its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information System' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "It is indeed a dataset as it is mentioned in the context of being required for registration and national examinations, indicating its role as a data source.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "151_533550PAD0P117101Official0Use0Only1", + "page": 54, + "text": "% o f ( 1 % ) ( 1 8 % ) ( 23 % ) ( 26 % ) ( 25 % ) ( 7 % ) ( 3 % ) ( 1 8 % ) ( 16 % ) ( 35 % ) ( 16 % ) ( 12 % ) 149. Beneficiary Profiles in Pilot Governorates. The PMT was applied to survey data gathered from existing beneficiaries as well as new applicants. Based on the PMT, the beneficiary categories in the pilot areas are shown in Table 1. Groups E & F make up 38 percent of the total, with the largest percentage ( 52 percent ) in Aden. Group A coverage is very small ( approximately 1 percent ) with Group B coverage for the pilot area about 13 percent, with only 5 percent coverage of this group in Aden. Groups C & D make up the largest percentage ( 48 percent ) of beneficiaries in the pilot area, comprising approximately 50 percent of beneficiaries in both Hodeida ( 49 percent ) and Mukalla ( 51 percent ). The percentage of beneficiaries in each group will be assessed over the duration of the pilot project to determine the impact of the SWF targeting policy implementation as well as the rolling out of the public information campaign. 150. The component will support the services of a long-term consultant to design BDP options with participation from SWF governorate / district staff. The consultant will provide \u201c hands-on \u201d 43", + "ner_text": [ + [ + 185, + 196, + "named" + ], + [ + 211, + 233, + "survey data <> reference population" + ], + [ + 439, + 443, + "survey data <> data geography" + ], + [ + 609, + 613, + "survey data <> data geography" + ], + [ + 771, + 778, + "survey data <> data geography" + ], + [ + 798, + 805, + "survey data <> data geography" + ] + ], + "validated": true, + "empirical_context": "Beneficiary Profiles in Pilot Governorates. The PMT was applied to survey data gathered from existing beneficiaries as well as new applicants. Based on the PMT, the beneficiary categories in the pilot areas are shown in Table 1.", + "type": "survey", + "explanation": "In this context, 'survey data' is indeed a dataset as it is explicitly mentioned as being gathered from existing beneficiaries and new applicants for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'survey data' is a dataset because it refers to data collected from beneficiaries through a structured survey.", + "contextual_reason_agent": "In this context, 'survey data' is indeed a dataset as it is explicitly mentioned as being gathered from existing beneficiaries and new applicants for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "102_Kenya-Water-PAD-04072017", + "page": 37, + "text": "27 Project Development Objective: To improve water supply and sanitation services in select coastal and northeastern regions in Kenya. PDO Level Results Indicators C o r e Unit of Meas ure Cumulative Target Values Fre - quency Data source / method - ology Respon - sibility for data collec - tion Comments Baselin e in 2016 2017 2018 2019 2020 2021 2022 People benefiting under the project from a connection to the sewage system or from improved septic sludge management. Number 0 0 0 0 240, 000 330, 000 415, 000 Semi - ann - ually Project reports on construc - tion and operatio n of infrastru cture. WSP data on connec - tions. WSPs Coast counties and Wajir. People benefiting from improved septic sludge management are those whose sludge is taken to a sludge treatment facility. The number of people benefiting from septic sludge management will be calculated as: total dry solids divided by per capita production = number of people. People with existing connections benefiting from more hours per week of water services. Number 0 0 0 0 1. 900, 00 0 2, 030, 0 00 2, 100, 00 Semi - ann - ually Project reports on construc - tion and operatio n of infrastru cture. WSP data on WSPs Coast counties residents to benefit from rehabilitation of the distribution network.", + "ner_text": [ + [ + 603, + 611, + "named" + ], + [ + 128, + 133, + "WSP data <> data geography" + ], + [ + 636, + 650, + "WSP data <> data geography" + ], + [ + 655, + 660, + "WSP data <> data geography" + ], + [ + 1284, + 1302, + "WSP data <> usage context" + ] + ], + "validated": true, + "empirical_context": "Number 0 0 0 0 240, 000 330, 000 415, 000 Semi - ann - ually Project reports on construc - tion and operatio n of infrastru cture. WSP data on connec - tions. WSPs Coast counties and Wajir.", + "type": "data", + "explanation": "In this context, 'WSP data' is indeed used as a source of information related to connections, indicating it functions as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'WSP data' is a dataset because it is mentioned in the context of project reports and infrastructure operations.", + "contextual_reason_agent": "In this context, 'WSP data' is indeed used as a source of information related to connections, indicating it functions as a dataset.", + "contextual_signal": "mentioned as a data source in project reports", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 66, + "text": "Project activities, such as PBCs, will be implemented and managed by communities / schools using a community-empowered approach. SMCs will be responsible for facilitating dialogue among school stakeholders and decision making at the school level, making information publicly available, developing an implementation plan, and leading the implementation. In addition, the project will support capacity building of communities / SMC members in key areas such as school grants and gender sensitization awareness. Other efforts that will be introduced include GRM and social accountability through deeper engagement with communities. The ESMF provides details on citizen engagement for the LIRE Project. During the implementation stage, the client will carry out beneficiaries \u2019 satisfaction surveys in the selected sites to evaluate public satisfaction with citizen engagement measures. The feedback satisfaction surveys will be conducted through phone surveys, workshops, and community scorecards.", + "ner_text": [ + [ + 758, + 794, + "named" + ], + [ + 758, + 771, + "beneficiaries \u2019 satisfaction surveys <> reference population" + ] + ], + "validated": true, + "empirical_context": "The ESMF provides details on citizen engagement for the LIRE Project. During the implementation stage, the client will carry out beneficiaries \u2019 satisfaction surveys in the selected sites to evaluate public satisfaction with citizen engagement measures. The feedback satisfaction surveys will be conducted through phone surveys, workshops, and community scorecards.", + "type": "survey", + "explanation": "This is indeed a dataset as it involves structured collection of data through surveys to evaluate public satisfaction.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to structured surveys aimed at collecting data on beneficiaries' satisfaction.", + "contextual_reason_agent": "This is indeed a dataset as it involves structured collection of data through surveys to evaluate public satisfaction.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "113_Project-Appraisal-Document-PAD-disclosable-version-P156479-08122016", + "page": 13, + "text": "The number of refugees is equivalent to over 3. 5 percent of the population, the third highest level in the world according to the Office of the United Nations High Commissioner for Refugees. 4. Nearly half ( 47 percent ) of Chad \u2019 s population lives under the poverty line. According to national household survey data, relative poverty decreased between 2003 and 2011. In particular, in 2011, 29 percent of the population lived below the food poverty line ( it was 36 percent in 2003 ), 47 percent below the overall national poverty line ( 53 percent in 2003 ) and 68 percent on less than US $ 2 per day ( 76 percent in 2003 ). Despite this reduction in the poverty rates, the absolute number of people living in various degrees of poverty has increased because of population growth. Between 2003 and 2011, the number of food poor has risen from 2. 7 to 2. 9 million; the number of poor from 4. 1 to 4. 7 million; and the number of people living on less than US $ 2 per day from 5. 7 to 6. 8 million.", + "ner_text": [ + [ + 288, + 318, + "named" + ], + [ + 225, + 229, + "national household survey data <> data geography" + ], + [ + 355, + 359, + "national household survey data <> reference year" + ], + [ + 364, + 368, + "national household survey data <> publication year" + ], + [ + 802, + 806, + "national household survey data <> reference year" + ], + [ + 1017, + 1035, + "national household survey data <> usage context" + ] + ], + "validated": true, + "empirical_context": "Nearly half ( 47 percent ) of Chad \u2019 s population lives under the poverty line. According to national household survey data, relative poverty decreased between 2003 and 2011. In particular, in 2011, 29 percent of the population lived below the food poverty line ( it was 36 percent in 2003 ), 47 percent below the overall national poverty line ( 53 percent in 2003 ) and 68 percent on less than US $ 2 per day ( 76 percent in 2003 ).", + "type": "survey", + "explanation": "This is indeed a dataset as it provides structured data on poverty levels in Chad over time.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific type of data collection used for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it provides structured data on poverty levels in Chad over time.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 61, + "text": "The World Bank Jordan People-Centric Digital Government Program-for-Results ( P180291 ) Page | 52 reform, the Ministry of Education on digital examinations, the Ministry of Health on the digitalization of the health sector, the Information Council regarding Access to Information ) after ascertaining institutional capacity and fiscal resources. * * * 6. Result Area 1 on service delivery addresses one of the main strategic objectives of the Public Sector Modernization Roadmap and Digital Transformation Strategy, with a focus on increasing the inclusive adoption of people-centric digital identity, improving trusted, people-centric data sharing, and expanding trusted and inclusive access to people-centric digitalized services, including health services. - Jordan has been actively working on the digitalization of public services; however, user adoption remains low due to limited end-to-end service digitalization, low perceived relevance for many individuals and service providers, and eligibility restricted thus far to citizens. The Sanad application implemented by MODEE includes digital identity ( ID ), electronic signature, and a personal document store, in addition to functioning as a unified online portal for accessing digitalized public services. Approximately 800, 000 users ( that is, around 7 percent of the population ) have activated their Sanad accounts as of January 2024, which is far below the Government \u2019 s goal of 3. 5 million active digital IDs by 2025.", + "ner_text": [ + [ + 1043, + 1048, + "named" + ] + ], + "validated": false, + "empirical_context": "- Jordan has been actively working on the digitalization of public services; however, user adoption remains low due to limited end-to-end service digitalization, low perceived relevance for many individuals and service providers, and eligibility restricted thus far to citizens. The Sanad application implemented by MODEE includes digital identity ( ID ), electronic signature, and a personal document store, in addition to functioning as a unified online portal for accessing digitalized public services. Approximately 800, 000 users ( that is, around 7 percent of the population ) have activated their Sanad accounts as of January 2024, which is far below the Government \u2019 s goal of 3.", + "type": "program", + "explanation": "'Sanad' is not a dataset as it is described as an application and portal, not a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Sanad' is a dataset because it is associated with digital services and user accounts.", + "contextual_reason_agent": "'Sanad' is not a dataset as it is described as an application and portal, not a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 30, + "text": "The MIS will collect information from applicants during registration, such as ( a ) key contact information, gender, age, marital status, education status, refugee status, employment; ( b ) business level outcomes such as business partners, business age, monthly profits, number of employees; and ( c ) access to other programs and loans. The MIS database will be updated as program applicants make use of specific components. The GROW_ID can be entered to update the database with any additional information on services received to ensure the applicant is only registered once. For example, dates of business trainings, service provider, or amount of funds will all be entered into the MIS. This database will help support operations through feedback loops as it can track who is accessing which services in real time. The MIS will also be important in being able to establish a sample of study participants to draw on for an impact evaluation or other learning activities. 61. The project will also support MGLSD \u2019 s capacity to lead and oversight social risk management. At the national level, this will support capacity building of unit staff, public fora, SRM experts, etc.", + "ner_text": [ + [ + 4, + 7, + "named" + ] + ], + "validated": false, + "empirical_context": "The MIS will collect information from applicants during registration, such as ( a ) key contact information, gender, age, marital status, education status, refugee status, employment; ( b ) business level outcomes such as business partners, business age, monthly profits, number of employees; and ( c ) access to other programs and loans. The MIS database will be updated as program applicants make use of specific components.", + "type": "system", + "explanation": "However, 'MIS' is described as a management information system, not explicitly as a data source or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it involves collecting and managing information.", + "contextual_reason_agent": "However, 'MIS' is described as a management information system, not explicitly as a data source or dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "066_Jordan-Youth-Technology-and-Jobs-Project", + "page": 89, + "text": "The increase in income associated with various schooling levels ( e. g. high school completion, four-year university completion ) relative to no or lower levels of education are estimated through regression analysis ( in the form of Mincerian equations ) of individual-level survey data. Once earnings differentials denoted \u2206 \ud835\udc4a, are calculated, they are assumed to be realized each year from when the students enter the labor market until they are in retirement age, roughly 65 years old ( Jimenes and Patrinos 2007 ). The education level of each individual largely determines the timing of labor market entry, with some entering right after completing high school, at age 18, those attending university 4 years later, and so forth. The analysis here is an adjusted version of the conventional approach, using the differential is relative to no computer literacy courses61 rather than a different level of education. This presents a challenge since it is not possible to estimate the differential from the individual survey data available and there does not exist much evidence on it. Of the scarce literature, Bishop and Mane ( 2004 ) indicate that the increase in earnings is about 12 percent ( or more ) for students in the US who, in high school, entered a technical program with computer courses during 61 Although there is an existing course in computer literacy and skills, the curriculum is considered obsolete.", + "ner_text": [ + [ + 258, + 286, + "named" + ], + [ + 401, + 409, + "individual-level survey data <> reference population" + ], + [ + 490, + 510, + "individual-level survey data <> author" + ], + [ + 511, + 515, + "individual-level survey data <> publication year" + ], + [ + 1111, + 1126, + "individual-level survey data <> author" + ], + [ + 1211, + 1219, + "individual-level survey data <> reference population" + ] + ], + "validated": true, + "empirical_context": "g. high school completion, four-year university completion ) relative to no or lower levels of education are estimated through regression analysis ( in the form of Mincerian equations ) of individual-level survey data. Once earnings differentials denoted \u2206 \ud835\udc4a, are calculated, they are assumed to be realized each year from when the students enter the labor market until they are in retirement age, roughly 65 years old ( Jimenes and Patrinos 2007 ).", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as 'survey data' used for regression analysis in the research.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'individual-level survey data' which suggests a structured collection of data from surveys.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as 'survey data' used for regression analysis in the research.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "154_431120PAD0P1021e0only10R20081014311", + "page": 25, + "text": "In addition, DACs will: ( a ) review and approve CSO proposals below the Pula 30, 000 thresholds ( approximately US $ 5, 000 ); ( b ) disburse the grants to the NGOs; and ( c ) monitor implementation progress. Where needed, programmatic, procurement, and financial management support will be sourced from NACA. C. Monitoring and evaluation of outcomedresults 65. The Project \u2019 s monitoring and evaluation framework derives fully from the national HIV / AIDS monitoring and evaluation framework, the Botswana HIV / AIDS Response Information Management System ( BHRIMS ). The BNAPS Project will be supporting the further strengthening o f this system, with a focus on monitoring and evaluation at decentralized levels o f the response ( Annex 3 ). D. Sustainability 66. The Government, with support o f IBRD and other development partners, has mobilized different sectors, mass organizations and communities to Institutional sustainability. 16", + "ner_text": [ + [ + 499, + 557, + "named" + ], + [ + 305, + 309, + "Botswana HIV / AIDS Response Information Management System <> publisher" + ], + [ + 499, + 507, + "Botswana HIV / AIDS Response Information Management System <> data geography" + ] + ], + "validated": true, + "empirical_context": "Monitoring and evaluation of outcomedresults 65. The Project \u2019 s monitoring and evaluation framework derives fully from the national HIV / AIDS monitoring and evaluation framework, the Botswana HIV / AIDS Response Information Management System ( BHRIMS ). The BNAPS Project will be supporting the further strengthening o f this system, with a focus on monitoring and evaluation at decentralized levels o f the response ( Annex 3 ).", + "type": "system", + "explanation": "It is indeed a dataset as it is described as a system that supports monitoring and evaluation, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information Management System' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "It is indeed a dataset as it is described as a system that supports monitoring and evaluation, indicating it functions as a data source.", + "contextual_signal": "system but mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "182_multi0page", + "page": 20, + "text": "and technical review will be carried out prior to the project mid-term review and completion; ii ) independent financial audit will be undertaken annually; and iii ) World Bank and donor supervision and mid-term review missions. The following tools will be used for the performance monitoring: i ) Management Information System ( MIS ); ii ) semi-annual progress reports; iii ) special reports; and norms and standards. The coordinators of sub-components will submit semi-annual reports in a pre-agreed format to the PCU. The PCU will prepare summary report and will submit them to the GASS and MOLSA, World Bank and co-financiers. b ) Project Impact Monitoring will be carried out through qualitative and quantitative impact assessments studies to be carried out at the district and regional level and beneficiary impact assessments. In addition, the project will finance a regular living standards measurement survey ( LSMS ) which will monitor the broader impact of the Government reform program. c ) Monitoring of Community-based Services: each community-based sub-project will have a monitoring indicators and evaluation plan incorporated in project design. The monitoring will be carried out by the social services officers of GASS regional and central offices based on the norms and standards and in conformity with monitoring indicators outlined in individual contracts.", + "ner_text": [ + [ + 298, + 327, + "named" + ] + ], + "validated": false, + "empirical_context": "and technical review will be carried out prior to the project mid-term review and completion; ii ) independent financial audit will be undertaken annually; and iii ) World Bank and donor supervision and mid-term review missions. The following tools will be used for the performance monitoring: i ) Management Information System ( MIS ); ii ) semi-annual progress reports; iii ) special reports; and norms and standards. The coordinators of sub-components will submit semi-annual reports in a pre-agreed format to the PCU.", + "type": "system", + "explanation": "However, it is mentioned as a system used for performance monitoring, not explicitly as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Management Information System' suggests a structured collection of data.", + "contextual_reason_agent": "However, it is mentioned as a system used for performance monitoring, not explicitly as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 52, + "text": "The integration of digitally verifiable credentials by DPI ecosystem participants is verified as follows: ( 1 ) digital representations of a collection of data attributes can be securely and reliably verified by relying parties using trusted digital technologies, enabling data to be shared with a relying party from an authoritative data source while keeping the individual data subject at the center of the data sharing transaction. Examples of digitally verifiable credentials include electronic passports, national identity cards, digital education diplomas, and other credentials that can be issued to individuals and be digitally verified by relying parties; and ( 2 ) the credentials are portable, resistant to tampering, enable streamlined verification processes in online and offline scenarios, and have adequate measures in place to ensure that they can be trusted by verifiers. The availability of these trust measures is verified as follows: ( a ) there is a common, agreed-upon set of rules ( a trust framework ) describing the standards and operational requirements that must be followed by issuers and verifiers of credentials ( participating entities ); ( b ) the standards and procedures reflected in the trust framework follow internationally-recognized standards to allow for interoperability between credential issuers as well as", + "ner_text": [ + [ + 510, + 533, + "named" + ] + ], + "validated": false, + "empirical_context": "The integration of digitally verifiable credentials by DPI ecosystem participants is verified as follows: ( 1 ) digital representations of a collection of data attributes can be securely and reliably verified by relying parties using trusted digital technologies, enabling data to be shared with a relying party from an authoritative data source while keeping the individual data subject at the center of the data sharing transaction. Examples of digitally verifiable credentials include electronic passports, national identity cards, digital education diplomas, and other credentials that can be issued to individuals and be digitally verified by relying parties; and ( 2 ) the credentials are portable, resistant to tampering, enable streamlined verification processes in online and offline scenarios, and have adequate measures in place to ensure that they can be trusted by verifiers. The availability of these trust measures is verified as follows: ( a ) there is a common, agreed-upon set of rules ( a trust framework ) describing the standards and operational requirements that must be followed by issuers and verifiers of credentials ( participating entities ); ( b ) the standards and procedures reflected in the trust framework follow internationally-recognized standards to allow for interoperability between credential issuers as well as", + "type": "document", + "explanation": "'National identity cards' are mentioned as examples of credentials, not as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'national identity cards' are datasets because they contain personal data attributes.", + "contextual_reason_agent": "'National identity cards' are mentioned as examples of credentials, not as a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "076_Rwanda-Socio-Economic-Inclusion-of-Refugees-and-Host-Communities-in-Rwanda-Project", + "page": 23, + "text": "Socioeconomic Investments Road rehabilitation / upgrading will support the access of refugees and host communities to economic opportunities. It will also benefit local businesses and traders, who will have easier access to the refugee population. Markets will support trade for refugees and host communities. Component 2. Economic Opportunity Subcomponent 2 ( a ). Access to finance Access to finance will support MSMEs and cooperatives run by host communities and refugees and larger businesses, which will employ both. Will also benefit local micro-finance institutions and SACCOs by 18 National Institute of Statistics of Rwanda ( 2012 ). Rwanda Fourth Population and Housing Census, Thematic Report: Population size, structure and distribution. See: http: / / www. statistics. gov. rw / publication / rphc4-atlas; and UNHCR refugee data as of February 28, 2019 see above n. 4.", + "ner_text": [ + [ + 643, + 686, + "named" + ], + [ + 635, + 639, + "Rwanda Fourth Population and Housing Census <> publication year" + ], + [ + 643, + 649, + "Rwanda Fourth Population and Housing Census <> data geography" + ], + [ + 705, + 748, + "Rwanda Fourth Population and Housing Census <> data description" + ] + ], + "validated": true, + "empirical_context": "Will also benefit local micro-finance institutions and SACCOs by 18 National Institute of Statistics of Rwanda ( 2012 ). Rwanda Fourth Population and Housing Census, Thematic Report: Population size, structure and distribution. See: http: / / www.", + "type": "census", + "explanation": "This is indeed a dataset as it is a census report that provides empirical data on population size, structure, and distribution.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it references a specific census report that typically contains structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is a census report that provides empirical data on population size, structure, and distribution.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 28, + "text": "The KNEC \u2019 s school specific analysis for the learning assessments and examinations conducted in 2020 and 2021, and for the school reentry learning assessments conducted in January 2021 after the prolonged school closure, will be used to set the baseline and targets in the SIP for improving learning outcomes. \u2022 Learners school attendance: target schools will conduct regular parent / community meetings and relevant mobilization activities to ensure regular school attendance and specifically monitor retention of girls in grades 6 to 8 and allow reentry for teenage mothers in primary in line with the National reentry guidelines. Target schools are expected to closely track student \u2019 s attendance by gender and grade and submit attendance data on NEMIS. Subcounty-based education teams from the MoE, will be expected to visit target schools once a month to monitor school attendance, identify learners at risk of dropping out and agree on remedial actions with the school management. \u2022 School management and accountability: target schools will be supported to comply with MoE \u2019 s requirements for accountability30 in management of the student capitation grants ( as well as the proposed school grant ) and facilitate teachers to participate in the monthly SBTS cluster meetings. School heads, their deputies, and the senior teacher, will be expected to complete the school instructional leadership module which the TSC plans to develop and deliver through a third party. Support to the 50 30 Include inter alia, availability of approved costed annual work plan by the school committee / boards of management; maintenance of updated cash books; evidence of appropriate store ledgers; adherence to procurement procedures; and school level public disclosure of relevant information IPF", + "ner_text": [ + [ + 124, + 159, + "named" + ], + [ + 4, + 8, + "school reentry learning assessments <> publisher" + ], + [ + 97, + 101, + "school reentry learning assessments <> reference year" + ], + [ + 106, + 110, + "school reentry learning assessments <> publication year" + ], + [ + 173, + 185, + "school reentry learning assessments <> reference year" + ], + [ + 516, + 538, + "school reentry learning assessments <> reference population" + ], + [ + 561, + 587, + "school reentry learning assessments <> reference population" + ] + ], + "validated": true, + "empirical_context": "The KNEC \u2019 s school specific analysis for the learning assessments and examinations conducted in 2020 and 2021, and for the school reentry learning assessments conducted in January 2021 after the prolonged school closure, will be used to set the baseline and targets in the SIP for improving learning outcomes. \u2022 Learners school attendance: target schools will conduct regular parent / community meetings and relevant mobilization activities to ensure regular school attendance and specifically monitor retention of girls in grades 6 to 8 and allow reentry for teenage mothers in primary in line with the National reentry guidelines.", + "type": "assessment", + "explanation": "This is indeed a dataset as it refers to structured assessments used to inform baseline and targets for improving learning outcomes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific assessments conducted to gather data on learning outcomes.", + "contextual_reason_agent": "This is indeed a dataset as it refers to structured assessments used to inform baseline and targets for improving learning outcomes.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + } + }, + { + "filename": "167_27761", + "page": 20, + "text": "The FMIS system at the MOF and the Post Bank were found to be adequate to support the project. The project \u2019 s financial statements, including Statement of Expenditures ( SOE ) and the Special Account ( SA ), would be audited quarterly internally and annually by an independent auditor, in accordance with internationally accepted standards. In addition, the auditor would carry out field spot check audits to ascertain compliance with contractual requirements. Compliance with conditional cash transfers would be monitored by an independent external consultant ( paragraph C. 3 ). 4. Social Opportunities, constraints, impacts, and risks arising. out of the socio-cultural and political context. The impact of closure and incursions in the West Bank and Gaza has been extensively documented. On the rise are poverty, unemployment, school drop out rates; on the decline or deteriorating are household incomes, living conditions, school attendance rates, at-large nutritional status and in particular of children 0-5 years of age. 13 The so called \u201c newly poor \u201d, those who are just above the poverty line before the outbreak of the second Intifada, in the third quarter of 2000, have been assisted by various interventions. To date, it has been much more difficult to find ways to assist the very poor, or first decile, whose coping mechanisms are essentially exhausted. The proposed project i s designed to support children ( 0-18 years old ) in the first decile. Although the SHC has targeted the first decile for more than 20 years, the program i s very small today in relation to need and requires new instruments to address the present situation and to shift MOSA \u2019 s overall strategy from one of coping to one of social springboard. Therefore, the project will support MOSA in reshaping its social assistance strategy and to improve effectiveness and efficiency of ~ l3 PCBS quarterly household surveys, Palestinian Living Conditions quarterly surveys, IUCN, University of Geneva, Living Standards quarterly surveys, Nutrition Study, Johns Hopkins University, 2002, Nutrition Survey, PCBS and BirZeit University, 2002. 17", + "ner_text": [ + [ + 1987, + 2021, + "named" + ], + [ + 741, + 750, + "Living Standards quarterly surveys <> data geography" + ], + [ + 1049, + 1059, + "Living Standards quarterly surveys <> reference population" + ], + [ + 2066, + 2070, + "Living Standards quarterly surveys <> publication year" + ], + [ + 2099, + 2117, + "Living Standards quarterly surveys <> author" + ], + [ + 2119, + 2123, + "Living Standards quarterly surveys <> publication year" + ], + [ + 2143, + 2161, + "Living Standards quarterly surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "Although the SHC has targeted the first decile for more than 20 years, the program i s very small today in relation to need and requires new instruments to address the present situation and to shift MOSA \u2019 s overall strategy from one of coping to one of social springboard. Therefore, the project will support MOSA in reshaping its social assistance strategy and to improve effectiveness and efficiency of ~ l3 PCBS quarterly household surveys, Palestinian Living Conditions quarterly surveys, IUCN, University of Geneva, Living Standards quarterly surveys, Nutrition Study, Johns Hopkins University, 2002, Nutrition Survey, PCBS and BirZeit University, 2002. 17", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned alongside other surveys that provide data for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'quarterly surveys' which typically indicate structured data collection.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned alongside other surveys that provide data for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 50, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 39 Indicator Name Improved community water points ( constructed or rehabilitated ) under the project Definition / Description Number of improved community water points constructed or rehabilitated under the project. A community water point is defined as a public outlet for the provision of water supply to a number of households. Improved community water points refer to standpipes, protected dug well, borehole, or protected spring. It does not include, inter alia, unprotected wells or unprotected springs.. Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC tracking tools, Refugee Database and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE / NWSC Indicator Name Precent of water and sanitations committees with women in key position in project areas Definition / Description Women holding at least one key position in water user committees for influencing or making decisions on WSS related matters. The key positions are: Chairperson, Vice Chairperson, Secretary and Treasurer. Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC tracking tools, Refugee Database and UPMiS.", + "ner_text": [ + [ + 709, + 714, + "named" + ] + ], + "validated": true, + "empirical_context": ". Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC tracking tools, Refugee Database and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report.", + "type": "system", + "explanation": "UPMiS is confirmed as a dataset since it is explicitly mentioned as a primary source of data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is listed among primary data sources.", + "contextual_reason_agent": "UPMiS is confirmed as a dataset since it is explicitly mentioned as a primary source of data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "013_BOSIB0efb09b920d90858a0135df22da7d1", + "page": 42, + "text": "The budget monitoring system will be at the transaction, system, and reporting levels. The budget control in the Integrated Financial Management Information System ( IFMIS ) will be applied based on the Government budget code. The accounting system to be used would enable budget controls and monitoring, budget tracking, and periodic reporting. Expenditures will also be compared to the budgets regularly, explanations will be sought for significant variances, and remedial actions will be taken as appropriate. IFRs would include a variance report along with explanations of material variances. Management will take midway corrective measures based on the reports and explanations. 3. Accounting and staffing arrangement for the project. The GoE \u2019 s accounting policies ( modified cash basis ) and procedures will apply to the project. Separate accounts for the project will be maintained at the PMO. NIDP will develop a project specific FMM, which follows the government procedures and addresses the peculiarities of the project. Preparation of the FMM will be completed within three months of effectiveness. The chart of accounts of the PMO will be updated to accommodate the project. The project is expected to use an accounting system that captures project records at the component, subcomponent, and activity levels. In addition, to comply with government reporting requirements, the project will have to maintain records through IFMIS.", + "ner_text": [ + [ + 113, + 163, + "named" + ] + ], + "validated": false, + "empirical_context": "The budget monitoring system will be at the transaction, system, and reporting levels. The budget control in the Integrated Financial Management Information System ( IFMIS ) will be applied based on the Government budget code. The accounting system to be used would enable budget controls and monitoring, budget tracking, and periodic reporting.", + "type": "system", + "explanation": "However, it is described as a system for budget control and monitoring, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Information System', which often relates to data management.", + "contextual_reason_agent": "However, it is described as a system for budget control and monitoring, not as a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "074_Djibouti-Integrated-Cash-Transfer-and-Human-Capital-Project", + "page": 24, + "text": "The project steering committee is expected to approve the project \u2019 s annual work plans and budgets and ensure coordination with technical ministries and other donors. B. Results Monitoring and Evaluation Arrangements 58. The results monitoring framework assesses progress towards the PDO through key indicators, focusing on providing targeted cash transfers to poor households and supporting access to community-level interventions that improve human capital. In addition, intermediate indicators will be used to monitor the progress of each component over the life of the project. The SEAS will collect data for the activities they will implement. SEAS will be responsible for aggregating results data and preparing periodic reports on results as specified in the Financing Agreement ( FA ). Monitoring will occur at each stage of project implementation to identify arising problems and issues and to promptly consider and adopt corrective measures. 59. The project will conduct a mid-term review and several evaluations to gauge progress towards the PDO, to assess the impact of the project on the beneficiaries, the quality of the works carried out, as well as overall project efficiency. For component 1, these evaluations will include a process evaluation and a targeting assessment to evaluate the accuracy of safety net targeting procedures. For component 3, evaluations will include technical audits of infrastructures built ( at mid-term and end of project ) and audits of adherence to environmental and social safeguards ( at mid-term and end of project ). Beneficiary satisfaction surveys will also be conducted.", + "ner_text": [ + [ + 1568, + 1600, + "named" + ] + ], + "validated": true, + "empirical_context": "For component 3, evaluations will include technical audits of infrastructures built ( at mid-term and end of project ) and audits of adherence to environmental and social safeguards ( at mid-term and end of project ). Beneficiary satisfaction surveys will also be conducted.", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to structured surveys that collect data on beneficiary satisfaction.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'beneficiary satisfaction surveys' implies a structured collection of responses from beneficiaries.", + "contextual_reason_agent": "This is indeed a dataset as it refers to structured surveys that collect data on beneficiary satisfaction.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 64, + "text": "On an annual basis the health service quality assessment will include direct observation of health service process quality measures at hospitals and health centers. ( iv ) Bi-annual patient feedback using exit surveys. ( v ) Bi-annual visits to a sample of BHTs to measure service outputs and quality. ( b ) Periodic TPM data collection: ( i ) Biennial household coverage surveys as baseline / endline surveys in the project \u2019 s three-year timeframe. 35 ( ii ) Citizen engagement survey collected at the household level, with the coverage survey every other year. 2. Data Analysis and Visualization Platform. The platform will emphasize development of an integrated, institutionalized, and sustainable system. The platform will include analysis of health service delivery in refugee and host community areas. The platform will include the following: ( a ) Interactive data visualization platform presenting Results Framework and core indicators. The platform will use data from DHIS2 and the TPM and will include BHI data. It will be updated at least on a quarterly basis. ( b ) Presentation of data in data visualization platform in the form of static and interactive maps, charts and graphs, tailored to project monitoring needs and partner priorities. ( c ) Incorporation of HSF data within the data visualization platform through a link or page within the platform. ( d ) Overlay of disease and health service delivery data with meteorologic data to better understand seasonal patterns in service delivery and infectious disease data. 35 Given the planned project length of three years, this is a baseline and an endline survey. Potential timeframe changes would include interim surveys, which are planned to be light surveys focusing on key indicators.", + "ner_text": [ + [ + 978, + 983, + "named" + ], + [ + 344, + 379, + "DHIS2 <> data description" + ], + [ + 461, + 486, + "DHIS2 <> data type" + ], + [ + 856, + 895, + "DHIS2 <> data type" + ] + ], + "validated": true, + "empirical_context": "The platform will include the following: ( a ) Interactive data visualization platform presenting Results Framework and core indicators. The platform will use data from DHIS2 and the TPM and will include BHI data. It will be updated at least on a quarterly basis.", + "type": "system", + "explanation": "DHIS2 is confirmed as a data source in the context, indicating it is used to provide data for the platform.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned as a source of data for the platform.", + "contextual_reason_agent": "DHIS2 is confirmed as a data source in the context, indicating it is used to provide data for the platform.", + "contextual_signal": "follows 'uses data from'", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 29, + "text": "The World Bank SPF: Improved Livelihoods for Internally Displaced Persons in Azerbaijan ( P178125 ) Page 27 of 34 Indicator Name Corporate Unit of Measure Baseline End Target Frequency Data Source / Methodology Responsibility for Data Collection three months after civic engagement training. Percentage of beneficiaries taking a more active role in their communities - disabled Percentag e 0. 00 50. 00 Twice, once before civic engagement training and again at least three months after civic engagement training completion. For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection. For individuals trained only as part of Component 3, a separate pre-training survey will be conducted as well as a follow-up survey conducted at least three months after civic engagement training. M & E Specialist Description: Total number of individuals trained in civic engagement topics who report a higher level of community activity compared to the level reported before receiving training and support.", + "ner_text": [ + [ + 582, + 597, + "named" + ], + [ + 45, + 73, + "Baseline Survey <> reference population" + ], + [ + 77, + 87, + "Baseline Survey <> data geography" + ], + [ + 1091, + 1109, + "Baseline Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "00 Twice, once before civic engagement training and again at least three months after civic engagement training completion. For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection. For individuals trained only as part of Component 3, a separate pre-training survey will be conducted as well as a follow-up survey conducted at least three months after civic engagement training.", + "type": "survey", + "explanation": "The Baseline Survey is explicitly mentioned as a tool for data collection in the context of the project.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey used for data collection.", + "contextual_reason_agent": "The Baseline Survey is explicitly mentioned as a tool for data collection in the context of the project.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 47, + "text": "For Results area 2, TA will support capacity building activities, such as the creation of a multisectoral team at the woreda level, staffing of key specialists to support SPG implementation; provision of training for relevant staff; M & E including data collection and entry for DHIS2, UNISE, and EMIS; incentive mechanisms for the service delivery system; development of innovations for effective service delivery at federal and regional levels by focusing in selected woredas for demonstrations; and impact evaluation to assess the effectiveness of SPG interventions. 87. The capacity building interventions supported under the IPF component are described in more detail in Annex 8. The HCO includes a PAP to drive intermediate outputs linked to the achievement of the results outlined under each results area, and the IPF component ( subcomponent 2. 3 ) provides TA for the completion of activities in the PAP ( Annex 6 ). Key capacity building activities are summarized in section II. D above, and a more detailed description is provided in Annex 8. D. Capacity Building", + "ner_text": [ + [ + 286, + 291, + "named" + ] + ], + "validated": false, + "empirical_context": "For Results area 2, TA will support capacity building activities, such as the creation of a multisectoral team at the woreda level, staffing of key specialists to support SPG implementation; provision of training for relevant staff; M & E including data collection and entry for DHIS2, UNISE, and EMIS; incentive mechanisms for the service delivery system; development of innovations for effective service delivery at federal and regional levels by focusing in selected woredas for demonstrations; and impact evaluation to assess the effectiveness of SPG interventions. 87.", + "type": "program", + "explanation": "UNISE is mentioned as part of a broader capacity building initiative, not specifically as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed UNISE is a dataset because it is mentioned alongside data collection activities.", + "contextual_reason_agent": "UNISE is mentioned as part of a broader capacity building initiative, not specifically as a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 3, + "validated": 1, + "not_validated": 2 + } + }, + { + "filename": "127_PAD10180PAD0P14972400PUBLIC00Box391431B", + "page": 31, + "text": "21 Annex 1: Results Framework and Monitoring REPUBLIC OF LEBANON Municipal Services Emergency Project ( P149724 ) Results Framework Project Development Objectives The project development objective is to address urgent community priorities in selected municipal services5, targeting areas most affected by the influx of Syrian refugees. Project Development Objective Indicators Cumulative Target Values Frequency Data Source / Methodology Responsibilit y for Data Collection Indicator Name Core Unit of Measure Baselin e 2014 2015 2016 2017 End Target Direct project beneficiaries, of which women6 \uf0fd Number, %, 0 60, 000 ( 50 % ) 150, 000 ( 50 % ) 300, 000 ( 50 % ) 300, 000 ( 50 % ) Annual Surveys and service delivery records of CDR. CDR Conflict affected people to whom benefits have been delivered within the first year of project effectiveness, of which: ( i ) women; ( ii ) host population; ( iii ) refugees7 \uf0fd Number 0 60, 000 ( 30, 000 ) ( 40, 000 ) ( 20, 000 ) NA NA 60, 000 ( 30, 000 ) ( 40, 000 ) ( 20, 000 ) Annual Surveys and service delivery records of CDR CDR 5 Municipal services include solid waste management, water, wastewater and sanitation, roads ( and related services ), recreational facilities and community activities. 6 Direct beneficiaries include the Lebanese host", + "ner_text": [ + [ + 683, + 697, + "named" + ] + ], + "validated": false, + "empirical_context": "21 Annex 1: Results Framework and Monitoring REPUBLIC OF LEBANON Municipal Services Emergency Project ( P149724 ) Results Framework Project Development Objectives The project development objective is to address urgent community priorities in selected municipal services5, targeting areas most affected by the influx of Syrian refugees. Project Development Objective Indicators Cumulative Target Values Frequency Data Source / Methodology Responsibilit y for Data Collection Indicator Name Core Unit of Measure Baselin e 2014 2015 2016 2017 End Target Direct project beneficiaries, of which women6 \uf0fd Number, %, 0 60, 000 ( 50 % ) 150, 000 ( 50 % ) 300, 000 ( 50 % ) 300, 000 ( 50 % ) Annual Surveys and service delivery records of CDR. CDR Conflict affected people to whom benefits have been delivered within the first year of project effectiveness, of which: ( i ) women; ( ii ) host population; ( iii ) refugees7 \uf0fd Number 0 60, 000 ( 30, 000 ) ( 40, 000 ) ( 20, 000 ) NA NA 60, 000 ( 30, 000 ) ( 40, 000 ) ( 20, 000 ) Annual Surveys and service delivery records of CDR CDR 5 Municipal services include solid waste management, water, wastewater and sanitation, roads ( and related services ), recreational facilities and community activities.", + "type": "survey", + "explanation": "'Annual Surveys' is mentioned as a method for data collection but does not function as a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Annual Surveys' refers to a dataset because it implies a systematic collection of data over time.", + "contextual_reason_agent": "'Annual Surveys' is mentioned as a method for data collection but does not function as a structured collection of data itself.", + "contextual_signal": "mentioned only as a method for data collection, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 19, + "text": "The proposed project will also support the Social Registry Directorate to scale-up its communication and outreach, while strengthening data protection. Few line ministries currently use the Social Registry ( most users are linked to projects supported by the World Bank ) 9. In addition, knowledge about the objectives, modalities and strengths of the Social Registry is limited, within the government and among partners. These gaps will be addressed by intensifying the Social Registry \u2019 s communication and outreach efforts. User agreements will specify authorized uses of the data and protection of personal data. 30. The project will also encourage the use of the Social Registry as a tool for program coordination. The objective of this activity is to increase the SCAPP Directorate, Taazour and sectorial ministries \u2019 ownership of the Social Registry, by fostering its use for integrated service provision to poor and vulnerable households, and for better planning of investments according to needs. 31. This sub-component will finance the costs associated with: ( a ) the pilot Social Registry card and its evaluation, potential scale-up to 200, 000 households; ( b ) the development of the outreach strategy and related tools and communication campaign; and ( c ) workshops, analytical and coordination capacity building.", + "ner_text": [ + [ + 190, + 205, + "named" + ], + [ + 259, + 269, + "Social Registry <> publisher" + ], + [ + 915, + 945, + "Social Registry <> reference population" + ], + [ + 1345, + 1363, + "Social Registry <> usage context" + ] + ], + "validated": true, + "empirical_context": "The proposed project will also support the Social Registry Directorate to scale-up its communication and outreach, while strengthening data protection. Few line ministries currently use the Social Registry ( most users are linked to projects supported by the World Bank ) 9. In addition, knowledge about the objectives, modalities and strengths of the Social Registry is limited, within the government and among partners.", + "type": "registry", + "explanation": "In the context, the Social Registry is mentioned as being used by line ministries and linked to projects, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a registry, which typically implies a structured collection of data.", + "contextual_reason_agent": "In the context, the Social Registry is mentioned as being used by line ministries and linked to projects, indicating it functions as a data source.", + "contextual_signal": "mentioned as a data source for line ministries", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 28, + "text": "The World Bank SPF: Improved Livelihoods for Internally Displaced Persons in Azerbaijan ( P178125 ) Page 26 of 34 Indicator Name Corporate Unit of Measure Baseline End Target Frequency Data Source / Methodology Responsibility for Data Collection Description: Name: Percentage of beneficiaries taking a more active role in their communities Percentag e 0. 00 50. 00 Twice, once before civic engagement training and again at least three months after civic engagement training completion. For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection. For individuals trained only as part of Component 3, a separate pre-training survey will be conducted as well as a follow-up survey conducted at least three months after civic engagement training. M & E Specialist Percentage of beneficiaries taking a more active role in their communities - female Percentag e 0. 00 50. 00 Twice, once before civic engagement training and again at least three months after civic engagement training completion. For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection.", + "ner_text": [ + [ + 694, + 713, + "named" + ], + [ + 77, + 87, + "pre-training survey <> data geography" + ], + [ + 265, + 339, + "pre-training survey <> data description" + ] + ], + "validated": true, + "empirical_context": "For participants in Component 1 and 2 of the project, the Baseline Survey and Post-Training Completion Survey will be used for data collection. For individuals trained only as part of Component 3, a separate pre-training survey will be conducted as well as a follow-up survey conducted at least three months after civic engagement training. M & E Specialist Percentage of beneficiaries taking a more active role in their communities - female Percentag e 0.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey used for data collection in the context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey used for data collection.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey used for data collection in the context.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "154_431120PAD0P1021e0only10R20081014311", + "page": 15, + "text": "Epidemiologic data illustrate a clear pattern o f gender disparity, with women generally exhibiting higher HIV prevalence rates than men, particularly in the 15-39 year age cohort. Within these age groups, HIV prevalence remains at least 17 percentage points higher for women than for men. Conversely, there are more men who are HIV positive at older ages. This overall epidemiologic pattern i s strongly suggestive o f substantial intergenerational HIV transmission. 21. Demographic impact: The epidemic already has exerted a substantial negative impact on fundamental human development indicators, including life expectancy at birth, infant mortality, and child mortality. For instance, by 2004, the Central Statistics Office estimates that life expectancy had decreased to 56 years. It i s estimated that life expectancy would have increased to 70 years by 2000, in the absence o f the HIV / AIDS epidemic. The impact of AIDS - related morbidity and mortality have resulted in Botswana \u2019 s decline in the UNDP Human Development Index international rankings ( a measure heavily impacted by life expectancy ), from 71 in 1996 to 124 in 2005. 22. HIV / AIDS has distorted the population structure o f Botswana.", + "ner_text": [ + [ + 0, + 18, + "named" + ] + ], + "validated": false, + "empirical_context": "Epidemiologic data illustrate a clear pattern o f gender disparity, with women generally exhibiting higher HIV prevalence rates than men, particularly in the 15-39 year age cohort. Within these age groups, HIV prevalence remains at least 17 percentage points higher for women than for men.", + "type": "data", + "explanation": "However, 'epidemiologic data' is used here as a general term for information rather than a specific structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'epidemiologic data' refers to a dataset due to its mention in a statistical context.", + "contextual_reason_agent": "However, 'epidemiologic data' is used here as a general term for information rather than a specific structured collection of data.", + "contextual_signal": "mentioned only as a type of data, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "032_IBArchive-e8d67f4f-bc76-49af-9b6c-6099c748075b", + "page": 14, + "text": "South Sudan ranks in the bottom third of countries for the HDI \u2019 s life-course gender gap10 and women \u2019 s empowerment. 11 Local governance and access to services in South Sudan possess gendered dimensions, such that women and girls are affected disproportionately compared to men and boys. A society steeped in patriarchy underpins male authority and decision-making in local community leadership, customary law, restorative justice, the police and security forces, and the household. One survey shows the civic and political participation of men at 84 percent, compared to women at 15 percent. 12 The percentage of women in leadership roles was highest in Western Bahr el Ghazal state ( 30. 3 percent ) and lowest in Warrap State ( 4. 9 percent ). 13 In addition, there are limited income generating opportunities for women. When women do generate income, 7 World Bank 2021. 8 UNDP ( United Nations Development Programme ). 2020. Human Development Report. 9 World Bank 2021. 10 HDI \u2019 s life-course gender gap compiles 12 indicators that analyze gender gaps in choices and opportunities across the life-span including education, labor and work, political representation, time use, and social protection. HDI \u2019 s women \u2019 s empowerment dashboard compiles 13 woman-specific empowerment indicators in three categories: reproductive health and family planning, violence against women and girls, and socioeconomic empowerment. 11 UNDP. 2018. Human Development Indices and Indicators: 2018 Statistical Update - South Sudan. 12 Kenwill International Limited. 2015. Fortifying Equality and Economic Diversification ( FEED ): Improved Livelihoods in South Sudan. Gender Assessment Report, World Vision. 13 Kenwill International Limited 2015.", + "ner_text": [ + [ + 489, + 495, + "named" + ], + [ + 0, + 11, + "survey <> data geography" + ], + [ + 216, + 221, + "survey <> reference population" + ], + [ + 574, + 579, + "survey <> reference population" + ], + [ + 657, + 685, + "survey <> data geography" + ], + [ + 718, + 730, + "survey <> data geography" + ], + [ + 859, + 869, + "survey <> publisher" + ], + [ + 959, + 969, + "survey <> publisher" + ], + [ + 1424, + 1428, + "survey <> publisher" + ], + [ + 1430, + 1434, + "survey <> publication year" + ], + [ + 1478, + 1482, + "survey <> publication year" + ], + [ + 1504, + 1515, + "survey <> data geography" + ], + [ + 1551, + 1555, + "survey <> publication year" + ], + [ + 1679, + 1691, + "survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "A society steeped in patriarchy underpins male authority and decision-making in local community leadership, customary law, restorative justice, the police and security forces, and the household. One survey shows the civic and political participation of men at 84 percent, compared to women at 15 percent. 12 The percentage of women in leadership roles was highest in Western Bahr el Ghazal state ( 30.", + "type": "survey", + "explanation": "This is indeed a dataset as it provides empirical data on civic and political participation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'survey' typically refers to a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it provides empirical data on civic and political participation.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "035_Senegal-Casamance-Economic-Development-Project", + "page": 66, + "text": "The project will address climate resilience by: - Investing in community resilience, which entails targeting of investments in climate vulnerable communities, integrating a climate filter in participatory needs assessments and local planning and screening and prioritizing of investments that are climate smart and adapted, as well as activities that promote climate resilience; and rehabilitation and maintenance of infrastructure will integrate climate - resilient design practices; - Building capacity for community resilience, through raising community awareness on climate change risks and providing technical assistance and capacity building of key stakeholders on climate dimensions; - Building the knowledge base on climate change, which involves data collection, analysis and enhanced knowledge on climate change risks and trends in Casamance, supporting research studies on climate change towards policy action, climate diagnostics ( using the CDD app ) for Community Facilitators to promote climate-informed local development planning, and engaging communities in collecting climate data ( using the CDD app ) and monitoring climate risks with the CDD application", + "ner_text": [ + [ + 954, + 961, + "named" + ] + ], + "validated": false, + "empirical_context": "The project will address climate resilience by: - Investing in community resilience, which entails targeting of investments in climate vulnerable communities, integrating a climate filter in participatory needs assessments and local planning and screening and prioritizing of investments that are climate smart and adapted, as well as activities that promote climate resilience; and rehabilitation and maintenance of infrastructure will integrate climate - resilient design practices; - Building capacity for community resilience, through raising community awareness on climate change risks and providing technical assistance and capacity building of key stakeholders on climate dimensions; - Building the knowledge base on climate change, which involves data collection, analysis and enhanced knowledge on climate change risks and trends in Casamance, supporting research studies on climate change towards policy action, climate diagnostics ( using the CDD app ) for Community Facilitators to promote climate-informed local development planning, and engaging communities in collecting climate data ( using the CDD app ) and monitoring climate risks with the CDD application", + "type": "tool", + "explanation": "However, the CDD app is described as a tool for facilitating data collection and monitoring, not as a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed the CDD app is a dataset because it is mentioned in the context of data collection and analysis.", + "contextual_reason_agent": "However, the CDD app is described as a tool for facilitating data collection and monitoring, not as a structured collection of data itself.", + "contextual_signal": "mentioned only as a tool for data collection, not as a data source", + "tags": [] + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 61, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 57 of 74 and have received cash transfers, at least for one payment cycle. on a quarterly basis Payment data will be regularly updated in the SNSOP MIS Percentage of Labor Intensive Public Works subprojects that mitigate climate - induced shocks Percentage of LIPW subprojects that are designed to mitigate the impacts of climate-induced shocks in beneficiary communities, as identified under the Project Operations Manual. This indicator will be measured at least on a quarterly basis through missions and ISRs LIPW monitoring forms The implementing partner will periodically monitor LIPW as part of quality control and monitoring and evaluation activities carried out by dedicated field staff including field-based engineers Implementing partner Number of LIPW work days created Number of total LIPW work days generated by SNSOP This indicator will be measured at minimum on a quarterly basis including through missions and ISR ' s SNSOP MIS which hosts information on LIPW work days used to generate payment schedules. The number of LIPW work days will be documented at LIPW work sites and collected by field-based staff.", + "ner_text": [ + [ + 1028, + 1037, + "named" + ] + ], + "validated": false, + "empirical_context": "on a quarterly basis Payment data will be regularly updated in the SNSOP MIS Percentage of Labor Intensive Public Works subprojects that mitigate climate - induced shocks Percentage of LIPW subprojects that are designed to mitigate the impacts of climate-induced shocks in beneficiary communities, as identified under the Project Operations Manual. This indicator will be measured at least on a quarterly basis through missions and ISRs LIPW monitoring forms The implementing partner will periodically monitor LIPW as part of quality control and monitoring and evaluation activities carried out by dedicated field staff including field-based engineers Implementing partner Number of LIPW work days created Number of total LIPW work days generated by SNSOP This indicator will be measured at minimum on a quarterly basis including through missions and ISR ' s SNSOP MIS which hosts information on LIPW work days used to generate payment schedules. The number of LIPW work days will be documented at LIPW work sites and collected by field-based staff.", + "type": "system", + "explanation": "However, the context indicates that SNSOP MIS is described as a system that hosts information rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'MIS' which often stands for Management Information System, suggesting data handling.", + "contextual_reason_agent": "However, the context indicates that SNSOP MIS is described as a system that hosts information rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 18, + "validated": 10, + "not_validated": 8 + } + }, + { + "filename": "098_CAMEROON-PAD-NEW-08032017", + "page": 44, + "text": "31 Beneficiaries that feel project investments reflected their needs ( percentage, disaggregated by gender ) The percentage of beneficiary population that feel that the project activities have reflected their needs as expressed through a baseline beneficiary assessment MTR, end of project Beneficiary satisfaction surveys NGOs, TLUs, PCU, CTDs Of which women The percentage of beneficiary women that feel that the project activities have reflected their needs as expressed through a baseline beneficiary assessment MTR, end of project Beneficiary satisfaction surveys NGOs, TLUs, PCU, CTDs. Intermediate Results Indicators Indicator Name Description ( indicator definition etc. ) Frequency Data Source / Methodology Responsibility for Data Collection Cities for which the urban database is updated regularly The number of cities for which data on the key indicators in MINHDU \u2019 s urban database are provided every two years Starting Y3, annual M & E reports MINHDU, PCU Sector land use plans developed and validated under the project The number of sector land use plans that have been developed and validated under the project in accordance with the principles of integrated urban planning, urban resilience, and citizen participation Starting Y3, Annual M & E reports CU, MINHDU, TLUs, PCU Asset management system for road and drainage assets operational in Douala and Yaound\u00e9 Assess the progress in developing and operating the new maintenance systems in Douala and Yaound\u00e9 Annual M & E reports, technical audits", + "ner_text": [ + [ + 290, + 322, + "named" + ], + [ + 238, + 269, + "Beneficiary satisfaction surveys <> data description" + ], + [ + 378, + 395, + "Beneficiary satisfaction surveys <> reference population" + ], + [ + 1548, + 1566, + "Beneficiary satisfaction surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "31 Beneficiaries that feel project investments reflected their needs ( percentage, disaggregated by gender ) The percentage of beneficiary population that feel that the project activities have reflected their needs as expressed through a baseline beneficiary assessment MTR, end of project Beneficiary satisfaction surveys NGOs, TLUs, PCU, CTDs Of which women The percentage of beneficiary women that feel that the project activities have reflected their needs as expressed through a baseline beneficiary assessment MTR, end of project Beneficiary satisfaction surveys NGOs, TLUs, PCU, CTDs. Intermediate Results Indicators Indicator Name Description ( indicator definition etc. ) Frequency Data Source / Methodology Responsibility for Data Collection Cities for which the urban database is updated regularly The number of cities for which data on the key indicators in MINHDU \u2019 s urban database are provided every two years Starting Y3, annual M & E reports MINHDU, PCU Sector land use plans developed and validated under the project The number of sector land use plans that have been developed and validated under the project in accordance with the principles of integrated urban planning, urban resilience, and citizen participation Starting Y3, Annual M & E reports CU, MINHDU, TLUs, PCU Asset management system for road and drainage assets operational in Douala and Yaound\u00e9 Assess the progress in developing and operating the new maintenance systems in Douala and Yaound\u00e9 Annual M & E reports, technical audits", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned as a source of data for assessing beneficiary needs and satisfaction.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to surveys that collect data on beneficiary satisfaction.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a source of data for assessing beneficiary needs and satisfaction.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 93, + "text": "The World Bank Niger Accelerating Electricity Access Project ( Hask\u00e9 ) ( P174034 ) Page 86 of 174 This is not covered by \" People provided with new or improved electricity service \" indicator. of which, refugee beneficiaries Number of refugees ( specifically ) living in localities and settings where the project provides electricity through grid, mini grids and off-grid electrification ( under components 1, 2 and 3 ). Semi - annually. National statistics / Proj ect data / UNHCR data / World Bank data.. Estimated based on collected data about refugees and project - level data. NIGELEC / ANPER / ANERS OL. Public facilities electrified under the project The indicator captures the number of public institutions ( health facilities, schools, administrative buildings, market places, religious sites, etc. ) provided with an electricity connection under the project. The connection can come from any type of solution ( grid, mini-grid, standalone systems ). Semi - annually. NIGELEC / ANP ER / ANERSOL project databases. Determined based on the reporting of the contractors responsible for connections and Project databases. NIGELEC / ANPER / ANERS OL. People provided with clean and efficient cooking solutions under the project The indicator captures the number of people directly benefiting from clean cooking solutions that were acquired through the project including refugee and host population. Semi - annually.", + "ner_text": [ + [ + 996, + 1003, + "named" + ] + ], + "validated": false, + "empirical_context": "Semi - annually. NIGELEC / ANP ER / ANERSOL project databases. Determined based on the reporting of the contractors responsible for connections and Project databases.", + "type": "project", + "explanation": "'ANERSOL' is mentioned as a project, not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'ANERSOL' is a dataset because it is mentioned alongside 'project databases'.", + "contextual_reason_agent": "'ANERSOL' is mentioned as a project, not as a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 4, + "validated": 1, + "not_validated": 3 + } + }, + { + "filename": "172_multi0page", + "page": 42, + "text": "Data collection activities are carried out M A properly staffed and equipped EMIS is in a timely manner and are utilized for established in the MEST. planning and management purposes. The sector performance indicators are defined and included in annual data collection activities on a gradual basis over time. The planning department of the MEST is restructured on the basis of key planning operations and staff qualifications. Overall Risk Rating 5 Risk Rating - H ( High Risk ), S ( Substantial Risk ), M ( Modest Risk ), N ( Negligible or Low Risk ) 3. Possible Controversial Aspects: 3. 1 The Project supports the provision of services by private SPs such as, Catholic, Protestant and Islamic religious groups, NGOs and CBOs. Even though there is a long tradition in Sierra Leone by these SPs, and currently 85 % of government-assisted schools are administered by the missions, it can be argued that the provision of education services be made solely by the State. It is clear that the proposed approach has an advantage for the MEST so that it serves as a regulatory and policy making body, which is more consistent with modern trends, and more efficient in meeting the education needs of the population. - 37 -", + "ner_text": [ + [ + 77, + 81, + "named" + ] + ], + "validated": false, + "empirical_context": "Data collection activities are carried out M A properly staffed and equipped EMIS is in a timely manner and are utilized for established in the MEST. planning and management purposes.", + "type": "system", + "explanation": "'EMIS' is mentioned as a system but not as a data source, indicating it does not function as a dataset in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'EMIS' is a dataset because it is an acronym that could imply a structured system for data management.", + "contextual_reason_agent": "'EMIS' is mentioned as a system but not as a data source, indicating it does not function as a dataset in this context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 17, + "text": "The proposed project will allow for the inclusion of poor and vulnerable households in the refugee and host communities of the Hodh Chargui region within the SSN system, including the Social Registry, Tekavoul, and Elmaouna programs. Component 1: Updating and enhancing the Social Registry ( US $ 7. 3 million with US $ 4. 0 million from national IDA18, US $ 0. 9 million from IDA18 RSW, US $ 0. 9 million from ASP MDTF and US $ 1. 5 million from government ) 22. This component will be implemented by the Taazour General Delegation. It supports a full update of the Social Registry and inclusion of eligible refugee households ( Sub-component 1. 1 ) and its usage for operational and strategic purposes ( Sub-component 1. 2 ). Payment Platform POOR HOUSEHOLDS Social Transfer Program \u2022 Extreme Poor households \u2022 5-year cycle \u2022 CCT \u2022 Social promotion Social Registry \u2022 Identification \u2022 Data collection \u2022 Support to targeted social programs Shock - responsive Safety Net Program \u2022 Vulnerablehouseholds \u2022 4 months \u2022 Unconditional CT \u2022 Using SP tools Early Warning Systems DATA DATA Beneficiary list Beneficiary List", + "ner_text": [ + [ + 274, + 289, + "named" + ], + [ + 127, + 146, + "Social Registry <> data geography" + ], + [ + 506, + 532, + "Social Registry <> author" + ], + [ + 600, + 627, + "Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "The proposed project will allow for the inclusion of poor and vulnerable households in the refugee and host communities of the Hodh Chargui region within the SSN system, including the Social Registry, Tekavoul, and Elmaouna programs. Component 1: Updating and enhancing the Social Registry ( US $ 7. 3 million with US $ 4.", + "type": "registry", + "explanation": "In the context, 'Social Registry' is mentioned as part of the SSN system, indicating it functions as a data source for the project.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Social Registry' suggests a structured collection of data related to households.", + "contextual_reason_agent": "In the context, 'Social Registry' is mentioned as part of the SSN system, indicating it functions as a data source for the project.", + "contextual_signal": "mentioned as part of the SSN system", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 48, + "text": "Methodology for Data Collection Indicator values will be collected from the administrative data of MODEE and public - and private-sector relying parties on the use of transactional digital services that incorporate trusted, people-centric DPI, and cross-checked by the IVA through spot surveys. Responsibility for Data Collection MODEE. Improving trusted, people-centric data sharing Description Availability of trusted, people-centric data sharing [ Yes / No ]. Frequency Annually Data source ( a ) Annual reports on DPI implementation from MODEE, sectoral ministries, and the private sector, ( b ) MODEE \u2019 s software documentation and testing reports, and ( c ) Third-party assessment reports. Methodology for Data Collection Indicator values will be collected from ( a ) DPI implementation reports released by MODEE, sectoral ministries, and the private sector, ( b ) MODEE \u2019 s software documentation, and testing reports, and ( c ) Official reports submitted by third - party assessment bodies recruited to carry out the Privacy Impact Assessments. All indicator values will be cross - checked by the IVA. Responsibility for Data Collection MODEE. Enhanced management of medical records Description Promoting digital transformation in health servcie delivery by scaling up the coverage of an existing EMR platform to all MOH facility sites, prioritizing facility sites in districts with a high concentration of Syrian refugees. Frequency Biannualy. Data source Administrative data from the EHS. Methodology for Data Collection Data collected from the EHS to measure how many MOH facility sites had installed and operationalized the EMR platform. Responsibility for Data Collection MOH and EHS.", + "ner_text": [ + [ + 76, + 95, + "named" + ], + [ + 99, + 104, + "administrative data <> publisher" + ], + [ + 330, + 335, + "administrative data <> publisher" + ], + [ + 542, + 547, + "administrative data <> publisher" + ], + [ + 600, + 605, + "administrative data <> publisher" + ], + [ + 813, + 818, + "administrative data <> publisher" + ], + [ + 871, + 876, + "administrative data <> publisher" + ], + [ + 1145, + 1150, + "administrative data <> publisher" + ], + [ + 1465, + 1484, + "administrative data <> data type" + ] + ], + "validated": true, + "empirical_context": "Methodology for Data Collection Indicator values will be collected from the administrative data of MODEE and public - and private-sector relying parties on the use of transactional digital services that incorporate trusted, people-centric DPI, and cross-checked by the IVA through spot surveys. Responsibility for Data Collection MODEE.", + "type": "administrative data", + "explanation": "In this context, 'administrative data' is explicitly mentioned as a source for collecting indicator values, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'administrative data' typically refers to structured collections of data used for analysis.", + "contextual_reason_agent": "In this context, 'administrative data' is explicitly mentioned as a source for collecting indicator values, confirming its role as a dataset.", + "contextual_signal": "mentioned as a source for collecting indicator values", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 11, + "validated": 10, + "not_validated": 1 + } + }, + { + "filename": "085_Lebanon-Creating-Economic-Opportunities-in-Support-of-the-Lebanon-National-Jobs-Program-for-Results-Project", + "page": 99, + "text": "- Capital Investment Program ( CIP ) WBG is currently engaged with the GOL on energy reforms which is considered a high priority agenda Macroeconomic and fiscal environment * Unequal economic development across Lebanon, and inefficient and unproductive public financial management system, and macro-fiscal framework with large vulnerabilities. \u2022 Program targets lagging regions \u2022 Cross cutting MFD approach to alleviate fiscal burden. CIP Reform Agenda Business Climate * Weak business environment ( including access to finance ) and lack of diversity in economy mainly focused on real estate and tourism, relatively high import and export costs, bad infrastructure and cumbersome procedures.. \u2022 Supporting growth of firms along AGR and ICT value chains through matching grants * \u2022 Improve lending to SMEs and strengthen start-ups * \u2022 Trade facilitation environment improved through operationalizing AEO program * \u2022 Promoting competition in broadband through harmonized licensing regimes ( addressing existing monopoly ) * \u2022 Attracting new investments to the Tripoli Special Economic Zone under best practice model * Existing IFC engagements in starting business, advisory to develop commercial bank lending ( e. g. to women ) Education * N / A The GOL is working on defining a TVET strategy with support from ILO and UNICEF Foundation: Data availability and access to information Lack of strong statistical base and lack of access to information ( including gender-disaggregated ) \u2022 Gender-focused labor market surveys \u2022 SME observatory \u2022 Impact evaluation", + "ner_text": [ + [ + 1484, + 1519, + "named" + ], + [ + 211, + 218, + "Gender-focused labor market surveys <> data geography" + ], + [ + 1219, + 1224, + "Gender-focused labor market surveys <> reference population" + ], + [ + 1310, + 1313, + "Gender-focused labor market surveys <> publisher" + ] + ], + "validated": true, + "empirical_context": "g. to women ) Education * N / A The GOL is working on defining a TVET strategy with support from ILO and UNICEF Foundation: Data availability and access to information Lack of strong statistical base and lack of access to information ( including gender-disaggregated ) \u2022 Gender-focused labor market surveys \u2022 SME observatory \u2022 Impact evaluation", + "type": "survey", + "explanation": "This is indeed a dataset as it refers to surveys that collect gender-disaggregated labor market data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'surveys', which often refers to structured data collections.", + "contextual_reason_agent": "This is indeed a dataset as it refers to surveys that collect gender-disaggregated labor market data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "004_BOSIB-87c444de-4797-4bf9-b654-4932a7fb0112", + "page": 27, + "text": "The World Bank Integrated Community Resilience Project ( P506969 ) PROJECT APPRAISAL DOCUMENT Page 18 62. M & E teams will also be embedded in each of the Project Implementation Unit ( PIU ), MASS and ADDS. These teams, in coordination with the community facilitators mobilized at local level, will be responsible for collecting data based on the results framework outlined in the M & E plan. They will transmit data on a regular basis, contributing to the periodic reports. Data to track key performance indicators will be collected from various sources: ( a ) project-specific surveys and questionnaires; ( b ) service providers; ( c ) local governments; ( d ) consultant reports; and ( e ) construction progress reports from supervising engineers / engineering firm. 63. A comprehensive midterm review of the project \u2019 s implementation and results will be conducted by the government and the World Bank in 2027, during which the target values will be assessed and any necessary adjustments to the project design will be made if needed. The project will provide targeted support to strengthen M & E capacity within MASS and ADDS. Where feasible, the project will finance consultants to assist MASS and ADDS in developing a detailed M & E and reporting system plan.", + "ner_text": [ + [ + 663, + 681, + "named" + ] + ], + "validated": false, + "empirical_context": "They will transmit data on a regular basis, contributing to the periodic reports. Data to track key performance indicators will be collected from various sources: ( a ) project-specific surveys and questionnaires; ( b ) service providers; ( c ) local governments; ( d ) consultant reports; and ( e ) construction progress reports from supervising engineers / engineering firm. 63.", + "type": "document", + "explanation": "However, 'consultant reports' are mentioned as sources of information rather than as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'reports' can imply structured information.", + "contextual_reason_agent": "However, 'consultant reports' are mentioned as sources of information rather than as a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "005_BOSIB-8191b179-7209-4faa-b5e0-11783bcd492d", + "page": 13, + "text": "These measures would allow the MEP to optimize its management and fulfil its mandate to ensure that resources are used effectively to support the learning and development of students throughout the country. 17 Inclusion: Gender, Refugees and Migrants, and Indigenous Peoples 11. Gender parity between boys and girls to access STEAM related jobs is a challenge on which Costa Rica has made progress, but more policy action is needed. PISA 2022 mean scores for Costa Rica in Reading are above LAC \u2019 s average ( 415 vs. 400 ), as well as mean scores in Mathematics ( 385 vs. 374 ) and Sciences ( 411 vs. 400 ). Figure 1a shows that there is a statistically significant gender gap in Mathematics but not in Reading and Science as observed in PISA data. Looking at graduates of pre-university technical education ( mainly technical stream secondary education ) one finds an interesting pattern comparing three specializations ( Figure 1b ). Gender disparities are high in Electronics and Automation, less in Software Development and were recently eliminated in Network and Database Design. The final example of Network and Database Design in Figure 1b shows that gender disparity can be overcome in a short period of time.", + "ner_text": [ + [ + 738, + 742, + "named" + ], + [ + 369, + 379, + "PISA <> data geography" + ], + [ + 438, + 442, + "PISA <> publication year" + ], + [ + 459, + 469, + "PISA <> data geography" + ], + [ + 738, + 747, + "PISA <> data type" + ], + [ + 1233, + 1251, + "PISA <> usage context" + ] + ], + "validated": true, + "empirical_context": "400 ). Figure 1a shows that there is a statistically significant gender gap in Mathematics but not in Reading and Science as observed in PISA data. Looking at graduates of pre-university technical education ( mainly technical stream secondary education ) one finds an interesting pattern comparing three specializations ( Figure 1b ).", + "type": "dataset", + "explanation": "PISA is indeed a dataset as it is referenced in the context of analyzing educational performance data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because PISA is known for providing educational assessment data.", + "contextual_reason_agent": "PISA is indeed a dataset as it is referenced in the context of analyzing educational performance data.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "068_Burundi-Integrated-Community-Development-Project", + "page": 51, + "text": "The World Bank Burundi Integrated Community Development Project ( P169315 ) Page 46 of 86 Beneficiaries that feel project investments reflected their needs This indicator will measure the extent to which sub - project selection reflects beneficiary preferences in a consistent manner. Annual Beneficiary surveys Reports Facilitating Partners and PIU Female - Beneficiaries that feel project investments reflected their needs Refugees - Beneficiaries that feel project investments reflected their needs Microenterprises supported This indicator will measure the number of micro - enterprises that receives investment grants under Component 2. 1. Annual Project M & E system / MIS / GIS Regular reports Facilitating Partners and PIU Women Led \u2013 Microenterprises Supported Refugee Led \u2013 Microenterprises Supported Households engaged in production or transformation of high-nutritious food This indicator will track the number of households engaged in the production of high-nutritious food with the support of Component 2, through private sector grants for, among other things, community gardens, fish ponds, hatcheries and nurseries.", + "ner_text": [ + [ + 285, + 311, + "named" + ], + [ + 4, + 14, + "Annual Beneficiary surveys <> publisher" + ], + [ + 15, + 22, + "Annual Beneficiary surveys <> data geography" + ], + [ + 350, + 372, + "Annual Beneficiary surveys <> reference population" + ], + [ + 1147, + 1165, + "Annual Beneficiary surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "The World Bank Burundi Integrated Community Development Project ( P169315 ) Page 46 of 86 Beneficiaries that feel project investments reflected their needs This indicator will measure the extent to which sub - project selection reflects beneficiary preferences in a consistent manner. Annual Beneficiary surveys Reports Facilitating Partners and PIU Female - Beneficiaries that feel project investments reflected their needs Refugees - Beneficiaries that feel project investments reflected their needs Microenterprises supported This indicator will measure the number of micro - enterprises that receives investment grants under Component 2. 1.", + "type": "survey", + "explanation": "This is indeed a dataset as it is used to measure beneficiary preferences and is explicitly mentioned in the context of project evaluation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'Annual Beneficiary surveys' which implies a structured collection of data from beneficiaries.", + "contextual_reason_agent": "This is indeed a dataset as it is used to measure beneficiary preferences and is explicitly mentioned in the context of project evaluation.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 55, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 51 of 74 IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Beneficiaries of social safety net programs This indicator will be measured at least on a quarterly basis during missions and ISRs SNSOP MIS which hosts beneficiary registration and payment data The implementing partner will collect beneficiary data during targeting and registration. The payment service provider will document payment data and share with the implementing partner Implementing Partner Beneficiaries of social safety net programs - Female This indicator will be measured at least on a quarterly basis during missions and ISRs SNSOP MIS which hosts beneficiary registration and payment data The implementing partner will collect beneficiary data during targeting and registration. The payment service provider and implementing agency will document payment data Implementing Partner Beneficiary households of social safety net programs - Refugees The number of total beneficiaries HHs that are This indicator will be SNSOP MIS which hosts The implementing partner will collect Implementing Partner", + "ner_text": [ + [ + 437, + 446, + "named" + ], + [ + 4, + 14, + "SNSOP MIS <> publisher" + ], + [ + 306, + 349, + "SNSOP MIS <> reference population" + ], + [ + 459, + 500, + "SNSOP MIS <> data description" + ], + [ + 1158, + 1166, + "SNSOP MIS <> reference population" + ] + ], + "validated": true, + "empirical_context": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 51 of 74 IO Table SPACE UL Table SPACE Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Beneficiaries of social safety net programs This indicator will be measured at least on a quarterly basis during missions and ISRs SNSOP MIS which hosts beneficiary registration and payment data The implementing partner will collect beneficiary data during targeting and registration. The payment service provider will document payment data and share with the implementing partner Implementing Partner Beneficiaries of social safety net programs - Female This indicator will be measured at least on a quarterly basis during missions and ISRs SNSOP MIS which hosts beneficiary registration and payment data The implementing partner will collect beneficiary data during targeting and registration.", + "type": "system", + "explanation": "It is indeed a dataset as it is explicitly mentioned as hosting beneficiary registration and payment data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a system that hosts data related to beneficiaries.", + "contextual_reason_agent": "It is indeed a dataset as it is explicitly mentioned as hosting beneficiary registration and payment data used for empirical analysis.", + "contextual_signal": "mentioned as a data source that hosts beneficiary registration and payment data", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 18, + "validated": 10, + "not_validated": 8 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 13, + "text": "Cameroon has ranked consistently in the lower quintile of world governance indicators for the last two decades ( Figures 5-1 and 5-2, Annex 5 ). Governance, Government effectiveness, and corruption are consistently acknowledged as key issues by Cameroonians, 3 the private sector, 4 and the GoC. 5 3 Transparency International ( 2013 ), Global Corruption Barometer, Afrobarometer ( 2011 \u2013 2013 and 2014 \u2013 2015 ). 4 World Bank ( 2009 ) Cameroon Enterprise Surveys, INS ( 2009 ) firm census, GoC ( 2011 ) Business Climate Survey; WEF ( World Economic Forum ). 2015. Global Competitiveness Report 2014 \u2013 2015. 5 DSCE 2010 \u2013 2020; President Paul Biya \u2019 s New Year Speeches in December 2003, 2005, 2006, 2007, 2014, 2015 or official communication to Cabinet in December 2004, September 2006, September 2007, March 2008, July 2009, and October 2015.", + "ner_text": [ + [ + 503, + 526, + "named" + ], + [ + 0, + 8, + "Business Climate Survey <> data geography" + ], + [ + 245, + 257, + "Business Climate Survey <> reference population" + ], + [ + 415, + 425, + "Business Climate Survey <> publisher" + ], + [ + 428, + 432, + "Business Climate Survey <> publication year" + ], + [ + 470, + 474, + "Business Climate Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "5 3 Transparency International ( 2013 ), Global Corruption Barometer, Afrobarometer ( 2011 \u2013 2013 and 2014 \u2013 2015 ). 4 World Bank ( 2009 ) Cameroon Enterprise Surveys, INS ( 2009 ) firm census, GoC ( 2011 ) Business Climate Survey; WEF ( World Economic Forum ). 2015.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned alongside other data sources used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is listed among other surveys and datasets.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned alongside other data sources used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 46, + "text": "In addition, a survey will be conducted in sampled SPG woredas as part of the impact evaluation to measure composite indicators not collected through routine information systems. 71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2. The MOH will share HCO-related data with the regions and woredas to provide any disaggregated data needed to monitor HCO results indicators and DLIs. Likewise, the central MOE is responsible for the management of EMIS and NLA which collects varying aspects of data at the school level. MOE will validate and share disaggregated data with regions and woredas to monitor HCO results indicators and DLIs. Other M & E data sources will be coordinated with various implementation units and service providers. The SPG impact evaluation survey will be 71 Such as Minimal Acceptable Diet for children 6 \u2013 23 months. Minimal Acceptable Diet is calculated according to breasfeeding status, number of food groups consumed, and number of times fed. https: / / inddex. nutrition. tufts. edu / data4diets / indicator / minimum-acceptable-diet-mad.", + "ner_text": [ + [ + 293, + 299, + "named" + ] + ], + "validated": false, + "empirical_context": "In addition, a survey will be conducted in sampled SPG woredas as part of the impact evaluation to measure composite indicators not collected through routine information systems. 71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2.", + "type": "system", + "explanation": "However, EMIS is described as a management information system, not a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is mentioned in the context of data collection systems.", + "contextual_reason_agent": "However, EMIS is described as a management information system, not a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 175, + "text": "Drought is noted as a significant risk for Niger, causing impacts to public health, livestock, and pastoral livelihoods in recent years. More than a million people have been affected in the five droughts since 2005, with a 2009 event affecting nearly 8 million people. Probabilistic data on drought risk are not available at the writing of this document. However, figure 7. 3 shows the geographic occurrence drought, which occurs in most of the southern and western parts of the country. While not specifically quantified in the data presented here, drought is an exacerbating climate condition for fire risk. Ongoing vegetation management schemes may reduce impacts to transmission, distribution, and other power assets. 89 Data from Broeckx et al. 2018.", + "ner_text": [ + [ + 269, + 287, + "named" + ] + ], + "validated": false, + "empirical_context": "More than a million people have been affected in the five droughts since 2005, with a 2009 event affecting nearly 8 million people. Probabilistic data on drought risk are not available at the writing of this document. However, figure 7.", + "type": "data", + "explanation": "'Probabilistic data' is mentioned as not being available, indicating it is not a dataset or data source in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'probabilistic data' refers to a structured collection of data related to drought risk.", + "contextual_reason_agent": "'Probabilistic data' is mentioned as not being available, indicating it is not a dataset or data source in this context.", + "contextual_signal": "mentioned as not available, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "084_Lebanon-PAD-final-06082018", + "page": 59, + "text": "The household-level survey would provide gender-disaggregated data on labor market outcomes ( e. g. labor force participation, employment by sector / region / age, etc. ). The employer survey would be implemented in key economic sectors ( e. g. manufacturing, tourism, agriculture, etc. ) to gather information on women \u2019 s employment in those sectors, at the firm level. Such gender-disaggregated data are missing today and are critical for the newly established ministry for relevant and adequate evidence-based policy making on gender. o Gender database with gender-disaggregated data ( DLR 9. 2 ). A database is proposed to be set up at the OMSWA to compile existing and the newly-collected gender-disaggregated data ( as per DLR 9. 1 ). The first step will be to coordinate with different ministries ( working with the Gender Units, for example ) and donor agencies to compile existing administrative data related to women ( e. g. access to education, jobs, health, etc. ). o Childcare provision action plan and launch of a pilot project ( DLR 9. 3 ). High-quality childcare accessibility and affordability are widely accepted as necessary areas needing improvement to enhance women \u2019 s participation in the labor market in Lebanon. This activity will therefore develop a detailed feasibility study, with a clear action plan and required laws / regulations to be passed, and", + "ner_text": [ + [ + 4, + 26, + "named" + ], + [ + 100, + 125, + "household-level survey <> data description" + ], + [ + 127, + 162, + "household-level survey <> data description" + ], + [ + 314, + 319, + "household-level survey <> reference population" + ], + [ + 645, + 650, + "household-level survey <> publisher" + ], + [ + 1229, + 1236, + "household-level survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "The household-level survey would provide gender-disaggregated data on labor market outcomes ( e. g.", + "type": "survey", + "explanation": "This is indeed a dataset as it is described as a survey that provides data on labor market outcomes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data.", + "contextual_reason_agent": "This is indeed a dataset as it is described as a survey that provides data on labor market outcomes.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "185_multi-page", + "page": 182, + "text": "Baseline ( KDHS, 1999 ): 90 percent * Economic situation will improve during Target ( KDHS, 2003 ): 100 percent project life. * Intensified prograrn of HIV / AIDS prevention, care and support will be * By 2003, the median age of first sex * Kenya Demographic and Health socially and culturally acceptable at among young women ( 15-24 years ) will Survey ( KDHS ) data community level. increase. Baseline ( KDHS, 1999 ): 12 years Target ( KDHS, 2003 ): 14 years * Reports from the Ministry of * By 2004, HIV / AIDS curricula will be Education and other Ministries integrated and examinable in an increasing number of primary and secondary schools, and various post-graduate institutions. Baseline: less than I percent Target: 75 percent", + "ner_text": [ + [ + 438, + 442, + "named" + ], + [ + 17, + 21, + "KDHS <> reference year" + ], + [ + 92, + 96, + "KDHS <> publication year" + ], + [ + 241, + 246, + "KDHS <> data geography" + ], + [ + 412, + 416, + "KDHS <> reference year" + ] + ], + "validated": true, + "empirical_context": "increase. Baseline ( KDHS, 1999 ): 12 years Target ( KDHS, 2003 ): 14 years * Reports from the Ministry of * By 2004, HIV / AIDS curricula will be Education and other Ministries integrated and examinable in an increasing number of primary and secondary schools, and various post-graduate institutions. Baseline: less than I percent Target: 75 percent", + "type": "survey", + "explanation": "KDHS is indeed a dataset as it is used to establish baseline and target metrics for educational outcomes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because KDHS is referenced with baseline and target values, suggesting it provides empirical data.", + "contextual_reason_agent": "KDHS is indeed a dataset as it is used to establish baseline and target metrics for educational outcomes.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "045_Turkey-Agricultural-Employment-Support-for-Refugees-and-Turkish-Citizens-through-Enhanced-Market-Linkages-Project", + "page": 51, + "text": "Quarterly Administrativ e data ACC Proportion of trainees satisfied with the training received Measures the proportion of trainees who report being Quarterly Survey data Beneficiary survey ACC PIU", + "ner_text": [ + [ + 148, + 164, + "named" + ], + [ + 35, + 94, + "Quarterly Survey <> data description" + ], + [ + 212, + 230, + "Quarterly Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "Quarterly Administrativ e data ACC Proportion of trainees satisfied with the training received Measures the proportion of trainees who report being Quarterly Survey data Beneficiary survey ACC PIU", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referred to as 'Quarterly Survey data' indicating it is used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is labeled as a 'Quarterly Survey' which suggests a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referred to as 'Quarterly Survey data' indicating it is used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 29, + "text": "The World Bank Expanding Opportunities for Learning Project ( P166059 ) Page 24 of 85 63. Technical assistance. The activity will finance international technical assistance required for the technical assessment and needs analysis of the current data collection system that will provide recommendations for the most appropriate technical solutions to the problems identified, including data security and disaster recovery mechanisms. Other key enhancements include: a. The introduction of unique student identifiers with student-level descriptors so that students can be tracked throughout their schooling. b. The creation of an open data portal that will make real time education management information system ( EMIS ) data available to relevant stakeholders ( students, parents of students, teachers, school leaders, MENFOP personnel, and so on ). c. The development of a human resource management sub-portal. This will include all information on teachers and staff, including numbers, deployment to schools, and salaries and benefits. It would also include individualized professional development data with such details as professional development courses, training, certification for teachers, allowing inspectors and PAs to follow up on an individual basis with all staff. 64. Training. Relevant MENFOP staff will be trained on the use of these enhanced data systems and on use of the available data for decision making purposes.", + "ner_text": [ + [ + 628, + 644, + "named" + ] + ], + "validated": false, + "empirical_context": "b. The creation of an open data portal that will make real time education management information system ( EMIS ) data available to relevant stakeholders ( students, parents of students, teachers, school leaders, MENFOP personnel, and so on ). c.", + "type": "portal", + "explanation": "However, the term 'open data portal' refers to a platform for accessing data rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it mentions 'data' in the context of an open data portal.", + "contextual_reason_agent": "However, the term 'open data portal' refers to a platform for accessing data rather than a structured collection of data itself.", + "contextual_signal": "mentioned only as a portal, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 9, + "validated": 1, + "not_validated": 8 + } + }, + { + "filename": "013_BOSIB0efb09b920d90858a0135df22da7d1", + "page": 20, + "text": "Funds will be used for supporting: ( i ) the design, development, and maintenance of appropriate software for Fayda and back-end operations, including upgrading and scaling up existing Fayda systems; ( ii ) developing a mobile ID application incorporating a consented data-sharing function and a digital wallet; ( iii ) supporting system integration and interoperability with other sectoral systems; iv ) procuring software, licenses, and subscriptions for automated biometric identification systems; ( v ) software development kits for registration and authentication processes; ( vi ) ID card personalization and lifecycle management; ( vii ) the short message service; ( viii ) public key infrastructure for encrypting and digitally-signing data; and ( ix ) back-office systems such as enterprise resource planning tools, business intelligence and data analysis tools for monitoring and evaluation ( M & E ), and x ) various collaboration tools. 39. Subcomponent 2. 2 \u2013 Supporting development of data infrastructure ( US $ 15 million IDA, US $ 3 million WHR ).", + "ner_text": [ + [ + 789, + 823, + "named" + ] + ], + "validated": false, + "empirical_context": "Funds will be used for supporting: ( i ) the design, development, and maintenance of appropriate software for Fayda and back-end operations, including upgrading and scaling up existing Fayda systems; ( ii ) developing a mobile ID application incorporating a consented data-sharing function and a digital wallet; ( iii ) supporting system integration and interoperability with other sectoral systems; iv ) procuring software, licenses, and subscriptions for automated biometric identification systems; ( v ) software development kits for registration and authentication processes; ( vi ) ID card personalization and lifecycle management; ( vii ) the short message service; ( viii ) public key infrastructure for encrypting and digitally-signing data; and ( ix ) back-office systems such as enterprise resource planning tools, business intelligence and data analysis tools for monitoring and evaluation ( M & E ), and x ) various collaboration tools. 39.", + "type": "tool", + "explanation": "However, it is not a dataset as it refers to software tools rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'enterprise resource planning tools' can involve data management functionalities.", + "contextual_reason_agent": "However, it is not a dataset as it refers to software tools rather than a structured collection of data.", + "contextual_signal": "mentioned only as a tool, not as a data source", + "tags": [] + }, + { + "filename": "116_PAD1510-PAD-P152821-IDA-R2016-0078-1-Box394886B-OUO-9", + "page": 44, + "text": "34 Number of people participating in community-based decision - making / participatory planning exercises ( Number ) 0. 00 30, 000 50, 000 70, 000 90, 000 100, 000 100, 000 Number of people who have participated in social cohesion activities under the project ( Number ) 0. 00 30, 000 50, 000 70, 000 90, 000 100, 000 100, 000 Share of former refugees who report having good relations with host communities ( Percentage ) To be determined 40 % 40 % 50 % 50 % 60 % 60. 00 End target will be adjusted based on the results of the baseline survey. Number of plans in districts or wards mainstreamed with climate resilience ( Number ) 0. 00 0 0 1 2 3 3. 00 Number of individuals participating in regional knowledge and learning exchanges ( Number ) 0. 00 0 10 25 35 50 50. 00 Number of individuals participating in capacity building trainings ( Number ) 0. 00 80 130 160 180 200 200. 00 Indicator Description Project Development Objective Indicators Indicator Name Description ( indicator definition etc. ) Frequency Data Source / Methodology Responsibility for Data", + "ner_text": [ + [ + 527, + 542, + "named" + ], + [ + 336, + 351, + "baseline survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "00 30, 000 50, 000 70, 000 90, 000 100, 000 100, 000 Share of former refugees who report having good relations with host communities ( Percentage ) To be determined 40 % 40 % 50 % 50 % 60 % 60. 00 End target will be adjusted based on the results of the baseline survey. Number of plans in districts or wards mainstreamed with climate resilience ( Number ) 0.", + "type": "survey", + "explanation": "In the context, it is mentioned that the end target will be adjusted based on the results of the baseline survey, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'baseline survey' implies a structured collection of data collected for analysis.", + "contextual_reason_agent": "In the context, it is mentioned that the end target will be adjusted based on the results of the baseline survey, indicating it is used as a data source.", + "contextual_signal": "follows 'adjusted based on the results of'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "110_Lebaon-RACE2-PforR-Board-Package-PAD-WB-9-5-16-09072016", + "page": 53, + "text": "DLI # 6. 2 Disaggregated data by school type, geographical area, grade, Lebanese / non-Lebanese, gender. Data should be available in a digital central database in March. CERD publishes the disaggregated enrollment data for all schools on its website in August of each year. Yes CERD website ( http: / / www. crd p. org / en ) Third Party Review of enrollment and grade completion numbers and of the implementation of CERD data verification protocol. DLI # 7. 1 Decision to form the Curricula Higher Committee is issued by the Minister. The Curricula Higher Committee includes national stakeholders and representatives from the Education sector ( e. g. CERD, MEHE, Educational Inspectorate, and Faculty of Education among others ). The committee meets and approves the curriculum development work plan, which will be approved by the Minister. Standard Operating Procedures ( SOPs ) are prepared by CERD and approved by the Curricula Higher Committee. The work plan and SOPs are published on the CERD website. No CERD website ( http: / / www. crd p. org / en ) Third Party Checking the CERD website for the curriculum development plan and SOPs. DLI # 7. 2 Curriculum design document should include student learning outcomes, and scope & sequence, which should be developed for the subjects in each grade. Curriculum design documents need to be approved by the Curricula Higher Committee. Financing for the curriculum will include all cycles. No Signed Document at CERD Third Party Checking that design documents were approved by the Curricula Higher Committee.", + "ner_text": [ + [ + 189, + 218, + "named" + ], + [ + 170, + 174, + "disaggregated enrollment data <> publisher" + ], + [ + 278, + 282, + "disaggregated enrollment data <> publisher" + ], + [ + 417, + 421, + "disaggregated enrollment data <> publisher" + ], + [ + 652, + 656, + "disaggregated enrollment data <> publisher" + ], + [ + 897, + 901, + "disaggregated enrollment data <> publisher" + ], + [ + 994, + 998, + "disaggregated enrollment data <> publisher" + ], + [ + 1011, + 1015, + "disaggregated enrollment data <> publisher" + ], + [ + 1084, + 1088, + "disaggregated enrollment data <> publisher" + ], + [ + 1462, + 1466, + "disaggregated enrollment data <> publisher" + ] + ], + "validated": true, + "empirical_context": "Data should be available in a digital central database in March. CERD publishes the disaggregated enrollment data for all schools on its website in August of each year. Yes CERD website ( http: / / www.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to specific enrollment data that is collected and made available for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific data that is published and disaggregated for analysis.", + "contextual_reason_agent": "This is indeed a dataset as it refers to specific enrollment data that is collected and made available for empirical analysis.", + "contextual_signal": "published data on a website", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "031_IBArchive-bd2c789e-ee04-4df7-a219-9409a5f705d3", + "page": 62, + "text": "The World Bank Chad Energy Access Scale Up Project ( P174495 ) Page 57 of 87 SHS delployed, out of which Quarterly Sales reports of SHS suppliers, reports of verification agents, progress reports of PIU Data provided by suppliers of SHS and verified by an independent verification agent PIU of the Ministry of Petroleum and Energy In areas housing refugees and host communities Quarterly Sales reports of SHS suppliers, reports of verification agents, progress reports of PIU Data provided by suppliers of SHS and verified by an independent verification agent PIU of the Ministry of Petroleum and Energy Female-headed households Quaterly Reports of verification agents, progress reports of PIU Data provided by PIU and verified by an independent verification agent PIU of the Ministry of Petroleum and Energy Community forest resources under integrated and participative management Quaterly Reports of verification agents, progress reports of Data provided by PIU and verified by an independent verification agent PIU of the Ministry of Petroleum and Energy", + "ner_text": [ + [ + 105, + 145, + "named" + ], + [ + 348, + 377, + "Quarterly Sales reports of SHS suppliers <> reference population" + ], + [ + 1073, + 1091, + "Quarterly Sales reports of SHS suppliers <> usage context" + ] + ], + "validated": true, + "empirical_context": "The World Bank Chad Energy Access Scale Up Project ( P174495 ) Page 57 of 87 SHS delployed, out of which Quarterly Sales reports of SHS suppliers, reports of verification agents, progress reports of PIU Data provided by suppliers of SHS and verified by an independent verification agent PIU of the Ministry of Petroleum and Energy In areas housing refugees and host communities Quarterly Sales reports of SHS suppliers, reports of verification agents, progress reports of PIU Data provided by suppliers of SHS and verified by an independent verification agent PIU of the Ministry of Petroleum and Energy Female-headed households Quaterly Reports of verification agents, progress reports of PIU Data provided by PIU and verified by an independent verification agent PIU of the Ministry of Petroleum and Energy Community forest resources under integrated and participative management Quaterly Reports of verification agents, progress reports of Data provided by PIU and verified by an independent verification agent PIU of the Ministry of Petroleum and Energy", + "type": "report", + "explanation": "This is a dataset as it provides structured data on sales from SHS suppliers used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to structured reports that contain sales data.", + "contextual_reason_agent": "This is a dataset as it provides structured data on sales from SHS suppliers used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 20, + "validated": 4, + "not_validated": 16 + } + }, + { + "filename": "026_Djibouti-Health-System-Strengthening-Project", + "page": 29, + "text": "The project will support: ( a ) collection of gender-disaggregated data and gender statistics in the project \u2019 s results framework wherever relevant and possible; ( b ) improving access to and utilization of RMNCAH - N services by addressing both demand - and supply-side challenges, taking into consideration the full and equal participation of women and men; and ( c ) strengthening capacity of the MOH, including in the design, implementation and coordination of multi-sectoral issues such as stunting, climate resilience and FGM. II. PROJECT DESCRIPTION A. Project Development Objective PDO Statement To improve the utilization of quality reproductive, maternal, neonatal, child, adolescent health and nutrition ( RMNCAH-N ) services, with priority given to underserved areas, refugees and host communities PDO Level Indicators The PDO indicators will be: i ) Percent of children fully immunized before their first birthday Among girls Among refugees", + "ner_text": [ + [ + 46, + 71, + "named" + ], + [ + 76, + 93, + "gender-disaggregated data <> data type" + ] + ], + "validated": true, + "empirical_context": "The project will support: ( a ) collection of gender-disaggregated data and gender statistics in the project \u2019 s results framework wherever relevant and possible; ( b ) improving access to and utilization of RMNCAH - N services by addressing both demand - and supply-side challenges, taking into consideration the full and equal participation of women and men; and ( c ) strengthening capacity of the MOH, including in the design, implementation and coordination of multi-sectoral issues such as stunting, climate resilience and FGM. II.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to specific data collected and utilized for analysis within the project's results framework.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'gender-disaggregated data' implies a structured collection of data categorized by gender.", + "contextual_reason_agent": "This is indeed a dataset as it refers to specific data collected and utilized for analysis within the project's results framework.", + "contextual_signal": "mentioned as part of the project's results framework", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "159_38147core", + "page": 15, + "text": "PPU would have several means to monitor results. 0 0 0 The PPU would prepare and submit to IDA quarterly reports detailing the technical, operational, financial, and administrative progress. A CSIA would monitor project impact and social outcomes using participatory rural appraisal methods. The CSIA would offer an opportunity to cross verify the PPU reports. Mid-term and prior-to-closing environmental audits would verify the PHP \u2019 s compliance with agreed environmental guidelines and safeguards. An independent technical audit would verify compliance with engineering standards for housing, water and environment mitigation. 4 Sustainability 50. The demand driven nature o f the PHP would ensure its long term sustainability. It would help transform refugee camps into sustainable habitats and better integrate IDPs and non-IDPs. Regularized land title, strengthened social capital, better management o f water resources and improved IDP-non IDP relations would reinforce the PHP \u2019 s long term sustainability. The homeowner-driven approach, the transformation o f refugee camps to improved habitat, community development and better environment 21 Details of the monitoring indicators are provided in Annex 3. 10", + "ner_text": [ + [ + 504, + 531, + "named" + ] + ], + "validated": false, + "empirical_context": "Mid-term and prior-to-closing environmental audits would verify the PHP \u2019 s compliance with agreed environmental guidelines and safeguards. An independent technical audit would verify compliance with engineering standards for housing, water and environment mitigation. 4 Sustainability 50.", + "type": "audit", + "explanation": "However, it is not a dataset as it refers to a process of verification rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'audit' can imply a systematic review of data.", + "contextual_reason_agent": "However, it is not a dataset as it refers to a process of verification rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 119, + "text": "The World Bank Primary Education Equity in Learning Program ( P176867 ) Page 114 of 117. ANNEX 6. PROGRAM ACTION PLAN Action Description Source DLI # Responsibility Timing Completion Measurement Undertake data analysis in the NEMIS platform and publish annual educational statistical booklet. Technical MoE Recurrent Yearly Annual educational statistical booklet published for core education data, including refugee children and learner with special needs. Conduct policy dialogue for inclusion / mainstrea ming of refugees in the NEMIS. Technical MoE Other Continuous Data for refugee children included in the NEMIS Training: implementation of the consolidated three year capacity building plan for staff development at MoE and the Program \u2019 s implementing entities ( IEs ). Technical MoE Recurrent Yearly Targets in the plan implemented Technical expertise to be engaged in the areas of CBC and formative assessments Technical MoE Recurrent Yearly two international Partner institutions procured to provide technical support to KICD and KNEC in the implementation and monitoring of the roll out of CBC and assessments. Utilization of the online based item portal for competency based assessment Technical KNEC Recurrent Continuous Teachers post assessment items in the portal and also use the assessment items in the portal. Timely release of funds Exchequer to the Implementing Entities Fiduciary Systems NT, MoE and TSC Recurrent Yearly Timely release of funds to the Implementing Entities ( IEs ) PPRA to undertake compliance assessment.", + "ner_text": [ + [ + 226, + 231, + "named" + ] + ], + "validated": false, + "empirical_context": "ANNEX 6. PROGRAM ACTION PLAN Action Description Source DLI # Responsibility Timing Completion Measurement Undertake data analysis in the NEMIS platform and publish annual educational statistical booklet. Technical MoE Recurrent Yearly Annual educational statistical booklet published for core education data, including refugee children and learner with special needs.", + "type": "program", + "explanation": "However, NEMIS is described as a platform, not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is mentioned in the context of data analysis and educational statistics.", + "contextual_reason_agent": "However, NEMIS is described as a platform, not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "033_IBArchive-38b35d89-2c5c-4636-ab7e-0ba662b95a5f", + "page": 45, + "text": "The World Bank Djibouti Skills Development for Employment Project ( P175483 ) Page 41 of 68 significant emphasis on ensuring that youth, and specifically those belonging to the most disadvantaged groups \u2013 women, refugees, and persons with disabilities, will be able to participate in this program and hence data gathered will be disaggregated by sub-group. Data gathered at the training provider level will include information on leadership and management, school resources, teacher and student management, infrastructure information, program initiation and completion, beneficiary surveys, and TVET satisfaction survey. Project implementation will be monitored through supervision missions and others conducted jointly by the Government and the World Bank. A set of technical, infrastructure and process evaluations and audits will be supported regularly to inform the project of adaptation measures being undertaken by the project. Lessons learned from relevant assessments will be used for course correction during project implementation. C. Sustainability 87. Project sustainability in this context is difficult to assess given that the overwhelming share of development expenditures comes from development partners. Most of government financing is used to cover recurrent expenditure in the post-basic education and training sector.", + "ner_text": [ + [ + 595, + 619, + "named" + ], + [ + 4, + 14, + "TVET satisfaction survey <> publisher" + ], + [ + 15, + 23, + "TVET satisfaction survey <> data geography" + ], + [ + 130, + 135, + "TVET satisfaction survey <> reference population" + ], + [ + 205, + 210, + "TVET satisfaction survey <> reference population" + ], + [ + 212, + 220, + "TVET satisfaction survey <> reference population" + ], + [ + 226, + 251, + "TVET satisfaction survey <> reference population" + ], + [ + 415, + 568, + "TVET satisfaction survey <> data description" + ], + [ + 746, + 756, + "TVET satisfaction survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "The World Bank Djibouti Skills Development for Employment Project ( P175483 ) Page 41 of 68 significant emphasis on ensuring that youth, and specifically those belonging to the most disadvantaged groups \u2013 women, refugees, and persons with disabilities, will be able to participate in this program and hence data gathered will be disaggregated by sub-group. Data gathered at the training provider level will include information on leadership and management, school resources, teacher and student management, infrastructure information, program initiation and completion, beneficiary surveys, and TVET satisfaction survey. Project implementation will be monitored through supervision missions and others conducted jointly by the Government and the World Bank.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned as a survey that gathers data for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific survey that collects data on satisfaction.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a survey that gathers data for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 17, + "text": "According to the RSRI survey, just 24 percent of refugees reported doing paid work in the previous seven days ( though this increases to 45 percent for refugees living in Kigali ) and only eight percent run a business or are engaged in farming. Low income is reflected in poor food security, with almost 60 percent of refugee households reporting that they typically eat only one meal per day. The main reasons provided for the low employment levels were lack of skills ( 44 percent ) and lack of information about the local labor market ( 34 percent ). Other reasons cited include the need for investments in roads and connectivity to strengthen market access for agricultural producers and traders. A 2024 African Development Bank ( AfDB ) study on forced displacement in the region concluded that priority needs for self-reliance are better road connectivity, access to water and energy, improved educational and 6 The GoR \u2019 s strategy is consistent with lessons from global experience in the 2023 World Development Report that a sustainable approach to managing forced displacement requires: ( a ) policies that provide freedom of movement and the right to work; ( b ) inclusion of refugees into national service delivery systems to shift away from expensive parallel systems; and ( c ) support for self-reliance through access to jobs.", + "ner_text": [ + [ + 17, + 28, + "named" + ], + [ + 49, + 57, + "RSRI survey <> reference population" + ], + [ + 152, + 160, + "RSRI survey <> reference population" + ], + [ + 171, + 177, + "RSRI survey <> data geography" + ], + [ + 318, + 336, + "RSRI survey <> reference population" + ], + [ + 703, + 707, + "RSRI survey <> publication year" + ], + [ + 996, + 1000, + "RSRI survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "According to the RSRI survey, just 24 percent of refugees reported doing paid work in the previous seven days ( though this increases to 45 percent for refugees living in Kigali ) and only eight percent run a business or are engaged in farming. Low income is reflected in poor food security, with almost 60 percent of refugee households reporting that they typically eat only one meal per day.", + "type": "survey", + "explanation": "The RSRI survey is indeed a dataset as it provides structured data on refugees' work and food security, used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that collects data on refugees' employment and food security.", + "contextual_reason_agent": "The RSRI survey is indeed a dataset as it provides structured data on refugees' work and food security, used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "062_Turkey-Municipal-Services-Improvement-Project", + "page": 46, + "text": "The World Bank Municipal Services Improvement Project in Refugee Affected Areas ( P169996 ) Page 42 of 94 refugees ) managed sanitation services in the selected municipalities that have been provided through this Project. progress reports, based on updated beneficiary \u2019 s records from respective SKIs. People benefitting from safely managed solid waste services in the selected municipalities as a result of the project This indicator measures the cumulative number of people benefitted from safely managed solid waste services in the selected municipalities that have been provided through this Project. Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs. Municipal PIUs and ILBANK PMU People benefitting from safely managed solid waste services in the selected municipalities ( out of which female ) This indicator measures the percentage of female benefitted from safely managed solid waste services in the selected municipalities that have been provided through this Project. Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs.", + "ner_text": [ + [ + 1130, + 1138, + "named" + ], + [ + 4, + 14, + "PMU Data <> publisher" + ], + [ + 693, + 713, + "PMU Data <> data type" + ] + ], + "validated": true, + "empirical_context": "Municipal PIUs and ILBANK PMU People benefitting from safely managed solid waste services in the selected municipalities ( out of which female ) This indicator measures the percentage of female benefitted from safely managed solid waste services in the selected municipalities that have been provided through this Project. Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs.", + "type": "data", + "explanation": "In this context, 'PMU Data' refers to a structured collection of data compiled from beneficiary records, indicating it is indeed used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'PMU Data' is a dataset because it is mentioned in the context of compiling and recording information.", + "contextual_reason_agent": "In this context, 'PMU Data' refers to a structured collection of data compiled from beneficiary records, indicating it is indeed used as a data source.", + "contextual_signal": "mentioned as a data source for compiling reports", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 10, + "validated": 9, + "not_validated": 1 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 57, + "text": "Given the large demand of the national statistical system, the choice is made to focus either on the areas where the World Bank clearly has a comparative advantage among the donors or areas not supported by the other donors, namely, household survey, population census, national account, and archiving and dissemination. At the core of all this are two aspects: data collection and capacity building. The philosophy underlying data collection is to improve the design of the surveys to take into account the most recent methodological approaches. As for capacity building, the preference is given either to train staff locally or to use the learning-by - doing approach. Three subcomponents are distinguished as follows: Subcomponent 4. 1. Improving poverty-related data 39. Objective. The objective of this subcomponent is to improve the poverty related data production and analysis. 40. Current status. INS has implemented living conditions surveys in 1996, 2001, 2007, and 2014. The last three surveys have used very close methodologies and poverty indicators are comparable over", + "ner_text": [ + [ + 251, + 268, + "named" + ], + [ + 905, + 908, + "population census <> author" + ], + [ + 925, + 950, + "population census <> data type" + ], + [ + 966, + 970, + "population census <> reference year" + ], + [ + 976, + 980, + "population census <> publication year" + ] + ], + "validated": true, + "empirical_context": "Given the large demand of the national statistical system, the choice is made to focus either on the areas where the World Bank clearly has a comparative advantage among the donors or areas not supported by the other donors, namely, household survey, population census, national account, and archiving and dissemination. At the core of all this are two aspects: data collection and capacity building.", + "type": "census", + "explanation": "In this context, 'population census' is explicitly mentioned as part of the national statistical system, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'population census' is a recognized term for a structured collection of demographic data.", + "contextual_reason_agent": "In this context, 'population census' is explicitly mentioned as part of the national statistical system, indicating it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + }, + "term_stats": { + "total": 6, + "validated": 5, + "not_validated": 1 + } + }, + { + "filename": "084_Lebanon-PAD-final-06082018", + "page": 28, + "text": "18 women \u2019 s rights in the workplace ( targeting both employers and employees, including refugees ); ( ii ) share best practices amongst private sector firms in retaining and attracting women; ( iii ) raise awareness on the economic impact of stronger participation of women in the labor market; ( iv ) address gender norms; and ( v ) address concerns and raise awareness on the economic impact of sexual harassment at the workplace25. b. Gender-focused household and employer surveys. Supported by local and international experts and in partnership with relevant stakeholders, eight household and employer levels surveys will be conducted to shed light on the challenges faced by women in the Lebanese labor market. Such gender-disaggregated data are missing today and are critical for the newly established Ministry for relevant and adequate evidence-based policy making on gender. c. Gender database with gender-disaggregated data. A database will be set up at the OMSWA to compile existing and collect new ( see previous bullet point ) gender-disaggregated data. The data will be publicly available. d. Childcare provision action plan and launch of a pilot project. High-quality childcare accessibility and affordability are widely accepted as necessary areas needing improvement to enhance women \u2019 s participation in the labor market in Lebanon.", + "ner_text": [ + [ + 722, + 747, + "named" + ] + ], + "validated": false, + "empirical_context": "Supported by local and international experts and in partnership with relevant stakeholders, eight household and employer levels surveys will be conducted to shed light on the challenges faced by women in the Lebanese labor market. Such gender-disaggregated data are missing today and are critical for the newly established Ministry for relevant and adequate evidence-based policy making on gender. c.", + "type": "data", + "explanation": "However, it is not a dataset itself but rather a description of the type of data that is needed for analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'gender-disaggregated data' implies a structured collection of information categorized by gender.", + "contextual_reason_agent": "However, it is not a dataset itself but rather a description of the type of data that is needed for analysis.", + "contextual_signal": "mentioned only as a type of data needed, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 19, + "text": "The World Bank Building Skills for Human Capital Development in South Sudan ( P178654 ) Page 15 of 66 23. Functional mechanisms to support evidence-based decision-making and management of the sector are needed. MoGEI has an Education Management Information System ( EMIS ), but its implementation has not been tackled comprehensively to date and the existing data essential for sector evaluation and development has significant gaps. The EMIS requires dissemination to all states and harmonization with other databases in the MoGEI, such as the examination results, South Sudan School Attendance and Monitoring System, Human Resources Management Information System, and Financial Management Information System. The existing data collection instruments also need revision to incorporate the required information for monitoring the implementation of key indicators. Quality, reliable data is fundamental to inform education decisions and policies and enhance transparency. Accordingly, strengthening the EMIS and the annual school survey process to provide quality education information, in a timely manner, at every administrative level is essential to support the functions of the education ministries. 24. The education governing ministries lack the policy tools and implementation capacity needed for education sector management. The General Education system is characterized by inequitable allocation of teachers across the states, unclear deployment drivers, and outdated salary structure.", + "ner_text": [ + [ + 566, + 617, + "named" + ] + ], + "validated": false, + "empirical_context": "MoGEI has an Education Management Information System ( EMIS ), but its implementation has not been tackled comprehensively to date and the existing data essential for sector evaluation and development has significant gaps. The EMIS requires dissemination to all states and harmonization with other databases in the MoGEI, such as the examination results, South Sudan School Attendance and Monitoring System, Human Resources Management Information System, and Financial Management Information System. The existing data collection instruments also need revision to incorporate the required information for monitoring the implementation of key indicators.", + "type": "system", + "explanation": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'System' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "However, it is mentioned as a system and not explicitly as a data source in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "182_multi0page", + "page": 42, + "text": "Government maintains its services to vulnerable institutional and policy 3. DFID reports commitment to population groups developed, changes introduced 4. Public opinion surveys decentralization and budget including those related to 3. Financing models 5. Consultant reports reform residential service developed and implemented 6. Project records / reports 3. Government maintains its institutions; 4. IEP developed and 7. BIA studies commitment to preventive implemented periodically 8. MOLSA / GASS and community-based b ) capacity of the MOLSA 5. BIA carried out and reports / records services for formulation of results used to further 4. Qualified people available community-based social improve the programs to design and implement IEP services policy strengthened; 6. # of donors participating 5. Qualified people available in financing to design and implement c ) capacity of the MOLSA community-based social BIAs to monitor policy developed; services 6. Donors maintain their interested in funding social d ) models for financing of services in Albania community-based social services developed; - 39 -", + "ner_text": [ + [ + 154, + 176, + "named" + ], + [ + 1053, + 1060, + "Public opinion surveys <> data geography" + ] + ], + "validated": true, + "empirical_context": "DFID reports commitment to population groups developed, changes introduced 4. Public opinion surveys decentralization and budget including those related to 3. Financing models 5.", + "type": "survey", + "explanation": "In this context, 'public opinion surveys' are explicitly mentioned, indicating they are used as a source of data for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'public opinion surveys' typically involve structured data collection from respondents.", + "contextual_reason_agent": "In this context, 'public opinion surveys' are explicitly mentioned, indicating they are used as a source of data for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 93, + "text": "However, as part of the development of the ERfKE II program, the MoE with the support of the Bank, commissioned a series of eight very detailed preparation studies that, along with other analytic work at the Bank, provide considerable insights into the potential value-added and returns from the various components of the ERfKE program components. The analysis concentrated on findings from three preparation studies and one piece of Bank analytic work in particular: Education Finance ( Georgina Rawle, 2008 ); School Planning ( Bruno Parolin, 2008 ); Teacher Utilization ( Rawlinson and Allak, 2008 ); and \u201c Using PISA to Understand the Determinants of Learning in the Middle-East and North Africa Region, \u201d ( World Bank, HDNED, 2008 ). 12 Investing in Non-personnel Recurrent Expenditures likely to Enhance Quality 17. Component 3 is the second largest of the five ERfKE II program components ( about US $ 50 million ) as well as a component with aspects clearly related to the improvement of education quality in a manner supported by both the international literature on investing in education quality and the assessment, albeit suggestive, by Rawle ( 2008 ) for Jordan. As Rawle ( 2008: 42 ) discusses, Jordan \u2019 s share of recurrent educational expenditure dedicated to personnel and salaries, while falling, is still high ( compared, for example, to the OECD average of 20 percent ). Recurrent expenditure overall is also low compared to infrastructure investment. Table 3 shows that across all education programs and levels personnel expenses ( mostly salaries ) account for about 14 percent of total recurrent spending, less than half of which is devoted explicitly to quality related activities. Component 3 focuses on Teaching and 12 To a lesser extent, we used the preparation studies on Decentralization, Pre-Service Teacher Training, Vocational Education Reform; and Early Childhood Education.", + "ner_text": [ + [ + 144, + 163, + "named" + ] + ], + "validated": false, + "empirical_context": "However, as part of the development of the ERfKE II program, the MoE with the support of the Bank, commissioned a series of eight very detailed preparation studies that, along with other analytic work at the Bank, provide considerable insights into the potential value-added and returns from the various components of the ERfKE program components. The analysis concentrated on findings from three preparation studies and one piece of Bank analytic work in particular: Education Finance ( Georgina Rawle, 2008 ); School Planning ( Bruno Parolin, 2008 ); Teacher Utilization ( Rawlinson and Allak, 2008 ); and \u201c Using PISA to Understand the Determinants of Learning in the Middle-East and North Africa Region, \u201d ( World Bank, HDNED, 2008 ).", + "type": "study", + "explanation": "However, 'preparation studies' are described as analytic work rather than structured collections of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'preparation studies' are datasets because they involve detailed analysis and findings.", + "contextual_reason_agent": "However, 'preparation studies' are described as analytic work rather than structured collections of data.", + "contextual_signal": "mentioned only as studies, not as data sources", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 21, + "validated": 8, + "not_validated": 13 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 48, + "text": "Methodology for Data Collection Indicator values will be collected from the administrative data of MODEE and public - and private-sector relying parties on the use of transactional digital services that incorporate trusted, people-centric DPI, and cross-checked by the IVA through spot surveys. Responsibility for Data Collection MODEE. Improving trusted, people-centric data sharing Description Availability of trusted, people-centric data sharing [ Yes / No ]. Frequency Annually Data source ( a ) Annual reports on DPI implementation from MODEE, sectoral ministries, and the private sector, ( b ) MODEE \u2019 s software documentation and testing reports, and ( c ) Third-party assessment reports. Methodology for Data Collection Indicator values will be collected from ( a ) DPI implementation reports released by MODEE, sectoral ministries, and the private sector, ( b ) MODEE \u2019 s software documentation, and testing reports, and ( c ) Official reports submitted by third - party assessment bodies recruited to carry out the Privacy Impact Assessments. All indicator values will be cross - checked by the IVA. Responsibility for Data Collection MODEE. Enhanced management of medical records Description Promoting digital transformation in health servcie delivery by scaling up the coverage of an existing EMR platform to all MOH facility sites, prioritizing facility sites in districts with a high concentration of Syrian refugees. Frequency Biannualy. Data source Administrative data from the EHS. Methodology for Data Collection Data collected from the EHS to measure how many MOH facility sites had installed and operationalized the EMR platform. Responsibility for Data Collection MOH and EHS.", + "ner_text": [ + [ + 664, + 694, + "named" + ] + ], + "validated": false, + "empirical_context": "Improving trusted, people-centric data sharing Description Availability of trusted, people-centric data sharing [ Yes / No ]. Frequency Annually Data source ( a ) Annual reports on DPI implementation from MODEE, sectoral ministries, and the private sector, ( b ) MODEE \u2019 s software documentation and testing reports, and ( c ) Third-party assessment reports. Methodology for Data Collection Indicator values will be collected from ( a ) DPI implementation reports released by MODEE, sectoral ministries, and the private sector, ( b ) MODEE \u2019 s software documentation, and testing reports, and ( c ) Official reports submitted by third - party assessment bodies recruited to carry out the Privacy Impact Assessments.", + "type": "document", + "explanation": "However, it is not functioning as a data source in this context, as it is described as a type of document rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'reports' which can imply structured information.", + "contextual_reason_agent": "However, it is not functioning as a data source in this context, as it is described as a type of document rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 66, + "text": "Some NITA-U staff are already familiar with STEP, which is being used under RCIP-5. 3. Use of National Procurement System. National procurement procedures shall only apply if the requirements as required by the paragraph 5. 3 Procurement Regulations70 are met. In March 2017 ( updated February 2018 ) the PPDA issued a guideline titled \u201c reservations to promote local content in public procurement \u201d 70 ( a ) open advertising of the procurement opportunity at the national level; ( b ) the procurement is open to eligible firms from any country; ( c ) the request for bids / request for proposals document shall require that Bidders / Proposers submitting Bids / Proposals present a signed acceptance at the time of bidding, to be incorporated in any resulting contracts, confirming application of, and compliance with, the World Bank \u2019 s Anti-Corruption Guidelines, including without limitation the World Bank \u2019 s right to sanction and the World Bank \u2019 s inspection and audit rights; ( d ) Procurement Documents include provisions, as agreed with the World Bank, intended to adequately mitigate against environmental, social ( including sexual exploitation and abuse ( SEA ) and gender-based violence ( GBV ), health and safety ( \u201c ESHS \u201d ) risks and impacts; ( e ) contracts with an appropriate allocation of responsibilities, risks, and liabilities; ( f ) publication of contract award information; ( g ) rights for the World Bank to review procurement documentation and activities; ( h ) an effective complaints mechanism; and ( i ) maintenance of records of the Procurement Process.", + "ner_text": [ + [ + 44, + 48, + "named" + ] + ], + "validated": false, + "empirical_context": "Some NITA-U staff are already familiar with STEP, which is being used under RCIP-5. 3.", + "type": "program", + "explanation": "However, STEP is referred to as a program and not as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed STEP is a dataset because it is mentioned in a context related to data usage.", + "contextual_reason_agent": "However, STEP is referred to as a program and not as a data source or structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 15, + "validated": 1, + "not_validated": 14 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 44, + "text": "An Environmental and Social Systems Assessment ( ESSA ) was conducted to review the adequacy and capacity of the environmental and social ( E & S ) systems at both national and county levels and recommend material measures to avoid, minimize and mitigate adverse E & S effects associated with PEELP. The assessment also identified measures required to strengthen the performance of the existing E & S system as well as buttress the capacity of the MoE to implement these measures. The essential finding of the assessment is that there are adequate Environmental and Social Management Systems ( ESMSs ) in Kenya to address environmental, health and safety, as well as social effects related to Program activities. The systems are primarily consistent with the six core principles of the World Bank \u2019 s guidance on Program-for-Results financing Environmental and Social Systems Assessment ( to effectively manage program risks and promote sustainable development. 80. The assessment however, identified gaps that need to be addressed for more effective management of E & S effects. These include: ( i ) Sporadic compliance with E & S provisions ( application of ESMS as provided by EMCA, 1999, OSH Act, 2007 for school infrastructure activities ) due to limited awareness, resourcing, and 55 UNHCR & World Bank, ( 2021 ), Understanding the Socio-economic Conditions of Refugees in Kenya: Volume C: Results from the 2020-21 Urban Socioeconomic Survey.", + "ner_text": [ + [ + 1413, + 1447, + "named" + ] + ], + "validated": true, + "empirical_context": "The assessment however, identified gaps that need to be addressed for more effective management of E & S effects. These include: ( i ) Sporadic compliance with E & S provisions ( application of ESMS as provided by EMCA, 1999, OSH Act, 2007 for school infrastructure activities ) due to limited awareness, resourcing, and 55 UNHCR & World Bank, ( 2021 ), Understanding the Socio-economic Conditions of Refugees in Kenya: Volume C: Results from the 2020-21 Urban Socioeconomic Survey.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey providing results used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it references a specific survey that likely contains structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey providing results used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 68, + "text": "The World Bank Expanding Opportunities for Learning Project ( P166059 ) Page 63 of 85 communities. DLR # 4. 2: MENFOP calculates this annually from its statistical tables. The baseline repetition rate considered for the project is 24. 4 percent. The project end target is 14. 4 percent in year 5. DLR # 4. 3: MENFOP will calculate the improvement in gender parity index for gross enrollment in lower secondary education annually from its statistical tables. The index will be disaggregated by urban and rural. Please note that DLR # 4. 2 is the GPE variable part indicator for efficiency and DLR # 4. 3 is the GPE variable part indicator for equity. Data source / Agency DLR # 4. 1: MENFOP; DLR # 4. 2: MENFOP / Statistical Tables; DLR # 4. 3: MENFOP / Statistical Tables Verification Entity DLR # 4. 1: WB. DLR # 4. 2 and 4. 3: IVA Procedure DLR # 4. 1: The World Bank will verify that sub-regional plans are based on locally gathered evidence and address local constraints to enrollment and retention, especially for vulnerable populations. Disbursement formula: US $ 300, 000 disbursed upon approval of plans. DLR # 4. 2: The independent verification agent will review figures of reported repetition rates.", + "ner_text": [ + [ + 152, + 170, + "named" + ] + ], + "validated": false, + "empirical_context": "DLR # 4. 2: MENFOP calculates this annually from its statistical tables. The baseline repetition rate considered for the project is 24.", + "type": "table", + "explanation": "'Statistical tables' are mentioned as a source of information but not as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'statistical tables' are datasets because they contain organized data.", + "contextual_reason_agent": "'Statistical tables' are mentioned as a source of information but not as a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a source of information, not as a data source", + "tags": [] + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 23, + "text": "Washington, DC: World Bank. https: / / openknowledge. worldbank. org / handle / 10986 / 34712. 14 The survey results were published in June 2020 using data collected in 2018. The MTF defines access to electricity through seven attributes: capacity, availability, reliability, quality, affordability, formality, and health and safety. Tier 1 defines at least 4 hours availability per day including at least 1 hour per evening, with the capacity sufficient to power task lighting and phone charging / radio. Tier 2 electricity is available at least 4 hours per day, including at least 2 hours per evening, and capacity is sufficient to power low-load appliances. In Tier 3, Tier 4, and Tier 5, electricity is available at least 8 hours, 16 hours, and 23 hours, respectively, with higher standards for other attributes. 15 Off-grid solutions are not included. 16 According to the Sustainable Energy for All SDG7 tracker ( 2018 ), access rate in Niger was 20 percent in 2017; however, national data are used in the text.", + "ner_text": [ + [ + 877, + 916, + "named" + ], + [ + 919, + 923, + "Sustainable Energy for All SDG7 tracker <> reference year" + ], + [ + 942, + 947, + "Sustainable Energy for All SDG7 tracker <> data geography" + ], + [ + 966, + 970, + "Sustainable Energy for All SDG7 tracker <> reference year" + ], + [ + 981, + 994, + "Sustainable Energy for All SDG7 tracker <> data type" + ] + ], + "validated": true, + "empirical_context": "15 Off-grid solutions are not included. 16 According to the Sustainable Energy for All SDG7 tracker ( 2018 ), access rate in Niger was 20 percent in 2017; however, national data are used in the text.", + "type": "tracker", + "explanation": "The context indicates that the tracker provides national data, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a tracker, which often implies a collection of data over time.", + "contextual_reason_agent": "The context indicates that the tracker provides national data, confirming its role as a data source.", + "contextual_signal": "mentioned as a data source", + "tags": [] + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 47, + "text": "Results for gender related component are captured in: ( i ) PDO indicator, \u2018 Improved retention of poor and vulnerable girls in upper primary ( grades 7 to 8 ) which also measures reduction in dropouts in upper primary and ( ii ) intermediate results indicator: \u2018 Number of poor and vulnerable girls receiving scholarship, school kits and mentoring support services \u2019 ( DLI 4 ) measures the number of girls receiving the above-mentioned support disaggregated by refugee and host communities with a total allocation of US $ 22 million for the DLI. The PAP and the IPF Component, includes TA to support a \u2018 Strengthened supply chain for sanitary towels and accountability \u2019. These indicators and PAP action are described in detail in the results framework. Analytical work on gender concerns and priorities in Kenya, including girl \u2019 s education, will be supported as part of the proposed Kenya gender diagnostic analytical work in FY23 and FY 24 ( Kenya Gender Diagnostic ). ii. Gender-Based Violence ( GBV ): The PAP includes an action to scale up the GBV interventions being piloted under SEQIP to all primary schools, including camp-based refugee schools. Under SEQIP, a baseline survey was carried out with a sample from 110 subcounties which provided data on factors impacting retention and school dropout.", + "ner_text": [ + [ + 1173, + 1188, + "named" + ], + [ + 808, + 813, + "baseline survey <> data geography" + ], + [ + 887, + 892, + "baseline survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "Gender-Based Violence ( GBV ): The PAP includes an action to scale up the GBV interventions being piloted under SEQIP to all primary schools, including camp-based refugee schools. Under SEQIP, a baseline survey was carried out with a sample from 110 subcounties which provided data on factors impacting retention and school dropout.", + "type": "survey", + "explanation": "In this context, it is indeed a dataset as it provided data on factors impacting retention and school dropout.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'baseline survey' implies a structured collection of data collected for analysis.", + "contextual_reason_agent": "In this context, it is indeed a dataset as it provided data on factors impacting retention and school dropout.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "101_Project-Appraisal-Document-PAD-P163830-2017-05-22-CLEAN-05232017", + "page": 40, + "text": "FSNAU \u2019 s livelihoods baseline and household-to-livestock ratios as well as expert - based judgements on expected livestock activity enabled the quantification of livestock and associated losses in Somali livestock industry. 82. SWALIM \u2019 s data on strategic water resources in Somalia and remote assessment results helped better understanding the drought ' s impact on water resources and associated effects on population and livestock. The AWD / Cholera outbreak data from WHO has allowed for better targeting of intervention strategies as well as understanding of where drought, malnutrition and disease concerns are likely to jointly manifest themselves. 83. Project implementation will also follow an inclusive and consultative process with the relevant partners and stakeholders, including government, through an informal coordination mechanism. This will help ensure close strategic harmonization and operational coordination across the inter-related interventions implemented by FAO, ICRC and multiple other humanitarian and development partners. The Project will also provide support to FAO to further strengthen its coordination and harmonization of the Food Cluster activities and partners in Somalia. V. KEY RISKS A. Overall Risk Rating and Explanation of Key Risks 84. The overall risk for achieving the PDO is substantial. The rating for each category is listed in Table 5, with explanations underneath for the substantial and high risks.", + "ner_text": [ + [ + 441, + 468, + "named" + ], + [ + 277, + 284, + "AWD / Cholera outbreak data <> data geography" + ], + [ + 474, + 477, + "AWD / Cholera outbreak data <> publisher" + ], + [ + 1203, + 1210, + "AWD / Cholera outbreak data <> data geography" + ] + ], + "validated": true, + "empirical_context": "SWALIM \u2019 s data on strategic water resources in Somalia and remote assessment results helped better understanding the drought ' s impact on water resources and associated effects on population and livestock. The AWD / Cholera outbreak data from WHO has allowed for better targeting of intervention strategies as well as understanding of where drought, malnutrition and disease concerns are likely to jointly manifest themselves. 83.", + "type": "data", + "explanation": "This is indeed a dataset as it is used to inform intervention strategies and understand health concerns in the context of drought.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific outbreak data that can be analyzed empirically.", + "contextual_reason_agent": "This is indeed a dataset as it is used to inform intervention strategies and understand health concerns in the context of drought.", + "contextual_signal": "mentioned as a data source for targeting intervention strategies", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 25, + "text": "This subcomponent will support improved planning, management, and evaluation of education reforms in the following areas: ( a ) improvement of the existing EMIS and making better use of data to support management decisions at all levels, ( b ) national and international student assessments, and ( c ) TA to support implementation of Components 1 and 2 of the project. 41. Improvement of the existing EMIS includes the following activities: ( a ) technical improvements to EMIS to enable integrated use of NBS and MoF data; ( b ) strengthening a system for the generation of aggregate data and reports based on primary data and integrate data on refugee children from Ukraine; ( c ) training to EMIS experts on data collection and management, ( d ) strengthening the MoER \u2019 s analytical capacity in using data; and ( e ) strengthening of EMIS data protection systems as well as data quality assurance.", + "ner_text": [ + [ + 695, + 699, + "named" + ] + ], + "validated": false, + "empirical_context": "41. Improvement of the existing EMIS includes the following activities: ( a ) technical improvements to EMIS to enable integrated use of NBS and MoF data; ( b ) strengthening a system for the generation of aggregate data and reports based on primary data and integrate data on refugee children from Ukraine; ( c ) training to EMIS experts on data collection and management, ( d ) strengthening the MoER \u2019 s analytical capacity in using data; and ( e ) strengthening of EMIS data protection systems as well as data quality assurance.", + "type": "system", + "explanation": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is involved in data management and generation.", + "contextual_reason_agent": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "078_Afghanistan-Eshteghal-Zaiee-Karmondena-EZ-Kar-Project", + "page": 56, + "text": "The World Bank Afghanistan: Eshteghal Zaiee - Karmondena ( EZ-Kar ) ( P166127 ) Page 49 of 85 value may be set by the mid \u2010 term review by when there will be more information from implementation. Of which through Priority Projects in Kabul Number of businesses that are provided with access to ICT services through Priority Projects in Kabul Municipality. Target value for this indicator is currently set at \u2018 0 \u2019, since this Project takes on a demand \u2010 driven approach and the types and numbers of subprojects to be implemented under the Project cannot / should not be predetermined, as it will not be accurate. The target value may be set by the mid \u2010 term review by when there will be more information from implementation. Semi \u2010 annually Project MIS ( KMDP MIS ) Nahia level administrative data, quarterly progress reports, evaluation KM PIU Of which through Priority Projects in the four cities of Herat, Kandahar, Khost, and Jalalabad Number of businesses that are provided with access to ICT services through Priority Projects in Herat, Kandahar, Khost, and Jalalabad.", + "ner_text": [ + [ + 742, + 753, + "named" + ] + ], + "validated": false, + "empirical_context": "The target value may be set by the mid \u2010 term review by when there will be more information from implementation. Semi \u2010 annually Project MIS ( KMDP MIS ) Nahia level administrative data, quarterly progress reports, evaluation KM PIU Of which through Priority Projects in the four cities of Herat, Kandahar, Khost, and Jalalabad Number of businesses that are provided with access to ICT services through Priority Projects in Herat, Kandahar, Khost, and Jalalabad.", + "type": "system", + "explanation": "'Project MIS' is mentioned as a system but not as a data source, indicating it does not function as a dataset in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Project MIS' is a dataset because it includes 'MIS' which often refers to management information systems that handle data.", + "contextual_reason_agent": "'Project MIS' is mentioned as a system but not as a data source, indicating it does not function as a dataset in this context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 4, + "validated": 1, + "not_validated": 3 + } + }, + { + "filename": "054_Sudan-Basic-Education-Emergency-Support-Project", + "page": 14, + "text": "The country has recently completed its second round of NLA, and preliminary data indicates some gains in reading scores, with the reduction of non-readers from 40 percent in 2014 to 38 percent in 20172. Reading comprehension has improved from 36 percent in 2014 to 52 percent in 2017. The share of students able to perform single digit subtraction and addition increased significantly from 40 and 46 percent in 2014 to 43 percent and 52 percent in 2017, respectively. The preliminary results show that schools, where gains were made in raising reading levels in Grade 3 between the first and second NLA \u2019 s, also did better overall on Grade 6 tests. The analysis serves as an important source of data for policy dialogue. The data provides for details, which facilitate the understanding of the learning among states and within states. 2 The difference in scores is statistically significant at 0. 01 confidence level. 464 312 246 177 Female 197 524 341 255 164 Male 185 600 400 200 0 200 400 600 Basic 1 Basic 2 Basic 3 Basic 4 Basic 5 Basic 6 Basic 7 Basic 8 Secondary 1 Secondary 2 Secondary 3 A. Enrollment pyramid, thousand ( 2017 ) 4 % 17 % 18 % 22 % 27 % 40 % Jordan Iraq Morocco Egypt Yemen Sudan B. Percent of Grade 3 pupils who could not read a single word of a short text in Arabic ( 2014 or the latest available )", + "ner_text": [ + [ + 64, + 80, + "named" + ] + ], + "validated": false, + "empirical_context": "The country has recently completed its second round of NLA, and preliminary data indicates some gains in reading scores, with the reduction of non-readers from 40 percent in 2014 to 38 percent in 20172. Reading comprehension has improved from 36 percent in 2014 to 52 percent in 2017.", + "type": "data", + "explanation": "'Preliminary data' is not a structured collection of data but rather a description of the information that has been gathered.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'preliminary data' refers to a dataset because it implies a collection of information regarding reading scores.", + "contextual_reason_agent": "'Preliminary data' is not a structured collection of data but rather a description of the information that has been gathered.", + "contextual_signal": "mentioned only as data, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 58, + "text": "Proposed activities: Four main activities will be undertaken under this subcomponent: ( a ) improvement of the methodology and support to data collection of ECAM 5 planned for 2018 or 2019; ( b ) analysis of the fourth population census in 2017; 37 ( c ) poverty mapping owing to the ongoing complementary ECAM 4; and ( d ) the design of a system of labor statistics by improving LFS and enterprises and informal sector surveys. The project will finance trainings on poverty mapping and workshops to produce regional monography of poverty. The project will also provide TA to INS to train and assist targeted ministries in integrating national statistics in the definition of sector strategies, programs, and public investment projects. 37 This activity budgeted for US $ 1 million will complement the resources ( US $ 4 million ) provided by another World Bank - financed project in the health sector. In total, the World Bank contribution in the financing of the population will amount to US $ 5 million.", + "ner_text": [ + [ + 212, + 236, + "named" + ], + [ + 240, + 244, + "fourth population census <> reference year" + ], + [ + 851, + 861, + "fourth population census <> publisher" + ], + [ + 917, + 927, + "fourth population census <> publisher" + ] + ], + "validated": true, + "empirical_context": "Proposed activities: Four main activities will be undertaken under this subcomponent: ( a ) improvement of the methodology and support to data collection of ECAM 5 planned for 2018 or 2019; ( b ) analysis of the fourth population census in 2017; 37 ( c ) poverty mapping owing to the ongoing complementary ECAM 4; and ( d ) the design of a system of labor statistics by improving LFS and enterprises and informal sector surveys. The project will finance trainings on poverty mapping and workshops to produce regional monography of poverty.", + "type": "census", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a population census that provides data for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific census that collects population data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a population census that provides data for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "162_PAD12050PAD0P15264600PUBLIC00Box391428B", + "page": 53, + "text": "54 g ) Generating progress and monitoring reports. 48. Under this project, two types of monitoring are envisaged: ( i ) monitoring the project performance with regard to day-to-day progress of project activities ( including targets and intermediate results ) as per the implementation plan; and ( ii ) evaluating the project with regard to achievement of the overall development objective. Monitoring 49. Monitoring will be a continuous function carried out by MoPH / PMU with support from the MoPH HIS team. Specifically, it will comprise of two aspects as follows: a ) Establishing a monitoring system ( as part of health information system ) which will include: ( i ) annual work plans, targets, outputs, indicators, and outcomes for each component; ( ii ) baseline data, if available, for each outcome indicator; and ( iii ) user friendly data entry format and built in methodology that will automatically update the targets, outputs, and signal the achievement gap to alert the implementing agencies. The focus will be on systematic data collection on specified indicators and related deliverables to provide management and the main stakeholders the extent of progress and achievement of results and progress in the use of allocated funds. The data will be collected and reconciled with the PHCCs databases with specific focus on beneficiary enrollment and packaged delivered.", + "ner_text": [ + [ + 586, + 603, + "named" + ] + ], + "validated": false, + "empirical_context": "Monitoring will be a continuous function carried out by MoPH / PMU with support from the MoPH HIS team. Specifically, it will comprise of two aspects as follows: a ) Establishing a monitoring system ( as part of health information system ) which will include: ( i ) annual work plans, targets, outputs, indicators, and outcomes for each component; ( ii ) baseline data, if available, for each outcome indicator; and ( iii ) user friendly data entry format and built in methodology that will automatically update the targets, outputs, and signal the achievement gap to alert the implementing agencies. The focus will be on systematic data collection on specified indicators and related deliverables to provide management and the main stakeholders the extent of progress and achievement of results and progress in the use of allocated funds.", + "type": "system", + "explanation": "However, it is described as a monitoring system, which does not function as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes elements related to data collection and management.", + "contextual_reason_agent": "However, it is described as a monitoring system, which does not function as a data source itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 105, + "text": "The World Bank Niger Accelerating Electricity Access Project ( Hask\u00e9 ) ( P174034 ) Page 98 of 174 trainings in network operation, managing commercial network operation, management and commercial skills. reports. Feedback from citizens incorporated into program design and published in communication materials Feedback from citizens incorporated into program design and published in communication materials. Semi - annually. NIGELEC project database. Documentation demonstrating how feedback from citizens have been incorporated in communication materials. NIGELEC. Project related grievances registered under the project grievance redress mechanism ( GRM ) and addressed Project related grievances are registered under the project grievance redress mechanism ( GRM ) and addressed. Semi - annually. NIGELEC, ANPER and ANERSOL project database. Extracted from data received through the GRM and consultations with citizens. NIGELEC. ME IO Table SPACE.", + "ner_text": [ + [ + 808, + 813, + "named" + ] + ], + "validated": false, + "empirical_context": "Semi - annually. NIGELEC, ANPER and ANERSOL project database. Extracted from data received through the GRM and consultations with citizens.", + "type": "organization", + "explanation": "'ANPER' is not a dataset but rather an organization involved in the project.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'ANPER' is a dataset because it is mentioned alongside 'project database'.", + "contextual_reason_agent": "'ANPER' is not a dataset but rather an organization involved in the project.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 4, + "validated": 1, + "not_validated": 3 + } + }, + { + "filename": "078_Afghanistan-Eshteghal-Zaiee-Karmondena-EZ-Kar-Project", + "page": 27, + "text": "Available survey data indicate that once returned to Afghanistan, most refugees return to their province of origin to be in proximity to family and friends or, if they return elsewhere, they do so for safety and economic reasons. Afghan returnee households are large and although most families have at least one person working for pay, they have low job stability and low wages. According to survey data, most returnees work as daily wage laborers in non \u2010 agriculture and they generally experience a decrease in the employment rate, wage, and job stability after returning to Afghanistan. Data indicate that there were as many men as women among Afghans living in Pakistan in 2011 but more women ( 54 percent ) than men returned to Afghanistan between 2015 and 2017. 31 D. Results Chain 44. The project supports, through a programmatic, multi \u2010 sector, multi \u2010 implementation agency approach, the short, medium, and long \u2010 term measures required to increase economic integration of Afghan returnees, IDPs, and host communities in the cities supported by the project. The project will implement a range of policy and operational activities identified through multiple consultations and surveys with the GoIRA, potential beneficiaries, and stakeholders ( e. g. UN agencies, civil society, refugees ).", + "ner_text": [ + [ + 392, + 403, + "named" + ], + [ + 53, + 64, + "survey data <> data geography" + ], + [ + 71, + 79, + "survey data <> reference population" + ], + [ + 230, + 256, + "survey data <> reference population" + ], + [ + 577, + 588, + "survey data <> data geography" + ], + [ + 665, + 673, + "survey data <> data geography" + ], + [ + 677, + 681, + "survey data <> reference year" + ], + [ + 733, + 744, + "survey data <> data geography" + ], + [ + 983, + 999, + "survey data <> reference population" + ] + ], + "validated": true, + "empirical_context": "Afghan returnee households are large and although most families have at least one person working for pay, they have low job stability and low wages. According to survey data, most returnees work as daily wage laborers in non \u2010 agriculture and they generally experience a decrease in the employment rate, wage, and job stability after returning to Afghanistan. Data indicate that there were as many men as women among Afghans living in Pakistan in 2011 but more women ( 54 percent ) than men returned to Afghanistan between 2015 and 2017.", + "type": "survey", + "explanation": "In this context, 'survey data' is explicitly mentioned as a source of information used to analyze employment and wage conditions of Afghan returnees.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'survey data' is a dataset because it refers to collected information from a survey.", + "contextual_reason_agent": "In this context, 'survey data' is explicitly mentioned as a source of information used to analyze employment and wage conditions of Afghan returnees.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "117_Somali-Urban-Investment-Planning-Project", + "page": 27, + "text": "This will include the preparation, consultation on, and disclosure of the required E & S due diligence instruments, for which two main types of activities will be carried out: ( a ) a baseline survey of environmental and social information, data and issues that will help to identify E & S constraints, but also areas of potential enhancement of project outcomes, and provide E & S information, criteria and constraining factors for the processes for design and environmental / social assessments planned for SUDP or other downstream planning activities; ( b ) the development of an Environmental and Social Management Framework ( ESMF ), which would constitute a generic tool for managing social and environmental risks related to urban investments, and planning follow-up investigations, assessments and analyses ( for e. g. roads, bridges, water supply and waste management activities ) regardless of funding source, in the Somali territories for use by entities such as local governments as well as water utilities or waste management authorities. To broaden the scope of its potential applicability, the ESMF will be based on international good practice, rather than tied specifically to the World Bank \u2019 s safeguard requirements. The intention is to produce an instrument for use by Somali institutions, which is in line with international good practice norms, which can be applied to urban development projects in a variety of different funding contexts. This would be useful in itself for Somali urban institutions, which will be the primary purpose, but can also be swiftly adapted for use for Bank funded investments under projects such as the SUDP. 77. The Scope of Work outlining the above environmental and social studies has been produced during SUIPP preparation and disclosed in InfoShop.", + "ner_text": [ + [ + 184, + 199, + "named" + ], + [ + 184, + 256, + "baseline survey <> data description" + ], + [ + 312, + 362, + "baseline survey <> data description" + ], + [ + 927, + 945, + "baseline survey <> data geography" + ], + [ + 1497, + 1522, + "baseline survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "This will include the preparation, consultation on, and disclosure of the required E & S due diligence instruments, for which two main types of activities will be carried out: ( a ) a baseline survey of environmental and social information, data and issues that will help to identify E & S constraints, but also areas of potential enhancement of project outcomes, and provide E & S information, criteria and constraining factors for the processes for design and environmental / social assessments planned for SUDP or other downstream planning activities; ( b ) the development of an Environmental and Social Management Framework ( ESMF ), which would constitute a generic tool for managing social and environmental risks related to urban investments, and planning follow-up investigations, assessments and analyses ( for e. g.", + "type": "survey", + "explanation": "This is a dataset as it refers to a survey that collects specific data to identify constraints and enhance project outcomes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'baseline survey' implies a structured collection of data related to environmental and social information.", + "contextual_reason_agent": "This is a dataset as it refers to a survey that collects specific data to identify constraints and enhance project outcomes.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "008_BOSIB-60d57288-4e09-4519-ae6c-ffdc0037e0b1", + "page": 16, + "text": "In early May 2022 and between November 2022 and January 2023, heavy rains disrupted economic activities. Moreover, sea-level rise could pose a problem, especially if the warming of the Southern Atlantic Ocean leads to more hurricanes in the future. 21. Demographic pressure is threatening the Region \u2019 s biodiversity and endangering assets that are vital not only for tourism but also for the Region \u2019 s resilience and function as a carbon sink. Commonly known as Costa Verde & Mar, 22 Data derived from RAIS ( 2019 ). 23 Heavy rains in January and February 2022 left almost a hundred fatalities and rendered thousands of people homeless in several Brazilian states, including Rio de Janeiro, Bahia, S\u00e3o Paulo, Paran\u00e1, Minas Gerais, Tocantins, and Par\u00e1. In just two years, 2021 \u2013 22, Brazil was hit by seven named tropical storms, whereas there were only 15 between 2010 and 2019.", + "ner_text": [ + [ + 504, + 508, + "named" + ], + [ + 464, + 481, + "RAIS <> data geography" + ], + [ + 511, + 515, + "RAIS <> publication year" + ], + [ + 677, + 691, + "RAIS <> data geography" + ], + [ + 693, + 698, + "RAIS <> data geography" + ], + [ + 700, + 709, + "RAIS <> data geography" + ], + [ + 711, + 717, + "RAIS <> data geography" + ], + [ + 719, + 731, + "RAIS <> data geography" + ], + [ + 733, + 742, + "RAIS <> data geography" + ], + [ + 748, + 752, + "RAIS <> data geography" + ], + [ + 784, + 790, + "RAIS <> data geography" + ], + [ + 875, + 879, + "RAIS <> publication year" + ] + ], + "validated": true, + "empirical_context": "Demographic pressure is threatening the Region \u2019 s biodiversity and endangering assets that are vital not only for tourism but also for the Region \u2019 s resilience and function as a carbon sink. Commonly known as Costa Verde & Mar, 22 Data derived from RAIS ( 2019 ). 23 Heavy rains in January and February 2022 left almost a hundred fatalities and rendered thousands of people homeless in several Brazilian states, including Rio de Janeiro, Bahia, S\u00e3o Paulo, Paran\u00e1, Minas Gerais, Tocantins, and Par\u00e1.", + "type": "registry", + "explanation": "RAIS is explicitly mentioned as a source of data, indicating it functions as a structured collection of data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because RAIS is referenced as a source of derived data.", + "contextual_reason_agent": "RAIS is explicitly mentioned as a source of data, indicating it functions as a structured collection of data.", + "contextual_signal": "mentioned as a source of derived data", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "107_PAD-Citizens-Charter-Afghanistan-P160567-Oct-7-Board-version-10072016", + "page": 15, + "text": "Furthermore, the country \u2019 s demographic trends make poverty reduction challenging. Afghanistan faces high population growth and a youth bulge, with 400, 000 entrants into the labor force each year. The proportion of the population aged 15 or below is 51. 3 percent, making Afghanistan one of the youngest countries in Asia with extremely high dependency ratios. These demographic pressures are in the future likely to be exacerbated by significant numbers of returning refugees and internally displaced persons ( IDPs ). Lastly, vulnerability to weather-related shocks and natural disasters is high in Afghanistan especially among poorer households. 1 Central Statistics Organization of Afghanistan ( CSO ). 2016. Afghanistan Living Conditions Survey 2013-2014. Kabul: CSO; World Bank. 2016. Afghanistan Systematic Country Diagnostic. Washington, DC: World Bank 2 UNAMA. 2016. Civil Servants Districts Accessibility, April to June 2016.", + "ner_text": [ + [ + 715, + 751, + "named" + ], + [ + 84, + 95, + "Afghanistan Living Conditions Survey <> data geography" + ], + [ + 274, + 285, + "Afghanistan Living Conditions Survey <> data geography" + ], + [ + 603, + 614, + "Afghanistan Living Conditions Survey <> data geography" + ], + [ + 709, + 713, + "Afghanistan Living Conditions Survey <> publication year" + ], + [ + 715, + 726, + "Afghanistan Living Conditions Survey <> data geography" + ], + [ + 752, + 761, + "Afghanistan Living Conditions Survey <> reference year" + ], + [ + 793, + 804, + "Afghanistan Living Conditions Survey <> data geography" + ], + [ + 932, + 936, + "Afghanistan Living Conditions Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "2016. Afghanistan Living Conditions Survey 2013-2014. Kabul: CSO; World Bank.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly referred to as a survey that collects data on living conditions.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a survey, which typically involves structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly referred to as a survey that collects data on living conditions.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 20, + "text": "The World Bank Niger Accelerating Electricity Access Project ( Hask\u00e9 ) ( P174034 ) Page 13 of 174 4. Gender-based violence ( GBV ) is relatively prevalent, and economic opportunities and access to education are very limited for women and girls. Conflict, militarization, and insecurity in some areas of the country ( notably the Diffa, Tahoua, and Tillab\u00e9ry Regions ) have exacerbated pre-existing risks of GBV in multiple ways: the collapse of social safety nets and protective relationships, the growing challenges associated to accessing life-saving services leaving survivors isolated and unable to seek care, the weakened rule of law and state presence failing to provide protection, the widening of levels and severity of gender inequality, and different manifestations of GBV, from intimate partner violence ( IPV ) to sexual exploitation of women and girls. In addition, the United Nations Children \u2019 s Fund ( UNICEF ) estimates that Niger has the highest prevalence rate of child marriage in the world, with 77 percent of the girls married before the age of 18 ( Demographic and Health Survey [ DHS ] 2012 ) 4 and 28 percent before the age of 15. The median age of marriage for girls ( 15. 7 years ) is around nine years earlier than that for boys ( 24. 6 years ).", + "ner_text": [ + [ + 1072, + 1101, + "named" + ], + [ + 15, + 20, + "Demographic and Health Survey <> data geography" + ], + [ + 228, + 243, + "Demographic and Health Survey <> reference population" + ], + [ + 329, + 334, + "Demographic and Health Survey <> data geography" + ], + [ + 918, + 924, + "Demographic and Health Survey <> publisher" + ], + [ + 942, + 947, + "Demographic and Health Survey <> data geography" + ], + [ + 1035, + 1040, + "Demographic and Health Survey <> reference population" + ], + [ + 1104, + 1107, + "Demographic and Health Survey <> acronym" + ], + [ + 1110, + 1114, + "Demographic and Health Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "Conflict, militarization, and insecurity in some areas of the country ( notably the Diffa, Tahoua, and Tillab\u00e9ry Regions ) have exacerbated pre-existing risks of GBV in multiple ways: the collapse of social safety nets and protective relationships, the growing challenges associated to accessing life-saving services leaving survivors isolated and unable to seek care, the weakened rule of law and state presence failing to provide protection, the widening of levels and severity of gender inequality, and different manifestations of GBV, from intimate partner violence ( IPV ) to sexual exploitation of women and girls. In addition, the United Nations Children \u2019 s Fund ( UNICEF ) estimates that Niger has the highest prevalence rate of child marriage in the world, with 77 percent of the girls married before the age of 18 ( Demographic and Health Survey [ DHS ] 2012 ) 4 and 28 percent before the age of 15. The median age of marriage for girls ( 15.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it is cited with specific prevalence rates and is used to support empirical claims about child marriage in Niger.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is a recognized survey that provides statistical data on demographic and health indicators.", + "contextual_reason_agent": "The context confirms it is a dataset as it is cited with specific prevalence rates and is used to support empirical claims about child marriage in Niger.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 46, + "text": "In addition, a survey will be conducted in sampled SPG woredas as part of the impact evaluation to measure composite indicators not collected through routine information systems. 71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2. The MOH will share HCO-related data with the regions and woredas to provide any disaggregated data needed to monitor HCO results indicators and DLIs. Likewise, the central MOE is responsible for the management of EMIS and NLA which collects varying aspects of data at the school level. MOE will validate and share disaggregated data with regions and woredas to monitor HCO results indicators and DLIs. Other M & E data sources will be coordinated with various implementation units and service providers. The SPG impact evaluation survey will be 71 Such as Minimal Acceptable Diet for children 6 \u2013 23 months. Minimal Acceptable Diet is calculated according to breasfeeding status, number of food groups consumed, and number of times fed. https: / / inddex. nutrition. tufts. edu / data4diets / indicator / minimum-acceptable-diet-mad.", + "ner_text": [ + [ + 408, + 415, + "named" + ], + [ + 51, + 62, + "DHIS2 ) <> data geography" + ], + [ + 532, + 554, + "DHIS2 ) <> reference population" + ], + [ + 703, + 719, + "DHIS2 ) <> data type" + ], + [ + 764, + 782, + "DHIS2 ) <> data type" + ], + [ + 1240, + 1263, + "DHIS2 ) <> data description" + ] + ], + "validated": true, + "empirical_context": "In addition, a survey will be conducted in sampled SPG woredas as part of the impact evaluation to measure composite indicators not collected through routine information systems. 71 The MOE and MOH have established data collection systems through the Education Management Information System ( EMIS ), NLA, Unified Nutrition Information System for Ethiopia ( UNISE ), and District Health Information System ( DHIS2 ), and the IPF component will strengthen these systems to meet M & E requirements. DHIS2 regularly collects data from service delivery units, and the central MOH is responsible for this data management system, including expansion of the UNISE, which is linked to DHIS2.", + "type": "data management system", + "explanation": "DHIS2 is indeed a data management system that regularly collects data, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned in the context of data collection and management.", + "contextual_reason_agent": "DHIS2 is indeed a data management system that regularly collects data, confirming its role as a data source.", + "contextual_signal": "system but mentioned as a data source", + "tags": [] + }, + { + "filename": "006_BOSIB-2c22668f-f4ba-42f5-a0d1-7949b7b8fe34", + "page": 38, + "text": "Policy by addressing inconsistencies in existing laws that prevent refugees and former refugees from accessing basic services and regularizing their immigration status. Frequency Biannual Data source Project progress report, cabinet memos Methodology for Data Collection Data collected through minutes of the interministerial committee ( MORHCSA ) meetings, Parliamentary committee reports Responsibility for Data Collection MoHAIS Stakeholder consultations convened and priority measures identified Description Number of high-level consultations held with relevant government ministries and civil society organizations to deliberate on measures identified for legislative and regulatory reform, implementation performance, roles and responsibilities of", + "ner_text": [ + [ + 294, + 356, + "named" + ] + ], + "validated": false, + "empirical_context": "Policy by addressing inconsistencies in existing laws that prevent refugees and former refugees from accessing basic services and regularizing their immigration status. Frequency Biannual Data source Project progress report, cabinet memos Methodology for Data Collection Data collected through minutes of the interministerial committee ( MORHCSA ) meetings, Parliamentary committee reports Responsibility for Data Collection MoHAIS Stakeholder consultations convened and priority measures identified Description Number of high-level consultations held with relevant government ministries and civil society organizations to deliberate on measures identified for legislative and regulatory reform, implementation performance, roles and responsibilities of", + "type": "document", + "explanation": "However, it is not a dataset as it is described as minutes of meetings, which are documents rather than structured data collections.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a collection of meeting records.", + "contextual_reason_agent": "However, it is not a dataset as it is described as minutes of meetings, which are documents rather than structured data collections.", + "contextual_signal": "mentioned only as a document, not as a data source", + "tags": [] + }, + { + "filename": "129_PAD1030-ENGLISH-P149242-PUBLIC-FINAL-LEB-ENPTP-English", + "page": 13, + "text": "3 C. Sectoral and Institutional Context 9. Even prior to the onset of the Syrian conflict and the inflow of large numbers of Syrian refugees, poverty in Lebanon was significant and regional disparities in living conditions were acute. It is estimated that nearly 27 percent of the Lebanese population, or 1. 2 million people, are poor, living on less than US $ 4 per day, and seven percent, or 300, 000 people, are extremely poor, living on less than US $ 2. 40 per day ( UNDP, 2008 ). 3 Poverty is significantly higher in some regions, with the highest concentration of poor people found in the North governorate ( 52. 5 percent ), followed by the South governorate ( 42 percent ) and the Beka \u2019 a ( 29 percent ). 10. The Syrian conflict is projected to increase the poverty headcount of those below the upper poverty line by 170, 000 people by end 2014. Simulations using household expenditures data show that, between 2012 and 2014, poverty in Lebanon was projected to continue its downward path in the absence of the Syrian conflict. In its presence, however, about 120, 000 Lebanese are estimated to have been pushed into poverty in 2013, which is approximately three percent of the Lebanese ( pre-conflict ) population.", + "ner_text": [ + [ + 874, + 901, + "named" + ], + [ + 153, + 160, + "household expenditures data <> data geography" + ], + [ + 281, + 300, + "household expenditures data <> reference population" + ], + [ + 472, + 476, + "household expenditures data <> publisher" + ], + [ + 478, + 482, + "household expenditures data <> publication year" + ], + [ + 596, + 613, + "household expenditures data <> data geography" + ], + [ + 850, + 854, + "household expenditures data <> publication year" + ], + [ + 921, + 925, + "household expenditures data <> reference year" + ], + [ + 930, + 934, + "household expenditures data <> reference year" + ], + [ + 1138, + 1142, + "household expenditures data <> reference year" + ], + [ + 1188, + 1196, + "household expenditures data <> reference population" + ], + [ + 1241, + 1259, + "household expenditures data <> usage context" + ] + ], + "validated": true, + "empirical_context": "The Syrian conflict is projected to increase the poverty headcount of those below the upper poverty line by 170, 000 people by end 2014. Simulations using household expenditures data show that, between 2012 and 2014, poverty in Lebanon was projected to continue its downward path in the absence of the Syrian conflict. In its presence, however, about 120, 000 Lebanese are estimated to have been pushed into poverty in 2013, which is approximately three percent of the Lebanese ( pre-conflict ) population.", + "type": "data", + "explanation": "This is indeed a dataset as it provides structured data on household expenditures used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific type of data used in simulations.", + "contextual_reason_agent": "This is indeed a dataset as it provides structured data on household expenditures used for empirical analysis.", + "contextual_signal": "used in simulations to analyze poverty trends", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 48, + "text": "Frequency Quarterly Data source MINEMA, BRD, RTDA & BDF grievance redress mechanisms. Methodology for Data Collection Monitoring of grievances through the GRMs. Responsibility for Data Collection MINEMA, BRD, RTDA & BDF. MINEMA consolidates data for regular reporting. Beneficiaries that feel project investments reflect their needs ( Percentage ) Description Quantitative indicator counting proportion of beneficiaries indicating that project investments reflect their needs. Indictor is a composite of beneficiaries responding to a Likert scale or similar instrument. Frequency Annual. Data source Annual survey. Methodology for Data Collection Survey. Responsibility for Data Collection MINEMA. Increase in social cohesion score between refugees and host community members ( Percentage ) Description Composite first-order indicator composed of data from responses to questions gauging changes in horizontal and vertical social chesion in beneficiary communities. Frequency Annual. Data source Annual survey. Methodology for Data Collection Survey Responsibility for Data Collection MINEMA.", + "ner_text": [ + [ + 32, + 38, + "named" + ] + ], + "validated": false, + "empirical_context": "Frequency Quarterly Data source MINEMA, BRD, RTDA & BDF grievance redress mechanisms. Methodology for Data Collection Monitoring of grievances through the GRMs.", + "type": "organization", + "explanation": "However, MINEMA is identified as an organization and not a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed MINEMA is a dataset because it is mentioned alongside data sources.", + "contextual_reason_agent": "However, MINEMA is identified as an organization and not a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 14, + "text": "Unit. 20 In addition to the DHIS, there exist many parallel health management information systems ( HMIS ) ( for example, one for each vertical program, PPHI, and so on ). Services delivered through the private sector are also not reported. 21 Less than 4 percent of the population seeks care at the public primary level HFs and 28 percent seeks care at higher \u2010 level public HFs, while more than 60 percent seeks care at private HFs ( PSLSM Survey 2014 \u2013 15. Pakistan Bureau of Statics, 2016 ). Zaidi, S. A., M. Bigdeli, and E. V. Langlois, et al. 2019. \u201c Health Systems Changes after Decentralization: Progress, Challenges and Dynamics in Pakistan. \u201d BMJ Glob Health 4. 22 In Pakistan, primary schools cover grades 1 through 5 and secondary schools cover grades 6 to 10 with middle schools for grades 6 to 8 and high schools for grades 9 and 10. Higher \u2010 secondary schools cover grades 11 and 12.", + "ner_text": [ + [ + 60, + 97, + "named" + ] + ], + "validated": false, + "empirical_context": "Unit. 20 In addition to the DHIS, there exist many parallel health management information systems ( HMIS ) ( for example, one for each vertical program, PPHI, and so on ). Services delivered through the private sector are also not reported.", + "type": "system", + "explanation": "However, it is not a dataset as it is described as a system and not explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'information systems' which often relates to data management.", + "contextual_reason_agent": "However, it is not a dataset as it is described as a system and not explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "023_Ethiopia-Second-Phase-Development-Response-to-Displacement-Impacts-Project-in-the-Horn-of-Africa-Project", + "page": 13, + "text": "This will increase demand for water, raising the potential for conflict and 3 According to Ethiopia \u2019 s 2007 Census. 4 World Bank Poverty and Equity Brief for Ethiopia, October 2021. 5 As of May 11, 2022, Ethiopia had registered 470, 760 COVID cases and 7, 510 fatalities: https: / / covid19. who. int / region / afro / country / et 6 World Bank analysis suggests that the poverty headcount in the 23. 5th percentile ( the national poverty rate ) increased by 11. 2 percent and for the bottom 40th percentile by 7. 7 percent between 2018 / 19 and October 2020. Inequality is estimated to have increased, with the Gini coefficient rising to 42 in October / November 2020. See Christina Wieser et al ( 2021 ) \u201c Poverty projections and profiling based on Ethiopia \u2019 s High Frequency Phone Surveys of households using a SWIFT-COVID-19 package \u201d World Bank: Washington DC. 7 2021 Humanitarian Response Plan. https: / / www. wfp. org / countries / ethiopia", + "ner_text": [ + [ + 765, + 793, + "named" + ], + [ + 91, + 99, + "High Frequency Phone Surveys <> data geography" + ], + [ + 119, + 129, + "High Frequency Phone Surveys <> publisher" + ], + [ + 159, + 167, + "High Frequency Phone Surveys <> data geography" + ], + [ + 177, + 181, + "High Frequency Phone Surveys <> publication year" + ], + [ + 205, + 213, + "High Frequency Phone Surveys <> data geography" + ], + [ + 335, + 345, + "High Frequency Phone Surveys <> publisher" + ], + [ + 533, + 542, + "High Frequency Phone Surveys <> reference year" + ], + [ + 555, + 559, + "High Frequency Phone Surveys <> publication year" + ], + [ + 675, + 697, + "High Frequency Phone Surveys <> author" + ], + [ + 752, + 760, + "High Frequency Phone Surveys <> data geography" + ], + [ + 797, + 807, + "High Frequency Phone Surveys <> reference population" + ], + [ + 841, + 851, + "High Frequency Phone Surveys <> publisher" + ], + [ + 870, + 874, + "High Frequency Phone Surveys <> publication year" + ] + ], + "validated": true, + "empirical_context": "Inequality is estimated to have increased, with the Gini coefficient rising to 42 in October / November 2020. See Christina Wieser et al ( 2021 ) \u201c Poverty projections and profiling based on Ethiopia \u2019 s High Frequency Phone Surveys of households using a SWIFT-COVID-19 package \u201d World Bank: Washington DC. 7 2021 Humanitarian Response Plan.", + "type": "survey", + "explanation": "This is indeed a dataset as it is used for empirical analysis in the context of poverty projections.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific type of survey that collects data on households.", + "contextual_reason_agent": "This is indeed a dataset as it is used for empirical analysis in the context of poverty projections.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "158_40156", + "page": 70, + "text": "14. A Short term financial management consultant, experienced in system design, will be recruited to prepare a comprehensive financial management system ( accounting, disbursement, procurement, financial and procurement reporting ), integrating the project monitoring system and the activities of the Component Coordinators ( i. e., refugee sites, IDPs and cross-border mobile populations and health harmonization ). 16. Information systems: The existing computerized accounting \u2018 SUN \u2019 system currently being used to report on IGAD finances will be used to report on project finances. This software will be customized to produce IDA reporting requirements. The system is a user friendly and currently operating three accounting modules: ledger, corporate allocation or budget, and asset register. All finance staff including those in the two offices Nairobi and Ethiopia have received adequate training to operate the system. There is adequate internal IT support within IGAD provided by the System Administrator. Additional support is provided by the SUN office in Nairobi on annual basis for upgrade of the system and training of the users. IGAD plans are to expand the integrated system to the two offices Nairobi and Ethiopia by end of April 2007.", + "ner_text": [ + [ + 481, + 484, + "named" + ] + ], + "validated": false, + "empirical_context": "16. Information systems: The existing computerized accounting \u2018 SUN \u2019 system currently being used to report on IGAD finances will be used to report on project finances. This software will be customized to produce IDA reporting requirements.", + "type": "system", + "explanation": "'SUN' is a software system mentioned for reporting purposes, not a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'SUN' is a dataset because it is associated with reporting on finances.", + "contextual_reason_agent": "'SUN' is a software system mentioned for reporting purposes, not a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 41, + "text": "The ongoing revamping of NEMIS will enable the platform to also capture data from these Directorates. MoE will be responsible for monitoring overall results for the Operation, including the IPF Component and commitments in the PAP. 66. The existing National PCU at MoE will be directly responsible for tracking all the Program results through program participating entities. The PCU will obtain information on various indicators and DLIs from the PCTs at the IEs. MoE \u2019 s CPPMU are responsible for overall coordination and monitoring of the NESSP and therefore are expected to cross check data collected by the PCU. The IPF Component includes resources to support better data management. 67. In addition to monitoring by the PCU, results framework indicators which are also DLIs will be subject to third party verification by an independent verifier. As part of program review, at least two joint implementation support missions will be carried out each year to track progress on program implementation, achievement of the PDO indicators and progress on agreed DLIs. Also, there will be a midterm review of the Program to inform any areas needing adjustments.", + "ner_text": [ + [ + 25, + 30, + "named" + ] + ], + "validated": false, + "empirical_context": "The ongoing revamping of NEMIS will enable the platform to also capture data from these Directorates. MoE will be responsible for monitoring overall results for the Operation, including the IPF Component and commitments in the PAP.", + "type": "system", + "explanation": "NEMIS is described as a platform, not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is mentioned in the context of capturing data.", + "contextual_reason_agent": "NEMIS is described as a platform, not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 33, + "text": "Under this results area, the following activities will be supported: ( a ) the development of an integrated EMIS for MINEDUB-MINESEC-MINESUP-MINEFOP, including the establishment of new institutional arrangements for the sector, unified methods and processes, and a common platform for data collection and management; ( b ) the deconcentration of EMIS to regional, divisional, and sub-divisional levels ( as applicable ); ( c ) the provision of equipment and software to support the functioning of EMIS, and so on; and ( d ) the strengthening of human capacity and resources, particularly in the areas of statistics and data management. The release of IDA funds under this results area will be linked to the following DLI: Integrated education management information system functional and operational ( DLI 6 ). 52. While not specifically benefitting from financing from the RSW, activities linked to learning assessment and EMIS will include features that will enable the Government to monitor and evaluate data effectively on refugee-related education challenges. Learning assessments will include modules to analyze the learning outcomes of refugees and efforts will be made toward including disaggregated data on refugees in statistical yearbooks. Priority Area 4: Supporting the educational needs of public schools in host communities affected by refugees. 53. Refugee-related project activities, estimated at US $ 36 million, fall under Results Areas 1, 3, and 7.", + "ner_text": [ + [ + 924, + 928, + "named" + ] + ], + "validated": false, + "empirical_context": "52. While not specifically benefitting from financing from the RSW, activities linked to learning assessment and EMIS will include features that will enable the Government to monitor and evaluate data effectively on refugee-related education challenges. Learning assessments will include modules to analyze the learning outcomes of refugees and efforts will be made toward including disaggregated data on refugees in statistical yearbooks.", + "type": "system", + "explanation": "However, EMIS is described as a system and not explicitly mentioned as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it relates to monitoring and evaluating data.", + "contextual_reason_agent": "However, EMIS is described as a system and not explicitly mentioned as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "187_multi-page", + "page": 30, + "text": "Decisionmaking processes and the decisions they Rationalize the management of the policy result in are in general transparent and predictable. formulation processes within the CoM. Build stable professional capacity within the CoM to provide systematic analyses of policy issues and tradeoffs raised by legislative and policy proposals. Provide technical assistance to the CoM to review and implement recommendations of the functional review of the CoM completed in March 1999. Strengthen public expenditure management functions ( budget formulation, budget execution, treasury and cash management, debt management, accounting ). Initiate Budget Framework memorandum process. Oversight mechanisms ( i. e., checks and balances ) Strengthen public expenditure management exist to guard against arbitrariness and to ensure functions ( budget formulation, budget execution, accountability in the use of public resources, but treasury and cash management, debt management, these oversight mechanisms do not eliminate the accounting ). flexibility and delegation that is needed to respond Undertake survey baseline and one follow-up survey quickly to changing circumstances. of public officials. Undertake Public Expenditure Tracking Surveys. Create independent Civil Service Commission and support the development of its capacities. Strengthen Department of Public Administration. The various design elements aimed at ensuring responsiveness to the citizenry should partially address the need to reinforce commitment throughout the reform process.", + "ner_text": [ + [ + 1200, + 1235, + "named" + ], + [ + 466, + 476, + "Public Expenditure Tracking Surveys <> publication year" + ], + [ + 1172, + 1188, + "Public Expenditure Tracking Surveys <> reference population" + ] + ], + "validated": true, + "empirical_context": "of public officials. Undertake Public Expenditure Tracking Surveys. Create independent Civil Service Commission and support the development of its capacities.", + "type": "survey", + "explanation": "This is indeed a dataset as it involves structured data collection through surveys focused on tracking public expenditure.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific type of survey that collects data on public expenditure.", + "contextual_reason_agent": "This is indeed a dataset as it involves structured data collection through surveys focused on tracking public expenditure.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 8, + "text": "3 \u2022 Quality: In international assessments ( TIMSS & PISA ), Jordan has performed well in comparison with other countries in the Region, but well below many countries at comparable income and education expenditure levels, and at or below the international averages. National assessments ( both of \u201c knowledge economy skills \u201d and more general subject-area competence ) have revealed that the majority of students still perform below desired achievement levels. In addition up to 30 percent of students drop out before completion of 12th Grade. \u2022 Teacher Policy: Studies conducted as part of ERfKE preparation revealed that while the majority of teachers are in possession of the required formal qualifications, and the current student teacher ratios do not suggest a significant shortage of teachers, there are significant challenges regarding teacher recruitment, utilization, professional development and morale. There is still a relatively low level of actual use of the new methods and approaches in the classroom, and the new learning materials are often used in a conventional teaching approach. Teacher morale remains low. Teachers in Jordan are not recruited by the MoE but are assigned by the Public Service Bureau on the basis of examination scores.", + "ner_text": [ + [ + 52, + 56, + "named" + ] + ], + "validated": false, + "empirical_context": "3 \u2022 Quality: In international assessments ( TIMSS & PISA ), Jordan has performed well in comparison with other countries in the Region, but well below many countries at comparable income and education expenditure levels, and at or below the international averages. National assessments ( both of \u201c knowledge economy skills \u201d and more general subject-area competence ) have revealed that the majority of students still perform below desired achievement levels.", + "type": "assessment", + "explanation": "'PISA' is mentioned as an assessment rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'PISA' is a dataset because it is associated with international assessments that provide data on student performance.", + "contextual_reason_agent": "'PISA' is mentioned as an assessment rather than a structured collection of data or a data source.", + "contextual_signal": "mentioned only as an assessment, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + }, + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "182_multi0page", + "page": 6, + "text": "Improved capacity of General Administration of Social Services ( GASS ) staff, local governments, NGOs, community-based organizations ( CBOs ), and other organizations to deliver, monitor and manage services ( monitoring system, regular data reports, publications, standards and guidelines etc. ). 6. Increased public awareness of social inclusion issues of poor and vulnerable population groups, and increased numbers of people seeking referrals to social services. 7. Government analysis and development of social policy at all stages ( design, monitoring and evaluation ) is improved, measured through qualitative assessments of policy performance by key stakeholders ( e. g., Parliament, municipalities, NGO ' s, extemal development agencies ). 8. Introduction of a more reliable, timely and relevant household survey instrument for poverty monitoring and policy evaluation based on the 2001 population census and forthcoming LSMS and panel surveys. Concrete targets for each output to be achieved by Borrower will be agreed in detail at the beginning of each year through annual work prograrn. B. Strategic Context 1. Sector-related Country Assistance Strategy ( CAS ) goal supported by the project: ( see Annex 1 ) Document number: IDA / R2000-20 Date of latest CAS discussion: 03 / 21 / 00 Poverty alleviation and human development were identified as two strategic priorities of the 1998 CAS for Albania.", + "ner_text": [ + [ + 930, + 934, + "named" + ], + [ + 805, + 832, + "LSMS <> data type" + ], + [ + 891, + 913, + "LSMS <> reference year" + ], + [ + 939, + 952, + "LSMS <> data type" + ], + [ + 1403, + 1410, + "LSMS <> data geography" + ] + ], + "validated": true, + "empirical_context": "8. Introduction of a more reliable, timely and relevant household survey instrument for poverty monitoring and policy evaluation based on the 2001 population census and forthcoming LSMS and panel surveys. Concrete targets for each output to be achieved by Borrower will be agreed in detail at the beginning of each year through annual work prograrn.", + "type": "survey", + "explanation": "LSMS is indeed a dataset as it refers to the Living Standards Measurement Study, which is a structured collection of data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed LSMS is a dataset because it is mentioned in the context of household surveys and poverty monitoring.", + "contextual_reason_agent": "LSMS is indeed a dataset as it refers to the Living Standards Measurement Study, which is a structured collection of data used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "062_Turkey-Municipal-Services-Improvement-Project", + "page": 46, + "text": "Municipal PIUs and ILBANK PMU People benefitting from safely managed solid waste services in the selected municipalities ( out of which host community ) This indicator measures the percentage of host community benefitted from safely managed solid waste services in the selected municipalities that have been provided through this Project. Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs. Municipal PIUs and ILBANK PMU People benefitting from safely managed solid waste services in the selected municipalities ( out of which This indicator measures the percentage of refugees benefitted from safely Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU Municipal PIUs and ILBANK PMU", + "ner_text": [ + [ + 368, + 376, + "named" + ], + [ + 426, + 446, + "PMU Data <> data type" + ] + ], + "validated": true, + "empirical_context": "Municipal PIUs and ILBANK PMU People benefitting from safely managed solid waste services in the selected municipalities ( out of which host community ) This indicator measures the percentage of host community benefitted from safely managed solid waste services in the selected municipalities that have been provided through this Project. Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU progress reports, based on updated beneficiary \u2019 s records from respective SKIs. Municipal PIUs and ILBANK PMU People benefitting from safely managed solid waste services in the selected municipalities ( out of which This indicator measures the percentage of refugees benefitted from safely Semi - annually Reports from PMU Data to be compiled by municipal PIUs and recorded in PMU Municipal PIUs and ILBANK PMU", + "type": "data", + "explanation": "This is indeed a dataset as it refers to data compiled by municipal PIUs and recorded in PMU progress reports.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'PMU Data' suggests a collection of information compiled for reporting purposes.", + "contextual_reason_agent": "This is indeed a dataset as it refers to data compiled by municipal PIUs and recorded in PMU progress reports.", + "contextual_signal": "mentioned as data to be compiled and recorded in progress reports", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 10, + "validated": 9, + "not_validated": 1 + } + }, + { + "filename": "115_PAD1678-PAD-P152822-IDA-R2016-0090-1-Box394886B-OUO-9", + "page": 125, + "text": "111 user departments and PDU the community procurement manual to clarify IDA procedures and to disseminate to implementing agencies Delays in procurement processing Put in place a procurement tracking system to monitor the progress by the PDU of processing different contracts Six months after Effectiveness OPM Inadequate buy in by targeted stakeholders Identify the stakeholders at different levels ( policy, strategy or implementation ), and establish a forum, format and other ways to bring the project information to other stakeholders Six months after Effectiveness 106. IGAD is an intergovernmental organization and serves as the Secretariat for the coordination of the intergovernmental development activities of member countries. Because of this unique nature of the Secretariat there is no clear legal framework which guides procurement activities under the Secretariat. Moreover, being an intergovernmental organization the authority receives grants from various Development Partners to carry out specific studies and coordination of developmental activities in member countries. Consequently, the procurement unit of the authority has to follow procurement procedures of the various Development Partners, which provide support to the Authority. 107. The procurement unit of the Authority is organized under the Directorate for Finance and Administration. The procurement unit was organized in 2013 and is staffed by one senior procurement officer and an assistant. This team carries out procurement activities for almost all projects executed by IGAD.", + "ner_text": [ + [ + 180, + 207, + "named" + ] + ], + "validated": false, + "empirical_context": "111 user departments and PDU the community procurement manual to clarify IDA procedures and to disseminate to implementing agencies Delays in procurement processing Put in place a procurement tracking system to monitor the progress by the PDU of processing different contracts Six months after Effectiveness OPM Inadequate buy in by targeted stakeholders Identify the stakeholders at different levels ( policy, strategy or implementation ), and establish a forum, format and other ways to bring the project information to other stakeholders Six months after Effectiveness 106. IGAD is an intergovernmental organization and serves as the Secretariat for the coordination of the intergovernmental development activities of member countries.", + "type": "system", + "explanation": "However, the context indicates it is a system for monitoring procurement processes, not a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'system' which can imply data collection.", + "contextual_reason_agent": "However, the context indicates it is a system for monitoring procurement processes, not a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "068_Burundi-Integrated-Community-Development-Project", + "page": 24, + "text": "All 28 communes in the target provinces \u2013 Cankuzo, Muyinga, Ngozi and Ruyigi \u2013 were ranked using a composite needs index constructed with data on poverty, malnutrition and the impact of forced displacement. 34 Figure 4 demonstrates that the four project provinces rank among the highest in the country against these three variables. 34 Index data sources: ( a ) Poverty: RGPH 2008 and ECVMB 2014 ( Burundi Poverty Assessment 2016 ); ( b ) Malnutrition: ISTEEBU / WFP / UNICEF report 2019; and ( c ) Forced displacement: ( i ) IDPs - IOM-DTM ( May 2019 ); ( ii ) Refugees \u2013 UNHCR ( April 2019 ); and ( iii ) Returnees: UNHCR ( July 2019 ). Population figures are from UNFPA / ISTEEEBU 2018. In line with the PDO, the index was weighted 40 percent for poverty and malnutrition and 20 percent for the impact of forced displacement.", + "ner_text": [ + [ + 99, + 120, + "named" + ] + ], + "validated": false, + "empirical_context": "All 28 communes in the target provinces \u2013 Cankuzo, Muyinga, Ngozi and Ruyigi \u2013 were ranked using a composite needs index constructed with data on poverty, malnutrition and the impact of forced displacement. 34 Figure 4 demonstrates that the four project provinces rank among the highest in the country against these three variables.", + "type": "index", + "explanation": "However, the composite needs index is described as a ranking rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'index' which often relates to data collection.", + "contextual_reason_agent": "However, the composite needs index is described as a ranking rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a ranking, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 7, + "validated": 3, + "not_validated": 4 + } + }, + { + "filename": "158_40156", + "page": 23, + "text": "There are six types of surveys and surveillance that need to be carried out, partially with project funding ( additional resources would need to be mobilized by IGAD for all these surveys and surveillance to be carried out ): BSS, sentinel HIV surveillance, health facility survey, rapid assessment at hot spots, mapping assessment, and regional assessment. \u0083 BSS: Customized behavioral surveillance surveys ( BSSs ) will be undertaken amongst refugees and surrounding populations ( IDPs and returnees if possible ). The BSS will be customized to allow for the inclusion of unique characteristics of mobile populations, pre and post displacement, and conflict situations. UNHCR already has BSS information for some of the camps to be supported by the project, and if resources are made available in future, it is envisioned that the other camps would be covered by BSS as well. \u0083 Sentinel surveillance: Sentinel surveillance will be undertaken by IGAD member states, who will add additional sentinel sites to their national sentinel surveillance system \u2013 one in each refugee site, IDP site and among the surrounding population. Initially, only those sites supported by this project will be added. Later, when the regional HIV M & E strategy has been developed, all the sites and camps will be added to national sentinel surveillance. 19", + "ner_text": [ + [ + 231, + 256, + "named" + ] + ], + "validated": false, + "empirical_context": "There are six types of surveys and surveillance that need to be carried out, partially with project funding ( additional resources would need to be mobilized by IGAD for all these surveys and surveillance to be carried out ): BSS, sentinel HIV surveillance, health facility survey, rapid assessment at hot spots, mapping assessment, and regional assessment. \u0083 BSS: Customized behavioral surveillance surveys ( BSSs ) will be undertaken amongst refugees and surrounding populations ( IDPs and returnees if possible ).", + "type": "program", + "explanation": "However, it is mentioned as a type of surveillance rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'sentinel HIV surveillance' implies a systematic approach to data collection.", + "contextual_reason_agent": "However, it is mentioned as a type of surveillance rather than a structured collection of data or a data source.", + "contextual_signal": "mentioned only as a type of surveillance, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 53, + "text": "The World Bank Expanding Opportunities for Learning Project ( P166059 ) Page 48 of 85 these measurement tools. Yearly reports of teacher classroom practices will be collected by MENFOP from inspectors and pedagogical advisors. To report on this indicator, a random sample of trained primary and preschool teachers will be selected and the percentage of teachers from this sample demonstrating new skills learned through the trainings will be reported. PDO # 4: Learning assessments are revised and administered Evaluation frameworks for the national learning assessments are officially approved in the first year. The pilot national learning assessment will be administered in the first year of the project. Year 2 and year 5 OTIs evaluation frameworks officially approved in the second year of the project. Revised year 2 and year 5 OTIs will be administered starting the third year of the project. Annually MENFOP Reported and implemented by MENFOP General inspection, Examination service, evaluation service within MENFOP ME PDO Table SPACE", + "ner_text": [ + [ + 111, + 156, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Expanding Opportunities for Learning Project ( P166059 ) Page 48 of 85 these measurement tools. Yearly reports of teacher classroom practices will be collected by MENFOP from inspectors and pedagogical advisors. To report on this indicator, a random sample of trained primary and preschool teachers will be selected and the percentage of teachers from this sample demonstrating new skills learned through the trainings will be reported.", + "type": "report", + "explanation": "However, it is not a structured collection of data but rather a report summarizing findings from observations.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it mentions 'reports' which could imply a collection of data.", + "contextual_reason_agent": "However, it is not a structured collection of data but rather a report summarizing findings from observations.", + "contextual_signal": "mentioned only as a report, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 20, + "validated": 4, + "not_validated": 16 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 33, + "text": "Under this results area, the following activities will be supported: ( a ) the development of an integrated EMIS for MINEDUB-MINESEC-MINESUP-MINEFOP, including the establishment of new institutional arrangements for the sector, unified methods and processes, and a common platform for data collection and management; ( b ) the deconcentration of EMIS to regional, divisional, and sub-divisional levels ( as applicable ); ( c ) the provision of equipment and software to support the functioning of EMIS, and so on; and ( d ) the strengthening of human capacity and resources, particularly in the areas of statistics and data management. The release of IDA funds under this results area will be linked to the following DLI: Integrated education management information system functional and operational ( DLI 6 ). 52. While not specifically benefitting from financing from the RSW, activities linked to learning assessment and EMIS will include features that will enable the Government to monitor and evaluate data effectively on refugee-related education challenges. Learning assessments will include modules to analyze the learning outcomes of refugees and efforts will be made toward including disaggregated data on refugees in statistical yearbooks. Priority Area 4: Supporting the educational needs of public schools in host communities affected by refugees. 53. Refugee-related project activities, estimated at US $ 36 million, fall under Results Areas 1, 3, and 7.", + "ner_text": [ + [ + 108, + 112, + "named" + ] + ], + "validated": false, + "empirical_context": "Under this results area, the following activities will be supported: ( a ) the development of an integrated EMIS for MINEDUB-MINESEC-MINESUP-MINEFOP, including the establishment of new institutional arrangements for the sector, unified methods and processes, and a common platform for data collection and management; ( b ) the deconcentration of EMIS to regional, divisional, and sub-divisional levels ( as applicable ); ( c ) the provision of equipment and software to support the functioning of EMIS, and so on; and ( d ) the strengthening of human capacity and resources, particularly in the areas of statistics and data management. The release of IDA funds under this results area will be linked to the following DLI: Integrated education management information system functional and operational ( DLI 6 ).", + "type": "system", + "explanation": "However, EMIS is described as a management information system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it relates to data collection and management.", + "contextual_reason_agent": "However, EMIS is described as a management information system, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 30, + "text": "The Project will undertake further consultations with the relevant stakeholders to ensure that the design of the water infrastructure considers the associated risks. C. Financial Management 66. The MWE has a fully functioning Accounts Department headed by the Assistant Commissioner of Accounts. The MWE has an Internal Audit Unit that includes four internal auditors from the MoFPED. This unit reports to an audit committee at the MoFPED. The Project \u2019 s activities and transactions implemented by the MWE will be approved and authorized by the MWE \u2019 s Permanent Secretary who is the Accounting Officer. The main accounts of the MWE are computerized with Integrated Financial Management Systems ( IFMS ). However, this system is currently only operational for government funds as the project module is not yet fully operational. As a result, project financial reports cannot be generated directly from the IFMS. 67. For the NWSC, the Project \u2019 s transactions will be managed within the existing set-up of the NWSC. The Managing Director, who is the Accounting Officer, will approve and authorize activities and transactions implemented by NWSC. All transactions will be processed in accordance with the NWSC \u2019 s policies and procedures. The NWSC has a fully functional Finance and Accounts Department headed by the Deputy Managing Director for Finance and Corporate Strategy. The Accounting Unit of the NWSC is computerized with Iscala accounting systems.", + "ner_text": [ + [ + 1430, + 1455, + "named" + ] + ], + "validated": false, + "empirical_context": "The NWSC has a fully functional Finance and Accounts Department headed by the Deputy Managing Director for Finance and Corporate Strategy. The Accounting Unit of the NWSC is computerized with Iscala accounting systems.", + "type": "system", + "explanation": "However, it is not a dataset but rather a software system used for accounting purposes.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a system that handles accounting data.", + "contextual_reason_agent": "However, it is not a dataset but rather a software system used for accounting purposes.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "026_Djibouti-Health-System-Strengthening-Project", + "page": 19, + "text": "The World Bank Djibouti Health System Strengthening ( P178033 ) Page 15 of 64 Figure 4: Main health problem, last 30 days Children under 5 Children under 1 Source: World Bank visualization based on the 2017-2018 EDAM survey 18. Poor nutrition outcomes for children are pervasive across the country and are often linked to incidence of diarrheal diseases in childhood and increased risk of non-communicable diseases ( NCDs ) in adulthood. Undernutrition accounts for 57 percent of deaths among children under five; it is widespread, with 17 percent underweight and 25 percent stunted with no gender differentials. The stunting rate is higher among rural ( 34 percent ) than urban children ( 19 percent ). Some lagging regions experience higher burden of stunting: 40. 2, 33. 3, and 32. 6 percent in Obock, Dikhil and Tadjourah, respectively. At the same time, deaths due to NCDs such as ischemic heart disease, stroke, cirrhosis, and diabetes have increased significantly between 2009 and 2019. Obesity11 rates are also on the rise \u2013 18. 3 percent for women, 8. 6 percent for men, and about 5 percent for children. The poor adult health outcomes, including a high burden of NCDs, are in part driven by nutrition and health deficiencies accumulated in early childhood12.", + "ner_text": [ + [ + 212, + 223, + "named" + ], + [ + 4, + 14, + "EDAM survey <> publisher" + ], + [ + 15, + 23, + "EDAM survey <> data geography" + ], + [ + 122, + 138, + "EDAM survey <> reference population" + ], + [ + 164, + 174, + "EDAM survey <> publisher" + ], + [ + 202, + 211, + "EDAM survey <> publication year" + ], + [ + 493, + 512, + "EDAM survey <> reference population" + ], + [ + 798, + 803, + "EDAM survey <> data geography" + ], + [ + 805, + 811, + "EDAM survey <> data geography" + ], + [ + 816, + 825, + "EDAM survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "The World Bank Djibouti Health System Strengthening ( P178033 ) Page 15 of 64 Figure 4: Main health problem, last 30 days Children under 5 Children under 1 Source: World Bank visualization based on the 2017-2018 EDAM survey 18. Poor nutrition outcomes for children are pervasive across the country and are often linked to incidence of diarrheal diseases in childhood and increased risk of non-communicable diseases ( NCDs ) in adulthood.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it is explicitly mentioned as the source of data for the World Bank visualization.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'EDAM survey' is referenced as a source for data visualization.", + "contextual_reason_agent": "The context confirms it is a dataset as it is explicitly mentioned as the source of data for the World Bank visualization.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 20, + "text": "The Government has affirmed both in national legislation and at representations to the Committee on Elimination of Racial Discrimination that refugees are to be treated equally under the law with respect to key socioeconomic rights: property ownership; security; access to the courts; access to labor market ( no nationality discrimination ); freedom of expression and movement; and access to basic services including access to health, education, and housing. In December 2018, 4 The latest DHS was in 2012. Fieldwork for the 2017 DHS took place between October 2017 and February 2018. Internal and independent audits determined that the results obtained from the survey cannot be reconciled and therefore should not be used for planning or programming purposes. Hence, the release of the results was canceled. 5 World Bank. 2020. Risk and Resilience Assessment for the Sahel Region.", + "ner_text": [ + [ + 491, + 494, + "named" + ], + [ + 502, + 506, + "DHS <> reference year" + ], + [ + 526, + 530, + "DHS <> reference year" + ], + [ + 664, + 670, + "DHS <> data type" + ], + [ + 870, + 882, + "DHS <> data geography" + ], + [ + 899, + 917, + "DHS <> usage context" + ] + ], + "validated": true, + "empirical_context": "The Government has affirmed both in national legislation and at representations to the Committee on Elimination of Racial Discrimination that refugees are to be treated equally under the law with respect to key socioeconomic rights: property ownership; security; access to the courts; access to labor market ( no nationality discrimination ); freedom of expression and movement; and access to basic services including access to health, education, and housing. In December 2018, 4 The latest DHS was in 2012. Fieldwork for the 2017 DHS took place between October 2017 and February 2018.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it refers to the 'latest DHS' and specifies fieldwork dates, indicating it is used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because DHS is often associated with demographic and health surveys that collect structured data.", + "contextual_reason_agent": "The context confirms it is a dataset as it refers to the 'latest DHS' and specifies fieldwork dates, indicating it is used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 7, + "validated": 6, + "not_validated": 1 + } + }, + { + "filename": "040_Iraq-COVID-19-Vaccination-Project", + "page": 38, + "text": "To ensure that communities can engage nevertheless, the project will actively engage with citizens to collect feedback on project performance, including through the use of the Iterative Beneficiary Monitoring ( IBM ) survey and social media surveys. Findings from such surveys will be used to improve the communication campaign and citizen engagement. Through the IBM, as well as social media surveys, engagement with community and religious leaders, especially in remote areas, will ensure the inclusion of their ongoing feedback in the rollout and implementation of the COVID-19 vaccination campaign to strengthen targeting accuracy and increase uptake. To ensure citizen engagement, the project will: ( a ) ensure community engagement teams are gender-balanced; ( b ) target messages to areas where vulnerable groups, including refugees and IDPs, reside to inform them about safety measures and benefits; ( c ) tailor messages to the elderly and those with medical risks including their target family members and health care providers; and ( d ) provide information for disabled people in accessible formats, like Braille, large print; text captioning; videos etc. The project will also explore the possibility of including NGO representation in oversight bodies established to oversee transparent and inclusive administration of vaccines. H. Gender 87.", + "ner_text": [ + [ + 228, + 248, + "named" + ], + [ + 418, + 449, + "social media surveys <> reference population" + ], + [ + 465, + 477, + "social media surveys <> data geography" + ] + ], + "validated": true, + "empirical_context": "To ensure that communities can engage nevertheless, the project will actively engage with citizens to collect feedback on project performance, including through the use of the Iterative Beneficiary Monitoring ( IBM ) survey and social media surveys. Findings from such surveys will be used to improve the communication campaign and citizen engagement.", + "type": "survey", + "explanation": "It is indeed a dataset as it is explicitly mentioned as a method for collecting feedback, indicating its use in the research.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'social media surveys' implies a structured collection of data gathered from social media interactions.", + "contextual_reason_agent": "It is indeed a dataset as it is explicitly mentioned as a method for collecting feedback, indicating its use in the research.", + "contextual_signal": "follows 'collect feedback on project performance'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "151_533550PAD0P117101Official0Use0Only1", + "page": 12, + "text": "According to the International Food Policy Research Institute ( IFPRI ), the crisis resulted in a direct 25 percent increase in the poverty rate from 2006-2010, with poverty now affecting 44 percent of the population, or more than 10 million persons. The Government budget was reduced by more than 50 percent due in part to decreasing oil revenues as supplies have dwindled, limiting its capacity to provide basic services to an already impoverished population. Food insecurity and malnutrition levels in the country have surpassed emergency levels. Yemen is among the 10 countries in the world with the highest rates of food insecurity, with the country ranked third for the highest malnutrition in the world: 58 percent of children under 5 are stunted, and more than 1 in 10 children is acutely malnourished. Based on the World Food Program \u2019 s ( WFP ) recent Comprehensive Food Security Survey ( 2009 ), 7. 5 million persons are caught in the chronic poverty trap. The situation is further compounded by climate change, increasing influx of refugees from the Horn of Africa, high population growth, and low literacy. 3. Fiscal sustainability is the foremost economic issue in Yemen, as government spending is driven by a large public sector wage bill and unsustainably high fuel subsidies.", + "ner_text": [ + [ + 862, + 896, + "named" + ], + [ + 550, + 555, + "Comprehensive Food Security Survey <> data geography" + ], + [ + 725, + 741, + "Comprehensive Food Security Survey <> reference population" + ], + [ + 824, + 842, + "Comprehensive Food Security Survey <> publisher" + ], + [ + 849, + 852, + "Comprehensive Food Security Survey <> publisher" + ], + [ + 899, + 903, + "Comprehensive Food Security Survey <> publication year" + ], + [ + 1179, + 1184, + "Comprehensive Food Security Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "Yemen is among the 10 countries in the world with the highest rates of food insecurity, with the country ranked third for the highest malnutrition in the world: 58 percent of children under 5 are stunted, and more than 1 in 10 children is acutely malnourished. Based on the World Food Program \u2019 s ( WFP ) recent Comprehensive Food Security Survey ( 2009 ), 7. 5 million persons are caught in the chronic poverty trap.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned as a survey that provides empirical data used for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides data on food security.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a survey that provides empirical data used for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "144_84657-PAD-P132268-Project-Commitment-Paper", + "page": 19, + "text": "Furthermore, a number of stakeholder awareness activities will be developed in the first year and implemented throughout the project period, with the majority of such activities happening in the initial 2 years. This will help build awareness and achieve higher levels of commitment to behavioral change among stakeholders, to help achieve the targets identified. \uf0b7 Indicator ( 3 ) Total Waste Managed: the abovementioned stakeholder awareness activities in addition to the studies mentioned under Indicator ( 1 ), will also help ensure that progressively a larger portion of waste is managed in a sanitary manner. \uf0b7 Indicator ( 4 ) Improvement in Fee Collection Ratio: the development and deployment of the MIS will help keep track of fee collections and highlight areas for improvement which will enable the relevant agency to focus its efforts on such areas. Furthermore, the development and implementation of Guidelines for SWM Tariff and Fee Collection Systems in the initial years will help ensure the most appropriate billing mechanisms are utilized in each governorate. \uf0b7 Indicator ( 5 ) Improvement in Billings to Cost Ratio: once more, the actions taken in initial years will help with the achievement of targets for this indicator.", + "ner_text": [ + [ + 708, + 711, + "named" + ] + ], + "validated": false, + "empirical_context": "\uf0b7 Indicator ( 3 ) Total Waste Managed: the abovementioned stakeholder awareness activities in addition to the studies mentioned under Indicator ( 1 ), will also help ensure that progressively a larger portion of waste is managed in a sanitary manner. \uf0b7 Indicator ( 4 ) Improvement in Fee Collection Ratio: the development and deployment of the MIS will help keep track of fee collections and highlight areas for improvement which will enable the relevant agency to focus its efforts on such areas. Furthermore, the development and implementation of Guidelines for SWM Tariff and Fee Collection Systems in the initial years will help ensure the most appropriate billing mechanisms are utilized in each governorate.", + "type": "system", + "explanation": "'MIS' is mentioned as a management information system that tracks fee collections, but it is not described as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is an acronym that could imply a structured system for managing information.", + "contextual_reason_agent": "'MIS' is mentioned as a management information system that tracks fee collections, but it is not described as a data source itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 69, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 57 refugee host areas percentage of refugee children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in high refugee host areas Share of new entrants in the first grade of primary school attended pre-primary education in 29 selected SPG woredas The indicator measures children enrolled in the first year of primary school with some exposure to preschool education and calculated by dividing the gross enrollment rate for pre-primary level by the gross intake rate of primary level. Annual EMIS Annual data collection through school census MOE and WOFED Pregnant women & caregivers of children 0-23 months participating in Community Conversations ( CC ) in 29 selected SPG woredas These are currently pregnant women and caregivers of children 0-23 months who attended at least 1 CC session per month Annual UNISE / DHIS2 UNISE / DHIS2 - - Data collected by woreda health offices MOH and MOF Quarterly reports of minutes of multisectoral committee meetings in 29 selected SPG woredas Quarterly minutes report submitted by WOFED to MOF and verified by ESS Annual Admin WOFED report quarterly meeting minutes with all committee members signed to MOF WOFED Students benefiting from direct interventions to enhance learning Annual Project M & E Questionnaires will be developed to keep track on number of beneficiaries MOE and WOEFD", + "ner_text": [ + [ + 590, + 594, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 57 refugee host areas percentage of refugee children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in high refugee host areas Share of new entrants in the first grade of primary school attended pre-primary education in 29 selected SPG woredas The indicator measures children enrolled in the first year of primary school with some exposure to preschool education and calculated by dividing the gross enrollment rate for pre-primary level by the gross intake rate of primary level. Annual EMIS Annual data collection through school census MOE and WOFED Pregnant women & caregivers of children 0-23 months participating in Community Conversations ( CC ) in 29 selected SPG woredas These are currently pregnant women and caregivers of children 0-23 months who attended at least 1 CC session per month Annual UNISE / DHIS2 UNISE / DHIS2 - - Data collected by woreda health offices MOH and MOF Quarterly reports of minutes of multisectoral committee meetings in 29 selected SPG woredas Quarterly minutes report submitted by WOFED to MOF and verified by ESS Annual Admin WOFED report quarterly meeting minutes with all committee members signed to MOF WOFED Students benefiting from direct interventions to enhance learning Annual Project M & E Questionnaires will be developed to keep track on number of beneficiaries MOE and WOEFD", + "type": "system", + "explanation": "However, in this context, EMIS is mentioned as part of the annual data collection process, not as a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is often associated with educational data collection.", + "contextual_reason_agent": "However, in this context, EMIS is mentioned as part of the annual data collection process, not as a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 63, + "text": "EMIS deconcentrated ( MINEDUB / MINESEC ) and operational in at least five ( 5 ) regions ( c ) Budget for EMIS activities, including evaluation, for subsequent year for ministries in charge of education sector approved DLR 6. 5: ( a ) Integrated statistical yearbook of education and training sector available and publicly disseminated ( b ) EMIS deconcentrated ( MINEDUB / MINESEC ) and operational in all ten ( 10 ) regions ( c ) Evaluation of national EMIS completed ( d ) Budget for EMIS activities for subsequent year for ministries in charge of education sector approved", + "ner_text": [ + [ + 342, + 346, + "named" + ] + ], + "validated": false, + "empirical_context": "EMIS deconcentrated ( MINEDUB / MINESEC ) and operational in at least five ( 5 ) regions ( c ) Budget for EMIS activities, including evaluation, for subsequent year for ministries in charge of education sector approved DLR 6. 5: ( a ) Integrated statistical yearbook of education and training sector available and publicly disseminated ( b ) EMIS deconcentrated ( MINEDUB / MINESEC ) and operational in all ten ( 10 ) regions ( c ) Evaluation of national EMIS completed ( d ) Budget for EMIS activities for subsequent year for ministries in charge of education sector approved", + "type": "program", + "explanation": "However, EMIS is mentioned as a program and not as a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is related to education statistics.", + "contextual_reason_agent": "However, EMIS is mentioned as a program and not as a structured collection of data or a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 175, + "text": "The World Bank Niger Accelerating Electricity Access Project ( Hask\u00e9 ) ( P174034 ) Page 168 of 174 Landslides 5. Landslides are rated as a \u2018 very low \u2019 risk for Niger. Figure 7. 2 shows the landslide susceptibility for the project region, with the most susceptible regions shown in brown. 89 While site-specific information on soil type, slope, and other factors needs to be evaluated to determine the exact risk in different locations, this susceptibility data can inform regions where more detailed studies and consideration of mitigation options may have higher benefits relative to potential incurred damages from the hazard. Landslides are most likely to occur following precipitation events. Despite the high susceptibility of some areas of Niger for landslide, the very dry climate contributes to an overall \u2018 very low \u2019 risk for the country. Figure 7. 2. Landslide Susceptibility Map for Study Region Source: Broeckx, J., M. Vanmaercke, R. Duchateau, and J. Poesen. 2018. \u201c A Data-Based Landslide Susceptibility Map of Africa. \u201d Earth-Science Reviews 185: 102 \u2013 121. Note: Areas in brown indicate high to very high susceptibility. The most vulnerable regions of existing infrastructure for landslides are in the Center-East grid near Tahoua. Drought 6.", + "ner_text": [ + [ + 442, + 461, + "named" + ], + [ + 15, + 20, + "susceptibility data <> data geography" + ], + [ + 161, + 166, + "susceptibility data <> data geography" + ], + [ + 747, + 752, + "susceptibility data <> data geography" + ], + [ + 930, + 943, + "susceptibility data <> author" + ], + [ + 945, + 957, + "susceptibility data <> author" + ], + [ + 963, + 972, + "susceptibility data <> author" + ], + [ + 974, + 978, + "susceptibility data <> publication year" + ], + [ + 1220, + 1236, + "susceptibility data <> data geography" + ], + [ + 1242, + 1248, + "susceptibility data <> data geography" + ] + ], + "validated": true, + "empirical_context": "2 shows the landslide susceptibility for the project region, with the most susceptible regions shown in brown. 89 While site-specific information on soil type, slope, and other factors needs to be evaluated to determine the exact risk in different locations, this susceptibility data can inform regions where more detailed studies and consideration of mitigation options may have higher benefits relative to potential incurred damages from the hazard. Landslides are most likely to occur following precipitation events.", + "type": "data", + "explanation": "This is indeed a dataset as it provides structured information that can inform further studies and risk assessments.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'susceptibility data' implies a collection of information related to landslide risks.", + "contextual_reason_agent": "This is indeed a dataset as it provides structured information that can inform further studies and risk assessments.", + "contextual_signal": "described as data that informs regions for further studies", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 54, + "text": "The World Bank Niger Learning Improvement for Results in Education Project ( P168779 ) Page 48 of 101 ( refugee, host, general ), regions and districts. Female Out-of-school children and adolescents benefiting from direct interventions to support learning ( disaggregated by general population, host communities and refugees ) Beneficiaries include children and adolescents age 7-16 who are out-of - school and / or attending Makarantas schools. Annual Project data Reports from learning interventions. Efforts will be made to report disaggregated data per age, category of population ( general population, host communities, refugees ), regions and districts. PCU Female Primary and lower secondary schools benefiting from a performance grant and meeting the minimum requirements in terms of teaching and learning conditions Tracks whether project funded school grants are used to improve teaching / learning conditions at school level. Minimum requirements defined in Y1 and will include sub - indicators on the availability of essential inputs, differentiated for primary and lower secondary schools. Annual Project data Drawing from SDI survey methodology, data will be collected through visual inspections of classrooms and school premises in each primary and lower secondary schools surveyed.", + "ner_text": [ + [ + 1136, + 1146, + "named" + ], + [ + 153, + 198, + "SDI survey <> reference population" + ], + [ + 446, + 465, + "SDI survey <> data type" + ], + [ + 1313, + 1331, + "SDI survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "Minimum requirements defined in Y1 and will include sub - indicators on the availability of essential inputs, differentiated for primary and lower secondary schools. Annual Project data Drawing from SDI survey methodology, data will be collected through visual inspections of classrooms and school premises in each primary and lower secondary schools surveyed.", + "type": "survey", + "explanation": "The context confirms it is a dataset as it describes a methodology for collecting data through surveys.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'SDI survey' implies a structured method of data collection.", + "contextual_reason_agent": "The context confirms it is a dataset as it describes a methodology for collecting data through surveys.", + "contextual_signal": "described as a methodology for data collection", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 17, + "text": "It is not expected that any activity or contract will be excluded from the Program, in accordance with the Bank \u2019 s Policy and Directive on Program-for-Results Financing and the rationale for such exclusion. The Program does not finance the purchase by the GOJ of any large IT systems under high-value contracts. The 2020 government cloud policy promotes the use of cloud services across government entities. MODEE provides them with cloud infrastructure ( through the government \u2019 s private cloud ). Further, IT developments are usually funded through fee for services rather than through 9 Only about 800, 000 of the nearly 11 million ID holders have activated their digital IDs on Sanad as of January 2024, and only 35 percent of those are women. Similarly, of the 90, 000 visitors to the two existing GSCs since their inauguration, fewer than 20 percent have been women ( the exact percentage is not available because MODEE has not been collecting GSC visitor demographics thus far ). 10 MODEE. 2022. Jordan Government Websites Standards. https: / / www. modee. gov. jo / ebv4. 0 / root_storage / en / eb_list_page / government_websites_guidelines_2022_v2. 0_-en. pdf.", + "ner_text": [ + [ + 684, + 689, + "named" + ] + ], + "validated": false, + "empirical_context": "MODEE provides them with cloud infrastructure ( through the government \u2019 s private cloud ). Further, IT developments are usually funded through fee for services rather than through 9 Only about 800, 000 of the nearly 11 million ID holders have activated their digital IDs on Sanad as of January 2024, and only 35 percent of those are women. Similarly, of the 90, 000 visitors to the two existing GSCs since their inauguration, fewer than 20 percent have been women ( the exact percentage is not available because MODEE has not been collecting GSC visitor demographics thus far ).", + "type": "program", + "explanation": "'Sanad' is not a dataset as it is referred to as a system for digital IDs, not a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Sanad' is a dataset because it is mentioned in the context of digital IDs.", + "contextual_reason_agent": "'Sanad' is not a dataset as it is referred to as a system for digital IDs, not a structured collection of data.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [] + }, + { + "filename": "013_BOSIB0efb09b920d90858a0135df22da7d1", + "page": 16, + "text": "Furthermore, as a national system ( compared to a collection of kebele systems ), it can create a sense of belonging and will have a consistent look and feel, providing equal access to services for all registered persons. Second, the Fayda ID system can foster inclusion and shared prosperity by improving access to services and economic opportunities. As a digital system, Fayda will enable the Government, businesses, and civil society to harness digital technologies to make products and services more inclusive and human centered. 24. The World Bank, through ID4D, has been providing technical assistance to the GoE on ID issues since 2016, and modest financing for upstream activities through Digital Foundations Project ( P171034 ) since 2021. An ID4D Diagnostic was completed in 201724 and updated in 2019. In 2020, a legal assessment was carried out, which contributed to the Principles and Governance Structure of the National Identity Program published by the GoE. The ID4D technical assistance also contributed to the development of the Digital ID Proclamation, now adopted.", + "ner_text": [ + [ + 234, + 249, + "named" + ] + ], + "validated": false, + "empirical_context": "Furthermore, as a national system ( compared to a collection of kebele systems ), it can create a sense of belonging and will have a consistent look and feel, providing equal access to services for all registered persons. Second, the Fayda ID system can foster inclusion and shared prosperity by improving access to services and economic opportunities. As a digital system, Fayda will enable the Government, businesses, and civil society to harness digital technologies to make products and services more inclusive and human centered.", + "type": "system", + "explanation": "However, the context describes the Fayda ID system as a digital system focused on inclusion and access, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'system' which can imply data handling.", + "contextual_reason_agent": "However, the context describes the Fayda ID system as a digital system focused on inclusion and access, not as a structured collection of data.", + "contextual_signal": "mentioned only as a system, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "166_304360LK", + "page": 33, + "text": "This audit will include monitoring on a sample basis to ensure compliance on technical engineering standards. Monitoring arrangements will also include plans for intensive supervision by IDA to ensure prompt solutions to implementation difficulties. At a minimum, a full supervision mission i s needed bi - annually with specialists visiting the targeted areas more frequently if necessary. Bi-annual reports detailing progress in meeting implementation and development objectives ( detailed in the procurement plan and in t h s annex ) will be prepared and submitted to IDA. An evaluation report will also be prepared for the mid-term review and at the close of the program. Responsibility for reporting rests with NEHRU. The results o f the monitoring will be used to analyze and improve program management and to inform subsequent management decisions. The MTR Evaluation report will be used to implement changes in the program design, if necessary. The data required will be collected through qualitative means such as focus group discussions and interviewing o f key informants. Quantitative data collection will occur through beneficiary surveys and through the monitoring o f program outputs by the community audits, thrd party technical audit and district and divisional secretary technical supervision. Forms for monitoring will be designed by NEHRU together with the relevant consultants. Forms will appropriately contain, but will not be restricted to, the following data ( in addition to that required by the FMR ): Benejkiary Selection for Villages, Divisions and Districts 0 DistrictDivisionNillage Percentage damage Village ranlung ( for villages ) 28", + "ner_text": [ + [ + 1132, + 1151, + "named" + ], + [ + 716, + 721, + "beneficiary surveys <> publisher" + ], + [ + 1353, + 1358, + "beneficiary surveys <> publisher" + ] + ], + "validated": true, + "empirical_context": "The data required will be collected through qualitative means such as focus group discussions and interviewing o f key informants. Quantitative data collection will occur through beneficiary surveys and through the monitoring o f program outputs by the community audits, thrd party technical audit and district and divisional secretary technical supervision. Forms for monitoring will be designed by NEHRU together with the relevant consultants.", + "type": "survey", + "explanation": "In the context, 'beneficiary surveys' is explicitly mentioned as a method for quantitative data collection, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'beneficiary surveys' implies a structured collection of data from participants.", + "contextual_reason_agent": "In the context, 'beneficiary surveys' is explicitly mentioned as a method for quantitative data collection, confirming its role as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 63, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 59 of 74 installment of livelihood grant. Number of beneficiaries receiving Economic Opportunities who are female youth Number of beneficiaries receiving economic opportunities under Component 2, in accordance with the Project Operations Manual, of which are youth and female, defined as women between the ages of 18 and 35 years, and have received at least 1 installment of livelihood grant. This indicator will be measured, at a minimum, on a quarterly basis SNSOP MIS Beneficiary data will be collected during registration and updated during project implementation. Payment data will be regularly updated in the SNSOP MIS The Implementing Partner in charge of Component 2 will be responsible for data collection. Percentage of grievances resolved through the GRM Number of all grievances which are lodged, processed and resolved through the SNSOP GRM divided by all complaints which are lodged, processed and / or resolved, expressed as a percentage This indicator will be reviewed on a monthly basis Monthly GRM Reports GRM data will be reviewed and analyzed on a monthly basis through the MIS GRM module Selected Implementing Partner Percentage of cash transfers to beneficiary households made on time The total number of cash transfers which were completed on time for both LIPW and DIS, per the Project Operations Manual, divided by the total number of cash transfers, expressed This indicator", + "ner_text": [ + [ + 1188, + 1191, + "named" + ] + ], + "validated": false, + "empirical_context": "Payment data will be regularly updated in the SNSOP MIS The Implementing Partner in charge of Component 2 will be responsible for data collection. Percentage of grievances resolved through the GRM Number of all grievances which are lodged, processed and resolved through the SNSOP GRM divided by all complaints which are lodged, processed and / or resolved, expressed as a percentage This indicator will be reviewed on a monthly basis Monthly GRM Reports GRM data will be reviewed and analyzed on a monthly basis through the MIS GRM module Selected Implementing Partner Percentage of cash transfers to beneficiary households made on time The total number of cash transfers which were completed on time for both LIPW and DIS, per the Project Operations Manual, divided by the total number of cash transfers, expressed This indicator", + "type": "system", + "explanation": "However, 'MIS' refers to a management information system, which is not a structured collection of data itself but rather a system for managing information.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is associated with data collection and reporting.", + "contextual_reason_agent": "However, 'MIS' refers to a management information system, which is not a structured collection of data itself but rather a system for managing information.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "061_Turkey-Formal-Employment-Creation-Project", + "page": 48, + "text": "The World Bank Formal Employment Creation Project ( P171766 ) Page 44 of 86 the project Number of formal jobs created by Grants ( disaggregated by gender ) Annual Progress reports Declaration of firms verified by SGK TKYB Number of formal jobs created in SMEs ( disaggregated by gender ) Annual Progress reports Firm declaration verified by SGK TKYB Number of formal jobs created for women Increased management skills in loan beneficiary firms Annual Progress reports Skills measurement surveys TKYB and PFIs Increased employee skills in grant beneficiary firms Annual Progress reports Skills measurement surveys TKYB ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Capital Stock of loan beneficiary firms Capital-output ratio will be calculated per firm ( and cell - based ) Annual Progress reports Firm survey TKYB and PFIs Number of firms receiving grants Annual Progress reports TKYB administrative data TKYB Number of women-inclusive firms receiving grants Annual Progress reports TKYB administrative data TKYB", + "ner_text": [ + [ + 468, + 494, + "named" + ] + ], + "validated": true, + "empirical_context": "The World Bank Formal Employment Creation Project ( P171766 ) Page 44 of 86 the project Number of formal jobs created by Grants ( disaggregated by gender ) Annual Progress reports Declaration of firms verified by SGK TKYB Number of formal jobs created in SMEs ( disaggregated by gender ) Annual Progress reports Firm declaration verified by SGK TKYB Number of formal jobs created for women Increased management skills in loan beneficiary firms Annual Progress reports Skills measurement surveys TKYB and PFIs Increased employee skills in grant beneficiary firms Annual Progress reports Skills measurement surveys TKYB ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Capital Stock of loan beneficiary firms Capital-output ratio will be calculated per firm ( and cell - based ) Annual Progress reports Firm survey TKYB and PFIs Number of firms receiving grants Annual Progress reports TKYB administrative data TKYB Number of women-inclusive firms receiving grants Annual Progress reports TKYB administrative data TKYB", + "type": "survey", + "explanation": "In this context, it is indeed a dataset as it is used to collect data on skills measurement for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'surveys' which typically collect structured data.", + "contextual_reason_agent": "In this context, it is indeed a dataset as it is used to collect data on skills measurement for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "162_PAD12050PAD0P15264600PUBLIC00Box391428B", + "page": 53, + "text": "This will enable management decisions to be made based on an assessment of whether the program is moving towards its objectives. A periodic financial audit will be conducted which will also help to identify and mitigate any potential sign of fraud and governance issues. The program monitoring system will rely on regular and accurate data collection and analysis to identify the timely implementation of activities, the achievement of intended results, and positive and negative unintended effects. i. HIS will form the basis for a well-functioning monitoring system and will comprise: ( i ) design of a program database; ( ii ) development of registers and forms to gather data ( enrollment registers, provider data collection forms ); ( iii ) development of the claims processing system, which will feed data into the HIS; ( iv ) collection and analysis of program indicators; ( v ) design of wider monitoring and verification activities ( such as spot checks of providers, user satisfaction surveys through sampling of beneficiaries at the household level ) which will provide data with which to counter-check the HIS data; and ( vi ) capacity building of MoPH in monitoring of the EHCP program. ii. Sources of Data: ( i ) the routine data collected through the claims processing system; ( ii ) data routinely collected at health facility level; ( iii ) additional data collected by MoPH such as from enrollment registers, spot checks and ad-hoc surveys; ( iv ) information collected during the verification processes; and ( v ) data from the MoPH rapid facility assessment.", + "ner_text": [ + [ + 765, + 789, + "named" + ] + ], + "validated": false, + "empirical_context": "i. HIS will form the basis for a well-functioning monitoring system and will comprise: ( i ) design of a program database; ( ii ) development of registers and forms to gather data ( enrollment registers, provider data collection forms ); ( iii ) development of the claims processing system, which will feed data into the HIS; ( iv ) collection and analysis of program indicators; ( v ) design of wider monitoring and verification activities ( such as spot checks of providers, user satisfaction surveys through sampling of beneficiaries at the household level ) which will provide data with which to counter-check the HIS data; and ( vi ) capacity building of MoPH in monitoring of the EHCP program. ii.", + "type": "system", + "explanation": "However, it is described as a system rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves processing data.", + "contextual_reason_agent": "However, it is described as a system rather than a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "082_PAD2809-PAD-PUBLIC-disclosed-9-12-2018-IDA-R2018-0286-1", + "page": 68, + "text": "The World Bank Chad - Refugees and Host Communities Support Project ( P164748 ) Page 63 Accounting Arrangements 13. Accounting policies and procedures. The current accounting standards in use in West and Central African Francophone countries for ongoing Bank-financed projects will be applicable. SYSCOHADA is the assigned accounting system in West and Central African Francophone countries. Project accounts will be maintained on an accrual basis, supported with appropriate records and procedures to track commitments and to safeguard assets. Annual financial statements will be prepared by the project implementing agency in accordance with SYSCOHADA and Bank requirements. Accounting and control procedures will be documented in the Administrative, Financial and Accounting Procedures Manual. 14. FM manuals. CFS will update and adapt the administrative, financial and accounting procedures manual it currently uses for the PFS. The updated procedures manual should be adopted before project effectiveness. 15. Accounting staff. The current FM team consists of an administrative and financial specialist and a senior accountant at the central level, as well as two assistant accountants at the regional level. One additional accountant and three assistant accountants will be hired. The accountant will be based in N \u2019 djamena while the assistant accountants will be based in the regional offices to be opened as part of the new project. 16. Accounting software.", + "ner_text": [ + [ + 644, + 653, + "named" + ] + ], + "validated": false, + "empirical_context": "Project accounts will be maintained on an accrual basis, supported with appropriate records and procedures to track commitments and to safeguard assets. Annual financial statements will be prepared by the project implementing agency in accordance with SYSCOHADA and Bank requirements. Accounting and control procedures will be documented in the Administrative, Financial and Accounting Procedures Manual.", + "type": "framework", + "explanation": "However, SYSCOHADA is a framework for accounting standards, not a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed SYSCOHADA is a dataset because it is related to financial statements and accounting procedures.", + "contextual_reason_agent": "However, SYSCOHADA is a framework for accounting standards, not a structured collection of data.", + "contextual_signal": "mentioned only as a framework, not as a data source", + "tags": [] + }, + { + "filename": "031_IBArchive-bd2c789e-ee04-4df7-a219-9409a5f705d3", + "page": 83, + "text": "While data on WTP for electricity in Chad are not available, according to the results of the most recent expenditure survey data, households currently use on average 2. 5 light points across rural areas in Chad for around five hours a day for which they spend about US $ 4. 7 per month per household. 60 Even if an inefficient 40 W light point is assumed, a household in Chad would consume only about 15 kWh per month for which it is currently spending about US \u00a2 30 per kWh, which can be considered as a lower bound on WTP for electricity. As WTP per kWh would decrease with greater consumption, and to be on the conservative side, for the analysis, a WTP of US \u00a2 25 per kWh is used. 8. While the analysis does not consider other indirect benefits, it is expected that the project will contribute toward other economic benefits that are more difficult to quantify and monetize. These indirect benefits include improved air quality from reduced consumption of kerosene; reduced poisoning and accidental fires; and wider benefits that can be linked to access to modern electricity solutions such as improved health, improved connectivity, and improved security. Access to modern energy solutions is also expected to increase income-generating opportunities and improve the socioeconomic situation of households and MSMEs, with an expected positive impact on education and overall lifestyle. This means that the results from the economic analysis can be considered as conservative estimates of the overall economic benefits of the project. 9. The project is also expected to bring some benefits from reduced GHG emissions and local pollution. In addition to the quantifiable benefits discussed above, the economic analysis also considers 59 It is noted that this is a regional estimate as for Chad, such data are not available. 60 Results of the household expenditure survey are provided in annex 6.", + "ner_text": [ + [ + 105, + 128, + "named" + ], + [ + 37, + 41, + "expenditure survey data <> data geography" + ], + [ + 130, + 140, + "expenditure survey data <> reference population" + ], + [ + 206, + 210, + "expenditure survey data <> data geography" + ], + [ + 371, + 375, + "expenditure survey data <> data geography" + ], + [ + 1299, + 1309, + "expenditure survey data <> reference population" + ], + [ + 1791, + 1795, + "expenditure survey data <> data geography" + ] + ], + "validated": true, + "empirical_context": "While data on WTP for electricity in Chad are not available, according to the results of the most recent expenditure survey data, households currently use on average 2. 5 light points across rural areas in Chad for around five hours a day for which they spend about US $ 4.", + "type": "survey", + "explanation": "This is indeed a dataset as it provides empirical data on household expenditures relevant to the research context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'expenditure survey data' suggests a structured collection of data related to household spending.", + "contextual_reason_agent": "This is indeed a dataset as it provides empirical data on household expenditures relevant to the research context.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 49, + "text": "While keeping the Tawjihi as a national examination, its secondary graduation function will be reformed putting more emphasis on school \u2010 based assessment and / or a newly created school leaving exam ( which could still be known as Tawjihi ) as the unique requirement for secondary school graduation. Legislations / decrees and standard operating procedures Third Party The verification entity will verify the passing of legislation and / or decrees establishing a separate requirement for secondary school graduation ( even if Tawjihi remains as a national examination ) and for competitive voluntary admission. DLR # 8. 1 Geographical Information System ( GIS ) is operational and updated with latest data required for The GIS system is operational, contains the latest data updated on a yearly basis, and is able to produce reports. MOE staff have been trained on the use of the system and are able to use it to inform planning and monitoring GIS reports Third Party The verification agency reviews GIS reports and checks that latest data is available.", + "ner_text": [ + [ + 624, + 655, + "named" + ] + ], + "validated": false, + "empirical_context": "DLR # 8. 1 Geographical Information System ( GIS ) is operational and updated with latest data required for The GIS system is operational, contains the latest data updated on a yearly basis, and is able to produce reports. MOE staff have been trained on the use of the system and are able to use it to inform planning and monitoring GIS reports Third Party The verification agency reviews GIS reports and checks that latest data is available.", + "type": "system", + "explanation": "However, the context indicates that it is a system used for managing and reporting data, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it mentions 'latest data' and 'reports'.", + "contextual_reason_agent": "However, the context indicates that it is a system used for managing and reporting data, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "153_476340PAD0ZR0P101Official0Use0Only1", + "page": 60, + "text": "when possible. Output and process indicators are integral to tracking project implementation in the DRC context, particularly for those related to livelihoods and the well-being o f forest communities. 3. The project Results Framework includes indicators that reflect overall economic, social and environmental performance. Progress in sensitive areas such as preservation o f biodiversity, management o f national parks, and reduction o f illegal logging will be measured through well defined protocols that leave minimum room for subjective interpretation and minimize risks of conflict. Indicators relying on ministerial data have been designed so as to reduce ambiguity and facilitate collection. Only two baselines ( regarding the knowledge o f rights under the forest code and additional income sources ) are to be established during the first year o f program implementation. 4. Progress in protected area management will be scored according to a \u201c tracking tool \u201d developed by the World BanWWWF Alliance, consistent with the recommendations o f the World Commission on Protected Areas Management Effectiveness. Illegal logging tracking will measure variations in the length o f logging roads located within unallocated forest concessions and national parks, as observed in time series o f satellite images interpreted in collaboration between the Government and the Global Forest Watch Program o f the World Resources Institute.", + "ner_text": [ + [ + 1281, + 1313, + "named" + ] + ], + "validated": false, + "empirical_context": "Progress in protected area management will be scored according to a \u201c tracking tool \u201d developed by the World BanWWWF Alliance, consistent with the recommendations o f the World Commission on Protected Areas Management Effectiveness. Illegal logging tracking will measure variations in the length o f logging roads located within unallocated forest concessions and national parks, as observed in time series o f satellite images interpreted in collaboration between the Government and the Global Forest Watch Program o f the World Resources Institute.", + "type": "data", + "explanation": "However, it is not a structured collection of data used as a data source in this context, but rather a method of observation.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'time series of satellite images' suggests a collection of data over time.", + "contextual_reason_agent": "However, it is not a structured collection of data used as a data source in this context, but rather a method of observation.", + "contextual_signal": "described as a method of observation, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "182_multi0page", + "page": 8, + "text": "Poverty and Social Impact of Transition: The recent repeated shocks - - a consequence of political strife and social unrest - - have created major economic and social problems in the country. Because of these shocks, many Albanians have realized a loss of income and earning opportunities and have incurred risks to their personal well-being and safety. Poverty, which is concentrated in the rural areas and north of the country, is a result of the inability of individuals, households or communities to protect themselves, formally or informally, against the outcome of these shocks, or even against the more mundane income risks faced over their lifetime. A poverty profile based on 1996 data showed that some 30 percent of the rural and some 15 percent of the urban population are poor, with many others vulnerable to poverty due to their incomes being close to the poverty threshold ( this information will be updated through an analysis of a Living Conditions Survey, a Multiple Indicators Cluster Survey and ( likely ) a Household Budget Survey to be completed by May 2001, and through an FY2002 LSMS ). Young people, less educated people, and families with many children, are particularly at risk of poverty.", + "ner_text": [ + [ + 1027, + 1050, + "named" + ], + [ + 222, + 231, + "Household Budget Survey <> reference population" + ], + [ + 660, + 675, + "Household Budget Survey <> data description" + ], + [ + 685, + 689, + "Household Budget Survey <> reference year" + ], + [ + 1070, + 1078, + "Household Budget Survey <> publication year" + ] + ], + "validated": true, + "empirical_context": "Poverty, which is concentrated in the rural areas and north of the country, is a result of the inability of individuals, households or communities to protect themselves, formally or informally, against the outcome of these shocks, or even against the more mundane income risks faced over their lifetime. A poverty profile based on 1996 data showed that some 30 percent of the rural and some 15 percent of the urban population are poor, with many others vulnerable to poverty due to their incomes being close to the poverty threshold ( this information will be updated through an analysis of a Living Conditions Survey, a Multiple Indicators Cluster Survey and ( likely ) a Household Budget Survey to be completed by May 2001, and through an FY2002 LSMS ). Young people, less educated people, and families with many children, are particularly at risk of poverty.", + "type": "survey", + "explanation": "This is indeed a dataset as it is mentioned as a source of information that will be used to update the poverty profile analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that collects data on household budgets.", + "contextual_reason_agent": "This is indeed a dataset as it is mentioned as a source of information that will be used to update the poverty profile analysis.", + "contextual_signal": "follows 'through an analysis of' and mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "029_IBArchive-ea5ff2ad-6f1f-45bf-a2ad-036440562c18", + "page": 22, + "text": "Uganda \u2019 s off-grid market is one of the most dynamic in East Africa and according to the National Electrification Survey Report-2018 ( UBOS 2020 ), 18 percent of the population currently relies on off-grid solar technologies providing Tier 1-level access and above. In terms of sales of off-grid solar products, Uganda has the third largest market in East Africa behind Kenya and Ethiopia. The Uganda Solar Energy Association ( USEA ) reports over 225 solar companies operating in the Ugandan market by end-December 2021, with most sales coming from a few international companies. While sales of off-grid solar products were about 400, 000 in 2019, it fell drastically at a level of 280, 000 in 2020 due to the impacts of the COVID-19 pandemic and government lockdowns. Sales in the first half of 2021 amounted to 110, 000 off-grid solar products. The proposed project will support Uganda \u2019 s efforts to scale up access to electricity and clean cooking for households including for refugees and their host communities, commercial enterprises, including minerals and mining enterprises, industrial parks, and health and education facilities. 19. Achieving universal access by 2030 requires a steady commitment from the GoU and development", + "ner_text": [ + [ + 90, + 133, + "named" + ] + ], + "validated": true, + "empirical_context": "Uganda \u2019 s off-grid market is one of the most dynamic in East Africa and according to the National Electrification Survey Report-2018 ( UBOS 2020 ), 18 percent of the population currently relies on off-grid solar technologies providing Tier 1-level access and above. In terms of sales of off-grid solar products, Uganda has the third largest market in East Africa behind Kenya and Ethiopia.", + "type": "survey", + "explanation": "This is indeed a dataset as it provides empirical data on the electrification status in Uganda, used for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a report that likely contains structured data on electrification.", + "contextual_reason_agent": "This is indeed a dataset as it provides empirical data on the electrification status in Uganda, used for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 57, + "text": "Data source / Agency DLR 8. 1: The source of information is official documents approved by the Government of Jordan on the enforcement of 2007 Access to Information law. DLR 8. 2: Public dashboard and semi-annual reports of the PMDU. DLR 8. 3 & 4: Annual report on the implementation of access to information from the Information Commission. Verification Entity KACE. Procedure DLR 8. 1: Verification of official notification documenting the submission by the GoJ to Parliament of amendments to the 2007 Access to Information Law that they include the following provisions: ( 1 ) opening of the Information Council, which oversees the enforcement of the law to Civil Society Organizations ( CSOs ), strengthening its oversight responsibility; ( 2 ) specifying that exceptions do not include information related to human rights violations, war crimes, and crimes against humanity; ( 3 ) mandating proactive information disclosure and the appointment of information officers in all departments; and ( 4 ) shortening delays to respond to requests for information. DLR 8. 2: Verification of the availability of information on the PMDU dashboard every semester. DLR 8. 3 & 4: Verification of annual reports to be submitted by the Information Council regarding the enforcement of the Access to Information Law based on spot checks by the IVA in government entities.", + "ner_text": [ + [ + 60, + 78, + "named" + ] + ], + "validated": false, + "empirical_context": "Data source / Agency DLR 8. 1: The source of information is official documents approved by the Government of Jordan on the enforcement of 2007 Access to Information law. DLR 8.", + "type": "document", + "explanation": "'Official documents' are not a structured collection of data but rather individual records or texts.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'official documents' are a dataset because they are referenced as a source of information.", + "contextual_reason_agent": "'Official documents' are not a structured collection of data but rather individual records or texts.", + "contextual_signal": "mentioned only as a source of information, not as a data source", + "tags": [] + }, + { + "filename": "121_PAD1190-PAD-P152848-PUBLIC-Box391435B-LB-EESSP-Final-PAD-for-printing", + "page": 11, + "text": "The higher quality associated with private schools means that public-school students are likely to learn less and face more difficult job prospects upon graduation. This sets up inter-generational transmission of both lower learning levels and lower income. 9 Public schools exhibit lower academic outcomes in international and national assessments. The level of public school students was 10 percent lower than that of private schools in the 2011 Trends in International Mathematics and Science Study ( TIMSS ) results. Indeed, based on the 2004 household survey, poverty and education are highly correlated in Lebanon. 5 Lebanon \u2019 s inequality-adjusted HDI is 20. 8 percent lower than its HDI, among the largest losses in the group of countries in the high human development category. 6 World Economic Forum \u2019 s 2013 Human Capital Index 7 Further information about the level of private sector investments is expected from a forthcoming Education Expenditure Review. 8 World Bank Ed Stats 9 \u201c Poverty, Growth and Income Distribution in Lebanon, \u201d August 2008.", + "ner_text": [ + [ + 448, + 501, + "named" + ], + [ + 62, + 84, + "Trends in International Mathematics and Science Study <> reference population" + ], + [ + 363, + 385, + "Trends in International Mathematics and Science Study <> reference population" + ], + [ + 443, + 447, + "Trends in International Mathematics and Science Study <> publication year" + ], + [ + 504, + 509, + "Trends in International Mathematics and Science Study <> acronym" + ], + [ + 612, + 619, + "Trends in International Mathematics and Science Study <> data geography" + ], + [ + 623, + 630, + "Trends in International Mathematics and Science Study <> data geography" + ], + [ + 1037, + 1044, + "Trends in International Mathematics and Science Study <> data geography" + ] + ], + "validated": true, + "empirical_context": "9 Public schools exhibit lower academic outcomes in international and national assessments. The level of public school students was 10 percent lower than that of private schools in the 2011 Trends in International Mathematics and Science Study ( TIMSS ) results. Indeed, based on the 2004 household survey, poverty and education are highly correlated in Lebanon.", + "type": "study", + "explanation": "This is indeed a dataset as it is referenced in the context of academic assessments and provides empirical data for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific study that provides data on academic outcomes.", + "contextual_reason_agent": "This is indeed a dataset as it is referenced in the context of academic assessments and provides empirical data for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 21, + "validated": 8, + "not_validated": 13 + } + }, + { + "filename": "063_Niger-Learning-Improvement-for-Results-in-Education-Project", + "page": 60, + "text": "The World Bank Niger Learning Improvement for Results in Education Project ( P168779 ) Page 54 of 101 international experts of the Task force and their counterparts ( homologues ) officially nominated at MEP and MES to ensure national capacities are built throughout project implementation. A national online education platform is established The development of the platform, content, and its free access and use. To focus on teachers, teacher trainers, ENI staff and administrative staff. Platform conceived to extend to other education stakeholders in the future, including students, parents and the general public. Annual Online education platform Reports generated by the online education platform PCU Annual school census conducted through new digital tools and report produced Digital data collection carried out and report of the results prepared before the end of the school year Annual Online education platform Census reports generated by the online education platform Directorate for Information System ME IO Table SPACE", + "ner_text": [ + [ + 706, + 726, + "named" + ], + [ + 576, + 584, + "Annual school census <> reference population" + ], + [ + 783, + 806, + "Annual school census <> data description" + ] + ], + "validated": true, + "empirical_context": "Platform conceived to extend to other education stakeholders in the future, including students, parents and the general public. Annual Online education platform Reports generated by the online education platform PCU Annual school census conducted through new digital tools and report produced Digital data collection carried out and report of the results prepared before the end of the school year Annual Online education platform Census reports generated by the online education platform Directorate for Information System ME IO Table SPACE", + "type": "census", + "explanation": "The context confirms it is a dataset as it is mentioned in relation to digital data collection and report production.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a census, which typically involves structured data collection.", + "contextual_reason_agent": "The context confirms it is a dataset as it is mentioned in relation to digital data collection and report production.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "019_BOSIB16bf0132a0651b03113d0de461f77e", + "page": 63, + "text": "Finance department had also introduced computerized cashbook whereby transactions are captured in an Excel spreadsheet. MoGEI could not access the current cashbook due to technical glitch experienced at the time of assessment, a printout of June 2020 cashbook was therefore presented as the most current cashbook available for review. Under MoHEST, the assessment noted incomplete recording of cashbook in Excel spreadsheet, transactions from July 2021 to September 12, 2022, could not be traced and needs to be updated using hardcopies in file. A review of utilization of Financial Forms in MoHEST noted that payments are made without recording Payment Order ( Form 40 ) in addition to failure to record Treasury Chest Book also known as Form 19. Failure to update Chest Books was attributed to staff turnover in the office of the Cashier. MoFP has also provided a 5-digit chart of accounts to be used for classification of expenditure with the objective of accurate recording of expenditure in line with the approved budget line items. It was noted that chart of accounts codes is only recorded in request letter sent to MoFP when requesting funds but the same is not used when recording expenditure both manually and electronically. Manual systems lack critical control areas such as controls for completeness, chart of accounts for ledger account code classification and controls for accuracy of financial transactions.", + "ner_text": [ + [ + 866, + 891, + "named" + ] + ], + "validated": false, + "empirical_context": "Failure to update Chest Books was attributed to staff turnover in the office of the Cashier. MoFP has also provided a 5-digit chart of accounts to be used for classification of expenditure with the objective of accurate recording of expenditure in line with the approved budget line items. It was noted that chart of accounts codes is only recorded in request letter sent to MoFP when requesting funds but the same is not used when recording expenditure both manually and electronically.", + "type": "chart of accounts", + "explanation": "However, it is not functioning as a data source in this context, as it is described as a classification tool rather than a collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves numerical codes that could be seen as structured data.", + "contextual_reason_agent": "However, it is not functioning as a data source in this context, as it is described as a classification tool rather than a collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a classification tool, not as a data source", + "tags": [] + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 90, + "text": "85 7. Nevertheless, analysis of recent PISA results ( World Bank, 2008 ) 6 indicate that given its endowments ( such as school, student, and household characteristics ), Jordan should likely be performing even better in terms of cognitive skill development. By Jordan \u2019 s own internal assessment systems, there is significant need for quality enhancing reforms. Pass rates on the general secondary certificate GSC ( essentially the secondary school exit exam also known as the Tawjihi ) are quite low ( about 55 percent of regular track students and under 35 percent of students from \u201c irregular \u201d tracks ), as are results on Jordan \u2019 s own national achievement tests which indicate the majority of 8th and 10th grade students are not proficient in any of the subjects ( Science, Math, English, and Arabic. ) In addition, the Tawjihi, like many exams of its kind, does not measure the same kinds of cognitive skills as does PISA, and therefore it creates a strong incentive against education quality. ERfKE II directly addresses this need to transform the exams, and it is not insignificant that increasing scores on school, national, and international assessments is the first ERfKE II project development objective and key indicator, before completion rates and enrolment rates.", + "ner_text": [ + [ + 641, + 667, + "named" + ] + ], + "validated": false, + "empirical_context": "By Jordan \u2019 s own internal assessment systems, there is significant need for quality enhancing reforms. Pass rates on the general secondary certificate GSC ( essentially the secondary school exit exam also known as the Tawjihi ) are quite low ( about 55 percent of regular track students and under 35 percent of students from \u201c irregular \u201d tracks ), as are results on Jordan \u2019 s own national achievement tests which indicate the majority of 8th and 10th grade students are not proficient in any of the subjects ( Science, Math, English, and Arabic. ) In addition, the Tawjihi, like many exams of its kind, does not measure the same kinds of cognitive skills as does PISA, and therefore it creates a strong incentive against education quality.", + "type": "assessment", + "explanation": "However, it is mentioned as an assessment tool rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to tests that provide measurable results.", + "contextual_reason_agent": "However, it is mentioned as an assessment tool rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as an assessment, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + } + }, + { + "filename": "035_Senegal-Casamance-Economic-Development-Project", + "page": 71, + "text": "participation of women in local decision - making bodies GAP Access to services: Gaps in access to services and markets due to distance and socio-economic barriers Organizing consultations with women and girls to gather their specific needs in terms of access to services and markets Number of consultations conducted with women and girls to gather their specific needs for access to services and markets Availability of specific needs analysis reports on access to services and markets Development of awareness - raising activities among community leaders that promote women ' s access to services and markets % of women and girls reporting fewer barriers to accessing services and markets compared to baseline Improved access to services and markets for women and girls Production facilities in identified strategic value chains ( e. g., small buildings, production centers, workshops, processing facilities, etc. ) Number of production facilities in strategic value chains The net income of women and girls has increased significantly from the baseline GAP Control of Assets Gaps in land ownership and management Networking of women in order to benefit from support, primarily access to land and water Number of women ' s networks created and supported in terms of land - Improved land access rates for women and youth ( M / F ) - Reduced gaps in land ownership and management by women and girls 29 Agence Nationale de la Statistique et de la D\u00e9mographie ( ANSD ) [ Senegal ], and ICF International. 2012. Senegal Demographic and Health and Multiple Indicator Cluster Survey ( EDS-MICS ) 2010-2011. Rockville, Maryland, USA: ANSD and ICF International.", + "ner_text": [ + [ + 1544, + 1577, + "named" + ], + [ + 194, + 199, + "Multiple Indicator Cluster Survey <> reference population" + ], + [ + 570, + 575, + "Multiple Indicator Cluster Survey <> reference population" + ], + [ + 616, + 621, + "Multiple Indicator Cluster Survey <> reference population" + ], + [ + 1306, + 1311, + "Multiple Indicator Cluster Survey <> reference population" + ], + [ + 1460, + 1464, + "Multiple Indicator Cluster Survey <> author" + ], + [ + 1469, + 1476, + "Multiple Indicator Cluster Survey <> data geography" + ], + [ + 1484, + 1501, + "Multiple Indicator Cluster Survey <> publisher" + ], + [ + 1503, + 1507, + "Multiple Indicator Cluster Survey <> publication year" + ], + [ + 1509, + 1516, + "Multiple Indicator Cluster Survey <> data geography" + ], + [ + 1591, + 1600, + "Multiple Indicator Cluster Survey <> publication year" + ], + [ + 1602, + 1626, + "Multiple Indicator Cluster Survey <> data geography" + ], + [ + 1628, + 1632, + "Multiple Indicator Cluster Survey <> author" + ], + [ + 1637, + 1654, + "Multiple Indicator Cluster Survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "2012. Senegal Demographic and Health and Multiple Indicator Cluster Survey ( EDS-MICS ) 2010-2011. Rockville, Maryland, USA: ANSD and ICF International.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly identified as a survey that collects demographic and health indicators.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'Survey', which often indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly identified as a survey that collects demographic and health indicators.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 19, + "text": "The World Bank SPF: Improved Livelihoods for Internally Displaced Persons in Azerbaijan ( P178125 ) Page 17 of 34 will recruit an environmental and social specialist for the purposes of the project and all PIU and field staff will undergo training on relevant aspects of the ESF at project initiation. 34. The SCRI team has not yet implemented projects under the World Bank \u2019 s new procurement framework. The team will be supported with training and assistance from WB procurement staff in the implementation of the project procurement strategy for development ( PPSD ) which was developed during project preparation. B. Results Monitoring and Evaluation 35. The PIU within SCRI will be responsible for monitoring and evaluating the outcomes of the project against agreed indicators as set out in the Results Framework. A consultant will be hired as an M & E Specialist to undertake and coordinate this work and to report on results indicators. The M & E Specialist will collect baseline data, which will enable the Committee to compare the before and after situation for project participants. Data after training program completion will be collected by the M & E Specialist and if additional data collection support is needed, SCRI will engage the staff of its Monitoring Department and the M & E Specialist will provide staff with the needed training and quality assurance supervision.", + "ner_text": [ + [ + 979, + 992, + "named" + ], + [ + 77, + 87, + "baseline data <> data geography" + ], + [ + 1072, + 1092, + "baseline data <> reference population" + ] + ], + "validated": true, + "empirical_context": "A consultant will be hired as an M & E Specialist to undertake and coordinate this work and to report on results indicators. The M & E Specialist will collect baseline data, which will enable the Committee to compare the before and after situation for project participants. Data after training program completion will be collected by the M & E Specialist and if additional data collection support is needed, SCRI will engage the staff of its Monitoring Department and the M & E Specialist will provide staff with the needed training and quality assurance supervision.", + "type": "data", + "explanation": "In this context, 'baseline data' is indeed used as a structured collection of data for evaluating project outcomes.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'baseline data' is a dataset because it refers to data collected for comparison purposes.", + "contextual_reason_agent": "In this context, 'baseline data' is indeed used as a structured collection of data for evaluating project outcomes.", + "contextual_signal": "mentioned as data collected for comparison before and after the project", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "151_533550PAD0P117101Official0Use0Only1", + "page": 43, + "text": "Number of cases that undergo a recertification process using the PMT method 2. Preparation of a targeting policy paper 3. Endorsement of the poverty-based targeting policy by the SWF Board 4. Number of new applications 5. Percentage of new applications processed and decided on 6. Percentage of appeals received and responded to per year 7. Percentage of beneficiaries who collect their quarterly payments on time 8. Average number of contacts with beneficiary households per year 9. M & E system in place 10. Percentage of administrative cost compared to overall program cost 1 1. All departments of the SWF have access to MIS and use it for reporting purposes Use of Results Monitoring Demonstrating that SWF resources are targeted to poorer households Verifying the communication of SWF cas transfer program reaches poorer households Measuring the impact of BDP Measuring responsiveness the BDP. Use of Results Monitoring Monitor whether cash transfers are better targeted and administrative processes have been improved 32", + "ner_text": [ + [ + 624, + 627, + "named" + ] + ], + "validated": false, + "empirical_context": "Percentage of administrative cost compared to overall program cost 1 1. All departments of the SWF have access to MIS and use it for reporting purposes Use of Results Monitoring Demonstrating that SWF resources are targeted to poorer households Verifying the communication of SWF cas transfer program reaches poorer households Measuring the impact of BDP Measuring responsiveness the BDP. Use of Results Monitoring Monitor whether cash transfers are better targeted and administrative processes have been improved 32", + "type": "system", + "explanation": "'MIS' is mentioned as a management information system but not as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is related to reporting and data management.", + "contextual_reason_agent": "'MIS' is mentioned as a management information system but not as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "041_Burkina-Faso-Support-to-Land-and-Mining-Management-Strengthening-Project", + "page": 28, + "text": "will improve the knowledge and access to geological information to minimize investment risks and promote sound, transparent, and inclusive economic development through activities to: ( a ) assess approximately 15 existing targets or anomalies, particularly with respect to, inter alia, base metals, lithium and rare earths ( minerals critical for the green economy ); ( b ) compile and analyze existing data to develop new targets and acquisition of new geophysical, geochemical and geological data at a scale of 1: 50 000 for 20 maps and 1: 100 000; ( c ) develop a web-map application / portal demonstrating the value of common territorial datasets easily adaptable to a myriad of non-mining applications; ( d ) strengthen and expand the geographic information systems ( GIS ) into an IT hub enabling interconnection between agencies within the MEMC, MINEFID, and other departments; and ( e ) train staff, provide tools and equipment to BUMIGEB and support its promotional activities. The mapping targets minerals in short supply for the development of renewable energy to enable Burkina to potentially tap into expected high market demands for minerals by the low carbon economy. 40. 2. 1. 2. 1 Prospectivity Mapping for ASM.", + "ner_text": [ + [ + 740, + 770, + "named" + ] + ], + "validated": false, + "empirical_context": "will improve the knowledge and access to geological information to minimize investment risks and promote sound, transparent, and inclusive economic development through activities to: ( a ) assess approximately 15 existing targets or anomalies, particularly with respect to, inter alia, base metals, lithium and rare earths ( minerals critical for the green economy ); ( b ) compile and analyze existing data to develop new targets and acquisition of new geophysical, geochemical and geological data at a scale of 1: 50 000 for 20 maps and 1: 100 000; ( c ) develop a web-map application / portal demonstrating the value of common territorial datasets easily adaptable to a myriad of non-mining applications; ( d ) strengthen and expand the geographic information systems ( GIS ) into an IT hub enabling interconnection between agencies within the MEMC, MINEFID, and other departments; and ( e ) train staff, provide tools and equipment to BUMIGEB and support its promotional activities. The mapping targets minerals in short supply for the development of renewable energy to enable Burkina to potentially tap into expected high market demands for minerals by the low carbon economy.", + "type": "system", + "explanation": "However, in this context, it is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'geographic information systems' often involve data collection and analysis.", + "contextual_reason_agent": "However, in this context, it is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "020_P1781250bdd2b50b0b9720d5c17632331c", + "page": 11, + "text": "In addition to the analytical work undertaken, activity under the SPF grant also includes support for the Joint Recovery Needs Assessment ( JRNA ) for Fizuli, Agdam and Jabrayil, and just-in-time advice to the Office of Special Representative ( OSR ) of the President to the Karabakh Economic Region on issues including management information systems, local governance, and smart city development. Preparation of the JRNA will benefit from the knowledge gained through the proposed Improved Livelihoods for Internally Displaced Persons ( ILIDP ) Project and the OSR will benefit from the Project as well given their role in facilitating the sustainable return of IDPs. 10. The IDP survey and lessons learned paper on livelihoods described above have informed the design of the proposed project by summarizing the current living conditions of IDPs as well as lessons from implementation of previous livelihood programs. Though both studies are in the process of being finalized, they have provided valuable inputs to the design of this project. The study of lessons from other livelihoods and job training programs revealed the need for close support for training participants to ensure the sustainability of their achievements. This has resulted in the incorporation of mentors into the project design from the time of project launch through completion and an emphasis on community-based support to address the unique context of each IDP settlement. The household survey found that 22 percent of household members are unemployed and 30 percent of respondents are looking for work. There remains a reliance on state support with 90 percent of respondents receiving an IDP allowance. To address their income generation needs, respondents identified various skills they would like to acquire with males wanting to have skills in the agriculture / fishery, automotive and land transport, and construction sectors while women preferred garments / sewing, health care and community development. While the data collected through the survey on job and skills provides useful benchmarking information, more localized labor market surveys will need to be undertaken to identify targeted opportunities in the communities where IDPs are living to support livelihoods that provide greater incomes over sustained periods.", + "ner_text": [ + [ + 1454, + 1470, + "named" + ], + [ + 151, + 157, + "household survey <> data geography" + ], + [ + 169, + 177, + "household survey <> data geography" + ], + [ + 275, + 299, + "household survey <> data geography" + ], + [ + 1482, + 1528, + "household survey <> data description" + ], + [ + 2323, + 2341, + "household survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "This has resulted in the incorporation of mentors into the project design from the time of project launch through completion and an emphasis on community-based support to address the unique context of each IDP settlement. The household survey found that 22 percent of household members are unemployed and 30 percent of respondents are looking for work. There remains a reliance on state support with 90 percent of respondents receiving an IDP allowance.", + "type": "survey", + "explanation": "In the context, the household survey is explicitly mentioned as a source of information that provides empirical data about unemployment and state support.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'household survey' implies a structured collection of data collected from households.", + "contextual_reason_agent": "In the context, the household survey is explicitly mentioned as a source of information that provides empirical data about unemployment and state support.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "185_multi-page", + "page": 43, + "text": "44 Project Components: Inputs ( budget for each Project Reports: From Components to component ): Outputs: 1 ) Prevention US $ TBD National and / or sub-regional HIV / AlDS progress reports 2 ) Care and support US $.. TBD Financial data from MOF and line ministries 3 ) Capacity building US $. TBD Project financial data 4 ) Partnerships US $.. TBD Survey data", + "ner_text": [ + [ + 221, + 235, + "named" + ] + ], + "validated": false, + "empirical_context": ". TBD Financial data from MOF and line ministries 3 ) Capacity building US $. TBD Project financial data 4 ) Partnerships US $.", + "type": "data", + "explanation": "However, 'Financial data' is mentioned in a general context without specifying it as a structured collection or data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'data' which often refers to structured information.", + "contextual_reason_agent": "However, 'Financial data' is mentioned in a general context without specifying it as a structured collection or data source.", + "contextual_signal": "mentioned only as a type of data, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 47, + "text": "The World Bank South Sudan Health Sector Transformation Project ( HSTP ) ( P181385 ) Page 44 of 68 Frequency Quarterly Data source DHIS2 Methodology for Data Collection DHIS2 Responsibility for Data Collection MoH and UNICEF; Measures subcomponent 1. 1 Under UNICEF Percentage of HC women receiving four ANC visits ( Percentage ) Description Percentage of HC women at childbearing age with a live birth in a given time period who received antenatal care, four times or more times from any provider. Frequency Quarterly Data source DHIS2 Methodology for Data Collection DHIS2 Responsibility for Data Collection MoH and UNICEF; Measures subcomponent 1. 1 Under UNICEF Number of health facilities with climate friendly minor rehabilitation and water and sanitation improvements completed ( Number ) Description Number of health facilities with a. ) climate friendly rehabilitation measures as defined by a set list of measures that go beyond standard practice to reduce flooding, heavy rain, and heat risk to health facilities; and / or b. ) water and sanitation improvements as defined as improvements in the availability of safe water ( drilling of boreholes, piping of water, safe rainwater catchment ) and sanitation ( pit latrines to ESF specifications; flushable toilets ) Frequency Quarterly Data source UNICEF / TPM report Methodology for Data Collection UNICEF / TPM Responsibility for Data Collection UNICEF / TPM Percentage of deliveries attended by skilled health personnel ( Number ) Description Percentage of", + "ner_text": [ + [ + 131, + 136, + "named" + ], + [ + 218, + 224, + "DHIS2 <> publisher" + ], + [ + 259, + 265, + "DHIS2 <> publisher" + ], + [ + 618, + 624, + "DHIS2 <> publisher" + ], + [ + 659, + 665, + "DHIS2 <> publisher" + ], + [ + 666, + 784, + "DHIS2 <> data description" + ], + [ + 1308, + 1314, + "DHIS2 <> publisher" + ], + [ + 1360, + 1366, + "DHIS2 <> publisher" + ], + [ + 1408, + 1414, + "DHIS2 <> publisher" + ] + ], + "validated": true, + "empirical_context": "The World Bank South Sudan Health Sector Transformation Project ( HSTP ) ( P181385 ) Page 44 of 68 Frequency Quarterly Data source DHIS2 Methodology for Data Collection DHIS2 Responsibility for Data Collection MoH and UNICEF; Measures subcomponent 1. 1 Under UNICEF Percentage of HC women receiving four ANC visits ( Percentage ) Description Percentage of HC women at childbearing age with a live birth in a given time period who received antenatal care, four times or more times from any provider.", + "type": "system", + "explanation": "DHIS2 is indeed a data source used for collecting health data, confirming its role as a dataset.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned as a data source for health-related metrics.", + "contextual_reason_agent": "DHIS2 is indeed a data source used for collecting health data, confirming its role as a dataset.", + "contextual_signal": "mentioned as a data source for data collection", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 63, + "text": "will be measured, at a minimum, on a SNSOP MIS and payment schedules Payment data stored in the MIS will be compared with approved payment schedules. Selected Implementing Partner", + "ner_text": [ + [ + 69, + 81, + "named" + ] + ], + "validated": true, + "empirical_context": "will be measured, at a minimum, on a SNSOP MIS and payment schedules Payment data stored in the MIS will be compared with approved payment schedules. Selected Implementing Partner", + "type": "data", + "explanation": "'Payment data' is indeed used in the context as it is mentioned to be stored in the MIS and compared with payment schedules.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'Payment data' is a dataset because it refers to a specific type of information collected and stored.", + "contextual_reason_agent": "'Payment data' is indeed used in the context as it is mentioned to be stored in the MIS and compared with payment schedules.", + "contextual_signal": "mentioned as data stored in the MIS", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 5, + "validated": 4, + "not_validated": 1 + } + }, + { + "filename": "055_Chad-COVID-19-Response-Project", + "page": 14, + "text": "The World Bank Chad COVID-19 Strategic Preparedness and Response Project ( P173894 ) Page 10 through out-of-pocket payments. This poses major challenges in terms of the equity, the efficiency and the sustainability of the country \u2019 s health financing architecture. 15. Health facilities have low readiness levels to deliver quality health services. The number of health facilities in Chad is low and more than 3, 000 facilities are needed to reach WHO target of two facilities per 10, 000 inhabitants. Further, according to the most recent SARA survey, one in three health facilities had access to electricity and two in three had access to improved water sources. The availability of essential medical equipment ( scales, thermometers, stethoscopes, etc. ) and laboratory capacity were also substandard ( WHO, 2019 ). In terms of health professionals, in 2017 there were less than 10, 000 professionals in all Chad. Shortages are particularly acute for doctors and specialized health professionals ( 0. 38 per 10, 000 population ), and there are important disparities in the distribution of health professionals between provinces. 16. The coverage of essential health services is low.", + "ner_text": [ + [ + 540, + 551, + "named" + ], + [ + 4, + 14, + "SARA survey <> publisher" + ], + [ + 15, + 19, + "SARA survey <> data geography" + ], + [ + 384, + 388, + "SARA survey <> data geography" + ], + [ + 448, + 451, + "SARA survey <> publisher" + ], + [ + 553, + 609, + "SARA survey <> data description" + ], + [ + 806, + 809, + "SARA survey <> publisher" + ], + [ + 811, + 815, + "SARA survey <> publication year" + ], + [ + 856, + 860, + "SARA survey <> reference year" + ], + [ + 911, + 915, + "SARA survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "The number of health facilities in Chad is low and more than 3, 000 facilities are needed to reach WHO target of two facilities per 10, 000 inhabitants. Further, according to the most recent SARA survey, one in three health facilities had access to electricity and two in three had access to improved water sources. The availability of essential medical equipment ( scales, thermometers, stethoscopes, etc. ) and laboratory capacity were also substandard ( WHO, 2019 ).", + "type": "survey", + "explanation": "The context confirms it is a dataset as it provides empirical data on health facilities' access to electricity and water sources.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'SARA survey' implies a structured collection of data related to health facilities.", + "contextual_reason_agent": "The context confirms it is a dataset as it provides empirical data on health facilities' access to electricity and water sources.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "028_Ethiopia-Response-Recovery-Resilience-for-Conflict-Affected-Communities-in-Ethiopia-Project", + "page": 105, + "text": "The 2016 Ethiopia Demographic and Health Survey indicates that 26 percent of all women aged 15 to 49 report having experienced physical or sexual violence in their lifetime, with 23 percent reporting physical violence and 10 percent experiencing sexual violence. 107 Acceptance of use of violence is high, with 63 percent of Ethiopian women believing a man is justified in beating his wife, while only 23 percent of survivors of physical or sexual violence have sought help for their experience and only two to three percent of these women have sought care from service providers, such as medical professionals, social workers, or lawyers. 108 9. Conditions of conflict and displacement more recently have exacerbated the incidence of GBV, including widespread reports of physical and sexual violence. Although available data is limited, some reports estimate that more than 100 cases of sexual violence \u2014 including gang rape \u2014 were reported daily between November 2020 to July 2021, while health facilities in the Tigray region registered nearly 1, 300 cases of rape between February \u2013 April 2021. 109, 110 The UN Secretary-General \u2019 s Special Representative on Conflict-Related Sexual Violence issued a statement in January 2021 expressing her deep concerns regarding \u201c serious allegations of sexual violence in the Tigray region of Ethiopia. \u201d 111 EHRC has also expressed its concerns from the lack of protection of civilians, including against rape, extending from", + "ner_text": [ + [ + 9, + 47, + "named" + ], + [ + 4, + 8, + "Ethiopia Demographic and Health Survey <> publication year" + ], + [ + 9, + 17, + "Ethiopia Demographic and Health Survey <> data geography" + ], + [ + 81, + 100, + "Ethiopia Demographic and Health Survey <> reference population" + ], + [ + 325, + 340, + "Ethiopia Demographic and Health Survey <> reference population" + ], + [ + 1015, + 1028, + "Ethiopia Demographic and Health Survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "The 2016 Ethiopia Demographic and Health Survey indicates that 26 percent of all women aged 15 to 49 report having experienced physical or sexual violence in their lifetime, with 23 percent reporting physical violence and 10 percent experiencing sexual violence. 107 Acceptance of use of violence is high, with 63 percent of Ethiopian women believing a man is justified in beating his wife, while only 23 percent of survivors of physical or sexual violence have sought help for their experience and only two to three percent of these women have sought care from service providers, such as medical professionals, social workers, or lawyers.", + "type": "survey", + "explanation": "This is indeed a dataset as it is referenced directly in the context as a source of empirical data regarding women's experiences with violence.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is named as a survey that provides statistical data on women's experiences in Ethiopia.", + "contextual_reason_agent": "This is indeed a dataset as it is referenced directly in the context as a source of empirical data regarding women's experiences with violence.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "028_Ethiopia-Response-Recovery-Resilience-for-Conflict-Affected-Communities-in-Ethiopia-Project", + "page": 32, + "text": "To achieve the first objective, key response activities will include provision of mobile support for health, education, and WASH services to conflict-affected communities. Recovery activities will include local mapping and factfinding of conflict-affected people, households, and communities \u2019 needs, impacts, response services and a local conflict analysis and participatory climate risk assessments; consultation, planning, and implementation of sustainable solutions for communities; and recovery plans ( Percentage ); ( ii ) Neighborhood Relations Committees formed and reinforced and still operational one year after receiving funding ( Percentage ); ( iii ) Beneficiaries that feel project investments reflect their needs ( Percentage ); and ( iv ) People reporting increased awareness of available GBV response services in their community ( Percentage ). 65 These are: ( i ) Number of financed sub-projects that are functioning or delivering services to communities six months after completion ( with disaggregation to education, WASH, health, and other types of sub-projects ); ( ii ) Beneficiaries with rebuilt and improved access to climate - resilient community infrastructure; and ( iii ) Community recovery plans implemented. 66 The average population size of Woredas in the four regions is calculated from the July 2021 projections of the Ethiopia Central Statistics Agency. IDP numbers are calculated using a combination of IOM \u2019 s Site Assessment Survey ( Round 26 ), collected in June-July 2021, and the Emergency Site Assessment ( ESA ) round 8, collected in July-August 2021. The ESA data is still pending confirmation from the government. Data collection could not be completed in parts of the Tigray, Western Oromia, and Benishangul-Gumuz regions due to insecurity and access restrictions.", + "ner_text": [ + [ + 1521, + 1546, + "named" + ], + [ + 1324, + 1333, + "Emergency Site Assessment <> publication year" + ], + [ + 1439, + 1442, + "Emergency Site Assessment <> publisher" + ], + [ + 1497, + 1511, + "Emergency Site Assessment <> reference year" + ], + [ + 1549, + 1552, + "Emergency Site Assessment <> acronym" + ], + [ + 1714, + 1720, + "Emergency Site Assessment <> data geography" + ], + [ + 1722, + 1736, + "Emergency Site Assessment <> data geography" + ], + [ + 1742, + 1759, + "Emergency Site Assessment <> data geography" + ] + ], + "validated": true, + "empirical_context": "66 The average population size of Woredas in the four regions is calculated from the July 2021 projections of the Ethiopia Central Statistics Agency. IDP numbers are calculated using a combination of IOM \u2019 s Site Assessment Survey ( Round 26 ), collected in June-July 2021, and the Emergency Site Assessment ( ESA ) round 8, collected in July-August 2021. The ESA data is still pending confirmation from the government.", + "type": "survey", + "explanation": "It is indeed a dataset as it is mentioned in the context of collecting data for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as an assessment that collects data.", + "contextual_reason_agent": "It is indeed a dataset as it is mentioned in the context of collecting data for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "172_multi0page", + "page": 114, + "text": "Table 4: Entry rate in primnar Grade 1 by gender. geographic location and region Access to pnmary Grade I ( % ) Total 51. 9 Boys 55. 4 Girls 48. 0 Urban 73. 3 Rural 42. 4 North 34. 7 East 42. 8 South 58. 1 West 85. 0 A pattern relatively similar to that obtained on the GER was observed, namely: i ) that gender disparities are moderate ( 55. 4 percent for boys, 48 percent for girls ); but that ii ) the gap between urban and rural areas is quite substantial ( 73 percent in urban, but only 42 percent in rural ); and iii ) that regional disparities are very wide ( 35 percent in the North, 43 percent in the East, 58 percent in the East, but 85 percent in the Western Region ). From the data presented in Table 4, it may also be stressed that, within the global population of adolescents, only a very small proportion of those in the age group 15 to 20, have had any access to schooling. It is indeed estimated that among the 2, 929 individuals in that age group in the survey as many as 2002 ( 68. 4 percent ) did not have access to school at all.", + "ner_text": [ + [ + 270, + 273, + "named" + ] + ], + "validated": false, + "empirical_context": "1 West 85. 0 A pattern relatively similar to that obtained on the GER was observed, namely: i ) that gender disparities are moderate ( 55. 4 percent for boys, 48 percent for girls ); but that ii ) the gap between urban and rural areas is quite substantial ( 73 percent in urban, but only 42 percent in rural ); and iii ) that regional disparities are very wide ( 35 percent in the North, 43 percent in the East, 58 percent in the East, but 85 percent in the Western Region ).", + "type": "concept", + "explanation": "'GER' is not explicitly defined as a data source or dataset in the context provided.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'GER' is a dataset because it is mentioned in a statistical context with percentages.", + "contextual_reason_agent": "'GER' is not explicitly defined as a data source or dataset in the context provided.", + "contextual_signal": "mentioned only as a concept, not as a data source", + "tags": [] + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 25, + "text": "More specifically, it will finance ( a ) development and integration of the e-Catalog / e-Register and other modules to EMIS that would allow a switch from paper-based to a greener, more efficient electronic format of data reporting to various stakeholders and interoperability of the NBS system and EMIS; ( b ) creation of the dedicated module and interface in EMIS that would allow the MoER to identify students who require support with adherence to the personal data protection legislation; and ( c ) the development and integration to EMIS of a module for ANACEC on external assessment of education institutions. 42. Support for implementation of national and international student assessments includes the following activities: ( a ) analysis of PISA 2022 results in student learning and Moldova \u2019 s participation in PISA 2025 and 2028; ( b ) preparation activities conducted by NACE, including the development, translation, and editing of test materials, education, training, and preparatory workshops, and piloting of assessments; ( c ) test implementation, including printing, distribution, and supervision of assessments, data processing and analysis, and dissemination of results; and ( d ) strengthening of the capacity of the national implementing agency responsible for carrying out national and international student assessments, including for refugee children from Ukraine. Years of administration of PISA are indicative based on the frequency of previous cycles ( pending decision about the frequency of future cycles ). 43. This subcomponent will also support TA for: ( a ) revision of the teacher and manager professional standards, ( b ) nationwide education infrastructure investment planning for modern school", + "ner_text": [ + [ + 539, + 543, + "named" + ] + ], + "validated": false, + "empirical_context": "More specifically, it will finance ( a ) development and integration of the e-Catalog / e-Register and other modules to EMIS that would allow a switch from paper-based to a greener, more efficient electronic format of data reporting to various stakeholders and interoperability of the NBS system and EMIS; ( b ) creation of the dedicated module and interface in EMIS that would allow the MoER to identify students who require support with adherence to the personal data protection legislation; and ( c ) the development and integration to EMIS of a module for ANACEC on external assessment of education institutions. 42.", + "type": "system", + "explanation": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is involved in data reporting and integration.", + "contextual_reason_agent": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "005_BOSIB-8191b179-7209-4faa-b5e0-11783bcd492d", + "page": 10, + "text": "The literacy rate for adults aged 15 and older is 98 percent, and the share of adults aged 15 and above who had no formal education has declined from 19. 2 percent in 1950 to a projected 1. 8 percent in 2020. 8 Younger cohorts are also attaining more years of education; 1 Source: Macro Poverty Outlook for Costa Rica: April 2024; 2 Source: World Development Indicators ( WDI ) https: / / data. worldbank. org / indicator / NE. TRD. GNFS. ZS? locations = CR 3 Source: World Economic Outlook ( WEO ), October 2023, https: / / www. imf. org / external / datamapper / LUR @ WEO / CRI? zoom = CRI & highlight = CRI 4 World Bank estimates using administrative records and annual statistical reports from the Directorate General of Migrants and Foreigners. https: / / www. migracion. go. cr / Paginas / Centro % 20de % 20Documentaci % C3 % B3n / Estad % C3 % ADsticas. aspx. 5 World Bank, Climate Change Knowledge Portal - Costa Rica. 6 World Bank, GFFDR, ThinkHazard! Portal. 7 Ibid 8 Source: https: / / ourworldindata. org /", + "ner_text": [ + [ + 640, + 662, + "named" + ] + ], + "validated": false, + "empirical_context": "org / external / datamapper / LUR @ WEO / CRI? zoom = CRI & highlight = CRI 4 World Bank estimates using administrative records and annual statistical reports from the Directorate General of Migrants and Foreigners. https: / / www.", + "type": "records", + "explanation": "'Administrative records' are not a structured collection of data themselves but rather a source of information used to compile datasets.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'administrative records' is a dataset because it refers to a collection of data used in estimates.", + "contextual_reason_agent": "'Administrative records' are not a structured collection of data themselves but rather a source of information used to compile datasets.", + "contextual_signal": "mentioned as a source of information for estimates", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 5, + "validated": 3, + "not_validated": 2 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 18, + "text": "The World Bank Socio-economic Inclusion of Refugees & Host Communities in Rwanda Project Phase II ( P509677 ) Page 3 health facilities and market-linked skills training. 7 The 2024 Rwanda FinScope survey also showed relatively low take - up of finance for investments or credit for productive purposes by Rwandese and refugees, limiting business and income-generating opportunities. 8 7. High poverty rates and minimal economic activity in the hosting districts constrain self-reliance prospects for refugees and host communities alike. The national non-monetary poverty rate in Rwanda of 30 percent is exceeded in all five districts that host refugee camps. Gisagara, where the Mugombwa camp is located, ranked as the poorest district in the country in the 2022 Census at 45 percent. 9 By comparison, the City of Kigali has the lowest percentage of poor people ( 9. 5 percent ). Core elements of the non-monetary poverty index include health, education and living standards, highlighting the dual need to: ( a ) invest in economic opportunity and access to services in the hosting districts; and ( b ) at the same time, facilitate refugee mobility out of the camps and rural areas into urban centers, where more opportunities are available and self-reliance is more achievable. 8. Building on the ongoing Phase I of Jya Mbere, Phase II aims to assist refugees and host communities to achieve self-reliance.", + "ner_text": [ + [ + 176, + 203, + "named" + ] + ], + "validated": true, + "empirical_context": "The World Bank Socio-economic Inclusion of Refugees & Host Communities in Rwanda Project Phase II ( P509677 ) Page 3 health facilities and market-linked skills training. 7 The 2024 Rwanda FinScope survey also showed relatively low take - up of finance for investments or credit for productive purposes by Rwandese and refugees, limiting business and income-generating opportunities. 8 7.", + "type": "survey", + "explanation": "The 2024 Rwanda FinScope survey is explicitly mentioned in the context as providing data on finance uptake, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically collects structured data.", + "contextual_reason_agent": "The 2024 Rwanda FinScope survey is explicitly mentioned in the context as providing data on finance uptake, confirming its role as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "037_Uganda-Investment-for-Industrial-Transformation-and-Employment-Project", + "page": 76, + "text": "Table 1: Refugee and Host Population in Uganda62 Population Refugee % of total Number of firms63 Refu gee Ugandan hosts Refuge e Host North West Refugee-Hosting Districts Yumbe, Adjumani, Madi Okollo, Terego Lamwo, Koboko, Obongi 873, 844 2, 169, 200 29 % 1, 987 13, 505 South West Refugee-Hosting Districts Isingiro, Kyegegwa, Kamwenge, Kiryandongo, Kikuube 576, 922 2, 266, 800 20 % 2, 526 15, 095 Total non-Kampala RHDs 1, 450, 766 4, 436, 000 25 % 4, 513 28, 601 Total Kampala 98, 415 1, 709, 000 5 % 5, 028 104, 972 2. The economic activity slow down caused by COVID-19 has affected Uganda \u2019 s ability to generate jobs for those living in vulnerable situations, including refugees and host communities. Despite the concerted efforts to integrate refugees within the ecosystems of their host communities, refugee - hosting districts ( RHDs ) remain less developed areas. Low levels of disposable incomes have resulted in low demand and limited access to labor markets, leaving those residents with some access to land with no alternative but to live off subsistence agriculture and humanitarian aid. These areas were less developed even before the inflow of refugees and remain decoupled from resilient and viable supply chains in the economy. For example, the average value of assets among all households ( both refugee and host ) in the district of Arua64 is 560, 000 Ugandan shillings ( US $ 144 ), which is only 10 percent of comparable asset values in the Kampala region. 62 Uganda Comprehensive Refugee Response Portal ( https: / / data2. unhcr. org / en / country / uga ) 31 October 2021 63 Calculation based on district-level firm data from Census of Business Establishments ( COBE ), and refugee and host community household data from the Refugee and Host Community Household Survey 64 Arua was until recent sub-divisions of the district considered a refugee hosting district.", + "ner_text": [ + [ + 1653, + 1686, + "named" + ], + [ + 473, + 480, + "Census of Business Establishments <> data geography" + ], + [ + 588, + 594, + "Census of Business Establishments <> data geography" + ], + [ + 677, + 685, + "Census of Business Establishments <> reference population" + ], + [ + 1484, + 1490, + "Census of Business Establishments <> data geography" + ], + [ + 1594, + 1598, + "Census of Business Establishments <> publication year" + ], + [ + 1623, + 1647, + "Census of Business Establishments <> data type" + ], + [ + 1905, + 1923, + "Census of Business Establishments <> usage context" + ] + ], + "validated": true, + "empirical_context": "unhcr. org / en / country / uga ) 31 October 2021 63 Calculation based on district-level firm data from Census of Business Establishments ( COBE ), and refugee and host community household data from the Refugee and Host Community Household Survey 64 Arua was until recent sub-divisions of the district considered a refugee hosting district.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a source of data used for calculations in the context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a 'Census' which typically indicates a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a source of data used for calculations in the context.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 54, + "text": "DLR 5. 2 disburses a ) USD $ 1, 197, 500 million for the establishment of a functional competency assessment administration and b ) US $ 1 m against each 4. 5 percentage points increase in performance-based promotion for civil servants, up to a percentage of 72 %, in the limit of US $ 18, 000, 000. DLR 5. 3 disburses a ) US $ 1 m for the Prime Ministry \u2019 s endorsement of the curricular for digital training; and b ) US $ 1 m for each batch of 500 civil servants with certified digital literacy / skills, up to a percentage of 72 % and in the limit of US $ 8 m. DLR 5. 4 disburses US $ 1. 25 million against each 18-percentage points increase in the number of competitive recruitments, performance-based promotions and certification of digital literacy / skills training using HRMIS and other digital platform, in the limit of US $ 5 million. Description The DLI supports competency-based and gender-sensitive human resource management and digital skills development in the civil service. It consists of the five following DLRs: DLR 5. 0: Adoption of the regulatory framework for civil service professionalization. DLR 5. 1: Mainstreaming of competitive recruitment for all new civil servants. DLR 5. 2: Mainstreaming of performance-based promotion for civil servants.", + "ner_text": [ + [ + 779, + 784, + "named" + ] + ], + "validated": false, + "empirical_context": "4 disburses US $ 1. 25 million against each 18-percentage points increase in the number of competitive recruitments, performance-based promotions and certification of digital literacy / skills training using HRMIS and other digital platform, in the limit of US $ 5 million. Description The DLI supports competency-based and gender-sensitive human resource management and digital skills development in the civil service.", + "type": "system", + "explanation": "However, HRMIS is mentioned as a digital platform and not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed HRMIS is a dataset because it includes 'HR' (Human Resources) and 'MIS' (Management Information System), which often relate to data management.", + "contextual_reason_agent": "However, HRMIS is mentioned as a digital platform and not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "044_Turkey-Social-Entrepreneurship-Empowerment-and-Cohesion-in-Refugee-and-Host-Communities-in-Turkey-Project", + "page": 21, + "text": "Community needs assessments will be conducted in specific community target areas, in order to identify: ( i ) the barriers and constraints for women and youth refugee and host community members to engage in social enterprises; ( ii ) the needs, in each community, at the individual, household and community levels \u2013 to address these barriers. This is expected to include both social and economic responses e. g. access to subgrants, access to training, childcare facilities, language barriers or restrictions on their mobility in the city, or male perspectives on women working. As this will likely identify gaps in local facilities ( such as places to work and childcare needs in the community ), this will also serve as the first step of Component 2A ). The community needs assessments will take a participatory approach, ensuring the genuine engagement of target beneficiaries in the identification of their problems and needs, and promote interaction and substantive discussion between refugees and host communities around livelihoods development. The community needs assessments will verify, with beneficiaries, the results of the local market assessments. Given the importance of participatory dialogues with refugees and host communities, the community needs assessments will be reviewed and verified by the CIPs after they have hired skilled community facilitators. b.", + "ner_text": [ + [ + 0, + 27, + "named" + ] + ], + "validated": false, + "empirical_context": "Community needs assessments will be conducted in specific community target areas, in order to identify: ( i ) the barriers and constraints for women and youth refugee and host community members to engage in social enterprises; ( ii ) the needs, in each community, at the individual, household and community levels \u2013 to address these barriers. This is expected to include both social and economic responses e.", + "type": "assessment", + "explanation": "However, 'Community needs assessments' are described as evaluations rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'assessments' can imply a structured collection of data.", + "contextual_reason_agent": "However, 'Community needs assessments' are described as evaluations rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as an assessment, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 18, + "text": "The updated methodology will reflect the main improvements achieved during the first phase ( locality based, participative process, grievance mechanism, and so on ). 25. Households from Hodh Chargui, including those in the Mbera Refugee Camp that are considered poor, will be included in the Social Registry. This inclusion will be a positive step towards better coordination between humanitarian actors and the Government and between emergency and development support. The methodology to identify poor refugee households for inclusion in the Social Registry will be based on the profiling work carried out in 2018 by the UNHCR and WFP. It identified six different household categories: ( a ) deprived; ( b ) precarious; ( c ) unstable; ( d ) fragile ( e ) emerging; and ( f ) catalyst ( see figure 3 ). Each category gradually receives ( or will receive ) differentiated assistance based on its needs. This profiling work was carried out with the participation of camp and Government authorities. Based on preliminary data analysis, categories 1, 2 and 3 ( \u201c deprived \u201d, \u201c precarious \u201d, and \u201c unstable \u201d ) which include approximately 7, 500 households will enter the Social Registry. 26. This sub-component will finance: ( a ) the costs related to the community targeting ( Social Registry enrolment officers and related logistics and material ); ( b ) the socio-economic data collection costs for up to 350, 000 households ( 70, 000 households per year ); and ( c ) the costs related to quality insurance ( grievance mechanism, toll-free number, spot-check surveys, process evaluation, and so on ). Figure 3: Refugee categorization in Mauritania Source: UNHCR", + "ner_text": [ + [ + 1168, + 1183, + "named" + ], + [ + 223, + 241, + "Social Registry <> data geography" + ], + [ + 498, + 521, + "Social Registry <> reference population" + ], + [ + 610, + 614, + "Social Registry <> publication year" + ], + [ + 622, + 627, + "Social Registry <> publisher" + ], + [ + 1637, + 1647, + "Social Registry <> data geography" + ], + [ + 1656, + 1661, + "Social Registry <> publisher" + ] + ], + "validated": true, + "empirical_context": "This profiling work was carried out with the participation of camp and Government authorities. Based on preliminary data analysis, categories 1, 2 and 3 ( \u201c deprived \u201d, \u201c precarious \u201d, and \u201c unstable \u201d ) which include approximately 7, 500 households will enter the Social Registry. 26.", + "type": "registry", + "explanation": "The context indicates that the Social Registry is used to categorize households based on preliminary data analysis, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a 'Social Registry' which implies a structured collection of data about households.", + "contextual_reason_agent": "The context indicates that the Social Registry is used to categorize households based on preliminary data analysis, confirming its role as a data source.", + "contextual_signal": "mentioned as a data source for categorizing households", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 29, + "text": "The World Bank Expanding Opportunities for Learning Project ( P166059 ) Page 24 of 85 63. Technical assistance. The activity will finance international technical assistance required for the technical assessment and needs analysis of the current data collection system that will provide recommendations for the most appropriate technical solutions to the problems identified, including data security and disaster recovery mechanisms. Other key enhancements include: a. The introduction of unique student identifiers with student-level descriptors so that students can be tracked throughout their schooling. b. The creation of an open data portal that will make real time education management information system ( EMIS ) data available to relevant stakeholders ( students, parents of students, teachers, school leaders, MENFOP personnel, and so on ). c. The development of a human resource management sub-portal. This will include all information on teachers and staff, including numbers, deployment to schools, and salaries and benefits. It would also include individualized professional development data with such details as professional development courses, training, certification for teachers, allowing inspectors and PAs to follow up on an individual basis with all staff. 64. Training. Relevant MENFOP staff will be trained on the use of these enhanced data systems and on use of the available data for decision making purposes.", + "ner_text": [ + [ + 873, + 909, + "named" + ] + ], + "validated": false, + "empirical_context": "c. The development of a human resource management sub-portal. This will include all information on teachers and staff, including numbers, deployment to schools, and salaries and benefits.", + "type": "system", + "explanation": "However, it is described as a sub-portal, which indicates it is a system for managing information rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves information on teachers and staff.", + "contextual_reason_agent": "However, it is described as a sub-portal, which indicates it is a system for managing information rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 46, + "text": "Economic Opportunity & Self-reliance Businesses and cooperatives that receive capacity building support and that are operational 1 year after intervention ( Percentage ) Description Quantitative indicator counting percentage of businesses and cooperatives that receive capacity - building support under sub-component 2 ( a ) that are still operational one year after the capacity building has been completed. Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation. BRD data fed to MINEMA. Responsibility for Data Collection BRD and MINEMA. Financial institutions that receive capacity building and are providing financial services to refugees ( Number ) Description Quantitative indicator counting number of instituitons that receive capcity building and that are providing financial services to refugees. Frequency Quarterly Data source Project MIS and Project Progress Reports. Methodology for Data Collection Monitoring project implementation including reporting from beneficiary insitutitons. BRD data fed to MINEMA. Responsibility for Data Collection BRD, BDF and MINEMA. Micro-finance institutions and Savings and Credit Cooperatives that become project participating financial institutions ( Number ) Description Quantitative indicator counting number of MFIs and SACCOs that become project participating financial instutions. Frequency Quarterly.", + "ner_text": [ + [ + 550, + 558, + "named" + ], + [ + 182, + 204, + "BRD data <> data type" + ] + ], + "validated": true, + "empirical_context": "Methodology for Data Collection Monitoring project implementation. BRD data fed to MINEMA. Responsibility for Data Collection BRD and MINEMA.", + "type": "data", + "explanation": "In this context, 'BRD data' is indeed used as a source of information for monitoring project implementation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'BRD data' is a dataset because it is mentioned in the context of data collection and monitoring.", + "contextual_reason_agent": "In this context, 'BRD data' is indeed used as a source of information for monitoring project implementation.", + "contextual_signal": "mentioned as a source of information for data collection", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "144_84657-PAD-P132268-Project-Commitment-Paper", + "page": 17, + "text": "Indicator ( 1 ) includes the following studies and actions to be implemented in the first year: - Indicator ( 1 ) SWM Strategy: \uf0b7 Strategy Development: this entails development of plans for management and treatment of slaughterhouse and medical waste and the development of plans for closure and rehabilitation of unsanitary dumpsites8. \uf0b7 Implementation of the MIS: the design and implementation of the MIS is targeted for the first year. Relevant SWM data such as waste tonnage handled, costs, etc., is currently tracked by service providers; however, the MIS will facilitate the consolidation and reporting of this data and allow JSC-H & B to more effectively track progress and performance. B3. Project Design Targeting In the absence of data on household income, the Project uses geographical targeting at the level of the governorates where poverty level is estimated at 18. 3 % on average, 32. 5 % in Hebron and 21. 3 % in Bethlehem ( 2010 ) 9. Both governorates also have the highest unemployment rates in the West Bank with averages of 22. 8 % and 22. 4 %, respectively10. The vulnerability of these groups is exacerbated by mobility restrictions and poor market access. Output-based subsidy payment 8 These closures are part of the World Bank project and including them in the OBA targets will further incentivize JSC-H & B to achieve closures according to the planned schedule. In addition, these closures are necessary to meet OBA targets for Indicator ( 3 ) Waste Managed. 9 PCBS: West Bank Southern Governorates Statistical Yearbook, 2011 10 PCSBS Press Release on Labour Force Survey Results, Labour Force Survey ( January-March, 2011 ) Round ( Q1 / 2011 ).", + "ner_text": [ + [ + 361, + 364, + "named" + ] + ], + "validated": false, + "empirical_context": "Indicator ( 1 ) includes the following studies and actions to be implemented in the first year: - Indicator ( 1 ) SWM Strategy: \uf0b7 Strategy Development: this entails development of plans for management and treatment of slaughterhouse and medical waste and the development of plans for closure and rehabilitation of unsanitary dumpsites8. \uf0b7 Implementation of the MIS: the design and implementation of the MIS is targeted for the first year. Relevant SWM data such as waste tonnage handled, costs, etc., is currently tracked by service providers; however, the MIS will facilitate the consolidation and reporting of this data and allow JSC-H & B to more effectively track progress and performance.", + "type": "system", + "explanation": "However, 'MIS' is described as a management information system, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it involves data tracking and reporting.", + "contextual_reason_agent": "However, 'MIS' is described as a management information system, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 47, + "text": "Annual Progress reports Impact evaluation, progress reports drawing on data from selected schools MoER PMT Annual education statistics reports produced and publicly disseminated based on data generated by the integrated EMIS and used for education sector management including refugee response Reports providing education statistics are produced and publicly disseminated on an annual basis thanks to the integrated EMIS ( to identify determinants of student and school performance for decision making on teacher policies / training, tutoring program, and other investments in the Annual Integrated EMIS database; annual education reports progress and monitoring reports; annual education reports MoER, PMT", + "ner_text": [ + [ + 209, + 224, + "named" + ], + [ + 107, + 142, + "integrated EMIS <> data type" + ] + ], + "validated": true, + "empirical_context": "Annual Progress reports Impact evaluation, progress reports drawing on data from selected schools MoER PMT Annual education statistics reports produced and publicly disseminated based on data generated by the integrated EMIS and used for education sector management including refugee response Reports providing education statistics are produced and publicly disseminated on an annual basis thanks to the integrated EMIS ( to identify determinants of student and school performance for decision making on teacher policies / training, tutoring program, and other investments in the Annual Integrated EMIS database; annual education reports progress and monitoring reports; annual education reports MoER, PMT", + "type": "system", + "explanation": "In the context, it is described as generating data used for education sector management, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'integrated EMIS' suggests a system that manages and organizes educational data.", + "contextual_reason_agent": "In the context, it is described as generating data used for education sector management, indicating it functions as a data source.", + "contextual_signal": "mentioned as a data source for education statistics", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 55, + "text": "The targeting committee will include representatives from the colline development committee ( president and female member ), representatives from religious groups in the colline ( Caritas or priest, pastor ), representatives from the Child Protection Committee, the imboneza if present ( volunteer women in charge of addressing domestic violence and children issues, with support from MDPHSAG ), community health workers and the Red Cross, the community leader ( bachingonazi ) or customary leader ( Abagobo - 24 The poverty analysis on the household survey data ( ECVMB 2014 ) for the poverty map will provide the PMT coefficients and inform the design of a questionnaire to collect data on the variables associated with extreme poverty at the household-level.", + "ner_text": [ + [ + 541, + 562, + "named" + ], + [ + 571, + 575, + "household survey data <> publication year" + ], + [ + 586, + 597, + "household survey data <> data geography" + ], + [ + 615, + 631, + "household survey data <> data description" + ] + ], + "validated": true, + "empirical_context": "The targeting committee will include representatives from the colline development committee ( president and female member ), representatives from religious groups in the colline ( Caritas or priest, pastor ), representatives from the Child Protection Committee, the imboneza if present ( volunteer women in charge of addressing domestic violence and children issues, with support from MDPHSAG ), community health workers and the Red Cross, the community leader ( bachingonazi ) or customary leader ( Abagobo - 24 The poverty analysis on the household survey data ( ECVMB 2014 ) for the poverty map will provide the PMT coefficients and inform the design of a questionnaire to collect data on the variables associated with extreme poverty at the household-level.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as data used for empirical analysis in the context of poverty mapping.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'household survey data' which implies a structured collection of data from surveys.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as data used for empirical analysis in the context of poverty mapping.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 44, + "text": "The World Bank Strengthening Public Sector Efficiency and Statistical Capacity Project ( P151155 ) Page 41 of 93 Indicator Name Core Unit of Measure Baseline End Target Frequency Data Source / Methodology Responsibility for Data Collection Description: Component 1 and Citizen Engagement IRI Name: Procurement staff trained with demonstrated competency in procurement Percentage 0. 00 70. 00 Annual MINMAP MINMAP Description: Component and Gender IRI Demonstrated competency will be evidenced by test at the end of the trainings delivered to measure learning achievements Name: ECAM 5 survey revised methodology is aligned with new international standards on Poverty surveys Yes / No N Y Once INS MINEPAT Description: Component 4 major IRI Name: Interval between two Population Census reduced Years 17. 00 12. 00 Once INS MINEPAT Description: Component 4 major IRI Name: Regions for which cross border trade statistics are produced and taken into Number 0. 00 4. 00 Annual INS MINEPAT", + "ner_text": [ + [ + 578, + 591, + "named" + ], + [ + 4, + 14, + "ECAM 5 survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "00 70. 00 Annual MINMAP MINMAP Description: Component and Gender IRI Demonstrated competency will be evidenced by test at the end of the trainings delivered to measure learning achievements Name: ECAM 5 survey revised methodology is aligned with new international standards on Poverty surveys Yes / No N Y Once INS MINEPAT Description: Component 4 major IRI Name: Interval between two Population Census reduced Years 17. 00 12.", + "type": "survey", + "explanation": "This is indeed a dataset as it is described as a survey methodology aligned with international standards, indicating it is used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'survey', which often refers to a structured collection of data.", + "contextual_reason_agent": "This is indeed a dataset as it is described as a survey methodology aligned with international standards, indicating it is used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "013_BOSIB0efb09b920d90858a0135df22da7d1", + "page": 48, + "text": "The World Bank Ethiopia Digital ID for Inclusion and Services Project ( P179040 ) Page 38 of 39 ANNEX 3: Gender Analysis and Action Plan Problem statement. In Ethiopia, there is a notable gender gap in the existing ID system ( Kebele ID ) coverage. According to the ID4D-Findex Survey ( 2017 ), 36 percent of the population ages 18 and older lack a Kebele ID, with significant gender gap of 46 percent of women lacking one, compared to 25 percent of men. ANALYSIS: Gender gaps identified ACTIONS: Proposed actions Taken to address gaps INDICATORS: How bridging the gap will be measured Women have less knowledge about benefits of having an ID. Country-specific research, including a Social Risk Analysis and a Gender Gap in ID Study, outline low literacy, a general lack of awareness on the day-to-day use of ID, perceived irrelevance of formal identification, and limited knowledge of individual rights as key factors contributing to lower Kebele ID enrolment by women. Based on the most recent data from 2017, the adult ( age 15 and above ) literacy rate for men is 59 percent, compared to 44 percent for women ( World Bank 2022 ), which can make it harder for women to navigate the ID registration process.", + "ner_text": [ + [ + 266, + 284, + "named" + ], + [ + 4, + 14, + "ID4D-Findex Survey <> publisher" + ], + [ + 15, + 23, + "ID4D-Findex Survey <> data geography" + ], + [ + 159, + 167, + "ID4D-Findex Survey <> data geography" + ], + [ + 287, + 291, + "ID4D-Findex Survey <> publication year" + ], + [ + 313, + 341, + "ID4D-Findex Survey <> reference population" + ], + [ + 1006, + 1010, + "ID4D-Findex Survey <> reference year" + ], + [ + 1115, + 1125, + "ID4D-Findex Survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "In Ethiopia, there is a notable gender gap in the existing ID system ( Kebele ID ) coverage. According to the ID4D-Findex Survey ( 2017 ), 36 percent of the population ages 18 and older lack a Kebele ID, with significant gender gap of 46 percent of women lacking one, compared to 25 percent of men. ANALYSIS: Gender gaps identified ACTIONS: Proposed actions Taken to address gaps INDICATORS: How bridging the gap will be measured Women have less knowledge about benefits of having an ID.", + "type": "survey", + "explanation": "The ID4D-Findex Survey is explicitly mentioned as a source of data regarding the gender gap in ID coverage.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey that provides statistical data on ID coverage.", + "contextual_reason_agent": "The ID4D-Findex Survey is explicitly mentioned as a source of data regarding the gender gap in ID coverage.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 55, + "text": "These shall include: ( 1 ) a client registry ( that supports the unique identification and management of patient identities ); ( 2 ) one or more terminology registries accessible as a service ( that provide a standardized classification or a query-able source for health information exchange, including clinical terminologies, ontologies, dictionaries, code systems, and value sets ); ( 3 ) a universal facility registry ( that sets the unique identifier for locations where health services are provided ); ( 4 ) a health professional registry ( that sets the unique identifier for health workers that provide services within a country ); and ( 5 ) a common product catalogue ( that properly defines and categorizes medical products or commodities ). 24 Core standards include: ( 1 ) content standards ( that dictate the structure of electronic documents and types of data they must contain by ensuring data is properly organized and represented in a clear manner ); ( 2 ) terminology standards ( that ensure that all parties will be able to understand and use it while exchanging health data ); ( 3 ) transport standards ( that facilitate data exchange between different systems by defining what formats, document architecture, data elements, methods, and application programming interfaces to use for achieving interoperability ); and ( 4 ) security standards ( that establish administrative and technical rules to protect sensitive data from misuse, unauthorized access, or disclosure ). 25 Health information systems could include the national EMR platform, supply chain management information system, and surveillance systems managed by the MOH. Selected health information systems will be described in the Program Operational Manual ( including the verification protocol ).", + "ner_text": [ + [ + 651, + 675, + "named" + ] + ], + "validated": false, + "empirical_context": "These shall include: ( 1 ) a client registry ( that supports the unique identification and management of patient identities ); ( 2 ) one or more terminology registries accessible as a service ( that provide a standardized classification or a query-able source for health information exchange, including clinical terminologies, ontologies, dictionaries, code systems, and value sets ); ( 3 ) a universal facility registry ( that sets the unique identifier for locations where health services are provided ); ( 4 ) a health professional registry ( that sets the unique identifier for health workers that provide services within a country ); and ( 5 ) a common product catalogue ( that properly defines and categorizes medical products or commodities ). 24 Core standards include: ( 1 ) content standards ( that dictate the structure of electronic documents and types of data they must contain by ensuring data is properly organized and represented in a clear manner ); ( 2 ) terminology standards ( that ensure that all parties will be able to understand and use it while exchanging health data ); ( 3 ) transport standards ( that facilitate data exchange between different systems by defining what formats, document architecture, data elements, methods, and application programming interfaces to use for achieving interoperability ); and ( 4 ) security standards ( that establish administrative and technical rules to protect sensitive data from misuse, unauthorized access, or disclosure ).", + "type": "catalogue", + "explanation": "However, the context describes it as a catalogue that defines and categorizes products, not as a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'catalogue' which can imply a collection of items.", + "contextual_reason_agent": "However, the context describes it as a catalogue that defines and categorizes products, not as a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 90, + "text": "While there appears to be little differential for those completing primary schooling, this may be due to the fact that the Jordanian system defines primary schooling differently from the international standards used in the surveys upon which Figure 4 is based. In fact, the figure for completing preparatory schooling is quite possibly most accurately interpreted in Jordan as completing \u201c basic education \u201d ( up to approximately age 15 ), especially since enrollment rates are so high ( essentially universal ) and dropout rates so low in primary schooling. 10. Those with vocational education earn hardly any more than those who can only read and write, as found in previous studies ( e. g., ERfKE I PAD ) indicating that the returns to vocational education are particularly low. This speaks to the clear need to improve the quality of vocational education in Jordan, as 6 \u201c Using PISA to Understand the Determinants of Learning in the Middle-East and North Africa Region, \u201d HDNED. 7 For whatever reason, the research has tended to focus on institutional aspects and components that, it is claimed, tentatively appear to hold promise for improving quality: Centralized exams, Accountability, Choice, School autonomy, Performance incentives, Supply side interventions, and Demand side interventions, teacher preparation and training, to name a few. Each of these strategies or policies may hold promise, but the evidence thus far is not universally compelling and in and of themselves they provide few useful insights for policymakers looking to design reforms likely to improve classroom practices that will improve learning. 8 Note that one would normally do the comparison compared to illiterates; however, the household survey did not capture any illiterates and thus we are unable to perform that comparison. Later, we consider the implications given our best guess as to what accurate data on wages for illiterates would tell us.", + "ner_text": [ + [ + 1715, + 1731, + "named" + ], + [ + 367, + 373, + "household survey <> data geography" + ], + [ + 862, + 868, + "household survey <> data geography" + ], + [ + 977, + 982, + "household survey <> publisher" + ], + [ + 1689, + 1700, + "household survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "Each of these strategies or policies may hold promise, but the evidence thus far is not universally compelling and in and of themselves they provide few useful insights for policymakers looking to design reforms likely to improve classroom practices that will improve learning. 8 Note that one would normally do the comparison compared to illiterates; however, the household survey did not capture any illiterates and thus we are unable to perform that comparison. Later, we consider the implications given our best guess as to what accurate data on wages for illiterates would tell us.", + "type": "survey", + "explanation": "In this context, it is confirmed as a dataset since it is mentioned as a source of data that was used for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'household survey' typically refers to a structured collection of data gathered from households.", + "contextual_reason_agent": "In this context, it is confirmed as a dataset since it is mentioned as a source of data that was used for analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "187_multi-page", + "page": 9, + "text": "Transparent, Procurement rules and procedures Once the Public Procurement Agency develops capacity to competitive should enhance transparency and monitor public procurement, they will report on the procurement competition. incidence of permitted forms of procurement ( international competitive bidding, national competitive bidding, direct procurement, etc. ). Increasing reliance on international competitive bidding ( for contracts above a given threshold ), for instance, would indicate increasing competition. ( Periodicity: annual, once monitoring capacity is established ) This measure will be complemented by survey-based information. The corruption surveys of firms to be conducted in 2003 will include questions probing firms about their perceptions of the transparency and competitiveness of public procurement practices. Results will be compared to baseline results found in the 1998 corruption surveys, in particular, that ( a ) more than half of the firms surveyed in 1998 claimed that they did not participate in specific government procurements because competition is unfair, ( b ) almost 50 % of firms requiring clearance for participation in government procurements admnit to paying bribes for such clearance, and ( c ) these latter firms claimed to pay bribes 70 % of the time.", + "ner_text": [ + [ + 647, + 674, + "named" + ], + [ + 617, + 641, + "corruption surveys of firms <> data type" + ], + [ + 694, + 698, + "corruption surveys of firms <> publication year" + ], + [ + 891, + 895, + "corruption surveys of firms <> reference year" + ] + ], + "validated": true, + "empirical_context": "( Periodicity: annual, once monitoring capacity is established ) This measure will be complemented by survey-based information. The corruption surveys of firms to be conducted in 2003 will include questions probing firms about their perceptions of the transparency and competitiveness of public procurement practices. Results will be compared to baseline results found in the 1998 corruption surveys, in particular, that ( a ) more than half of the firms surveyed in 1998 claimed that they did not participate in specific government procurements because competition is unfair, ( b ) almost 50 % of firms requiring clearance for participation in government procurements admnit to paying bribes for such clearance, and ( c ) these latter firms claimed to pay bribes 70 % of the time.", + "type": "survey", + "explanation": "This is indeed a dataset as it involves structured survey data collected from firms regarding their experiences and perceptions.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to surveys that collect data on firms' perceptions.", + "contextual_reason_agent": "This is indeed a dataset as it involves structured survey data collected from firms regarding their experiences and perceptions.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 51, + "text": "The World Bank Generating Growth Opportunities and Productivity for Women Enterprises Uganda ( P176747 ) Page 47 of 77 Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Women participating in a networking platform supported by the project, disaggregated by age ( Number ) The number of women that register with a networking platform. Continuous. Platform registered users. Data are collected automatically as people register. Platform managers to report the information to the MGLSD. The MGLSD will collect the information from the primary source each month. Women in RHDs Refugee women Women entrepreneurs who complete the core women entrepreneur course ( Number ) The satisfaction of participants of the training courses with the training. Continuous. Questionnair e. At the completion of the training, participants will complete a satisfaction questionnaire. The MGLSD will collect the questionnaires from the service providers and PSFU each month. Women RHDs Refugee women RHC women who report the core women entrepreneur course is accessible and meets their needs ( Percentage ) The satisfaction of refugee and host community women with the core course on entrepreneurship. Continuous Questionnair e At the completion of the training, participants will complete a satisfaction questionnaire. The MGLSD will collect the questionnaires from the service providers and PSFU each month.", + "ner_text": [ + [ + 617, + 622, + "named" + ] + ], + "validated": false, + "empirical_context": "Data are collected automatically as people register. Platform managers to report the information to the MGLSD. The MGLSD will collect the information from the primary source each month.", + "type": "organization", + "explanation": "MGLSD is not a dataset; it is an organization responsible for collecting information.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed MGLSD is a dataset because it is involved in data collection.", + "contextual_reason_agent": "MGLSD is not a dataset; it is an organization responsible for collecting information.", + "contextual_signal": "mentioned only as an organization, not as a data source", + "tags": [] + }, + { + "filename": "087_UGANDA-PAD-05252018", + "page": 42, + "text": "The World Bank Integrated Water Management and Development Project ( P163782 ) 31 Indicator Name Percent of the service areas achieve cost recovery ratio of 1. 1 under the project Definition / Description Percent of service areas supported by the project that achieve cost recovery ratio of 1. 1. Cost recovery ratio is the revenues collected over operating expenses for selected service areas under the project. Baseline was derived from the existing service areas that have been gazetted: Gulu, Mbale, Adjumani, Busia, Namasale, Kyegegwa-Mpara-Ruyonza, Namungalwe-Kaliro and Budaka-Kadama-Tirinyi-Kibuku - Butaleja-Busolwe. Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC Accounting System ( SCALA ), Refugee Database and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report. Methodology for Data Collection Responsibility for Data Collection MWE / DWD Rural and Urban / NWSC", + "ner_text": [ + [ + 758, + 763, + "named" + ], + [ + 97, + 161, + "UPMiS <> data description" + ], + [ + 491, + 495, + "UPMiS <> data geography" + ], + [ + 497, + 502, + "UPMiS <> data geography" + ], + [ + 504, + 512, + "UPMiS <> data geography" + ], + [ + 514, + 519, + "UPMiS <> data geography" + ], + [ + 521, + 529, + "UPMiS <> data geography" + ], + [ + 531, + 553, + "UPMiS <> data geography" + ], + [ + 555, + 572, + "UPMiS <> data geography" + ], + [ + 577, + 624, + "UPMiS <> data geography" + ], + [ + 820, + 875, + "UPMiS <> data description" + ] + ], + "validated": true, + "empirical_context": "Baseline was derived from the existing service areas that have been gazetted: Gulu, Mbale, Adjumani, Busia, Namasale, Kyegegwa-Mpara-Ruyonza, Namungalwe-Kaliro and Budaka-Kadama-Tirinyi-Kibuku - Butaleja-Busolwe. Frequency Bi-annually Data Source Primary sources include: MWE M & E system, NWSC Accounting System ( SCALA ), Refugee Database and UPMiS. Secondary sources include: MWE / DWD progress reports, annual water and environments sector performance report and NWSC annual report.", + "type": "system", + "explanation": "UPMiS is indeed a dataset as it is explicitly mentioned as a primary source of data in the context.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed UPMiS is a dataset because it is listed among primary data sources.", + "contextual_reason_agent": "UPMiS is indeed a dataset as it is explicitly mentioned as a primary source of data in the context.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "117_Somali-Urban-Investment-Planning-Project", + "page": 11, + "text": "For the first time since 1991, there is a federal ( rather than transitional ) government and a widely supported provisional constitution. The September 2013 Brussels Conference galvanized further domestic and international support for the government, and endorsed a Compact whose Peace-building and State-building Goals provide an important roadmap for the coming three year period. While positive momentum has been generated, sustaining domestic confidence through the translation of international support into improved security, governance, and economic benefits on the ground is an increasingly pressing challenge. 4. Somalia \u2019 s macro-economic framework reflects the country \u2019 s underlying fragility. Reliable macro-economic data for Somalia is not available \u2013 however regional fiscal and economic data does exist and broader estimates can be aggregated. Public expenditure is estimated to account for 7. 7 percent of GDP3 compared with private sector consumption of 73 percent of GDP. Agriculture and services are the key contributors to GDP. Based on regional fiscal data, Somaliland controls the largest budgetary resource envelope, generating US $ 127 million in revenue during 2012, compared to US $ 35 million at the federal level and US $ 38 million in Puntland. 5. In contrast to the war-torn south, authorities in the northern regions have put in place functioning institutions that have succeeded in sustaining stability although considerable development challenges remain. Following their declaration of independence and semi - autonomy respectively, Somaliland and Puntland have developed hybrid forms of governance combining modern institutions with religious authorities, civil society, the private sector and 1 Interim Strategy Note FY14-16, World Bank, December 2013, UNFPA Population Estimates 2014 2 A Rapid Assessment of Three Somali Urban Areas, World Bank, November 2013, UNFPA Population Estimates 2014 3 In the UNDP Human Development Report, Somalia \u2019 s GDP is estimated to be US $ 2. 6 billion and per capita GDP is estimated to be US $ 288 based on the World Development Indicators and Economist Intelligence Unit.", + "ner_text": [ + [ + 774, + 807, + "named" + ], + [ + 153, + 157, + "regional fiscal and economic data <> publication year" + ], + [ + 622, + 629, + "regional fiscal and economic data <> data geography" + ], + [ + 1080, + 1090, + "regional fiscal and economic data <> data geography" + ], + [ + 1187, + 1191, + "regional fiscal and economic data <> publication year" + ], + [ + 1567, + 1577, + "regional fiscal and economic data <> data geography" + ], + [ + 1762, + 1772, + "regional fiscal and economic data <> publisher" + ], + [ + 1871, + 1881, + "regional fiscal and economic data <> publisher" + ], + [ + 2160, + 2178, + "regional fiscal and economic data <> usage context" + ] + ], + "validated": true, + "empirical_context": "Somalia \u2019 s macro-economic framework reflects the country \u2019 s underlying fragility. Reliable macro-economic data for Somalia is not available \u2013 however regional fiscal and economic data does exist and broader estimates can be aggregated. Public expenditure is estimated to account for 7.", + "type": "data", + "explanation": "This is indeed a dataset as it refers to existing data that can be aggregated for analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to specific types of data related to fiscal and economic aspects.", + "contextual_reason_agent": "This is indeed a dataset as it refers to existing data that can be aggregated for analysis.", + "contextual_signal": "mentioned as existing data that can be aggregated", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 33, + "text": "WFP works to complement MENFOP \u2019 s school canteen program. UNHCR coordinates on the educational needs of refugees. The Japanese International Cooperation Agency currently sends experts in mathematics instruction and school management committees. The French Development Agency, also active in the sector, works in TVET and Higher Education. Finally, the EAA Foundation is collaborating with the World Bank on their Educate A Child program for out of school children. They are contributing US $ 3. 8 million with results-based funding aimed at helping the GoD enroll OOSC in the project. G. Lessons Learned and Reflected in the Project Design 72. The proposed project will benefit from the technical and policy dialogue in education conducted with the GoD on an ongoing basis since 2000. It will also benefit from the World Bank \u2019 s strong analytical understanding of contemporary education issues in Djibouti that is based on: ( a ) the Systematic Country Diagnostic ( 2018 ), ( b ) the EDAM4 Household Survey ( 2018 ), and ( c ) the support to MENFOP in their revision of their Education Action Plan for 2017-2020. The World Bank has also recently completed a SABER-ECD study ( 2019 ) and is currently studying the educational needs of refugees ( expected 2019 ). The project will also draw on lessons from the UNICEF-supported revision to the Out of School Children Survey ( 2019 ), which focuses on refugees, girls, and school leavers. 73. The following table highlights lessons from the two previous projects, PAEQ ( P145323 ) and ACIGEF ( P123315 ), that will be incorporated into the new Project. Table 3: Lessons learned from previous IDA-financed projects in education Lessons Mitigation Measures in this Project When developing a sub-sector in which there has been limited activity, like preschool, it is important to consider whether the legislative and regulatory framework is in place and is needed as a first step In the pre-primary sub-sector, the policy and quality framework will be developed first by incentivizing MENFOP to invest in the creation of an enabling environment through a DLI.", + "ner_text": [ + [ + 986, + 1008, + "named" + ], + [ + 394, + 404, + "EDAM4 Household Survey <> publisher" + ], + [ + 816, + 826, + "EDAM4 Household Survey <> publisher" + ], + [ + 899, + 907, + "EDAM4 Household Survey <> data geography" + ], + [ + 968, + 972, + "EDAM4 Household Survey <> publication year" + ], + [ + 1011, + 1015, + "EDAM4 Household Survey <> publication year" + ], + [ + 1119, + 1129, + "EDAM4 Household Survey <> publisher" + ], + [ + 1178, + 1182, + "EDAM4 Household Survey <> publication year" + ], + [ + 1376, + 1380, + "EDAM4 Household Survey <> publication year" + ], + [ + 1401, + 1409, + "EDAM4 Household Survey <> reference population" + ] + ], + "validated": true, + "empirical_context": "The proposed project will benefit from the technical and policy dialogue in education conducted with the GoD on an ongoing basis since 2000. It will also benefit from the World Bank \u2019 s strong analytical understanding of contemporary education issues in Djibouti that is based on: ( a ) the Systematic Country Diagnostic ( 2018 ), ( b ) the EDAM4 Household Survey ( 2018 ), and ( c ) the support to MENFOP in their revision of their Education Action Plan for 2017-2020. The World Bank has also recently completed a SABER-ECD study ( 2019 ) and is currently studying the educational needs of refugees ( expected 2019 ).", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned as a survey that contributes to the World Bank's analytical understanding of education issues.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Household Survey' in its name, suggesting a structured collection of data.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a survey that contributes to the World Bank's analytical understanding of education issues.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 36, + "text": "Cycle 61. 5 % 97. 8 % 61. 6 % 98. 0 % 61 8 % 98. 2 % 62. 0 % 98. 4 % 62. 3 % 98. 6 % 62. 6 % 98. 8 % 63. 0 % % Annual EMIS Directorate of Educational Planning ( DEP ) 2 The baseline for the enrollment rates based on the EMIS data for the year 2007 / 2008", + "ner_text": [ + [ + 118, + 122, + "named" + ] + ], + "validated": false, + "empirical_context": "8 % 63. 0 % % Annual EMIS Directorate of Educational Planning ( DEP ) 2 The baseline for the enrollment rates based on the EMIS data for the year 2007 / 2008", + "type": "system", + "explanation": "However, EMIS is mentioned as a system rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with educational data collection.", + "contextual_reason_agent": "However, EMIS is mentioned as a system rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 14, + "text": "Women entrepreneurs are likely to be excluded from the channels of information, networks, and mentors associated with the more profitable, male-dominated sectors and businesses within them. Throughout a firm \u2019 s life, the diversity of networks can impact whether an entrepreneur has access to credit, learns about new market opportunities, and acquires the skills needed to successfully operate their businesses. 22 Studies of women entrepreneurs in Uganda find that women who work closely with a mentor \u2014 often male, and usually a family member \u2014 are more likely to transition into higher-profit sectors. 23 24 15. Additional factors that block women from developing growth-oriented enterprises in profitable sectors are related to the failure of existing business development services to address the needs of women-owned firms. According to an enterprise survey conducted in 2014, MSMEs lacked key skills needed for business growth. Only 28 percent of firms surveyed said they do book-keeping to track revenues and expenses; a mere 10 percent had invested in training for employees; and just 36 percent had access to the internet. Female-owned firms appear to be particularly lacking when it comes to the use of standard business practices. A recent microenterprise survey showed a gender gap of 24 percentage points on an index of adoption of good business practices.", + "ner_text": [ + [ + 1252, + 1274, + "named" + ], + [ + 0, + 19, + "microenterprise survey <> reference population" + ], + [ + 450, + 456, + "microenterprise survey <> data geography" + ], + [ + 811, + 828, + "microenterprise survey <> reference population" + ], + [ + 877, + 881, + "microenterprise survey <> publication year" + ], + [ + 883, + 888, + "microenterprise survey <> reference population" + ], + [ + 1133, + 1151, + "microenterprise survey <> reference population" + ], + [ + 1284, + 1318, + "microenterprise survey <> data description" + ], + [ + 1325, + 1369, + "microenterprise survey <> data description" + ] + ], + "validated": true, + "empirical_context": "Female-owned firms appear to be particularly lacking when it comes to the use of standard business practices. A recent microenterprise survey showed a gender gap of 24 percentage points on an index of adoption of good business practices.", + "type": "survey", + "explanation": "This is indeed a dataset as it provides structured data on the adoption of business practices among female-owned firms.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data on business practices.", + "contextual_reason_agent": "This is indeed a dataset as it provides structured data on the adoption of business practices among female-owned firms.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 18, + "text": "The World Bank Expanding Opportunities for Learning Project ( P166059 ) Page 13 of 85 25. The vision of MENFOP \u2019 s ICT strategy is to enhance the quality of teaching and learning and to significantly improve students \u2019 performance. As part of the ICT strategy, GoD \u2019 s president initiated the \" One Child, One Tablet \" project which aims to generalize the use of tablets in classrooms starting in first grade. At this stage, coverage has reached grades one, two, three and four. Free applications, programs by subject matter and interactive content have been developed and come with the tablets. A teacher training module on the usage of tablets has also been developed. Furthermore, while ICT education will only be introduced into basic education with the curriculum revisions that are currently underway, MENFOP has already incorporated the study of ICT into the high school curriculum starting in 2013. It also introduced the use of \u201c smart classrooms \u201d, which are now present in 10 high schools and in the largest technical high school; and a training module has been developed for teachers on their use. In terms of internet connectivity, schools in Djibouti Ville are connected through ADSL while the regions use Wimax, a system that piggybacks on existing telephone coverage thereby making it suitable for remote communities.", + "ner_text": [ + [ + 1220, + 1225, + "named" + ] + ], + "validated": false, + "empirical_context": "It also introduced the use of \u201c smart classrooms \u201d, which are now present in 10 high schools and in the largest technical high school; and a training module has been developed for teachers on their use. In terms of internet connectivity, schools in Djibouti Ville are connected through ADSL while the regions use Wimax, a system that piggybacks on existing telephone coverage thereby making it suitable for remote communities.", + "type": "system", + "explanation": "'Wimax' is not a dataset as it refers to a technology/system for internet connectivity, not a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Wimax' is a dataset because it is related to internet connectivity, which can involve data transmission.", + "contextual_reason_agent": "'Wimax' is not a dataset as it refers to a technology/system for internet connectivity, not a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "098_CAMEROON-PAD-NEW-08032017", + "page": 89, + "text": "To assess avoided costs in the without investment scenario, the following information was collected from existing literature: a. Health expenses data were sourced from healthcare centers and from a national health survey28. b. Incidence of major water-borne diseases were also sourced from TBS329 c. Opportunity cost of lost working day was calculated from data of a national employment survey30. ( CFAF 2, 500 per work day ). It was assumed that, for the treatment duration, each patient will be taken care of by one adult. Time spent by this adult translates into an opportunity cost as the adult will forgo revenues he / she could otherwise earn. The daily revenue of the average adult was estimated at CFAF 2, 500 per day ( legal minimum salary ). Therefore, given the population in the different sub - projects, avoided costs included ( i ) the direct costs incurred for different water-borne diseases and ( ii ) indirect costs related to the opportunity costs of adults \u2019 time spent on care. 31 Costs were estimated at CFAF 45, 000 per household per year. ( ii ) Flood avoidance related benefits. To assess avoided costs related to avoiding floods, the following information was collected: a. Frequency and costs of major flood events in the project influence area32.", + "ner_text": [ + [ + 367, + 395, + "named" + ], + [ + 300, + 336, + "national employment survey30 <> data description" + ], + [ + 1289, + 1307, + "national employment survey30 <> usage context" + ] + ], + "validated": true, + "empirical_context": "Incidence of major water-borne diseases were also sourced from TBS329 c. Opportunity cost of lost working day was calculated from data of a national employment survey30. ( CFAF 2, 500 per work day ).", + "type": "survey", + "explanation": "This is indeed a dataset as it provides structured data used for empirical analysis regarding employment.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a source of data for calculating opportunity cost.", + "contextual_reason_agent": "This is indeed a dataset as it provides structured data used for empirical analysis regarding employment.", + "contextual_signal": "mentioned as a source of data for calculations", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 53, + "text": "The DLI disburses US $ 1 million for each percentage point increase in the number of MOH facility sites which installed and operationalized the national EMR platform out of a total number of MOH health facility sites21 in Jordan ( as of calendar year [ CY ] 2023 ) within the limit of US $ 63 million. Description The DLI supports the installation and operationalization of the national EMR platform ( that is, Hakeem ) to produce electronic medical records across all MOH health facility sites. The operational status will include at least four core functions: 1. The ability to query and / or access a record22 in the system. 2. The ability to create and / or update a record in the system. 3. The ability to refer a patient to a different facility with a common facility identifier across the system. 4. The ability to generate administrative activity reports that demonstrate the utilization of the system. 5. The ability for patients to access and view personal medical records. Data source / Agency A delivery notice from the EHS concerning the installation of the EMR platform, and the confirmation notice from the MOH about the installed EMR being operational at supported facility sites. Verification Entity KACE. 21 MOH health facility sites refer to sites that are required to have the EMR platform in accordance with the MOH decision.", + "ner_text": [ + [ + 831, + 862, + "named" + ] + ], + "validated": false, + "empirical_context": "4. The ability to generate administrative activity reports that demonstrate the utilization of the system. 5.", + "type": "document", + "explanation": "However, 'administrative activity reports' are documents that summarize activities rather than structured collections of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because reports can contain data and are often used in analysis.", + "contextual_reason_agent": "However, 'administrative activity reports' are documents that summarize activities rather than structured collections of data.", + "contextual_signal": "mentioned only as a report, not as a data source", + "tags": [] + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 25, + "text": "16 NCHRD and MOE \u2014 which started under ERfKE II and contributed to producing key pieces of analytical work that have played a significant role in policy development \u2014 will continue under the proposed Program. 21 One example of an evaluation study can be to look at how gender issues are addressed across different policy proposals and program interventions. The annual plan for the M & E activities will be endorsed by the GPSC and shared with all concerned donors and education sector partners. 52. The MOE has strengthened its data systems and will continue to do so to meet the M & E requirements of the Program. Under ERfKE II22, the MOE completed the redesign of its OpenEMIS, which was successfully deployed in all public schools in Jordan. OpenEMIS is a comprehensive and integrated information and data collection system, which covers all education levels. The system captures disaggregated school level data on students, teachers, grades, subjects, and teaching periods for all MOE operated schools and institutions in the country. A dashboard linked to the OpenEMIS is currently being developed and will be accessible to individual units at the MOE to enable them to monitor their respective indicators. The Queen Rania Center ( QRC ) manages the EMIS and its dashboard.", + "ner_text": [ + [ + 672, + 680, + "named" + ], + [ + 13, + 16, + "OpenEMIS <> publisher" + ], + [ + 504, + 507, + "OpenEMIS <> publisher" + ], + [ + 638, + 641, + "OpenEMIS <> publisher" + ], + [ + 721, + 735, + "OpenEMIS <> reference population" + ], + [ + 739, + 745, + "OpenEMIS <> data geography" + ], + [ + 885, + 916, + "OpenEMIS <> data description" + ], + [ + 987, + 990, + "OpenEMIS <> publisher" + ], + [ + 1155, + 1158, + "OpenEMIS <> publisher" + ] + ], + "validated": true, + "empirical_context": "The MOE has strengthened its data systems and will continue to do so to meet the M & E requirements of the Program. Under ERfKE II22, the MOE completed the redesign of its OpenEMIS, which was successfully deployed in all public schools in Jordan. OpenEMIS is a comprehensive and integrated information and data collection system, which covers all education levels.", + "type": "system", + "explanation": "OpenEMIS is indeed a data system that functions as a source of information for educational data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because OpenEMIS is described as an information and data collection system.", + "contextual_reason_agent": "OpenEMIS is indeed a data system that functions as a source of information for educational data.", + "contextual_signal": "described as a comprehensive and integrated information and data collection system", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 9, + "validated": 2, + "not_validated": 7 + } + }, + { + "filename": "101_Project-Appraisal-Document-PAD-P163830-2017-05-22-CLEAN-05232017", + "page": 86, + "text": "The World Bank Somalia Emergency Drought Response and Recovery Project ( P163830 ) Page 82 of 111 Key Potential Risks Summary Analysis and Mitigation Measures to maintaining the ICRC \u2019 s capacity to operate rapidly and independently, prioritizing its response on the basis of the most urgent needs and vulnerabilities of the people it endeavors to help. Although the document management systems across the two organizations were found to be fairly robust, essential supporting documentation may not be maintained particularly documentation relating to decentralized cash transfer payments in Somalia. Given that the FM fiduciary risk is rated high, FM implementation supervision shall be undertaken at least once every 3 months. Specific attention to transactional review and records management and reconciliation of the expenditures with the Third-Party Technical Review Reports. Implementation Fraud and associated Risks. Recruitment and deployment of Third-Party Review arrangements focusing inter-alia on specific two fiduciary aspects: ( i ) ongoing monitoring of the payment distribution processes to include financial compliance, and; ( ii ) post-transfer monitoring.", + "ner_text": [ + [ + 367, + 394, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank Somalia Emergency Drought Response and Recovery Project ( P163830 ) Page 82 of 111 Key Potential Risks Summary Analysis and Mitigation Measures to maintaining the ICRC \u2019 s capacity to operate rapidly and independently, prioritizing its response on the basis of the most urgent needs and vulnerabilities of the people it endeavors to help. Although the document management systems across the two organizations were found to be fairly robust, essential supporting documentation may not be maintained particularly documentation relating to decentralized cash transfer payments in Somalia. Given that the FM fiduciary risk is rated high, FM implementation supervision shall be undertaken at least once every 3 months.", + "type": "system", + "explanation": "However, it is not a dataset as it refers to systems for managing documents rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'document management systems' could imply a structured collection of documents.", + "contextual_reason_agent": "However, it is not a dataset as it refers to systems for managing documents rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 90, + "text": "While there appears to be little differential for those completing primary schooling, this may be due to the fact that the Jordanian system defines primary schooling differently from the international standards used in the surveys upon which Figure 4 is based. In fact, the figure for completing preparatory schooling is quite possibly most accurately interpreted in Jordan as completing \u201c basic education \u201d ( up to approximately age 15 ), especially since enrollment rates are so high ( essentially universal ) and dropout rates so low in primary schooling. 10. Those with vocational education earn hardly any more than those who can only read and write, as found in previous studies ( e. g., ERfKE I PAD ) indicating that the returns to vocational education are particularly low. This speaks to the clear need to improve the quality of vocational education in Jordan, as 6 \u201c Using PISA to Understand the Determinants of Learning in the Middle-East and North Africa Region, \u201d HDNED. 7 For whatever reason, the research has tended to focus on institutional aspects and components that, it is claimed, tentatively appear to hold promise for improving quality: Centralized exams, Accountability, Choice, School autonomy, Performance incentives, Supply side interventions, and Demand side interventions, teacher preparation and training, to name a few. Each of these strategies or policies may hold promise, but the evidence thus far is not universally compelling and in and of themselves they provide few useful insights for policymakers looking to design reforms likely to improve classroom practices that will improve learning. 8 Note that one would normally do the comparison compared to illiterates; however, the household survey did not capture any illiterates and thus we are unable to perform that comparison. Later, we consider the implications given our best guess as to what accurate data on wages for illiterates would tell us.", + "ner_text": [ + [ + 883, + 887, + "named" + ], + [ + 938, + 973, + "PISA <> data geography" + ] + ], + "validated": true, + "empirical_context": ", ERfKE I PAD ) indicating that the returns to vocational education are particularly low. This speaks to the clear need to improve the quality of vocational education in Jordan, as 6 \u201c Using PISA to Understand the Determinants of Learning in the Middle-East and North Africa Region, \u201d HDNED. 7 For whatever reason, the research has tended to focus on institutional aspects and components that, it is claimed, tentatively appear to hold promise for improving quality: Centralized exams, Accountability, Choice, School autonomy, Performance incentives, Supply side interventions, and Demand side interventions, teacher preparation and training, to name a few.", + "type": "dataset", + "explanation": "PISA is indeed a dataset as it provides structured data on student performance and educational systems used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed PISA is a dataset because it is often associated with educational assessments and data collection.", + "contextual_reason_agent": "PISA is indeed a dataset as it provides structured data on student performance and educational systems used for empirical analysis.", + "contextual_signal": "mentioned as a source for understanding determinants of learning", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "086_NEW-ETHIOPIA-PAD-06072018", + "page": 111, + "text": "103 20. Internal audit. The 2014 PEFA noted that all Federal BIs have functioning internal audit units, which prepare annual audit plans on a risk basis and report quarterly to MoFEC Inspection Directorate. To enhance independency, internal auditors are assigned and removed only by MoFEC. Audit findings are discussed with auditees at the conclusion of each audit and action plans are agreed and followed up. The PEFA noted that high staff turnover remains a challenge for improvement of internal audit function. Internal audit manual ( 2004 ), internal audit training module ( 2005 ), internal audit reporting procedure manual ( 2010 ) and performance audit manual, standards and implementation guide ( 2013 ) are available for reference. All are issued by MoFEC. The assessment corroborated the PEFA findings ( mentioned in the above paragraph ) at the EIC and MoLSA. The current number of internal auditors at the EIC, as shown in staff data, in the annex, is not adequate to perform the audit effectively. The internal audit unit should be strengthened by filling the vacant posts as the books of the accounts of the proposed Program are subject to internal audit review. At ARRA, the internal audit unit is established in the current year ( 2017 ).", + "ner_text": [ + [ + 935, + 945, + "named" + ] + ], + "validated": false, + "empirical_context": "The assessment corroborated the PEFA findings ( mentioned in the above paragraph ) at the EIC and MoLSA. The current number of internal auditors at the EIC, as shown in staff data, in the annex, is not adequate to perform the audit effectively. The internal audit unit should be strengthened by filling the vacant posts as the books of the accounts of the proposed Program are subject to internal audit review.", + "type": "data", + "explanation": "'Staff data' is mentioned in a context that does not indicate it is a dataset used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'staff data' refers to a structured collection of information about staff.", + "contextual_reason_agent": "'Staff data' is mentioned in a context that does not indicate it is a dataset used for empirical analysis.", + "contextual_signal": "mentioned only as a reference, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + }, + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "065_Mauritania-Decentralization-and-Productive-Cities-Support-Project", + "page": 29, + "text": "The GEMS will enable the PCU to collect and structure digital data that automatically feeds into a centralized M & E system. The platform will be customized to capture relevant indicators, photos, audio, videos; time and date stamps; and Global Positioning System coordinates that allow for automated geo - mapping of the project during implementation. Using these tools systematically allows the Government and the World Bank remote supervision, frequent safeguards monitoring, and coordination across projects and partners working in the same area. 54. The M & E process will involve data collection and reporting, production of periodic activity reports, and biannual reviews. Surveys will be conducted among economic actors of the seven cities of 35 The same team at SOMELEC will be overseeing the implementation of the AFD-funded RIMDIR and will benefit from the oversight of the RIMDIR PSC. 36 The specialist will be established in the same unit in charge of the AFD-funded DECLIC 1 and 2 projects to ensure synergies.", + "ner_text": [ + [ + 4, + 8, + "named" + ] + ], + "validated": false, + "empirical_context": "The GEMS will enable the PCU to collect and structure digital data that automatically feeds into a centralized M & E system. The platform will be customized to capture relevant indicators, photos, audio, videos; time and date stamps; and Global Positioning System coordinates that allow for automated geo - mapping of the project during implementation.", + "type": "system", + "explanation": "However, GEMS is described as a system for collecting data, not as a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed GEMS is a dataset because it involves data collection and structuring.", + "contextual_reason_agent": "However, GEMS is described as a system for collecting data, not as a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 35, + "text": "The focus of TA will be on activities critical for the attainment of DLI targets under each results area. TA for EMIS will include support for training at deconcentrated levels on the use of the information technology platform for data management, descriptive statistics, and data management techniques ( data validation, tabulations, metadata and database management, and report generation, among others ). Support will also include EMIS monitoring at the central and deconcentrated levels. Subcomponent 3. 2: Project management ( US $ 3 million ) 62. The objective of this subcomponent is to ensure adequate capacity for project implementation and coordination through the establishment of a Project Coordination and Management Unit ( PCMU ) within MINEDUB. This subcomponent will also finance costs associated with training, the recruitment of short - and long-term consultants, studies, surveys, M & E activities ( including independent verification of", + "ner_text": [ + [ + 113, + 117, + "named" + ] + ], + "validated": false, + "empirical_context": "The focus of TA will be on activities critical for the attainment of DLI targets under each results area. TA for EMIS will include support for training at deconcentrated levels on the use of the information technology platform for data management, descriptive statistics, and data management techniques ( data validation, tabulations, metadata and database management, and report generation, among others ). Support will also include EMIS monitoring at the central and deconcentrated levels.", + "type": "program", + "explanation": "However, EMIS is mentioned as a program focused on training and support rather than as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with data management and statistics.", + "contextual_reason_agent": "However, EMIS is mentioned as a program focused on training and support rather than as a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 25, + "text": "The World Bank Balochistan Human Capital Investment Project ( P166308 ) Page 21 of 47 ( FMS ), and an environmental and social safeguards specialist / officer. 47 The PMUs will be fully authorized to implement the planned activities approved by the Project Steering Committee ( PSC ). 46. A Project Coordination Committee ( PCC ) will be set up to coordinate project implementation and a PSC will be set up to provide strategic guidance and oversight. The PCC, co \u2010 chaired by Secretaries Health and Secondary Education, will meet quarterly. The PSC, chaired by the Additional Chief Secretary, will meet biannually ( see figure 2 ). Figure 2. Institutional and Implementation Arrangements B. Results Monitoring and Evaluation Arrangements 47. Building on the results chain, the M & E framework identified indicators to track project implementation progress and impact. The PDO \u2010 level health indicators are taken from the RMNCHN indicators in the DHIS, while digitization and integration of various HMIS is an intermediate indicator. The education indicators are taken from the EMIS. Where possible, relevant indicators will be disaggregated by gender. Discussions with the GoB and the UNHCR have confirmed, however, that beneficiary data by nationality will not be routinely collected or publicly released. 48.", + "ner_text": [ + [ + 1078, + 1082, + "named" + ] + ], + "validated": false, + "empirical_context": "The PDO \u2010 level health indicators are taken from the RMNCHN indicators in the DHIS, while digitization and integration of various HMIS is an intermediate indicator. The education indicators are taken from the EMIS. Where possible, relevant indicators will be disaggregated by gender.", + "type": "system", + "explanation": "However, EMIS is mentioned as a system and not explicitly as a data source in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with education indicators.", + "contextual_reason_agent": "However, EMIS is mentioned as a system and not explicitly as a data source in this context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "172_multi0page", + "page": 46, + "text": "grant program and oversight WMA A Finance Directorate able to Adequate procedures in manage all project funds and compliance with IDA financial treasury and to comply with management and accounting accounting, budgeting and rules. financial reporting requirements. Internal Audit unit able to Report on budget execution per eview all MEST operations quarter and annual budget forecast based on assessment of needs and pnorities. EMIS established with data on EMIS network infrastructure four school annual surveys and set up with procedures in place gradually integrating and for users. linking data on finance ( i. e. udget execution ); personnel Four annual school surveys ( payroll and deployment published. ecords ); and indicators on OL standards. EMIS data are effectively used EMIS reports. for planning and prospective work on the education sector. Improved personnel Audit report on personnel management system with management contracted out. effective procedures enforced for recruitment, deployment Records on MEST ' s staff on and payment of MEST ' s staff the payroll. and teachers on the payroll. 80 trainers trained in first year List of trainers published and of Project to serve as trainers evaluation report on training for in-service teacher training received. activities to be implemented via school grants. 600 SMCs established with Reports on school tnspections prior mobilization from the from Inspectorate. community and with capacity built ( training when required ) to monitor school grant", + "ner_text": [ + [ + 459, + 463, + "named" + ] + ], + "validated": false, + "empirical_context": "Internal Audit unit able to Report on budget execution per eview all MEST operations quarter and annual budget forecast based on assessment of needs and pnorities. EMIS established with data on EMIS network infrastructure four school annual surveys and set up with procedures in place gradually integrating and for users. linking data on finance ( i.", + "type": "system", + "explanation": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with data on network infrastructure and surveys.", + "contextual_reason_agent": "However, EMIS is described as a system rather than a structured collection of data or a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "043_Uganda-Digital-Acceleration-Project", + "page": 48, + "text": "Additional refugee-specific risks include the high proportion of women and girls and other vulnerable people within the refugee population, which poses specific protection challenges, including GBV; challenges to the ongoing allocation of land to refugees; and administrative and informal barriers for refugees to access productive employment, finance, and market opportunities. Another protection risk that this project will manage relates to ensuring the ongoing adequacy and management of refugee registration data. Over the three years, the WB has undertaken analytical studies in Uganda across refugees and RHDs such as on GBV, deforestation and environmental management, and socioeconomic status informing refugee policy. The findings of these are being operationalized through WHR-financed projects including this one. These risks are then being managed jointly through effective in-country coordination mechanisms which include the UNHCR, OPM, development and humanitarian partners, and other parts of the GoU, spearheaded by the CRRF Steering Group, which meets quarterly. The WB co-chairs the CRRF Development Partners Group which provides another effective platform to ensure joint management of the above risks, including on protection issues, with the GoU and other humanitarian and development organizations. The project will work through these coordination mechanisms. Refugee Sector Response Plans referenced earlier have been developed to institutionalize refugee support within national systems, and the JLIRP has strong digital components. The WB will work closely with the UNHCR to continually monitor the protection environment throughout project implementation, including on registration data management and access to digital services..", + "ner_text": [ + [ + 492, + 517, + "named" + ], + [ + 120, + 138, + "refugee registration data <> reference population" + ], + [ + 545, + 547, + "refugee registration data <> publisher" + ], + [ + 585, + 591, + "refugee registration data <> data geography" + ], + [ + 599, + 607, + "refugee registration data <> reference population" + ], + [ + 1086, + 1088, + "refugee registration data <> publisher" + ], + [ + 1563, + 1565, + "refugee registration data <> publisher" + ] + ], + "validated": true, + "empirical_context": "Additional refugee-specific risks include the high proportion of women and girls and other vulnerable people within the refugee population, which poses specific protection challenges, including GBV; challenges to the ongoing allocation of land to refugees; and administrative and informal barriers for refugees to access productive employment, finance, and market opportunities. Another protection risk that this project will manage relates to ensuring the ongoing adequacy and management of refugee registration data. Over the three years, the WB has undertaken analytical studies in Uganda across refugees and RHDs such as on GBV, deforestation and environmental management, and socioeconomic status informing refugee policy.", + "type": "data", + "explanation": "This is indeed a dataset as it pertains to the structured collection of data used for managing refugee information.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a specific type of data related to refugee registration.", + "contextual_reason_agent": "This is indeed a dataset as it pertains to the structured collection of data used for managing refugee information.", + "contextual_signal": "mentioned as a data source for managing refugee information", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 17, + "text": "The overall lower secondary level repetition rates are higher for male students than female students. Drop-out rates rose sharply from 3. 7 percent to 7 percent between 2013 and 2015 and are higher for female students. In terms of learning outcomes, the 2018 Early Grade Mathematics Assessment ( EGMA ) test results showed that on average, grade two students were only able to solve 1 out of 6 mathematics problems, and almost 60 percent had zero scores. Although they represent slight improvements over the 2017 EGMA scores, weak performance in mathematics remains a major concern. 23. Assessment systems are not fully utilized for learning and policy decision-making and need to be redesigned to capture student performance against learning standards. Fifth grade OTI results are managed at the national level while the second-grade results are managed at the school level. Currently, only basic analysis is conducted for the fifth grade OTI and BEF exam results. Strengthened MENFOP capacity to analyze and use assessment data would optimize the feedback systems to inform teaching practices. 24. A process of curriculum modernization has begun. The existing curriculum was introduced in 2011, and a revision is currently underway. The revision aims to make appropriate adjustments, additions and improvements to introduce innovative pedagogical practices, focus on mathematics, languages and science, integrate Information, Communication and Technology ( ICT ) into curricula, and develop digital and 21st century skills. Revisions have started, and work related to materials development, trialing and production will begin in September 2019.", + "ner_text": [ + [ + 1014, + 1029, + "named" + ] + ], + "validated": false, + "empirical_context": "Currently, only basic analysis is conducted for the fifth grade OTI and BEF exam results. Strengthened MENFOP capacity to analyze and use assessment data would optimize the feedback systems to inform teaching practices. 24.", + "type": "data", + "explanation": "'Assessment data' is mentioned in a general sense and not as a specific dataset or data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'assessment data' refers to a structured collection of data used for analysis.", + "contextual_reason_agent": "'Assessment data' is mentioned in a general sense and not as a specific dataset or data source.", + "contextual_signal": "mentioned only as a type of data, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "159_38147core", + "page": 33, + "text": "The IDPs had been encouraged to put up new thatched huts on the eve o f the housing project in order to be entitled to the cash grant. However, the U N H C R Survey conducted in April 2006 would be the baseline and cut o f f point to determine eligibility for housing assistance. Sequencing o f RefuPee Camps for Implementation The phasing o f refugee camps for housing support was done on the basis o f a two stage screening process. Relying upon U N H C R data, all 141 IDP camps were socially ranked using three indicators i. e. ( i ) percentage o f temporary houses in a camp; ( ii ) percentage o f families possessing land in a camp; and ( iii ) percentage o f families in a camp who opted to settle in Puttalam. These indicators were assigned scores o f 75, 15 and 10 respectively in keeping with community perceptions as to their relative weight. The socially ranked camps were then screened in terms o f three environmental indicators i. e. ( i ) flooding, ( ii ) land surface and ( iii ) quality o f environment. 25 camps were thus identified for Phase 1 o f the PHP.", + "ner_text": [ + [ + 148, + 164, + "named" + ], + [ + 4, + 8, + "U N H C R Survey <> reference population" + ], + [ + 178, + 188, + "U N H C R Survey <> reference year" + ], + [ + 472, + 481, + "U N H C R Survey <> reference population" + ], + [ + 538, + 579, + "U N H C R Survey <> data description" + ], + [ + 588, + 637, + "U N H C R Survey <> data description" + ], + [ + 708, + 716, + "U N H C R Survey <> data geography" + ], + [ + 1092, + 1110, + "U N H C R Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "The IDPs had been encouraged to put up new thatched huts on the eve o f the housing project in order to be entitled to the cash grant. However, the U N H C R Survey conducted in April 2006 would be the baseline and cut o f f point to determine eligibility for housing assistance. Sequencing o f RefuPee Camps for Implementation The phasing o f refugee camps for housing support was done on the basis o f a two stage screening process.", + "type": "survey", + "explanation": "This is a dataset as it is explicitly mentioned as a survey that serves as a baseline for determining eligibility for housing assistance.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data for eligibility determination.", + "contextual_reason_agent": "This is a dataset as it is explicitly mentioned as a survey that serves as a baseline for determining eligibility for housing assistance.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 14, + "text": "In terms of teachers, 38 serve in public preschools, 1, 125 in public primary school and 1, 112 in public lower secondary schools. At the primary school level, 19 percent of teachers have at least completed their lower secondary cycle, 39 percent their baccalaureate, 8 percent a university degree while 32 percent have not declared their educational background. As for refugees, there are three refugee preschools that serve a total of 686 students, and three primary refugee schools and five accelerated learning centers that serve 3, 529 students. At the lower secondary level, there are 743 refugee students enrolled in three schools. 10. Access to education at all levels continues to be one of the country \u2019 s major challenges. The gross enrollment rate stands at only 14 percent for pre-primary, 89 percent for primary and 63 percent for lower secondary, according to the Ministry of National Education and Professional Training ( MENFOP ). Findings from the latest household survey in 2018 suggests updated figures for Net Enrollment Rates for primary at 74 percent nationally, 79 percent for Djibouti Ville, and ranges between 60. 6 percent and 74. 6 percent in the regions ( Arta and Ali-Sabieh respectively ). 11. Enrollment in preschool has witnessed slight growth in the past five years but remains low.", + "ner_text": [ + [ + 973, + 989, + "named" + ], + [ + 879, + 935, + "household survey <> author" + ], + [ + 993, + 997, + "household survey <> publication year" + ], + [ + 1027, + 1047, + "household survey <> data description" + ], + [ + 1101, + 1115, + "household survey <> data geography" + ], + [ + 1185, + 1189, + "household survey <> data geography" + ], + [ + 1194, + 1204, + "household survey <> data geography" + ], + [ + 1332, + 1350, + "household survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "The gross enrollment rate stands at only 14 percent for pre-primary, 89 percent for primary and 63 percent for lower secondary, according to the Ministry of National Education and Professional Training ( MENFOP ). Findings from the latest household survey in 2018 suggests updated figures for Net Enrollment Rates for primary at 74 percent nationally, 79 percent for Djibouti Ville, and ranges between 60. 6 percent and 74.", + "type": "survey", + "explanation": "In the context, it is explicitly mentioned as providing findings and updated figures, indicating it is used as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'household survey' implies a structured collection of data collected from households.", + "contextual_reason_agent": "In the context, it is explicitly mentioned as providing findings and updated figures, indicating it is used as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "184_multi-page", + "page": 39, + "text": "The problem in urban areas is access - demand exists among all groups but the rationing of sets ends up benefiting the better off who live in areas where schools have historically been located. Any further expansion of places will help the poorer segments of the population more particularly if care is taken to site the schools in areas where the poor live. There are also significant gender gaps and research indicates that educated mothers play a key role in the country ' s overall development. There is a shortage of school places and any rationing works to the detriment of girls enrollment. Parents are less willing for their girls to attend school because in par., they may view the curriculum as foreign. In addition, despite the fact the education is officially free, poor families still have difficulty paying the cost of books and materials. They prefer to use their constrained resources for their boys who they feel have a better labor market potential. Finally, the data from the Household Survey, showed that even if girls go to school, their parents pull them out at an age when they think they can help around the household.", + "ner_text": [ + [ + 995, + 1011, + "named" + ], + [ + 15, + 26, + "Household Survey <> data geography" + ], + [ + 426, + 442, + "Household Survey <> reference population" + ], + [ + 510, + 535, + "Household Survey <> data description" + ], + [ + 1195, + 1213, + "Household Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "They prefer to use their constrained resources for their boys who they feel have a better labor market potential. Finally, the data from the Household Survey, showed that even if girls go to school, their parents pull them out at an age when they think they can help around the household.", + "type": "survey", + "explanation": "In this context, it is confirmed as a dataset since it is explicitly referenced as a source of data that informs the analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Household Survey' is a structured collection of data typically used for empirical analysis.", + "contextual_reason_agent": "In this context, it is confirmed as a dataset since it is explicitly referenced as a source of data that informs the analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "203_multi-page", + "page": 7, + "text": "This support will improve identification of priorities with local participation, design using least-cost methods, and maintenance of completed works. A detailed methodology for CIP implementation is given in the Operations Manual. Part C The project management unit in the Ministry of Planning ( MOP / PMU ) will manage poverty surveys and studies which will be used in the SPP ( NAF component and poor communities identification under the CIP ). MOP / PMU will also manage the pilot projects with the support of CVDB, concerned ministries, authorities, NGOs, UNDP and beneficiaries. The findings obtained from these pilot projects will be used to design a comprehensive program consisting of similar sub-projects for implementation in a second phase of the CIP. Details on the implementation of Part C are given in the Operations Manual. Project Coordination Overall SPP coordination will be the responsibility of the MOP / PMU. Subsidiary agreements between the MOP and HUDC and the MOP and CVDB will be made, specifying the responsibilities of HUDC and CVDB in implementing the project. Initially, the MOP / PMU will also manage poverty and unemployment surveys.", + "ner_text": [ + [ + 320, + 335, + "named" + ], + [ + 398, + 414, + "poverty surveys <> reference population" + ], + [ + 1181, + 1199, + "poverty surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "A detailed methodology for CIP implementation is given in the Operations Manual. Part C The project management unit in the Ministry of Planning ( MOP / PMU ) will manage poverty surveys and studies which will be used in the SPP ( NAF component and poor communities identification under the CIP ). MOP / PMU will also manage the pilot projects with the support of CVDB, concerned ministries, authorities, NGOs, UNDP and beneficiaries.", + "type": "survey", + "explanation": "In the context, 'poverty surveys' are explicitly mentioned as being managed and used for the SPP, indicating they serve as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'poverty surveys' is a dataset because it refers to a structured collection of data related to poverty.", + "contextual_reason_agent": "In the context, 'poverty surveys' are explicitly mentioned as being managed and used for the SPP, indicating they serve as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 31, + "text": "Project-affected communities and individuals may submit their complaint to the Bank \u2019 s independent Accountability Mechanism ( AM ). The AM houses the Inspection Panel, which determines whether harm occurred, or could occur, as a result of Bank non-compliance with its policies and procedures, and the Dispute Resolution Service, which provides communities and borrowers with the opportunity to address complaints through dispute resolution. Complaints may be submitted at any time after concerns have been brought directly to the Bank ' s attention, and Bank Management has been given an opportunity to respond. For information on how to submit complaints to the Bank \u2019 s Grievance Redress Service ( GRS ), visit https: / / www. worldbank. org / GRS. For information on how to submit complaints to the Bank \u2019 s Accountability Mechanism, visit https: / / accountability. worldbank. org. V. RISK 61. The overall risk is moderate. 62. Political and governance: Moderate. Public sector reforms are expected to face resistance and inertia, which can only be overcome with strong political leadership. The strong support of the King and oversight by the Royal Court is expected to mitigate the risk of inaction or inconsistency across the government. The GOJ is aware of the social risks of the targeted 17 World Bank. 2021. ID4D global dataset. 18 World Bank. Second State of the Mashreq Women Report. https: / / www. worldbank. org / en / country / jordan / publication / second - state-of-the-mashreq-women-report-who-cares-care-work-and-women-s-labor-market-outcomes-in-iraq-jordan-and - lebano #: ~: text = Based % 20on % 20the % 20analysis % 20and, needs % 2C % 20particularly % 20for % 20the % 20most. 19 The exact percentage is not available because MODEE has not captured the demographics of GSC visitors thus far.", + "ner_text": [ + [ + 1320, + 1324, + "named" + ], + [ + 531, + 535, + "ID4D <> publisher" + ], + [ + 1302, + 1312, + "ID4D <> publisher" + ], + [ + 1314, + 1318, + "ID4D <> publication year" + ], + [ + 1344, + 1354, + "ID4D <> publisher" + ] + ], + "validated": true, + "empirical_context": "2021. ID4D global dataset. 18 World Bank.", + "type": "dataset", + "explanation": "The context explicitly identifies 'ID4D' as a global dataset, confirming its role as a structured collection of data.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a 'global dataset' in the context.", + "contextual_reason_agent": "The context explicitly identifies 'ID4D' as a global dataset, confirming its role as a structured collection of data.", + "contextual_signal": "described as a global dataset", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 2, + "validated": 1, + "not_validated": 1 + } + }, + { + "filename": "029_IBArchive-ea5ff2ad-6f1f-45bf-a2ad-036440562c18", + "page": 111, + "text": "A draft PPSD was developed to improve the implementation of the project and help achieve results. The PPSD resulted in the preparation of an initial 18-month PP setting forth the selection methods to be followed by the IA during project implementation in the procurement of goods, works, and non-consulting and consulting services financed by the World Bank. The PP will be updated at least annually or as required to reflect the actual project implementation needs and improvements in institutional capacity. The IA will use STEP in the implementation of the project. This is a planning and tracking system, which would provide data on procurement activities, establish benchmarks, monitor delays, and measure procurement performance. After getting the World Bank \u2019 s agreement to the work plan, all documents at each stage of the procurement process will be uploaded in STEP for the World Bank \u2019 s post review. 7. Procurement capacity risk assessment. The procurement activities will be implemented by the PIUs at MEMD and UECCC. Assessment of the respective IAs was conducted as part of project preparation, and it was noted that project management will make use of existing procurement management arrangements. The proposed IAs have experience implementing World Bank-funded projects, and the project will leverage the gain in procurement capacity training of procurement staff through the implementation of the previous and the ongoing World Bank-funded projects, ERT-3 and GERP. The MEMD has a PCU and a PIU supported by 66 Supplies - UGX 1 billion ( US $ 266, 667 ), road works - UGX45 billion ( US $ 12 million ), public works - UGX10 billion ( US $ 2, 7 million ), consultancy services - UGX1 billion ( US $ 266, 667 ), and non-consultancy services - UGX 200 million ( US $ 53, 000 ).", + "ner_text": [ + [ + 872, + 876, + "named" + ] + ], + "validated": false, + "empirical_context": "This is a planning and tracking system, which would provide data on procurement activities, establish benchmarks, monitor delays, and measure procurement performance. After getting the World Bank \u2019 s agreement to the work plan, all documents at each stage of the procurement process will be uploaded in STEP for the World Bank \u2019 s post review. 7.", + "type": "system", + "explanation": "However, it is described as a planning and tracking system, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is mentioned in the context of providing data on procurement activities.", + "contextual_reason_agent": "However, it is described as a planning and tracking system, not as a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 15, + "validated": 1, + "not_validated": 14 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 93, + "text": "The proposed project will continue strengthening EMIS capacity within MINEDUB and MINESEC, while expanding the system to MINESUP and MINEFOP, in turn creating a fully integrated EMIS for the education sector as a whole. Under this result area, the following activities will be supported: ( a ) the development of an integrated EMIS for MINEDUB-MINESEC-MINESUP - MINEFOP, including the establishment of new institutional arrangements for the sector, unified methods and processes, and a common platform for data collection and management; ( b ) the deconcentration of EMIS to regional, divisional and sub-divisional levels ( as applicable ); ( c ) the provision of equipment and software to support the functioning of EMIS, and so on; and ( d ) the strengthening of human capacity and resources, particularly in the areas of statistics and data management. The release of IDA funds under this", + "ner_text": [ + [ + 717, + 721, + "named" + ] + ], + "validated": false, + "empirical_context": "The proposed project will continue strengthening EMIS capacity within MINEDUB and MINESEC, while expanding the system to MINESUP and MINEFOP, in turn creating a fully integrated EMIS for the education sector as a whole. Under this result area, the following activities will be supported: ( a ) the development of an integrated EMIS for MINEDUB-MINESEC-MINESUP - MINEFOP, including the establishment of new institutional arrangements for the sector, unified methods and processes, and a common platform for data collection and management; ( b ) the deconcentration of EMIS to regional, divisional and sub-divisional levels ( as applicable ); ( c ) the provision of equipment and software to support the functioning of EMIS, and so on; and ( d ) the strengthening of human capacity and resources, particularly in the areas of statistics and data management. The release of IDA funds under this", + "type": "system", + "explanation": "However, EMIS is described as a system for managing data rather than a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it relates to data collection and management.", + "contextual_reason_agent": "However, EMIS is described as a system for managing data rather than a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 19, + "text": "The subcomponent will also support strengthened supervision, management, and on-the-job coaching for IPs and service providers through an integrated supervisory approach in which IPs develop CHD capacity, inclusive of health service delivery planning, supervision, and data entry into District Health Information Software 2 ( DHIS2 ). The subcomponent will include planning and execution of outreach ( village visits, mass campaigns, and so on ) and transportation ( vehicle, boat, and foot ) modalities paying close attention to seasonal population movement patterns and access. Climate sensitive health service delivery and planning will be integral to the approach under this subcomponent. 30. Subcomponent 1. 1 will channel resources through UNICEF to sub-contract national and international NGOs for health service delivery and coordination. 29 In collaboration and through the leadership of the MoH, SMoHs, and CHDs, UNICEF will be responsible for ( a ) oversight and coordination of health services and DHIS2 data collection and entry systems; ( b ) supervision and quality assurance of IPs and health facilities in line with national plans and guidelines; ( c ) coordinating and conducting in-service training; ( d ) through an integrated approach, developing the capacity of SMoHs to plan, supervise, and oversee service delivery and the DHIS2 system; and ( e ) integrated pharmaceutical procurement, quantification, and forecasting.", + "ner_text": [ + [ + 285, + 323, + "named" + ] + ], + "validated": false, + "empirical_context": "The subcomponent will also support strengthened supervision, management, and on-the-job coaching for IPs and service providers through an integrated supervisory approach in which IPs develop CHD capacity, inclusive of health service delivery planning, supervision, and data entry into District Health Information Software 2 ( DHIS2 ). The subcomponent will include planning and execution of outreach ( village visits, mass campaigns, and so on ) and transportation ( vehicle, boat, and foot ) modalities paying close attention to seasonal population movement patterns and access.", + "type": "system", + "explanation": "However, it is described as a software system, not explicitly as a data source or dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Information' in its name, suggesting it handles data.", + "contextual_reason_agent": "However, it is described as a software system, not explicitly as a data source or dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "046_Cameroon-COVID-19-Preparedness-and-Response-Project", + "page": 26, + "text": "Reporting: The MoPH will produce a periodic report each 90 days based on agreed targets and the progress made of implementation of critical project activities. This report will contain tables of performance against indicators for the proposed project. 54. For real-time data collection and analysis, the project will implement the Geo-enabling Method for Monitoring and Supervision ( GEMS ). The GEMS method was developed by the Fragility, Conflict and Violence ( FCV ) Group of the World Bank and enables project teams to use open source ICT tools for in - field collection of structured digital data from the field that automatically feeds into a centralized M & E system and MIS. The integrated data can include any kind of indicators, based on tailor-made forms; photos, audio, videos; time and date stamps; and GPS coordinates that allow for automated geo-mapping of the information. Using these tools systematically allows the project to enhance the transparency and accuracy of project planning as well as M & E and third-party monitoring throughout the project cycle. Moreover, GEMS allow to establish a digital platform for remote supervision, real-time safeguards monitoring, and portfolio mapping for coordination across project components as well as with other operations in the country. 55. GEMS use in the context of COVID-19 monitoring. While GEMS can support operational monitoring of the COVID-19 response in a variety of ways, it is not intended to be used for surveillance activities", + "ner_text": [ + [ + 678, + 681, + "named" + ] + ], + "validated": false, + "empirical_context": "For real-time data collection and analysis, the project will implement the Geo-enabling Method for Monitoring and Supervision ( GEMS ). The GEMS method was developed by the Fragility, Conflict and Violence ( FCV ) Group of the World Bank and enables project teams to use open source ICT tools for in - field collection of structured digital data from the field that automatically feeds into a centralized M & E system and MIS. The integrated data can include any kind of indicators, based on tailor-made forms; photos, audio, videos; time and date stamps; and GPS coordinates that allow for automated geo-mapping of the information.", + "type": "system", + "explanation": "'MIS' refers to a Management Information System, which is a system but not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'MIS' is a dataset because it is associated with data collection and management.", + "contextual_reason_agent": "'MIS' refers to a Management Information System, which is a system but not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "001_BOSIB-3f2311b3-9a20-44d3-b637-b3b2b3d21695", + "page": 27, + "text": "The project will support the construction of access-ways to allow evacuation tankers to remove septage from communal toilet systems in Kigeme camp ( Nyamagabe ) and Kiziba camp ( Karongi ), where leakages and overflows into the environment / water sources are most serious. The layout and construction of the access-ways will be done through a design and build procurement approach. These districts experience frequent heavy rainfall, so the activity will directly respond to the climate-induced risk of more frequent flooding rains. 42. Implementation. This component will be implemented by MINEMA, supported by the Ministry of Environment and the Rwanda Environment Management Authority. As activities will be in the camps, MINEMA will be responsible for long-term management and maintenance. The activities are expected to improve environmental conditions for 125, 000 people ( camp-based refugee population and people living in the surrounding villages ). Component 4: Project Management, M & E and Capacity-Building ( US $ 4. 02 million equivalent ) 43. This component will support the Single Project Implementation Unit ( SPIU ) at MINEMA and staff in the target districts to manage the project. 24 This will include project staff at the district level for coordination and oversight. M & E arrangements will include a baseline survey, annual surveys, and endline assessment and continued operation of the Management Information System ( MIS ). Other activities will include a communications strategy and funds for capacity-building for staff from all project implementing agencies on issues such as community development, fiduciary management, E & S risk management, financial inclusion, climate risk management and development responses to forced displacement. 23 The assessment also identified additional needs that will not be included in the Phase II design due to resource constraints and to avoid overcomplicating the design: ( a ) reforestation in and around the camps; ( b ) solid waste management; and ( c ) insufficient fuel for cooking. 24 Operational costs for BDF are covered in sub-component 2 ( c ).", + "ner_text": [ + [ + 1412, + 1441, + "named" + ] + ], + "validated": false, + "empirical_context": "24 This will include project staff at the district level for coordination and oversight. M & E arrangements will include a baseline survey, annual surveys, and endline assessment and continued operation of the Management Information System ( MIS ). Other activities will include a communications strategy and funds for capacity-building for staff from all project implementing agencies on issues such as community development, fiduciary management, E & S risk management, financial inclusion, climate risk management and development responses to forced displacement.", + "type": "system", + "explanation": "However, it is mentioned as a system for management rather than a data source in this context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'Management Information System' suggests a structured collection of data.", + "contextual_reason_agent": "However, it is mentioned as a system for management rather than a data source in this context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "106_1482030098559-000A10458-PAD-Burundi-SSN-11282016", + "page": 22, + "text": "Based on the progress on coordinating other information and additional financing for the program, at mid-term review, a decision will be taken to either continue with the initial process, apply the questionnaire to all potentially eligible households, based on the small-area estimate of extreme poverty in the commune ( and the community validation may then occur afterwards ), or use only the community-based targeting. The targeting questionnaire will collect basic socio-economic information about the household, as well as key information on specific vulnerabilities, access to basic social services, and livelihood strategies. This information could then be used for targeting by other programs managed by other Ministries which are members of the CNPS. The analysis of the data will provide a dynamic portrait of the extreme poor, their access to services and their livelihood strategies, which can help inform the design of poverty reduction programs. All individuals in the registry will be uniquely identified. The proposed methodology will initially be implemented in the 8 selected rural communes of the Karuzi and Ruyigi provinces and then in the additional 8 communes in Gitega and Kirundo provinces. This phased implementation will enable the implementation team to: ( i ) assess the operational processes for the implementation of the targeting and registration processes, in terms of organizational capacity, time and costs to inform the subsequent roll-out; and ( ii ) test the targeting efficiency of the proposed CBT and PMT combination, given the prevalence of poverty and the lack of reliable consumption data. Lessons from the first eight communes will inform the roll-out in the next eight communes.", + "ner_text": [ + [ + 426, + 449, + "named" + ] + ], + "validated": false, + "empirical_context": "Based on the progress on coordinating other information and additional financing for the program, at mid-term review, a decision will be taken to either continue with the initial process, apply the questionnaire to all potentially eligible households, based on the small-area estimate of extreme poverty in the commune ( and the community validation may then occur afterwards ), or use only the community-based targeting. The targeting questionnaire will collect basic socio-economic information about the household, as well as key information on specific vulnerabilities, access to basic social services, and livelihood strategies. This information could then be used for targeting by other programs managed by other Ministries which are members of the CNPS.", + "type": "questionnaire", + "explanation": "However, it is not a dataset itself but rather a tool for gathering data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it involves collecting information.", + "contextual_reason_agent": "However, it is not a dataset itself but rather a tool for gathering data.", + "contextual_signal": "mentioned only as a tool for collecting information, not as a data source", + "tags": [] + }, + { + "filename": "029_IBArchive-ea5ff2ad-6f1f-45bf-a2ad-036440562c18", + "page": 107, + "text": "The World Bank Electricity Access Scale-up Project ( EASP ) ( P166685 ) Page 102 of 130 percent ( which is the average share of female asset ownership in Uganda ) to 30 percent, corresponding to about 1, 200 loans for female recipients under the project. Finally, focus will be placed in ensuring female-led commercial enterprises also have access to finance for modern energy technologies for use in their enterprises. An increase in access to finance of female-led commercial enterprises is expected from the current 24 percent to 35 percent, which corresponds to about 1, 750 recipient enterprises, to bridge the gap in female-led enterprises and female access to resources. B. Enhancing Data and Knowledge on Male and Female Consumers 6. UBOS / MTF Survey update. Lessons learned from international experiences and best practices indicate the key role played by up-to-date data and analytics for the efficient and effective delivery of services. To provide more insight on the access outlook for the country, the Electrification Survey Report-2018 ( UBOS, 2020 ) will be updated to include more granular information on grid and off-grid gender disaggregated access rate. The information collected would feed into the national geospatial planning platform for access targeted to socioeconomic development. The activity will be implemented by the MEMD and UBOS, in close collaboration with the SPs.", + "ner_text": [ + [ + 1017, + 1051, + "named" + ] + ], + "validated": true, + "empirical_context": "Lessons learned from international experiences and best practices indicate the key role played by up-to-date data and analytics for the efficient and effective delivery of services. To provide more insight on the access outlook for the country, the Electrification Survey Report-2018 ( UBOS, 2020 ) will be updated to include more granular information on grid and off-grid gender disaggregated access rate. The information collected would feed into the national geospatial planning platform for access targeted to socioeconomic development.", + "type": "survey", + "explanation": "This is a dataset as it is mentioned to be updated with more granular information and is used to provide insights for planning.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a report that provides data on electrification access rates.", + "contextual_reason_agent": "This is a dataset as it is mentioned to be updated with more granular information and is used to provide insights for planning.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 44, + "text": "The World Bank Mauritania Social Safety Net System Project II ( P171125 ) Page 39 of 61 Households with complete information in the Social Registry - Refugee Number of refugee households registred within the Social Registry database and for which the socio - economic data has been collected and recorded. Semester Social Registry MIS Specific request on the Social Registry database Social Registry Directorate Households with updated information in the Social registry ( less than 3 years ) Percentage of households included in the Social Register database for which socio-economic data was collected within the previous three years. Semester Social Registry MIS Request on the Social Registry database Social Registry Directorate Cases from the Social Registry grievance redress mechanism resolved in a timely manner Percentage of complaints or requests for information received by the Social Registry that have been resolved within 60 days. Semester Social Registry MIS MIS extraction Social Registry Directorate Spot-check surveys carried-out and disclosed by the Social Registry Number of spot checks on the Social Registry carried out by an external firm and / or the Social Registry during the duration of the project and which have been the subject of a report and feedback to the teams.", + "ner_text": [ + [ + 1114, + 1129, + "named" + ], + [ + 4, + 14, + "Social Registry <> publisher" + ], + [ + 15, + 25, + "Social Registry <> data geography" + ], + [ + 88, + 98, + "Social Registry <> reference population" + ], + [ + 168, + 186, + "Social Registry <> reference population" + ], + [ + 251, + 272, + "Social Registry <> data type" + ], + [ + 412, + 422, + "Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "Semester Social Registry MIS Request on the Social Registry database Social Registry Directorate Cases from the Social Registry grievance redress mechanism resolved in a timely manner Percentage of complaints or requests for information received by the Social Registry that have been resolved within 60 days. Semester Social Registry MIS MIS extraction Social Registry Directorate Spot-check surveys carried-out and disclosed by the Social Registry Number of spot checks on the Social Registry carried out by an external firm and / or the Social Registry during the duration of the project and which have been the subject of a report and feedback to the teams.", + "type": "database", + "explanation": "The term is explicitly mentioned as a 'database' and is used as a source of information for various metrics.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a 'database' in the context.", + "contextual_reason_agent": "The term is explicitly mentioned as a 'database' and is used as a source of information for various metrics.", + "contextual_signal": "mentioned as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 44, + "text": "The World Bank Cash for Jobs Project ( P175327 ) Page 40 of 86 of both of it is to build the capacity of the GoB to monitor its own programs in the Social Protection sector. In this sense, several instruments will support the monitoring of the project and Social Protection programs in general: a. The Social Registry. It will help the MNSSAHRG, and SEP monitor the socio-economic situation of the poor and vulnerable households in Burundi. The socio-economic data being collected through the registry will allow government to monitor the situation and better design social policies. The registry will also collect data on beneficiaries enrolled in different social programs allowing to monitor the coverage of these. b. The project will support the mid-term review of the Social Protection Strategy that is meant to be approved at the beginning of 2022. c. SEP / CNPS will receive technical and financial support to perform their coordination tasks and lead the Social Protection Working Group allowing for a better monitoring of Social Protection interventions by different partners. 128. PDO indicators and Intermediate Results Indicators of the project will be measured through different instruments. These include process evaluations; regular spot checks and beneficiary surveys through mobile phone to evaluate the quality of implementation: the efficiency of the targeting and payment processes; and the overall satisfaction with the program.", + "ner_text": [ + [ + 302, + 317, + "named" + ], + [ + 4, + 14, + "Social Registry <> publisher" + ], + [ + 398, + 428, + "Social Registry <> reference population" + ], + [ + 432, + 439, + "Social Registry <> data geography" + ], + [ + 445, + 464, + "Social Registry <> data type" + ], + [ + 623, + 674, + "Social Registry <> reference population" + ], + [ + 849, + 853, + "Social Registry <> publication year" + ], + [ + 1465, + 1483, + "Social Registry <> usage context" + ] + ], + "validated": true, + "empirical_context": "In this sense, several instruments will support the monitoring of the project and Social Protection programs in general: a. The Social Registry. It will help the MNSSAHRG, and SEP monitor the socio-economic situation of the poor and vulnerable households in Burundi.", + "type": "registry", + "explanation": "The Social Registry is explicitly mentioned as a tool to monitor socio-economic situations, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a registry that monitors socio-economic situations.", + "contextual_reason_agent": "The Social Registry is explicitly mentioned as a tool to monitor socio-economic situations, indicating it functions as a data source.", + "contextual_signal": "described as a registry that helps monitor socio-economic situations", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + }, + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "011_BOSIB12886229a02a1bcdc12ee681b5fe59", + "page": 22, + "text": "The subcomponent will: ( a ) finance procurement of information communication technology equipment at the national level and train staff on data entry and use; ( b ) train trainers to develop health facility staff data entry, management, and use capacity; ( c ) create interoperability and integration between data systems and ensure data sharing, storage and backup; ( d ) develop, print, and disseminate Standard Operating Procedures for HMIS data entry, cleaning, quality improvement, and use at all levels; ( e ) conduct data review meetings and generate data use tools; ( f ) establish and operate the National and State level HMIS and Monitoring and Evaluation ( M & E ) Technical Working Groups; ( g ) conduct data quality improvement activities at the facility and national level; ( h ) operationalize a national and state level research committee, building on existing structure; ( i ) conduct an annual health sector review meeting; and ( j ) maintain and institutionalize the Health Service Functionality ( HSF ) Database. 39.", + "ner_text": [ + [ + 632, + 636, + "named" + ] + ], + "validated": false, + "empirical_context": "The subcomponent will: ( a ) finance procurement of information communication technology equipment at the national level and train staff on data entry and use; ( b ) train trainers to develop health facility staff data entry, management, and use capacity; ( c ) create interoperability and integration between data systems and ensure data sharing, storage and backup; ( d ) develop, print, and disseminate Standard Operating Procedures for HMIS data entry, cleaning, quality improvement, and use at all levels; ( e ) conduct data review meetings and generate data use tools; ( f ) establish and operate the National and State level HMIS and Monitoring and Evaluation ( M & E ) Technical Working Groups; ( g ) conduct data quality improvement activities at the facility and national level; ( h ) operationalize a national and state level research committee, building on existing structure; ( i ) conduct an annual health sector review meeting; and ( j ) maintain and institutionalize the Health Service Functionality ( HSF ) Database. 39.", + "type": "system", + "explanation": "'HMIS' is mentioned as a system but not explicitly as a data source or dataset in the context provided.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'HMIS' is a dataset because it is related to health management information systems, which often involve data collection.", + "contextual_reason_agent": "'HMIS' is mentioned as a system but not explicitly as a data source or dataset in the context provided.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 21, + "validated": 4, + "not_validated": 17 + } + }, + { + "filename": "167_27761", + "page": 20, + "text": "The FMIS system at the MOF and the Post Bank were found to be adequate to support the project. The project \u2019 s financial statements, including Statement of Expenditures ( SOE ) and the Special Account ( SA ), would be audited quarterly internally and annually by an independent auditor, in accordance with internationally accepted standards. In addition, the auditor would carry out field spot check audits to ascertain compliance with contractual requirements. Compliance with conditional cash transfers would be monitored by an independent external consultant ( paragraph C. 3 ). 4. Social Opportunities, constraints, impacts, and risks arising. out of the socio-cultural and political context. The impact of closure and incursions in the West Bank and Gaza has been extensively documented. On the rise are poverty, unemployment, school drop out rates; on the decline or deteriorating are household incomes, living conditions, school attendance rates, at-large nutritional status and in particular of children 0-5 years of age. 13 The so called \u201c newly poor \u201d, those who are just above the poverty line before the outbreak of the second Intifada, in the third quarter of 2000, have been assisted by various interventions. To date, it has been much more difficult to find ways to assist the very poor, or first decile, whose coping mechanisms are essentially exhausted. The proposed project i s designed to support children ( 0-18 years old ) in the first decile. Although the SHC has targeted the first decile for more than 20 years, the program i s very small today in relation to need and requires new instruments to address the present situation and to shift MOSA \u2019 s overall strategy from one of coping to one of social springboard. Therefore, the project will support MOSA in reshaping its social assistance strategy and to improve effectiveness and efficiency of ~ l3 PCBS quarterly household surveys, Palestinian Living Conditions quarterly surveys, IUCN, University of Geneva, Living Standards quarterly surveys, Nutrition Study, Johns Hopkins University, 2002, Nutrition Survey, PCBS and BirZeit University, 2002. 17", + "ner_text": [ + [ + 1910, + 1957, + "named" + ], + [ + 1003, + 1028, + "Palestinian Living Conditions quarterly surveys <> reference population" + ], + [ + 1156, + 1177, + "Palestinian Living Conditions quarterly surveys <> reference year" + ], + [ + 1965, + 1985, + "Palestinian Living Conditions quarterly surveys <> author" + ], + [ + 2066, + 2070, + "Palestinian Living Conditions quarterly surveys <> publication year" + ], + [ + 2099, + 2117, + "Palestinian Living Conditions quarterly surveys <> author" + ], + [ + 2119, + 2123, + "Palestinian Living Conditions quarterly surveys <> publication year" + ], + [ + 2143, + 2161, + "Palestinian Living Conditions quarterly surveys <> usage context" + ] + ], + "validated": true, + "empirical_context": "Although the SHC has targeted the first decile for more than 20 years, the program i s very small today in relation to need and requires new instruments to address the present situation and to shift MOSA \u2019 s overall strategy from one of coping to one of social springboard. Therefore, the project will support MOSA in reshaping its social assistance strategy and to improve effectiveness and efficiency of ~ l3 PCBS quarterly household surveys, Palestinian Living Conditions quarterly surveys, IUCN, University of Geneva, Living Standards quarterly surveys, Nutrition Study, Johns Hopkins University, 2002, Nutrition Survey, PCBS and BirZeit University, 2002. 17", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a quarterly survey that collects data on living conditions.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a quarterly survey that collects data on living conditions.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "158_40156", + "page": 24, + "text": "\u0083 Health facility surveys: At all health facilities in hot spots, a health facility survey will be undertaken to assess the availability of HIV related services in the health facilities that are servicing populations in the 14 hot spots. \u0083 Rapid assessment in hot spot areas: All 14 hot spot areas \u2013 the seven targeted for year 1 of implementation and the seven targeted for year 2 of implementation - will be assessed prior to the initiation of implementation. The purpose of these rapid assessments will be to collect the baseline data on services supporting CBMPs, identify vulnerable populations ( CBMPs ), estimate the sizes of the vulnerable populations, the demographics of the local populations ( including nomads ), vulnerability factors, and interaction between the CBMPs and local populations. \u0083 IGAD Regional Mapping assessment: In the last couple of years IGAD implemented a World Bank Institutional Development Fund ( IDF ) grant on Strengthening the HIV / AIDS M & E Capacity in the Horn of Africa Cross-Border Regions. The main output of this grant was a regional mapping assessment of HIV / AIDS interventions in the cross-border areas, and the establishment of a regional M & E working group. The data generated from in-country reports will be updated regularly through this project and posted on the IGAD web site.", + "ner_text": [ + [ + 68, + 90, + "named" + ], + [ + 577, + 599, + "health facility survey <> reference population" + ], + [ + 685, + 702, + "health facility survey <> reference population" + ], + [ + 715, + 721, + "health facility survey <> reference population" + ], + [ + 807, + 811, + "health facility survey <> author" + ], + [ + 998, + 1033, + "health facility survey <> data geography" + ] + ], + "validated": true, + "empirical_context": "\u0083 Health facility surveys: At all health facilities in hot spots, a health facility survey will be undertaken to assess the availability of HIV related services in the health facilities that are servicing populations in the 14 hot spots. \u0083 Rapid assessment in hot spot areas: All 14 hot spot areas \u2013 the seven targeted for year 1 of implementation and the seven targeted for year 2 of implementation - will be assessed prior to the initiation of implementation.", + "type": "survey", + "explanation": "This is indeed a dataset as it involves a systematic survey to collect data on HIV related services.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a structured assessment of health facilities.", + "contextual_reason_agent": "This is indeed a dataset as it involves a systematic survey to collect data on HIV related services.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "051_Pakistan-Balochistan-Human-Capital-Investment-Project", + "page": 38, + "text": "The World Bank Pakistan: Balochistan Human Capital Investment Project ( P166308 ) Page 34 of 47 teachers trained using scripted lessons; ( c ) boundary wall and functional toilets; ( d ) desk and chair for each student; ( e ) trained PTSMCs meeting regularly and endorsing monthly teacher attendance ME PDO Table SPACE Monitoring & Evaluation Plan: Intermediate Results Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Targeted HFs having majority of essential medicines for RMNCHN services Percentage of targeted HFs having > 75 percent of essential RMNCHN medicines. Bi \u2010 annual Health institutional database Self \u2010 reported data Health Department, HMIS, PPHI Absenteeism among key staff to provide RMNCHN services Percentage of key staff present at HFs during duty time to provide RMNCHN services. Baseline is a preliminary estimate. Bi \u2010 annual Real time monitoring system Administrative data Health Department, PMU Targeted health care providers with minimum knowledge and competencies in RMNCHN services Percentage of targeted staff with minimum knowledge and competencies in RMNCHN services. Bi \u2010 annual PMU Training report Health Department, PMU Targeted HMIS digitally integrated into DHIS ( 2 ) Cumulative number of RMNCHN relevant health information systems digitally integrated into DHIS ( 2 ).", + "ner_text": [ + [ + 1253, + 1257, + "named" + ] + ], + "validated": false, + "empirical_context": "Bi \u2010 annual Real time monitoring system Administrative data Health Department, PMU Targeted health care providers with minimum knowledge and competencies in RMNCHN services Percentage of targeted staff with minimum knowledge and competencies in RMNCHN services. Bi \u2010 annual PMU Training report Health Department, PMU Targeted HMIS digitally integrated into DHIS ( 2 ) Cumulative number of RMNCHN relevant health information systems digitally integrated into DHIS ( 2 ).", + "type": "system", + "explanation": "'HMIS' is mentioned as a system but not as a data source in the context.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'HMIS' is a dataset because it relates to health information systems.", + "contextual_reason_agent": "'HMIS' is mentioned as a system but not as a data source in the context.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 21, + "validated": 4, + "not_validated": 17 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 35, + "text": "The World Bank Education Quality Improvement Project ( P179363 ) Page 26 of 68 project manager, FM specialist, PS, M & E specialist, environmental specialist and social development specialist with experience in GBV prevention and response, civil works engineers, and other technical specialists to ensure timely, quality, transparent, and effective implementation of the civil works activities. Further details will be provided in the POM. The fiduciary assessment of the PMT of the MoER and NORLD has been completed and is reflected in the fiduciary sections. B. Results Monitoring and Evaluation Arrangements 68. The PDO-level results indicators and intermediate results indicators will be monitored using the following data: ( a ) data on education institutions and students generated by EMIS and e-Catalogue / e - Register ( disaggregated by gender, urban-rural divide, students with disabilities, and refugee students ); ( b ) results from the nationally representative national and international assessments of student performance and classroom observations; ( c ) regular survey data and administrative data of the MoER and NORLD; and ( d ) semiannual monitoring reports prepared by the MoER under support of the PMT. The MoER through the PMT will carry out the day-to-day coordination of M & E activities.", + "ner_text": [ + [ + 800, + 826, + "named" + ], + [ + 829, + 872, + "e-Catalogue / e - Register <> data description" + ], + [ + 874, + 900, + "e-Catalogue / e - Register <> reference population" + ], + [ + 906, + 922, + "e-Catalogue / e - Register <> reference population" + ], + [ + 949, + 1063, + "e-Catalogue / e - Register <> data description" + ], + [ + 1122, + 1126, + "e-Catalogue / e - Register <> publisher" + ] + ], + "validated": true, + "empirical_context": "Results Monitoring and Evaluation Arrangements 68. The PDO-level results indicators and intermediate results indicators will be monitored using the following data: ( a ) data on education institutions and students generated by EMIS and e-Catalogue / e - Register ( disaggregated by gender, urban-rural divide, students with disabilities, and refugee students ); ( b ) results from the nationally representative national and international assessments of student performance and classroom observations; ( c ) regular survey data and administrative data of the MoER and NORLD; and ( d ) semiannual monitoring reports prepared by the MoER under support of the PMT. The MoER through the PMT will carry out the day-to-day coordination of M & E activities.", + "type": "registry", + "explanation": "In the context, it is explicitly referenced as generating data on education institutions and students, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is mentioned alongside other data sources used for monitoring and evaluation.", + "contextual_reason_agent": "In the context, it is explicitly referenced as generating data on education institutions and students, indicating it functions as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "024_Uganda-Generating-Growth-Opportunities-and-Productivity-for-Women-Enterprises-Uganda-Project", + "page": 11, + "text": "These vulnerabilities are heightened given the economy \u2019 s strong reliance on climate-sensitive sectors like agriculture and tourism. While women are particularly vulnerable to the impacts of climate change, given their reliance on subsistence agriculture, they are also active in safeguarding resources such as water, giving them important roles in protecting the environment. 4. Supporting women \u2019 s entrepreneurship holds a critical place in Uganda \u2019 s efforts to revive its economy. Currently, women are less likely than men to be paid employees ( 13 percent of women compared with 23 percent of men ), and more likely to be self-employed ( 80 percent compared to men \u2019 s 70 percent ). 3 In this context, promoting ways for women to grow and expand their businesses is a good option to promote economic recovery. Micro, small, and medium enterprises ( MSMEs ) created within the past five years now generate over 50 percent of formal jobs, and household enterprises provide employment for another 3. 1 million households. 4 Furthermore, women are particularly vulnerable 1 The Uganda Bureau of Statistics ( UBOS ) has recently announced poverty rates based on the UNHS 2019 / 2020. The data for this survey was collected in two periods with a break during the strictest lockdown period between March \u2013 June 2020. The first data collection period started in September 2019 and ended in February 2020, then it resumed in July 2020 and ended in November 2020. 2 Government of Uganda ( 2020 ), Third National Development Plan ( NDP III ). 3 GoU 2018. National Labour Force Survey. 4 World Bank. 2019. \u201c Uganda Jobs Strategy for Inclusive Growth. \u201d", + "ner_text": [ + [ + 1168, + 1172, + "named" + ], + [ + 445, + 451, + "UNHS <> data geography" + ], + [ + 1081, + 1108, + "UNHS <> author" + ], + [ + 1141, + 1154, + "UNHS <> data description" + ], + [ + 1173, + 1184, + "UNHS <> publication year" + ], + [ + 1361, + 1375, + "UNHS <> reference year" + ], + [ + 1389, + 1402, + "UNHS <> reference year" + ], + [ + 1583, + 1593, + "UNHS <> publisher" + ], + [ + 1603, + 1609, + "UNHS <> data geography" + ] + ], + "validated": true, + "empirical_context": "1 million households. 4 Furthermore, women are particularly vulnerable 1 The Uganda Bureau of Statistics ( UBOS ) has recently announced poverty rates based on the UNHS 2019 / 2020. The data for this survey was collected in two periods with a break during the strictest lockdown period between March \u2013 June 2020.", + "type": "survey", + "explanation": "In the context, 'UNHS' is explicitly mentioned as a survey that provides data used for empirical analysis of poverty rates.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'UNHS' is a dataset because it is referenced in relation to poverty rates and data collection.", + "contextual_reason_agent": "In the context, 'UNHS' is explicitly mentioned as a survey that provides data used for empirical analysis of poverty rates.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "145_689710PAD0P1300Official0Use0Only090", + "page": 145, + "text": "132 Assessment of climate elasticity of runoff under the Niger Basin Climate Risk Assessment 29. The NRB-CRA Study is conducting a detailed hydrological modeling of the Niger Basin to establish the climate elasticity of runoff from the main runoff generating sub-basins, viz. the Upper Niger and Benue basins. Preliminary results indicate that the temperature elasticity of Niger Basin runoff is minimal and that the precipitation elasticity could be in the order of 2. 5. This work will be completed by year end ( 2012 ). Pending the modeling results an extensive linear and log-linear regression analysis of available hydro-meteorological data sets for the basin was performed to establish initial estimates. The estimated precipitation elasticity generally varies between 2. 0 ( Benue basin ) and 2. 3 ( Upper Niger basin ). This range is consistent with values reported in literature for other river basins, based on different methods. The analyses confirmed that it is impossible to reliably estimate the temperature elasticity from the available runoff, precipitation and temperature data sets for the Niger Basin, mainly because the contribution of annual variations in temperature to variations in runoff is much less than the random noise in the runoff signals.", + "ner_text": [ + [ + 620, + 650, + "named" + ], + [ + 57, + 68, + "hydro-meteorological data sets <> data geography" + ], + [ + 296, + 308, + "hydro-meteorological data sets <> data geography" + ], + [ + 515, + 519, + "hydro-meteorological data sets <> publication year" + ], + [ + 715, + 749, + "hydro-meteorological data sets <> data description" + ], + [ + 1286, + 1304, + "hydro-meteorological data sets <> usage context" + ] + ], + "validated": true, + "empirical_context": "This work will be completed by year end ( 2012 ). Pending the modeling results an extensive linear and log-linear regression analysis of available hydro-meteorological data sets for the basin was performed to establish initial estimates. The estimated precipitation elasticity generally varies between 2.", + "type": "data set", + "explanation": "In the context, it is explicitly mentioned that regression analysis was performed on these data sets, confirming their use as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to 'hydro-meteorological data sets' which suggests a structured collection of data.", + "contextual_reason_agent": "In the context, it is explicitly mentioned that regression analysis was performed on these data sets, confirming their use as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 34, + "text": "Improved HCI for higher productivity, equity and growth in Kenya Results Area 2: Improve Girls \u2019 Participation in Schooling, Including in Refugee Hosting Counties \u2022 High drop out of girls in upper primary \u2022 Gender disparities in transition to secondary school for poor and vulnerable populations \u2022 Provide scholarships, school kits, and mentorship support services for poor and vulnerable girls. \u2022 Strengthen supply chain for sanitary towels for girls to enhance dignity and ensure uninterrupted school attendance. \u2022 Implement gender specific actions in SIPs to track girls at risk of dropping out and to facilitate the reentry of out-of - school girls due to pregnancy. \u2022 ELIMU scholarship program expanded to benefit poor and vulnerable populations. \u2022 School level support mechanisms established for girls to complete primary education. Results Area 3: Strengthen Reform Implementation Capacity \u2022 Gaps in fidelity of implementation of initiated reforms ( CBC, CBA, pre-service teacher training, and teacher management ). \u2022 Low utilization of NEMIS data for planning and decision making in primary education. \u2022 Gaps in the quality of pre-service teacher training in the implementation of CBC \u2022 Low quality pre-school services are negatively impacting foundational learning. \u2022 Insufficient infrastructure in target counties \u2022 Strengthen reform implementation capacity for rollout of the CBC and CBA, notably: i. completion of CBC designs for all basic education grades ii. development of formative assessment for the CBC", + "ner_text": [ + [ + 1044, + 1049, + "named" + ], + [ + 59, + 64, + "NEMIS <> data geography" + ] + ], + "validated": true, + "empirical_context": "Results Area 3: Strengthen Reform Implementation Capacity \u2022 Gaps in fidelity of implementation of initiated reforms ( CBC, CBA, pre-service teacher training, and teacher management ). \u2022 Low utilization of NEMIS data for planning and decision making in primary education. \u2022 Gaps in the quality of pre-service teacher training in the implementation of CBC \u2022 Low quality pre-school services are negatively impacting foundational learning.", + "type": "system", + "explanation": "NEMIS is confirmed as a dataset since it is explicitly referenced for its data usage in planning and decision making.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because NEMIS is mentioned in relation to data utilization for planning and decision making.", + "contextual_reason_agent": "NEMIS is confirmed as a dataset since it is explicitly referenced for its data usage in planning and decision making.", + "contextual_signal": "follows 'utilization of NEMIS data for planning and decision making'", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 101, + "text": "In general, before effecting payment, the transaction requested for payment is checked against the approved budget and the availability of sufficient uncommitted budget balance. The IBEX module is in use for expenditure monitoring although some woredas use a manual budget ledger to keep track of expenditures and commitments instead of the budget control module of the IBEX. This is mainly due to inadequate skills on how to use the system which the government is working on through its continuous PFM trainings. For the HCO, quarterly IFRs will provide a variance analysis of the budget against actual expenditure and will explain the major reasons for the variances. 9. Budget utilization. The overall budget performance for the human capital sectors has been found satisfactory for the last three years scoring between 80 percent to 94 percent. The analysis for the capital expenditure was only done for the pilot SPG woredas as these are the woredas that are expected to have capital expenditures. It is noted that for the last three years, performance has been between 96 percent to 133 percent. 10. Fiscal transparency. In previous years, a remarkable achievement was registered on the Financial Transparency and Accountability ( FTA ) activities by enacting directives, provision of trainings, assignment of focal persons, and disclosure of public budget information. The annual regions subsidy is disclosed on the MOF website.", + "ner_text": [ + [ + 182, + 193, + "named" + ] + ], + "validated": false, + "empirical_context": "In general, before effecting payment, the transaction requested for payment is checked against the approved budget and the availability of sufficient uncommitted budget balance. The IBEX module is in use for expenditure monitoring although some woredas use a manual budget ledger to keep track of expenditures and commitments instead of the budget control module of the IBEX. This is mainly due to inadequate skills on how to use the system which the government is working on through its continuous PFM trainings.", + "type": "system", + "explanation": "However, the context indicates that IBEX is a system used for expenditure monitoring, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'module' can imply a structured component of a system that handles data.", + "contextual_reason_agent": "However, the context indicates that IBEX is a system used for expenditure monitoring, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 47, + "text": "Frequency Once, at Program closure. Data source Public opinion poll by the Centre for Strategic Studies at the University of Jordan on Tawjihi, Universities and the Unified Admission System. Methodology for Data Collection Polling of a national representative sample of high school students covering all governorates, and regions. Question asked: \u201c Do you believe that the Tawjihi exam constitutes a fair educational and academic evaluation for students? \u201d This would include a representative sub-sample of refugee students. Responsibility for Data Collection Centre for Strategic Studies. Increased representation of women in leadership positions in the civil service as a result of competency-based promotions and competitive recruitment. Description Percentage of women appointed to leadership positions in the civil service. Frequency Once, by Program closure. Data source Annual report on civil service from the SPAC. Methodology for Data Collection Leadership positions are defined by Article 16 of the 2020 CivilSservice Bylaw. Responsibility for Data Collection SPAC. Result Area 3 on transparency and accountability through digitalization Improved e-participation Description Jordan rating under the e-participation indicator of the UN e-Government Development Index. Frequency Every two years. Data source UN e-Government Development Index Methodology for Data Collection Questionnaire based on the 2022 UN e-Government Development Survey methodology. E-participation questions cover three dimensions: 1 ) e-information, 2 ) e-consultation, and 3 ) e-decision making. In 2022, these three dimensions are further categorised into six sub-dimensions as follows: e-notification and e-enabling ( under e-information ); e - discourse and e-dialogue ( under e-consultation ); and e-collaboration and e-empowerment ( under e-decision-making ). Responsibility for Data UN.", + "ner_text": [ + [ + 48, + 67, + "named" + ], + [ + 270, + 290, + "Public opinion poll <> reference population" + ], + [ + 507, + 523, + "Public opinion poll <> reference population" + ], + [ + 1185, + 1191, + "Public opinion poll <> data geography" + ] + ], + "validated": true, + "empirical_context": "Frequency Once, at Program closure. Data source Public opinion poll by the Centre for Strategic Studies at the University of Jordan on Tawjihi, Universities and the Unified Admission System. Methodology for Data Collection Polling of a national representative sample of high school students covering all governorates, and regions.", + "type": "poll", + "explanation": "This is indeed a dataset as it is described as a data source that collects information from a representative sample.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a public opinion poll, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is described as a data source that collects information from a representative sample.", + "contextual_signal": "mentioned as a data source", + "tags": [] + }, + { + "filename": "009_BOSIB-70f97ae8-b741-401c-82cc-e87615cc5487", + "page": 55, + "text": "These shall include: ( 1 ) a client registry ( that supports the unique identification and management of patient identities ); ( 2 ) one or more terminology registries accessible as a service ( that provide a standardized classification or a query-able source for health information exchange, including clinical terminologies, ontologies, dictionaries, code systems, and value sets ); ( 3 ) a universal facility registry ( that sets the unique identifier for locations where health services are provided ); ( 4 ) a health professional registry ( that sets the unique identifier for health workers that provide services within a country ); and ( 5 ) a common product catalogue ( that properly defines and categorizes medical products or commodities ). 24 Core standards include: ( 1 ) content standards ( that dictate the structure of electronic documents and types of data they must contain by ensuring data is properly organized and represented in a clear manner ); ( 2 ) terminology standards ( that ensure that all parties will be able to understand and use it while exchanging health data ); ( 3 ) transport standards ( that facilitate data exchange between different systems by defining what formats, document architecture, data elements, methods, and application programming interfaces to use for achieving interoperability ); and ( 4 ) security standards ( that establish administrative and technical rules to protect sensitive data from misuse, unauthorized access, or disclosure ). 25 Health information systems could include the national EMR platform, supply chain management information system, and surveillance systems managed by the MOH. Selected health information systems will be described in the Program Operational Manual ( including the verification protocol ).", + "ner_text": [ + [ + 515, + 543, + "named" + ], + [ + 582, + 596, + "health professional registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "These shall include: ( 1 ) a client registry ( that supports the unique identification and management of patient identities ); ( 2 ) one or more terminology registries accessible as a service ( that provide a standardized classification or a query-able source for health information exchange, including clinical terminologies, ontologies, dictionaries, code systems, and value sets ); ( 3 ) a universal facility registry ( that sets the unique identifier for locations where health services are provided ); ( 4 ) a health professional registry ( that sets the unique identifier for health workers that provide services within a country ); and ( 5 ) a common product catalogue ( that properly defines and categorizes medical products or commodities ). 24 Core standards include: ( 1 ) content standards ( that dictate the structure of electronic documents and types of data they must contain by ensuring data is properly organized and represented in a clear manner ); ( 2 ) terminology standards ( that ensure that all parties will be able to understand and use it while exchanging health data ); ( 3 ) transport standards ( that facilitate data exchange between different systems by defining what formats, document architecture, data elements, methods, and application programming interfaces to use for achieving interoperability ); and ( 4 ) security standards ( that establish administrative and technical rules to protect sensitive data from misuse, unauthorized access, or disclosure ).", + "type": "registry", + "explanation": "This is a dataset as it functions as a structured collection of data for identifying health professionals.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a registry that manages unique identifiers for health workers.", + "contextual_reason_agent": "This is a dataset as it functions as a structured collection of data for identifying health professionals.", + "contextual_signal": "mentioned as a registry that sets unique identifiers for health workers", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 74, + "validated": 71, + "not_validated": 3 + } + }, + { + "filename": "006_BOSIB-2c22668f-f4ba-42f5-a0d1-7949b7b8fe34", + "page": 19, + "text": "It is anticipated that the improvement of rural access road infrastructure will increase both mobility and trade to and from the settlement for refugees and host communities. Current electrical supply is poor, with households relying mostly on kerosene lanterns, candles, flashlights, and generators for lighting and cell phone charging. Enhanced transport and energy will also stimulate retail business and production. The upgrading and expansion of school infrastructure will improve learning outcomes, enhance teacher retention and new recruitment, and afford greater dignity to boarding pupils, especially girls, as many are currently sharing beds due to the lack of capacity. The provision of modern WASH facilities will support greater school attendance \u2014 especially for girls whose attendance is often curtailed due to lack of access to hygiene services26 \u2014 and improve the health of students who are at increased risk of waterborne diseases such as cholera in the rainy season given the current overreliance on pit latrines. ( a ) Transport infrastructure: Meheba settlement main access road and Mwafwe bridge ( US $ 3 million ). The overall state of rural roads within the Meheba settlement remains one of the main bottlenecks to the development of that settlement into an economically viable area. Results from various studies such as the 23 Zambia 2022 Census of Population and Housing, Central Statistical Office Zambia, 2023. 24 One Meheba Local Area Plan, Planning Survey and Issues Report, GRZ, 2023. 25 One Meheba Local Area Plan, Planning Survey and Issues Report, GRZ, 2023. 26 \u201c Many girls choose not to attend classes at certain times due to the lack of appropriate washing and hygiene facilities, \u201d School headmaster, Kalumbila District, February 2024.", + "ner_text": [ + [ + 1352, + 1396, + "named" + ] + ], + "validated": true, + "empirical_context": "The overall state of rural roads within the Meheba settlement remains one of the main bottlenecks to the development of that settlement into an economically viable area. Results from various studies such as the 23 Zambia 2022 Census of Population and Housing, Central Statistical Office Zambia, 2023. 24 One Meheba Local Area Plan, Planning Survey and Issues Report, GRZ, 2023.", + "type": "census", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a census that provides data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a census, which typically involves structured data collection.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a census that provides data used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 73, + "validated": 72, + "not_validated": 1 + } + }, + { + "filename": "172_multi0page", + "page": 10, + "text": "In addition, there is a lack of basic furniture, teaching and learning materials, overcrowding in many schools in safer areas, disorientation and psychological trauma experienced by a large segment of the population, especially children, a weakened institutional capacity of the MEST in managing the education system, a serious lack of information and data to plan the provision of services, and poor coordination of the various initiatives among numerous actors working in the education sector. Main Sector Issues 2. 2 Data presented in this section draw extensively from two sources: the National School Survey Report ( NSSR ) conducted by Research Triangle Institute and Paul Musker and Associates in collaboration with the Central Statistics Office ( CSO ), at the end of 2001 in the context of the preparation of the project; and the Multiple Cluster Indicators Survey ( MICS ) undertaken in 2000. The NSSR received support of a PHRD grant, and covered all existing institutions ( Grades 1-9 ), taking into account formal, non-formal, community-based and rapid education programs. ( a ) Limited access to education and regional and gender disparities - 5 -", + "ner_text": [ + [ + 839, + 873, + "named" + ], + [ + 642, + 669, + "Multiple Cluster Indicators Survey <> author" + ], + [ + 674, + 700, + "Multiple Cluster Indicators Survey <> author" + ], + [ + 727, + 752, + "Multiple Cluster Indicators Survey <> author" + ], + [ + 897, + 901, + "Multiple Cluster Indicators Survey <> publication year" + ], + [ + 1177, + 1195, + "Multiple Cluster Indicators Survey <> usage context" + ] + ], + "validated": true, + "empirical_context": "Main Sector Issues 2. 2 Data presented in this section draw extensively from two sources: the National School Survey Report ( NSSR ) conducted by Research Triangle Institute and Paul Musker and Associates in collaboration with the Central Statistics Office ( CSO ), at the end of 2001 in the context of the preparation of the project; and the Multiple Cluster Indicators Survey ( MICS ) undertaken in 2000. The NSSR received support of a PHRD grant, and covered all existing institutions ( Grades 1-9 ), taking into account formal, non-formal, community-based and rapid education programs.", + "type": "survey", + "explanation": "This is indeed a dataset as it is explicitly mentioned as a survey that provides data for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a survey, which typically collects structured data.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned as a survey that provides data for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "031_IBArchive-bd2c789e-ee04-4df7-a219-9409a5f705d3", + "page": 79, + "text": "The World Bank Chad Energy Access Scale Up Project ( P174495 ) Page 74 of 87 ANNEX 4: Climate and Hazard Considerations Climate Change and Natural Hazard Risks and Adaptation Opportunities 1. Resilient infrastructure development in the Republic of Chad includes consideration of existing natural hazards51 and ongoing climate change. Three key risks in this project include wildfire, flooding, and extreme heat, which are expected to increase due to climate change. Figure 4. 1. Key Hazard Risks in Chad and Their Geographic Distribution Source: ( A ) Author calculations using data from [ 46 ]; ( B ) FATHOM fluvial flooding data ( 2021 ) [ 48 ]; ( C ) Internal calculations using data from NASA NEX-GDDP CMIP5 data [ 50 ]. 2. Wildfire is recognized as a \u2018 high \u2019 risk in Chad under current climate conditions, and climate change is expected to exacerbate this risk. 52 However, this risk is concentrated in the southern part of the country, particularly along the southeastern corner where the average annual area of land that is burned is 20 \u2013 50 percent or more ( Figure 4. 1 ).", + "ner_text": [ + [ + 692, + 716, + "named" + ], + [ + 236, + 252, + "NASA NEX-GDDP CMIP5 data <> data geography" + ], + [ + 633, + 637, + "NASA NEX-GDDP CMIP5 data <> publication year" + ] + ], + "validated": true, + "empirical_context": "1. Key Hazard Risks in Chad and Their Geographic Distribution Source: ( A ) Author calculations using data from [ 46 ]; ( B ) FATHOM fluvial flooding data ( 2021 ) [ 48 ]; ( C ) Internal calculations using data from NASA NEX-GDDP CMIP5 data [ 50 ]. 2.", + "type": "data", + "explanation": "This is indeed a dataset as it is explicitly used for internal calculations in the context of hazard risk analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes a specific reference to data from NASA's NEX-GDDP CMIP5.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly used for internal calculations in the context of hazard risk analysis.", + "contextual_signal": "follows 'uses data from'", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 129, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 117 accountability, financial transparency, and grievance redress mechanisms. Under PFM, several expenditure and financial management assessments highlight how issues related to budget prioritization, reliability, and predictability impede service delivery and results. 119 The PFM subcomponent will provide TA in linking budgets with overarching HCO strategic results and supporting PFM oversight for institutions. Specifically, it will support the implementation of PBB and accountability for program results at both federal and regional levels. This will include: ( a ) training on PBB processes; ( b ) defining roles, responsibilities, and coordination mechanisms; ( c ) piloting PBB at the regional level ( Figure 8. 1 ) with the development of a regional PBB manual; ( d ) testing financial and human resource incentives to support a performance culture; and ( e ) creating regular monitoring and reporting system for financial and nonfinancial performance. PBB guidelines will be included in the POM as part of the IPF component. In addition, overall financial data collection and reporting will be strengthened ( using existing systems such as the Integrated Financial Management Information System ( IFMIS ) and IBEX ) to ensure proper tracking and reporting such as codification of human capital and cross-cutting activities and objectives in the budget system of regions and woredas.", + "ner_text": [ + [ + 1281, + 1285, + "named" + ] + ], + "validated": false, + "empirical_context": "PBB guidelines will be included in the POM as part of the IPF component. In addition, overall financial data collection and reporting will be strengthened ( using existing systems such as the Integrated Financial Management Information System ( IFMIS ) and IBEX ) to ensure proper tracking and reporting such as codification of human capital and cross-cutting activities and objectives in the budget system of regions and woredas.", + "type": "system", + "explanation": "However, IBEX is referred to as a system, not explicitly as a data source or structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed IBEX is a dataset because it is mentioned in the context of financial data collection and reporting.", + "contextual_reason_agent": "However, IBEX is referred to as a system, not explicitly as a data source or structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 65, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 53. Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Girls \u2019 survival rate in secondary school in all regions including areas affected by conflict, droughts and high levels of refugees This indicator measures the share of a cohort of female students enrolled in Grade 9 who successfully completed secondary schools and it will be measured by using a \u2018 synthetic cohort method \u2019. Annual EMIS, RRS and UNHCR Data collected by MoE through the annual EMIS process and through RRS and UNHCR in refugee host communities MOE / EMIS Directorate Children 0-59 months with diarrhea treated with ORS and zinc in low performing regions The indicator will be measured by the percentage of children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in the low performing regions Annual DHIS2, RRS and UNHCH in refugee host communities Data collected by MoH through the annual DHIS2 and through RRS and UNHCR in refugee host communities MoH Woredas that engage citizens in planning and budgeting including in areas affected by conflict, droughts and high levels of refugees in conflict affected areas The percentage of woredas that conduct pre-budget discussion with at least 40 % participation of women Annual Administrative data / reports from regions including through RRS and UNHCR in refugee host communities Use", + "ner_text": [ + [ + 1010, + 1015, + "named" + ] + ], + "validated": false, + "empirical_context": "Monitoring & Evaluation Plan: PDO Indicators Indicator Name Definition / Description Frequency Datasource Methodology for Data Collection Responsibility for Data Collection Girls \u2019 survival rate in secondary school in all regions including areas affected by conflict, droughts and high levels of refugees This indicator measures the share of a cohort of female students enrolled in Grade 9 who successfully completed secondary schools and it will be measured by using a \u2018 synthetic cohort method \u2019. Annual EMIS, RRS and UNHCR Data collected by MoE through the annual EMIS process and through RRS and UNHCR in refugee host communities MOE / EMIS Directorate Children 0-59 months with diarrhea treated with ORS and zinc in low performing regions The indicator will be measured by the percentage of children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc in the low performing regions Annual DHIS2, RRS and UNHCH in refugee host communities Data collected by MoH through the annual DHIS2 and through RRS and UNHCR in refugee host communities MoH Woredas that engage citizens in planning and budgeting including in areas affected by conflict, droughts and high levels of refugees in conflict affected areas The percentage of woredas that conduct pre-budget discussion with at least 40 % participation of women Annual Administrative data / reports from regions including through RRS and UNHCR in refugee host communities Use", + "type": "organization", + "explanation": "'UNHCH' is mentioned as part of a list of organizations involved in data collection, not as a dataset itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'UNHCH' is a dataset due to its mention alongside data sources.", + "contextual_reason_agent": "'UNHCH' is mentioned as part of a list of organizations involved in data collection, not as a dataset itself.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "158_40156", + "page": 52, + "text": "UNHCR already has BSS information for some of the camps to be supported by the project, and if resources are made available in future, it is envisioned that the other camps would be covered by BSS as well. b ) Sentinel surveillance: Sentinel surveillance will be undertaken by IGAD member states, who will add additional sentinel sites to their national sentinel surveillance system \u2013 one in each refugee site, IDP site and surrounding population. Initially, only those sites supported by this project will be added. Later, when the regional HIV M & E strategy has been developed, all the sites and camps will be added to national sentinel surveillance. c ) Health facility surveys: At all health facilities in hot spots, a health facility survey will be undertaken to assess the availability of HIV related services in the health facilities that are servicing populations in the 14 hot spots. d ) Rapid assessment in hot spot areas: All 14 hot spot areas \u2013 the seven targeted for year 1 of implementation and the seven targeted for year 2 of implementation - will be assessed at the start of year 1.", + "ner_text": [ + [ + 18, + 21, + "named" + ] + ], + "validated": false, + "empirical_context": "UNHCR already has BSS information for some of the camps to be supported by the project, and if resources are made available in future, it is envisioned that the other camps would be covered by BSS as well. b ) Sentinel surveillance: Sentinel surveillance will be undertaken by IGAD member states, who will add additional sentinel sites to their national sentinel surveillance system \u2013 one in each refugee site, IDP site and surrounding population.", + "type": "program", + "explanation": "However, BSS is described as a program or initiative rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed BSS is a dataset because it is mentioned in the context of information collection for camps.", + "contextual_reason_agent": "However, BSS is described as a program or initiative rather than a structured collection of data.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 4, + "validated": 2, + "not_validated": 2 + } + }, + { + "filename": "030_IBArchive-a3efea83-58ad-4ddf-8bec-57dcadea0526", + "page": 31, + "text": "The first action pertains to better utilization of NEMIS data for filling information gaps in CBC implementation, specifically in respect of the allocation of capitation grants, mapping of school needs, and development budget allocations. The second action is capacity strengthening of the teacher training colleges to ensure that their graduates have acquired the core competencies including retooling of tutors and establishment of ICT enabled learning resource centers, to ensure teachers, to implement the CBC and CBA. The third action to establish standards and tools for quality assurance of preschools, is fully aligned with the CBC \u2019 s objective of improving basic education quality. Finally, construction of new classrooms in existing schools as per the needs-based school infrastructure investment plan, will address the CBC \u2019 s requirement for improved learning conditions in schools. 46. The Program will support the KNEC to conduct and disseminate NASMLA for grades 3 in 2023 and 2026, which will also inform the PDO indicator for learning outcomes. The participating agency for the assessment is KNEC. Considering the need for robust TA and capacity building of the implementing institutions to carry out the reforms and sustain reform momentum to ensure achievement of objectives, support will be provided through the IPF component and some of the capacity building actions will be reflected in the PAP. IPF Component 47.", + "ner_text": [ + [ + 51, + 56, + "named" + ], + [ + 51, + 61, + "NEMIS <> data type" + ], + [ + 145, + 176, + "NEMIS <> data description" + ], + [ + 178, + 201, + "NEMIS <> data description" + ], + [ + 207, + 237, + "NEMIS <> data description" + ], + [ + 984, + 988, + "NEMIS <> publication year" + ], + [ + 1110, + 1114, + "NEMIS <> publisher" + ] + ], + "validated": true, + "empirical_context": "The first action pertains to better utilization of NEMIS data for filling information gaps in CBC implementation, specifically in respect of the allocation of capitation grants, mapping of school needs, and development budget allocations. The second action is capacity strengthening of the teacher training colleges to ensure that their graduates have acquired the core competencies including retooling of tutors and establishment of ICT enabled learning resource centers, to ensure teachers, to implement the CBC and CBA.", + "type": "system", + "explanation": "NEMIS is indeed a dataset as it is utilized for data-driven decision-making in the context of CBC implementation.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed NEMIS is a dataset because it is referenced in relation to filling information gaps and allocations.", + "contextual_reason_agent": "NEMIS is indeed a dataset as it is utilized for data-driven decision-making in the context of CBC implementation.", + "contextual_signal": "mentioned as a data source for filling information gaps", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 36, + "validated": 28, + "not_validated": 8 + } + }, + { + "filename": "059_Uganda-Investing-in-Forests-and-Protected-Areas-for-Climate-Smart-Development-Project", + "page": 72, + "text": "STEP. The project will use STEP, a planning and tracking system, which will provide data on procurement activities, establish benchmarks, monitor delays, and measure procurement performance. The MWE is familiar with STEP, which is also used under the IWMDP. The knowledge built, lessons learned, and corrective mechanisms put in place will provide a foundation for the project. 52. Use of national procurement procedures. Due to recent changes in Uganda \u2019 s procurement legal framework, Uganda National Procurement Procedures are not in line with the World Bank Procurement Regulations Section V - Para 5. 4 National Procurement Procedures. In particular, the following provisions are not consistent with Procurement Regulations: ( a ) Use of domestic preference for contracts obtained through open national competitive procedures ( b ) Fees for handling bidder complaints at procuring entity level", + "ner_text": [ + [ + 216, + 220, + "named" + ] + ], + "validated": false, + "empirical_context": "The project will use STEP, a planning and tracking system, which will provide data on procurement activities, establish benchmarks, monitor delays, and measure procurement performance. The MWE is familiar with STEP, which is also used under the IWMDP. The knowledge built, lessons learned, and corrective mechanisms put in place will provide a foundation for the project.", + "type": "system", + "explanation": "However, STEP is described as a planning and tracking system, not a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed STEP is a dataset because it provides data on procurement activities.", + "contextual_reason_agent": "However, STEP is described as a planning and tracking system, not a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 15, + "validated": 1, + "not_validated": 14 + } + }, + { + "filename": "038_Burundi-Cash-for-Jobs-Project", + "page": 43, + "text": "To scale up activities at national level, the PIU will need to strengthen the fiduciary team and will need to hire additional personnel at least in the following positions: an additional procurement specialist, an additional accountant, an environmental specialist, an expert on accompanying measures on human capital development, a productive inclusion specialist; a refugee support specialist and human resources specialist. These positions need to be filled not later than three months after project effectiveness. Additional consultants to be deployed to all new provinces and communes under the project will also need to be hired. In addition, the PIU will need to hire legal expertise for the components of the Social Registry and refugee support and these are the components with important policy content. The project will consider the use of interns to support project implementation and to build a pool of future social workers. 124. The project will rely on the support from three different UN agencies, namely UNICEF, WFP and UNHCR, for the implementation of its components. UNICEF already supported the implementation of accompanying measures on human capital development under Merankabandi and the same approach will be promoted with WFP and UNHCR for the productive inclusion and support to refugees and host communities \u2019 components.", + "ner_text": [ + [ + 717, + 732, + "named" + ] + ], + "validated": false, + "empirical_context": "Additional consultants to be deployed to all new provinces and communes under the project will also need to be hired. In addition, the PIU will need to hire legal expertise for the components of the Social Registry and refugee support and these are the components with important policy content. The project will consider the use of interns to support project implementation and to build a pool of future social workers.", + "type": "program", + "explanation": "However, the context indicates it is part of a project with policy content, not explicitly functioning as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'Social Registry' is a dataset because it sounds like a structured collection of data.", + "contextual_reason_agent": "However, the context indicates it is part of a project with policy content, not explicitly functioning as a data source.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 83, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 71 Procedure Data collected by woreda health offices using the regular data collection system ( DHIS2 ) and verified by ESS. The verified information is then submitted by the MOF to the World Bank in a consolidated DLI verification report as part of each semi - annual government / World Bank review. DLI_TBL_VERIFICATION DLI 8 Quarterly reports of minutes of multisectoral committee meetings in 29 selected SPG woredas Description Multisectoral committee will regularly meet for project management and minutes of the meeting will be reported ever quarter to MoF. Data source / Agency Administrative data, WOFED report quarterly meeting minutes with all committee members send to MOF Verification Entity ESS Procedure Quarterly minutes report submitted by WOFED to MOF and verified by ESS..", + "ner_text": [ + [ + 156, + 161, + "named" + ], + [ + 15, + 23, + "DHIS2 <> data geography" + ], + [ + 91, + 112, + "DHIS2 <> reference population" + ], + [ + 246, + 256, + "DHIS2 <> publisher" + ] + ], + "validated": true, + "empirical_context": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 71 Procedure Data collected by woreda health offices using the regular data collection system ( DHIS2 ) and verified by ESS. The verified information is then submitted by the MOF to the World Bank in a consolidated DLI verification report as part of each semi - annual government / World Bank review.", + "type": "system", + "explanation": "DHIS2 is a data collection system that is used to gather and verify health data, indicating it functions as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed DHIS2 is a dataset because it is mentioned in the context of data collection and verification.", + "contextual_reason_agent": "DHIS2 is a data collection system that is used to gather and verify health data, indicating it functions as a data source.", + "contextual_signal": "described as a data collection system that stores records", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 30, + "validated": 21, + "not_validated": 9 + } + }, + { + "filename": "067_Mauritania-Second-Social-Safety-Net-System-Project", + "page": 44, + "text": "The World Bank Mauritania Social Safety Net System Project II ( P171125 ) Page 39 of 61 Households with complete information in the Social Registry - Refugee Number of refugee households registred within the Social Registry database and for which the socio - economic data has been collected and recorded. Semester Social Registry MIS Specific request on the Social Registry database Social Registry Directorate Households with updated information in the Social registry ( less than 3 years ) Percentage of households included in the Social Register database for which socio-economic data was collected within the previous three years. Semester Social Registry MIS Request on the Social Registry database Social Registry Directorate Cases from the Social Registry grievance redress mechanism resolved in a timely manner Percentage of complaints or requests for information received by the Social Registry that have been resolved within 60 days. Semester Social Registry MIS MIS extraction Social Registry Directorate Spot-check surveys carried-out and disclosed by the Social Registry Number of spot checks on the Social Registry carried out by an external firm and / or the Social Registry during the duration of the project and which have been the subject of a report and feedback to the teams.", + "ner_text": [ + [ + 748, + 763, + "named" + ], + [ + 4, + 14, + "Social Registry <> publisher" + ], + [ + 15, + 25, + "Social Registry <> data geography" + ], + [ + 88, + 98, + "Social Registry <> reference population" + ], + [ + 168, + 186, + "Social Registry <> reference population" + ], + [ + 251, + 272, + "Social Registry <> data type" + ], + [ + 412, + 422, + "Social Registry <> reference population" + ] + ], + "validated": true, + "empirical_context": "Semester Social Registry MIS Specific request on the Social Registry database Social Registry Directorate Households with updated information in the Social registry ( less than 3 years ) Percentage of households included in the Social Register database for which socio-economic data was collected within the previous three years. Semester Social Registry MIS Request on the Social Registry database Social Registry Directorate Cases from the Social Registry grievance redress mechanism resolved in a timely manner Percentage of complaints or requests for information received by the Social Registry that have been resolved within 60 days. Semester Social Registry MIS MIS extraction Social Registry Directorate Spot-check surveys carried-out and disclosed by the Social Registry Number of spot checks on the Social Registry carried out by an external firm and / or the Social Registry during the duration of the project and which have been the subject of a report and feedback to the teams.", + "type": "database", + "explanation": "In the context, the Social Registry is explicitly mentioned as a database that contains updated information and socio-economic data, confirming its role as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is referred to as a 'database' and is associated with socio-economic data collection.", + "contextual_reason_agent": "In the context, the Social Registry is explicitly mentioned as a database that contains updated information and socio-economic data, confirming its role as a data source.", + "contextual_signal": "described as a database that contains socio-economic data", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 33, + "validated": 30, + "not_validated": 3 + } + }, + { + "filename": "167_27761", + "page": 32, + "text": "For health issues, the levels of malnutrition ( i. e., height and weight ) and anemia ( i. e., hemoglobin ) every time the child visits the health center will be gathered and the data collected in pertinent verification forms. For education, the levels of school enrollment and the dropout rates will be measured using the verification forms. In this manner, the outcome indicators will be recorded in the MIS so that an external fm can analyze the information and determine the changes. Outcome indicators will be obtained from the MIS. Data will be gathered for the three kinds of conditions: attendance levels, payments, and compliance; also indicators for beneficiary households, vulnerable members, and financial indicators. This information will be gathered in regular reports to be prepared initially by the project coordination team and afterward by a department at MOSA. In addition to the above, the external firm that analyzes the outcome indicators will also perform operational monitoring for the program at least once a year. Operations under the program vis - & vis the operational manual will be assessed through field visits, consultation to stakeholders, and interviews with program officials, and a review of progress reports. The firm will make recommendations to introduce corrective measures and improve operational efficiency.", + "ner_text": [ + [ + 533, + 536, + "named" + ] + ], + "validated": false, + "empirical_context": "In this manner, the outcome indicators will be recorded in the MIS so that an external fm can analyze the information and determine the changes. Outcome indicators will be obtained from the MIS. Data will be gathered for the three kinds of conditions: attendance levels, payments, and compliance; also indicators for beneficiary households, vulnerable members, and financial indicators.", + "type": "system", + "explanation": "However, 'MIS' is described as a management information system, not as a data source itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'MIS' could imply a structured system for managing information.", + "contextual_reason_agent": "However, 'MIS' is described as a management information system, not as a data source itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 77, + "text": "The World Bank CAMEROON Education Reform Support Project ( P160926 ) Page 74 of 148 DLI Protocol to Evaluate Achievement of the DLI and Data / Result Verification Data Source / Agency Verification Entity Definitions Verification Procedure MINEDUB, MINESEC, MINEFOP, and MINESUP. It will respect the DQAF ( Data Quality Assessment Framework ), be methodologically validated by INS, and follow international norms and standards \u2022 Integrated Statistical Yearbook published and available online, which covers the whole education and training sector \u2022 Integrated education sector database exists, covering national and sub-national levels, and is compatible with and linked to other systems used for the management of state personnel ( SIGIPES ) \u2022 The level of deconcentration of EMIS and the grouping of regions for implementation will be defined by the EMIS Technical Unit in consultation with the World Bank and UNESCO, and will be clearly defined in the National EMIS Implementation Strategy. EMIS deconcentration only applies to MINEDUB and MINESEC \u2022 Budget approved for EMIS activities for subsequent year Year 5 \u2022 The level of deconcentration of EMIS and the grouping of regions for implementation will be defined by the EMIS Technical Unit in consultation with the World Bank and UNESCO, and will be clearly defined in the National EMIS Implementation Strategy.", + "ner_text": [ + [ + 731, + 738, + "named" + ] + ], + "validated": false, + "empirical_context": "The World Bank CAMEROON Education Reform Support Project ( P160926 ) Page 74 of 148 DLI Protocol to Evaluate Achievement of the DLI and Data / Result Verification Data Source / Agency Verification Entity Definitions Verification Procedure MINEDUB, MINESEC, MINEFOP, and MINESUP. It will respect the DQAF ( Data Quality Assessment Framework ), be methodologically validated by INS, and follow international norms and standards \u2022 Integrated Statistical Yearbook published and available online, which covers the whole education and training sector \u2022 Integrated education sector database exists, covering national and sub-national levels, and is compatible with and linked to other systems used for the management of state personnel ( SIGIPES ) \u2022 The level of deconcentration of EMIS and the grouping of regions for implementation will be defined by the EMIS Technical Unit in consultation with the World Bank and UNESCO, and will be clearly defined in the National EMIS Implementation Strategy. EMIS deconcentration only applies to MINEDUB and MINESEC \u2022 Budget approved for EMIS activities for subsequent year Year 5 \u2022 The level of deconcentration of EMIS and the grouping of regions for implementation will be defined by the EMIS Technical Unit in consultation with the World Bank and UNESCO, and will be clearly defined in the National EMIS Implementation Strategy.", + "type": "system", + "explanation": "However, SIGIPES is described as a system used for managing state personnel, not as a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed SIGIPES is a dataset because it is mentioned in the context of education data management.", + "contextual_reason_agent": "However, SIGIPES is described as a system used for managing state personnel, not as a structured collection of data.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "025_South-Sudan-Productive-Safety-Net-for-Socioeconomic-Opportunities-Project", + "page": 57, + "text": "The World Bank Productive Safety Net for Socioeconomic Opportunities Project ( P177663 ) Page 53 of 74 within SNSOP project locations that are satisfied or very satisfied with assets created through LIPW divided by the total number of beneficiaries and non beneficiaries surveyed in SNSOP project locations. SNSOP project locations refer to bomas or quarter councils where the SNSOP project is active a quarterly basis during missions and ISRs System and satisfaction surveys carried out by the SNSOP M & E team. In addition, satisfaction will be monitored by the Third Party Monitor ( TPM ) Beneficiary households receiving economic opportunities Number of total beneficiary households of Component 1 that are also receiving economic opportunities under Component 2, in accordance with the Project Operations Manual, and have received at least 1 installment of the livelihood grant. This indicator will be measured at a minimum on a quarterly basis. SNSOP Management Information System ( MIS ) Data on participation in Component 2 will be collected at registration where based on the targeting and registration process outlined in the Project Operations Manual, eligible beneficiaries will be allocated to Component 2.", + "ner_text": [ + [ + 951, + 986, + "named" + ] + ], + "validated": false, + "empirical_context": "This indicator will be measured at a minimum on a quarterly basis. SNSOP Management Information System ( MIS ) Data on participation in Component 2 will be collected at registration where based on the targeting and registration process outlined in the Project Operations Manual, eligible beneficiaries will be allocated to Component 2.", + "type": "system", + "explanation": "However, it is described as a management information system, not explicitly as a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes 'Data' in its name.", + "contextual_reason_agent": "However, it is described as a management information system, not explicitly as a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "141_760530PAD0P127010Box377322B00OUO090", + "page": 56, + "text": "- 41 - Subcomponent 2-A: Targeted Capacity Support to Local Governments ( US $ 12 million, of which: GoM US $ 2 million, IDA US $ 5. 5 million, and EU US $ 4. 5 million equivalent ) 34. This subcomponent will provide targeted capacity support directly to LGs to strengthen their ability to manage and implement decentralized service deliver ( with specific emphasis on the CPG ) through two different windows: ( i ) mandatory but customized training and courses in core aspects of LG management ( including local government regulations and laws, financial management, procurement, planning and budgeting, own source revenue management, and sustainable natural resources management ) linked to a national framework of minimum qualifications and conditions for different levels of staff in LGs; and ( ii ) a system of \u201c mobile teams \u201d staffed with key personnel able to provide \u201c just-in-time \u201d on-the-job training and assistance. Both these types of capacity interventions will strategically build upon the findings of the annual assessments which will specifically identify weaknesses and strengths of each LG. The assessment reports will provide valuable information for the identification of training programs and for elaboration of the mobile teams \u2019 work plans. 35. The mandatory training will be based around the development of national training program with minimum standards and modules for corresponding staff levels.", + "ner_text": [ + [ + 1022, + 1040, + "named" + ] + ], + "validated": false, + "empirical_context": "This subcomponent will provide targeted capacity support directly to LGs to strengthen their ability to manage and implement decentralized service deliver ( with specific emphasis on the CPG ) through two different windows: ( i ) mandatory but customized training and courses in core aspects of LG management ( including local government regulations and laws, financial management, procurement, planning and budgeting, own source revenue management, and sustainable natural resources management ) linked to a national framework of minimum qualifications and conditions for different levels of staff in LGs; and ( ii ) a system of \u201c mobile teams \u201d staffed with key personnel able to provide \u201c just-in-time \u201d on-the-job training and assistance. Both these types of capacity interventions will strategically build upon the findings of the annual assessments which will specifically identify weaknesses and strengths of each LG. The assessment reports will provide valuable information for the identification of training programs and for elaboration of the mobile teams \u2019 work plans.", + "type": "assessment", + "explanation": "'Annual assessments' are mentioned as reports providing information rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed 'annual assessments' is a dataset because it implies a systematic evaluation process that could yield data.", + "contextual_reason_agent": "'Annual assessments' are mentioned as reports providing information rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + } + }, + { + "filename": "057_West-Bank-and-Gaza-Phase-1-of-the-Multiphase-Programmatic-Approach-Advancing-Sustainability-in-Performance-Infrastructure-and-Reliability-of-Energy-Sector-Project", + "page": 50, + "text": "Annual PECC and PCBS Primary data on generation capacity PENRA PMU Beneficiary women-owned businesses reporting stable incomes due to improved electricity services This refers to women owned SMEs that benefit from the financing mechanism for solar PV systems Bi-annual Women owned businesses Survey PENRA PMU Preparation of feasibility studies completed Feasibility studies for priority infrastructure projects for financing in Phase 2 Once PENRA, PETL or DISCOs Implementation report PENRA PMU PENRA publishes on its website results of citizen engagement survey citizen engagement indicator PENRA will conduct the first beneficiary PENRA Beneficiary survey PENRA PMU", + "ner_text": [ + [ + 537, + 562, + "named" + ], + [ + 57, + 62, + "citizen engagement survey <> publisher" + ], + [ + 79, + 101, + "citizen engagement survey <> reference population" + ], + [ + 299, + 304, + "citizen engagement survey <> publisher" + ], + [ + 441, + 446, + "citizen engagement survey <> publisher" + ], + [ + 485, + 490, + "citizen engagement survey <> publisher" + ], + [ + 495, + 500, + "citizen engagement survey <> publisher" + ], + [ + 537, + 591, + "citizen engagement survey <> data type" + ], + [ + 592, + 597, + "citizen engagement survey <> publisher" + ], + [ + 633, + 638, + "citizen engagement survey <> publisher" + ] + ], + "validated": true, + "empirical_context": "Annual PECC and PCBS Primary data on generation capacity PENRA PMU Beneficiary women-owned businesses reporting stable incomes due to improved electricity services This refers to women owned SMEs that benefit from the financing mechanism for solar PV systems Bi-annual Women owned businesses Survey PENRA PMU Preparation of feasibility studies completed Feasibility studies for priority infrastructure projects for financing in Phase 2 Once PENRA, PETL or DISCOs Implementation report PENRA PMU PENRA publishes on its website results of citizen engagement survey citizen engagement indicator PENRA will conduct the first beneficiary PENRA Beneficiary survey PENRA PMU", + "type": "survey", + "explanation": "This is indeed a dataset as it is described as a survey that collects empirical data on citizen engagement.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it refers to a survey that collects data on citizen engagement.", + "contextual_reason_agent": "This is indeed a dataset as it is described as a survey that collects empirical data on citizen engagement.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "039_Niger-First-Phase-of-Accelerating-Electricity-Access-Project", + "page": 92, + "text": "The World Bank Niger Accelerating Electricity Access Project ( Hask\u00e9 ) ( P174034 ) Page 85 of 174 ANPER and ANERSOL project databases. acquire an off-grid system. This is obtained from NIGELEC ' s customers database, ANPER and ANERSOL ( for mini-grid and standalone systems ). The number of people would be estimated by multiplying the number of households by the average household size at the national or regional level. People provided with new or improved electricity service - Female Semi - annually. National statistics / Proj ect data. Estimated based on the share of female in the national population as published by the national statistics institute or other credible sources. NIGELEC / ANPER / ANERS OL. Refugee and host beneficiaries provided with electricity services through grid, mini grids and electrified public institutions This relates to the number of refugees and host populations living in localities and settings where the project provides electricity through grid, mini grids and off-grid electrification ( under components 1, 2 and 3 ). Semi - annually. National statistics / Proj ect data / UNHCR data / World Bank data. Estimated based on collected data about refugees and host communities zones and project-level data. NIGELEC / ANPER / ANERS OL.", + "ner_text": [ + [ + 108, + 115, + "named" + ], + [ + 4, + 14, + "ANERSOL <> publisher" + ], + [ + 713, + 743, + "ANERSOL <> reference population" + ], + [ + 1128, + 1138, + "ANERSOL <> publisher" + ], + [ + 1225, + 1243, + "ANERSOL <> data type" + ] + ], + "validated": true, + "empirical_context": "The World Bank Niger Accelerating Electricity Access Project ( Hask\u00e9 ) ( P174034 ) Page 85 of 174 ANPER and ANERSOL project databases. acquire an off-grid system.", + "type": "database", + "explanation": "ANERSOL is confirmed as a dataset since it is listed with other project databases, indicating it serves as a data source.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because it is mentioned alongside 'project databases'.", + "contextual_reason_agent": "ANERSOL is confirmed as a dataset since it is listed with other project databases, indicating it serves as a data source.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "confusing_term", + "borderline" + ], + "term_stats": { + "total": 4, + "validated": 1, + "not_validated": 3 + } + }, + { + "filename": "118_PAD1199-PAD-P144637-IDA-R2015-0247-1-Box393201B-OUO-9", + "page": 13, + "text": "While the ruling party, Cameroon People Democratic Movement ( RDPC ) dominates 85 percent of the local councils ( ruling 305 out of 360 communes ), more than 70 percent of local council members were renewed between the 2007 and 2013 elections. In the 2013 election, voter turn-out was officially reported at 75 percent. Over the last eight years, citizens were actively involved \u2014 through PNDP \u2014 in the development of CDPs in 329 local councils. Furthermore, the Local PIB Monitoring Committee ( Comit\u00e9 Local de Suivi du BIP \u2013 CLS ) has, since 2006, provided a space for civil society and citizens to be directly involved in quarterly monitoring of PIB execution at the district, regional and national levels. However, citizens still have limited access to the CLSs, creating a lack of trust in local council members in comparison to other elected and government bodies ( Afrobarometer 2014 ).", + "ner_text": [ + [ + 872, + 885, + "named" + ], + [ + 228, + 242, + "Afrobarometer <> reference year" + ], + [ + 347, + 355, + "Afrobarometer <> reference population" + ], + [ + 544, + 548, + "Afrobarometer <> reference year" + ], + [ + 886, + 890, + "Afrobarometer <> publication year" + ] + ], + "validated": true, + "empirical_context": "Furthermore, the Local PIB Monitoring Committee ( Comit\u00e9 Local de Suivi du BIP \u2013 CLS ) has, since 2006, provided a space for civil society and citizens to be directly involved in quarterly monitoring of PIB execution at the district, regional and national levels. However, citizens still have limited access to the CLSs, creating a lack of trust in local council members in comparison to other elected and government bodies ( Afrobarometer 2014 ).", + "type": "survey", + "explanation": "Afrobarometer is indeed a dataset as it is referenced in the context as a source of empirical data regarding citizens' trust and access to local councils.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because Afrobarometer is known for collecting and providing survey data on public opinion in Africa.", + "contextual_reason_agent": "Afrobarometer is indeed a dataset as it is referenced in the context as a source of empirical data regarding citizens' trust and access to local councils.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 64, + "text": "The effect of remedial education on learning outcomes is well established in the literature: teaching oriented to the level of the student rather than the level prescribed for the student \u2019 s grade in the curriculum has produced large gains in learning as found in a number of randomized controlled trials ( Banerjee et al. 2016 ). Table 2. 3 presents effect sizes from evaluations of interventions that have elements of remedial learning targeting struggling students. These vary in modality including computer-assisted adaptive learning, volunteer community members providing after-school tutoring, and curriculum adjustments to focus on core skills. Together, they provide a sense of what would be a reasonable range of effect sizes to expect from the interventions supported under this project. 7. Modeling the benefits of Component 2 relies on the association between the digital learning environment and learning outcomes in PISA 2018. Given that the type of support to schools under Component 2 would vary depending on the needs of recipient schools, it is not possible to identify a range of effect sizes from similar interventions in the impact evaluation literature. Instead, the approach is to use an indicator of school digital learning environment in the PISA 2018 data and use the association between this indicator and learning outcomes as an effect size. The indicator used was whether all computers in the school were connected to the internet or not. The difference in learning outcomes between children at schools with all computers connected to the internet and those not was 0. 15 SD after controlling for differences in household wealth, gender, age, grade, and number of years of preprimary education.", + "ner_text": [ + [ + 931, + 940, + "named" + ] + ], + "validated": true, + "empirical_context": "7. Modeling the benefits of Component 2 relies on the association between the digital learning environment and learning outcomes in PISA 2018. Given that the type of support to schools under Component 2 would vary depending on the needs of recipient schools, it is not possible to identify a range of effect sizes from similar interventions in the impact evaluation literature.", + "type": "dataset", + "explanation": "PISA 2018 is indeed a dataset as it provides structured data on educational outcomes used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because PISA 2018 refers to an assessment that collects data on student learning outcomes.", + "contextual_reason_agent": "PISA 2018 is indeed a dataset as it provides structured data on educational outcomes used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [] + }, + { + "filename": "096_CAMEROON-PAD2-11012017", + "page": 20, + "text": "Similarly, the public investment preparation process does not have a systematic evidence-based assessment of return on investment to inform selection and prioritization of public investment projects, despite the availability of good national statistics overall. Public spending planning and programming would have a higher impact with better use of available statistics to inform resource allocation according to the population, poverty level, and other socioeconomic indicators. 25. The statistics system is adequate overall but is obviously under strain. The National Institute of Statistics ( Institut National de la Statistique, INS ) produces and publishes economic statistics ( national accounts, prices indices, external trade, enterprises ) and social statistics ( poverty and living condition, demographic, health ) of satisfactory quality. Cameroon has a solid experience in rolling out large data collection operations such as population censuses, living standard household surveys, and demographic surveys. The quality of training of Cameroonian statisticians is good. The sub regional training institute ( Institut Sous R\u00e9gional de Statistique et d \u2019 Economie Appliqu\u00e9e ) recruits through a rigorous open competitive exam jointly with the statistics training institutes of Abidjan and Dakar.", + "ner_text": [ + [ + 998, + 1017, + "named" + ], + [ + 561, + 593, + "demographic surveys <> publisher" + ], + [ + 753, + 770, + "demographic surveys <> data type" + ], + [ + 850, + 858, + "demographic surveys <> data geography" + ] + ], + "validated": true, + "empirical_context": "The National Institute of Statistics ( Institut National de la Statistique, INS ) produces and publishes economic statistics ( national accounts, prices indices, external trade, enterprises ) and social statistics ( poverty and living condition, demographic, health ) of satisfactory quality. Cameroon has a solid experience in rolling out large data collection operations such as population censuses, living standard household surveys, and demographic surveys. The quality of training of Cameroonian statisticians is good.", + "type": "survey", + "explanation": "In this context, 'demographic surveys' are explicitly mentioned as part of the data collection operations conducted by the National Institute of Statistics.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'demographic surveys' are typically structured collections of data used for analysis.", + "contextual_reason_agent": "In this context, 'demographic surveys' are explicitly mentioned as part of the data collection operations conducted by the National Institute of Statistics.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 16, + "text": "7 22. Moreover, investments in strengthening data systems ( both technical and financial ) need to continue to deepen the use of data for evidence \u2010 based decision making in the sector and further improve resource allocation. The MOE has successfully deployed an education management information system ( EMIS ), which is now hosting data on all schools and students in the system. Additional investments in a geographical information system ( GIS ) are ongoing and will allow MOE to better plan for expansion of access across all regions in the country. Leveraging the data available through the EMIS for decision making in the sector is a key opportunity for the MOE which will require additional technical assistance and capacity building to materialize. In addition, the collection, analysis, and use of student learning data and disaggregated and gender \u2010 sensitive data are essential for monitoring, targeting pedagogical interventions, and improving teacher practices in the classroom. C. Relationship to the Country Partnership Framework and Rationale for Use of Instrument 23. Relationship to the CPF. The proposed operation is fully aligned with the Jordan Country Partnership Framework ( CPF ) discussed by the World Bank Group Board on July 14, 2016. The CPF covers the period FY17 \u2013 22 and highlights the economic, geopolitical, and social challenges that Jordan has been facing, particularly with the Syrian refugee crisis.", + "ner_text": [ + [ + 808, + 829, + "named" + ], + [ + 358, + 366, + "student learning data <> reference population" + ], + [ + 834, + 875, + "student learning data <> data description" + ], + [ + 1160, + 1166, + "student learning data <> data geography" + ], + [ + 1257, + 1261, + "student learning data <> publication year" + ] + ], + "validated": true, + "empirical_context": "Leveraging the data available through the EMIS for decision making in the sector is a key opportunity for the MOE which will require additional technical assistance and capacity building to materialize. In addition, the collection, analysis, and use of student learning data and disaggregated and gender \u2010 sensitive data are essential for monitoring, targeting pedagogical interventions, and improving teacher practices in the classroom. C.", + "type": "data", + "explanation": "This is indeed a dataset as it is explicitly mentioned in the context as essential for monitoring and targeting interventions.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'student learning data' suggests a structured collection of information related to student performance.", + "contextual_reason_agent": "This is indeed a dataset as it is explicitly mentioned in the context as essential for monitoring and targeting interventions.", + "contextual_signal": "mentioned as essential for monitoring and targeting interventions", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 255, + "validated": 201, + "not_validated": 54 + } + }, + { + "filename": "004_BOSIB-87c444de-4797-4bf9-b654-4932a7fb0112", + "page": 27, + "text": "The World Bank Integrated Community Resilience Project ( P506969 ) PROJECT APPRAISAL DOCUMENT Page 18 62. M & E teams will also be embedded in each of the Project Implementation Unit ( PIU ), MASS and ADDS. These teams, in coordination with the community facilitators mobilized at local level, will be responsible for collecting data based on the results framework outlined in the M & E plan. They will transmit data on a regular basis, contributing to the periodic reports. Data to track key performance indicators will be collected from various sources: ( a ) project-specific surveys and questionnaires; ( b ) service providers; ( c ) local governments; ( d ) consultant reports; and ( e ) construction progress reports from supervising engineers / engineering firm. 63. A comprehensive midterm review of the project \u2019 s implementation and results will be conducted by the government and the World Bank in 2027, during which the target values will be assessed and any necessary adjustments to the project design will be made if needed. The project will provide targeted support to strengthen M & E capacity within MASS and ADDS. Where feasible, the project will finance consultants to assist MASS and ADDS in developing a detailed M & E and reporting system plan.", + "ner_text": [ + [ + 693, + 722, + "named" + ] + ], + "validated": false, + "empirical_context": "They will transmit data on a regular basis, contributing to the periodic reports. Data to track key performance indicators will be collected from various sources: ( a ) project-specific surveys and questionnaires; ( b ) service providers; ( c ) local governments; ( d ) consultant reports; and ( e ) construction progress reports from supervising engineers / engineering firm. 63.", + "type": "document", + "explanation": "However, it is not a dataset as it is mentioned as a type of report rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'reports' which can imply structured data.", + "contextual_reason_agent": "However, it is not a dataset as it is mentioned as a type of report rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "mentioned only as a project, not as a data source", + "tags": [] + }, + { + "filename": "084_Lebanon-PAD-final-06082018", + "page": 100, + "text": "90 2009 social accounting matrix. One of its closures is a wage-employment curve that captures the high reservation wages in Lebanon. 42. The economic benefits of the Program are assessed independently for each action but are interlinked and will reinforce on another. Each of the main activities of the Program are assessed for job creation and, subsequently, economic impact \u2013 organized by Results Area below. However, it is expected that many of the interventions will raise the economic returns of other interventions. For example, the integrated demand and supply side interventions in Results Areas 2 and 3 are expected to reinforce one another and raise the returns of each individual initiative. Similarly, the telecom investments and reforms as well as trade facilitation reforms from Results Area 1 should augment economic benefits arising from value chain initiatives in ICT and agribusiness from Results Area 2. These Program-level additionalities are not captured in the economic analysis presented below. 43. The Program is expected to create more than 52, 000 permanent jobs and close to 12, 000 temporary construction jobs over a fifteen-year time period. 46 This aggregate number is based on combining CGE results with econometric results for the DLIs where no CGE estimates are available ( SME lending and TSEZ ).", + "ner_text": [ + [ + 59, + 80, + "named" + ] + ], + "validated": false, + "empirical_context": "90 2009 social accounting matrix. One of its closures is a wage-employment curve that captures the high reservation wages in Lebanon. 42.", + "type": "concept", + "explanation": "However, it is not a dataset as it refers to a conceptual model rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because it includes the term 'curve' which can imply a graphical representation of data.", + "contextual_reason_agent": "However, it is not a dataset as it refers to a conceptual model rather than a structured collection of data.", + "contextual_signal": "mentioned only as a concept, not as a data source", + "tags": [] + }, + { + "filename": "152_468240PAD0P105101Official0Use0Only1", + "page": 12, + "text": "The sub - component will also have a range of specific interventions including: ( i ) the design and delivery of initial teacher training and accreditation; ( ii ) refinement of the teacher ranking system linked to TPD; ( iii ) the design and delivery of school leadership programs; ( iv ) teacher and principal appraisal; ( v ) a Trainer Development Program; and ( vi ) the development of a QA system for ITT and TPD. 24. Subcomponent 3. 2: Curriculum, Assessment and Learning Resources will: ( i ) where necessary review and revise the curriculum, learning materials and teacher guides developed under ERfKE I; ( ii ) develop a framework for long \u2013 term, continuing curriculum review; ( iii ) support the development of tools and training for assessment activities at the classroom and national levels ( NAfKE, TIMSS and PISA will be covered under Sub-component 2. 1 ); ( iv ) introduce benchmarking and item banking and related training; ( v ) update ICT equipment for schools and support existing ICT infrastructure; ( vi ) ensure ICT connectivity available in all schools to permit blended e-learning; ( vii ) ensure equity in distribution of learning resources across Jordan ( Financing for the goods associated with this activity is reflected in Component 5. ); and finally ( viii ) conduct Tawjihi-ERfKE alignment activities.", + "ner_text": [ + [ + 823, + 827, + "named" + ] + ], + "validated": false, + "empirical_context": "Subcomponent 3. 2: Curriculum, Assessment and Learning Resources will: ( i ) where necessary review and revise the curriculum, learning materials and teacher guides developed under ERfKE I; ( ii ) develop a framework for long \u2013 term, continuing curriculum review; ( iii ) support the development of tools and training for assessment activities at the classroom and national levels ( NAfKE, TIMSS and PISA will be covered under Sub-component 2. 1 ); ( iv ) introduce benchmarking and item banking and related training; ( v ) update ICT equipment for schools and support existing ICT infrastructure; ( vi ) ensure ICT connectivity available in all schools to permit blended e-learning; ( vii ) ensure equity in distribution of learning resources across Jordan ( Financing for the goods associated with this activity is reflected in Component 5. )", + "type": "assessment", + "explanation": "However, in this context, PISA is mentioned as an assessment framework rather than a structured collection of data.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed PISA is a dataset because it is often associated with educational assessments and data collection.", + "contextual_reason_agent": "However, in this context, PISA is mentioned as an assessment framework rather than a structured collection of data.", + "contextual_signal": "mentioned only as an assessment, not as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + }, + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "172_multi0page", + "page": 47, + "text": "Procedures in place. 2. 2 Strengthening of US $ 1. 5M Quarterly project Information and reporting MEST ' s planning and implementation progress needs clearly defined. management capacity. report for the first year and the Staff trained in collecting and annual EMIS reports from end recording information for 2002. EMIS. Hardware and software purchased and EMIS set up accordingly. 2. 3 Support to education US $ 1. 6M Inspectors duly trained to service delivery. provide pedagogical support and monitor implementation of the education curriculum. Public information campaign to mobilize communities in setting up SMC. SMC members trained - 42 -", + "ner_text": [ + [ + 357, + 361, + "named" + ] + ], + "validated": false, + "empirical_context": "EMIS. Hardware and software purchased and EMIS set up accordingly. 2.", + "type": "system", + "explanation": "However, EMIS is mentioned as hardware and software setup, indicating it is a system rather than a data source.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is an acronym that could imply a structured system for managing data.", + "contextual_reason_agent": "However, EMIS is mentioned as hardware and software setup, indicating it is a system rather than a data source.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "092_CAMEROON-PAD-03232018", + "page": 32, + "text": "The project aims to increase the number of children benefitting from access to CPCs meeting pre-agreed quality standards in rural areas from 8, 360 to at least 32, 360 pupils. 23 To this end, the project will support the establishment, upgrading, and functioning of at least 1, 000 CPCs in line with agreed upon standards. The project will achieve these standards at the school level by providing an integrated package of capacity-building, sensitization activities, and school grants to beneficiary CPCs. To maximize the impact of the intervention in support of the Early Years Initiative, local councils benefitting from the Social Safety Net Project intervention will be prioritized. The release of IDA funds under this results area will be linked to the following DLI: Increased access to pre-school in rural areas through community pre-school according to standards ( DLI 4 ). Priority Area 2: Strengthening Education System Management 49. Two results areas will be supported under this priority area: the establishment of a standardized student learning assessment system in primary and secondary education and the establishment of an integrated, operational, and functional EMIS platform. Each of these results areas are described in further detail in the following paragraphs. 50. Results Area 5: The establishment of a standardized student learning assessment system for primary and secondary education.", + "ner_text": [ + [ + 1181, + 1194, + "named" + ] + ], + "validated": false, + "empirical_context": "Priority Area 2: Strengthening Education System Management 49. Two results areas will be supported under this priority area: the establishment of a standardized student learning assessment system in primary and secondary education and the establishment of an integrated, operational, and functional EMIS platform. Each of these results areas are described in further detail in the following paragraphs.", + "type": "system", + "explanation": "However, the context describes it as a platform rather than a structured collection of data used for empirical analysis.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed it is a dataset because 'EMIS platform' suggests a system that could store or manage data.", + "contextual_reason_agent": "However, the context describes it as a platform rather than a structured collection of data used for empirical analysis.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + } + }, + { + "filename": "095_Jordan-Educ-Reform-121282-JO-PAD-11142017", + "page": 15, + "text": "6 function of secondary education graduation and competitive screening for university admission. The Tawjihi is administered twice a year to grade 12 students, although any Jordanian having completed grade 12 is entitled to take the examination. In addition, Jordan has participated in the Trends in International Mathematics and Science Study ( TIMSS ) for grade 8 in mathematics and science since 1999, and PISA for 15 \u2010 year \u2010 olds in reading, mathematics, and science since 200616. Jordan also recently started administering the Early Grade Reading and Math Assessments ( EGRA and EGMA ) 17 for grades 2 and 3 students, although this assessment is run by external donors and has not been institutionalized. 19. The general Secondary Certificate Examination ( Tawjihi ) is the single most influential and decisive high \u2010 stakes exam in Jordan \u2019 s education system. This examination has been used for many decades with the dual purpose of a gateway to high school ( upon achieving a passing score ), and to determine the admissions track to higher education. Results from the exam split students into either technical education and vocational training ( TVET ) tertiary colleges or universities ( with extremely demanding cut \u2010 off scores to access the most sought \u2010 after faculties and programs ).", + "ner_text": [ + [ + 409, + 413, + "named" + ], + [ + 259, + 265, + "PISA <> data geography" + ], + [ + 418, + 434, + "PISA <> reference population" + ], + [ + 486, + 492, + "PISA <> data geography" + ], + [ + 839, + 845, + "PISA <> data geography" + ] + ], + "validated": true, + "empirical_context": "The Tawjihi is administered twice a year to grade 12 students, although any Jordanian having completed grade 12 is entitled to take the examination. In addition, Jordan has participated in the Trends in International Mathematics and Science Study ( TIMSS ) for grade 8 in mathematics and science since 1999, and PISA for 15 \u2010 year \u2010 olds in reading, mathematics, and science since 200616. Jordan also recently started administering the Early Grade Reading and Math Assessments ( EGRA and EGMA ) 17 for grades 2 and 3 students, although this assessment is run by external donors and has not been institutionalized.", + "type": "assessment", + "explanation": "PISA is indeed a dataset as it provides structured data on educational outcomes used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed it is a dataset because PISA is a well-known international assessment that collects data on student performance.", + "contextual_reason_agent": "PISA is indeed a dataset as it provides structured data on educational outcomes used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 26, + "validated": 8, + "not_validated": 18 + }, + "term_stats": { + "total": 23, + "validated": 14, + "not_validated": 9 + } + }, + { + "filename": "071_Djibouti-Expanding-Opportunities-for-Learning-Project", + "page": 60, + "text": "The World Bank Expanding Opportunities for Learning Project ( P166059 ) Page 55 of 85 updated COT will incorporate the revised professional development programs and the use of ICT for learning. Percentage of school directors who participate in professional community of practice School directors will create professional communities of practice to exchange good practice, observe, and give feedback to improve school quality. Annually MENFOP Reported by MENFOP General inspection within MENFOP Improved methods for data collection MENFOP will develop a digital application to improve data quality for private and public primary schools and ensure the timely publication of the statistical yearbook. Annually MENFOP Reported by MENFOP ICT unit within MENFOP Modernization of the EMIS MENFOP will perform a diagnosis on the existing data platforms ( EMIS, Human Resource Management ) and will work on recommendations for the setup of a student \u2019 s information systems and an electronic procurement system.", + "ner_text": [ + [ + 848, + 852, + "named" + ] + ], + "validated": false, + "empirical_context": "Annually MENFOP Reported by MENFOP General inspection within MENFOP Improved methods for data collection MENFOP will develop a digital application to improve data quality for private and public primary schools and ensure the timely publication of the statistical yearbook. Annually MENFOP Reported by MENFOP ICT unit within MENFOP Modernization of the EMIS MENFOP will perform a diagnosis on the existing data platforms ( EMIS, Human Resource Management ) and will work on recommendations for the setup of a student \u2019 s information systems and an electronic procurement system.", + "type": "system", + "explanation": "However, EMIS is described as a system rather than a structured collection of data or a dataset.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is mentioned in the context of data collection and management.", + "contextual_reason_agent": "However, EMIS is described as a system rather than a structured collection of data or a dataset.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + }, + { + "filename": "018_BOSIB-624554c1-598f-4576-aa60-cca7d93b64e7", + "page": 63, + "text": "Modelled Age-Earnings Profile Variables Mincerian Earnings Function Estimates Mean ( individuals ages 20 to 29 ) Higher education 1. 027 0. 34 Vocational education 0. 454 0. 3 Secondary complete 0. 275 0. 13 Secondary incomplete 0. 03 0. 22 Experience 0. 024 Varies by age Experience squared \u2212 0. 0005 Varies be age Female \u2212 0. 248 0. 522 Constant 7. 46 1 Note: Mincerian earnings coefficients are from MCC ( 2014 ) using data from 2006. The constant has been adjusted to convert into annual earnings in 2023 US $. Proportions of individuals in each educational category and female are based on the LFS 2020. 5. Modelled effect sizes for ( a ) increasing teacher effectiveness for student learning ( Subcomponent 1. 1 ) and ( b ) supporting innovative instructional practices ( Subcomponent 1. 3 ) range from 0. 05 SD to 1. 3 SD based on effect sizes of similar programs worldwide. Professional development programs have been shown to have positive effects on student learning outcomes ( Popova et al. 2018 ). Table 2. 2 presents a summary of effect sizes found in evaluations of including randomized trials of interventions that reflect the elements of Subcomponents 1. 1 and 1. 3. These evaluated interventions generally include the", + "ner_text": [ + [ + 599, + 602, + "named" + ], + [ + 85, + 110, + "LFS <> reference population" + ], + [ + 409, + 413, + "LFS <> publication year" + ], + [ + 432, + 436, + "LFS <> reference year" + ], + [ + 603, + 610, + "LFS <> reference year" + ], + [ + 1250, + 1268, + "LFS <> usage context" + ] + ], + "validated": true, + "empirical_context": "The constant has been adjusted to convert into annual earnings in 2023 US $. Proportions of individuals in each educational category and female are based on the LFS 2020. 5.", + "type": "survey", + "explanation": "In this context, 'LFS' refers to the Labour Force Survey, which is a structured collection of data used for empirical analysis.", + "is_dataset": true, + "contextual_reason_model": "The extractor may have believed 'LFS' is a dataset because it is referenced in relation to proportions of individuals in educational categories.", + "contextual_reason_agent": "In this context, 'LFS' refers to the Labour Force Survey, which is a structured collection of data used for empirical analysis.", + "contextual_signal": "enumerated alongside known datasets", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 588, + "validated": 572, + "not_validated": 16 + }, + "term_stats": { + "total": 4, + "validated": 2, + "not_validated": 2 + } + }, + { + "filename": "017_BOSIB00611b2300f20930a055190479c46f", + "page": 80, + "text": "The World Bank Ethiopia Human Capital Operation ( P172284 ) 68. Verification Protocol Table: Disbursement Linked Indicators DLI_TBL_VERIFICATION DLI 1 Girls \u2019 promotion rate from grade 11 to 12, nationwide Description The improvement in grils enrollment is measured by the proportion of girls enrolled in grade 11 at a given school year who study in the grade 12 in the following school year. The indicator is calculated by dividing the number of new enrolments of girls in grade 12 in school year t + 1 by the number of girls enrolled in grade 11 in the previous school year t. Data source / Agency EMIS Verification Entity ESS Procedure Data collected by MOE through the annual EMIS process and verified by ESS using a representative sample. The verified information is then submitted by MOF to the World Bank in a consolidated DLI verification report as part of each semiannual Government / World Bank review. DLI_TBL_VERIFICATION DLI 2 Children 0-59 months with diarrhea treated with ORS and Zinc in low performing regions Description The indicator will be measured by the percentage of children under 5 who went to the health facility with diarrhea and were treated with ORS and zinc Data source / Agency DHIS2 Verification Entity ESS Procedure Data collected by MOH through the annual DHIS2 and verified by ESS using a representative sample.", + "ner_text": [ + [ + 600, + 604, + "named" + ] + ], + "validated": false, + "empirical_context": "The indicator is calculated by dividing the number of new enrolments of girls in grade 12 in school year t + 1 by the number of girls enrolled in grade 11 in the previous school year t. Data source / Agency EMIS Verification Entity ESS Procedure Data collected by MOE through the annual EMIS process and verified by ESS using a representative sample. The verified information is then submitted by MOF to the World Bank in a consolidated DLI verification report as part of each semiannual Government / World Bank review.", + "type": "system", + "explanation": "However, EMIS is described as a system for collecting data, not a structured collection of data itself.", + "is_dataset": false, + "contextual_reason_model": "The extractor may have believed EMIS is a dataset because it is associated with data collection processes.", + "contextual_reason_agent": "However, EMIS is described as a system for collecting data, not a structured collection of data itself.", + "contextual_signal": "system but not mentioned as a data source", + "tags": [ + "mixed_type", + "confusing_term", + "borderline" + ], + "type_stats": { + "total": 611, + "validated": 98, + "not_validated": 513 + }, + "term_stats": { + "total": 105, + "validated": 15, + "not_validated": 90 + } + } +] \ No newline at end of file