Ryoya Awano
deploy: fix MedLFQA Marginal mode sample matching
19fc84f
raw
history blame contribute delete
921 Bytes
from abc import ABC, abstractmethod
from src.rag.retrieval import DocDB
class IRawDataProcessor(ABC):
@abstractmethod
# take input file path of raw data and output structured query data
def get_queries(self, input_file: str, output_file: str):
pass
# take structured input query file path of structured query data and output raw data
@abstractmethod
def get_documents(self, query_file: str, output_file: str):
pass
class DatasetProcessor(ABC):
"""Base abstract class for dataset processors"""
@abstractmethod
def process_queries(self, input_file: str, **kwargs) -> list:
"""Process queries from input file and return a list of formatted queries"""
pass
@abstractmethod
def process_documents(self, query_file: str, db: DocDB, **kwargs) -> dict:
"""Process documents for queries and return a dictionary of documents"""
pass