File size: 921 Bytes
19fc84f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from abc import ABC, abstractmethod
from src.rag.retrieval import DocDB


class IRawDataProcessor(ABC):
    @abstractmethod
    # take input file path of raw data and output structured query data
    def get_queries(self, input_file: str, output_file: str):
        pass

    # take structured input query file path of structured query data and output raw data
    @abstractmethod
    def get_documents(self, query_file: str, output_file: str):
        pass


class DatasetProcessor(ABC):
    """Base abstract class for dataset processors"""

    @abstractmethod
    def process_queries(self, input_file: str, **kwargs) -> list:
        """Process queries from input file and return a list of formatted queries"""
        pass

    @abstractmethod
    def process_documents(self, query_file: str, db: DocDB, **kwargs) -> dict:
        """Process documents for queries and return a dictionary of documents"""
        pass