Spaces:
Sleeping
Sleeping
| from abc import ABC, abstractmethod | |
| import pandas as pd | |
| from src.constants import TRAIN_SPLIT, TEST_SPLIT, VALIDATE_SPLIT, DATA_BASE_URL | |
| from src.exception import MyException | |
| from typing import Optional, Literal | |
| import sys | |
| import logging | |
| class DataFetcher(ABC): | |
| def __init__(self) -> None: | |
| pass | |
| async def export_data_as_df(self) -> pd.DataFrame: | |
| pass | |
| class SentenceDataFetcher(DataFetcher): | |
| def __init__(self, url: str = DATA_BASE_URL): | |
| super().__init__() | |
| self.url = url | |
| async def recompile_data(data: pd.DataFrame) -> pd.DataFrame: | |
| try: | |
| data['English'] = data['translation'].apply(lambda x: x['en']) | |
| data['Hindi'] = data['translation'].apply(lambda x: x['hi']) | |
| data = data.drop('translation', axis=1) | |
| return data | |
| except Exception as e: | |
| raise MyException(e, sys) | |
| async def export_data_as_df(self, split: Literal['train', 'validation', 'test'] = "train") -> pd.DataFrame: | |
| try: | |
| logging.info("Exporting data from export_data_as_df method") | |
| splits = { | |
| "train": TRAIN_SPLIT, | |
| "validation": VALIDATE_SPLIT, | |
| "test": TEST_SPLIT | |
| } | |
| data: pd.DataFrame = pd.read_parquet(self.url + splits[split]) | |
| # data=data[:100] | |
| data = await SentenceDataFetcher.recompile_data(data=data) | |
| return data | |
| except Exception as e: | |
| raise MyException(e, sys) | |