Sentence-Translator / src /data_access /data_fetcher.py
VashuTheGreat2's picture
Upload folder using huggingface_hub
b758d48 verified
Raw
History Blame Contribute Delete
1.57 kB
from abc import ABC, abstractmethod
import pandas as pd
from src.constants import TRAIN_SPLIT, TEST_SPLIT, VALIDATE_SPLIT, DATA_BASE_URL
from src.exception import MyException
from typing import Optional, Literal
import sys
import logging
class DataFetcher(ABC):
def __init__(self) -> None:
pass
@abstractmethod
async def export_data_as_df(self) -> pd.DataFrame:
pass
class SentenceDataFetcher(DataFetcher):
def __init__(self, url: str = DATA_BASE_URL):
super().__init__()
self.url = url
@staticmethod
async def recompile_data(data: pd.DataFrame) -> pd.DataFrame:
try:
data['English'] = data['translation'].apply(lambda x: x['en'])
data['Hindi'] = data['translation'].apply(lambda x: x['hi'])
data = data.drop('translation', axis=1)
return data
except Exception as e:
raise MyException(e, sys)
async def export_data_as_df(self, split: Literal['train', 'validation', 'test'] = "train") -> pd.DataFrame:
try:
logging.info("Exporting data from export_data_as_df method")
splits = {
"train": TRAIN_SPLIT,
"validation": VALIDATE_SPLIT,
"test": TEST_SPLIT
}
data: pd.DataFrame = pd.read_parquet(self.url + splits[split])
# data=data[:100]
data = await SentenceDataFetcher.recompile_data(data=data)
return data
except Exception as e:
raise MyException(e, sys)