Spaces:
No application file
No application file
| import pandas as pd | |
| import numpy as np | |
| from sklearn.preprocessing import MinMaxScaler | |
| from transformers import AutoTokenizer | |
| # Load data | |
| df = pd.read_csv('NIFTY_OPTION_CHAIN_data.csv') | |
| # Handle missing values | |
| df.fillna(df.mean(), inplace=True) | |
| # Normalize numerical columns | |
| scaler = MinMaxScaler() | |
| df[['open', 'high', 'low', 'close', 'volume', 'oi']] = scaler.fit_transform(df[['open', 'high', 'low', 'close', 'volume', 'oi']]) | |
| # Tokenize categorical columns | |
| tokenizer = AutoTokenizer.from_pretrained('llama-3.1') | |
| df['Index'] = tokenizer.encode(df['Index'], return_tensors='pt') | |
| df['Expiry'] = tokenizer.encode(df['Expiry'], return_tensors='pt') | |
| df['OptionType'] = tokenizer.encode(df['OptionType'], return_tensors='pt') | |
| # Convert datetime columns | |
| df['datetime'] = pd.to_datetime(df['datetime']) | |
| df['date'] = df['datetime'].dt.date | |
| df['time'] = df['datetime'].dt.time |