Spaces:

4zz0
/

Crypto-Price-Prediction

Sleeping

+import requests
+import pandas as pd
+import time
+from logger import get_logger
+from exception import CustomException
+logger = get_logger()
+# Human-readable cryptocurrency symbol mapping
+symbol_map = {
+    "bitcoin": "BTCUSDT",
+    "ethereum": "ETHUSDT",
+    "binance-coin": "BNBUSDT",
+    "ripple": "XRPUSDT",
+    "cardano": "ADAUSDT",
+    "solana": "SOLUSDT",
+    "polkadot": "DOTUSDT",
+    "dogecoin": "DOGEUSDT",
+    "shiba-inu": "SHIBUSDT",
+    "litecoin": "LTCUSDT",
+    "chainlink": "LINKUSDT",
+    "polygon": "MATICUSDT",
+    "avalanche": "AVAXUSDT",
+    "uniswap": "UNIUSDT",
+    "cosmos": "ATOMUSDT",
+    "stellar": "XLMUSDT",
+    "vechain": "VETUSDT",
+    "filecoin": "FILUSDT",
+    "algorand": "ALGOUSDT",
+    "monero": "XMRUSDT",
+    "bitcoin-cash": "BCHUSDT",
+    "eos": "EOSUSDT",
+    "tezos": "XTZUSDT",
+    "aave": "AAVEUSDT",
+    "compound": "COMPUSDT",
+    "maker": "MKRUSDT",
+}
+class DataFetcher:
+    def __init__(self, coin_name="ethereum", interval="1d", limit=365):
+        self.coin = coin_name
+        self.interval = interval
+        self.limit = limit
+        self.url = "https://api.binance.com/api/v3/klines"
+    def fetch_klines(self):
+        """Fetch data from Binance API with pagination to get more than 1000 rows"""
+        all_data = []
+        end_time = None  # Start from the most recent data
+        while len(all_data) < self.limit:
+            params = {
+                "symbol": symbol_map[self.coin],
+                "interval": self.interval,
+                "limit": min(self.limit - len(all_data), 1000),  # Request up to 1000 rows per API call
+            }
+            if end_time:
+                params["endTime"] = end_time  # Set endTime to get older data
+            try:
+                response = requests.get(self.url, params=params)
+                response.raise_for_status()
+                data = response.json()
+                if not data:
+                    break  # Stop if no more data is returned
+                all_data.extend(data)
+                end_time = data[0][0] - 1  # Update endTime to fetch older data
+                time.sleep(1)  # Binance API rate limiting
+            except requests.exceptions.RequestException as e:
+                logger.error(f"Error fetching Binance data: {e}")
+                raise CustomException(f"API Request Failed: {e}")
+        return all_data[:self.limit]  # Ensure we return exactly `limit` rows

data_collection/components/data_preprocessor.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import pandas as pd
+import os
+from logger import get_logger
+from datetime import datetime
+logger = get_logger()
+class DataPreprocessor:
+    @staticmethod
+    def process_klines(data):
+        """Convert Binance API data to a DataFrame and clean it."""
+        columns = ["Open_Time", "Open", "High", "Low", "Close", "Volume",
+                   "Close_Time", "Quote_Asset_Volume", "Number_of_Trades",
+                   "Taker_Buy_Base_Volume", "Taker_Buy_Quote_Volume", "Ignore"]
+        df = pd.DataFrame(data, columns=columns)
+        # Convert timestamps to datetime
+        df["Open_Time"] = pd.to_datetime(df["Open_Time"], unit="ms")
+        df["Close_Time"] = pd.to_datetime(df["Close_Time"], unit="ms")
+        # Convert numerical values to float
+        num_cols = ["Open", "High", "Low", "Close", "Volume",
+                    "Quote_Asset_Volume", "Taker_Buy_Base_Volume", "Taker_Buy_Quote_Volume"]
+        df[num_cols] = df[num_cols].astype(float)
+        logger.info("Data successfully processed and cleaned.")
+        return df
+    @staticmethod
+    def save_to_csv(df, file_path=f"data/raw_datasetes/crypto_data_{datetime.now()}.csv"):
+        """Save the DataFrame to a CSV file."""
+        os.makedirs(os.path.dirname(file_path), exist_ok=True)  # Ensure directory exists
+        df.to_csv(file_path, index=False)
+        logger.info(f"Data successfully saved to {file_path}")

data_collection/exception.py ADDED Viewed

	@@ -0,0 +1,6 @@

+import sys
+class CustomException(Exception):
+    def __init__(self, message, error_details=sys):
+        super().__init__(message)
+        self.error_details = error_details

data_collection/logger.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import logging
+import os
+from datetime import datetime
+def get_logger():
+    # Ensure logs directory exists
+    log_dir = "logs"
+    os.makedirs(log_dir, exist_ok=True)
+    # Generate log file name based on the current date
+    log_file = os.path.join(log_dir, f"pipeline_{datetime.now().strftime('%Y-%m-%d')}.log")
+    # Create a logger
+    logger = logging.getLogger("CryptoPipeline")
+    logger.setLevel(logging.INFO)
+    # Formatter for logs
+    formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
+    # File handler (logs will be saved in 'logs/pipeline_YYYY-MM-DD.log')
+    file_handler = logging.FileHandler(log_file)
+    file_handler.setFormatter(formatter)
+    # Stream handler (logs will also appear in the console)
+    stream_handler = logging.StreamHandler()
+    stream_handler.setFormatter(formatter)
+    # Avoid duplicate handlers
+    if not logger.hasHandlers():
+        logger.addHandler(file_handler)
+        logger.addHandler(stream_handler)
+    return logger

data_collection/logs/pipeline_2025-04-01.log ADDED Viewed

	@@ -0,0 +1,38 @@

+2025-04-01 00:15:25,314 - INFO - Starting the cryptocurrency data pipeline...
+2025-04-01 00:15:27,128 - INFO - Data successfully processed and cleaned.
+2025-04-01 00:15:27,162 - INFO - Data successfully saved to artifacts/crypto_data.csv
+2025-04-01 00:15:27,220 - INFO - Pipeline executed successfully!
+2025-04-01 00:22:05,701 - INFO - Starting the cryptocurrency data pipeline...
+2025-04-01 00:22:07,406 - INFO - Data successfully processed and cleaned.
+2025-04-01 00:22:07,406 - ERROR - An error occurred in the pipeline: name 'datetime' is not defined
+2025-04-01 00:22:53,568 - INFO - Starting the cryptocurrency data pipeline...
+2025-04-01 00:22:55,350 - INFO - Data successfully processed and cleaned.
+2025-04-01 00:22:55,355 - ERROR - An error occurred in the pipeline: [Errno 22] Invalid argument: 'data/raw_datasetes/crypto_data_2025-04-01 00:22:55.352917.csv'
+2025-04-01 00:28:53,891 - INFO - Starting the cryptocurrency data pipeline...
+2025-04-01 00:28:55,583 - INFO - Data successfully processed and cleaned.
+2025-04-01 00:28:55,592 - INFO - Data successfully saved to data/raw_datasetes/crypto_data_2025_04_01_00.csv
+2025-04-01 00:28:55,626 - INFO - Pipeline executed successfully!
+2025-04-01 00:30:44,830 - INFO - Starting the cryptocurrency data pipeline...
+2025-04-01 00:30:46,542 - INFO - Data successfully processed and cleaned.
+2025-04-01 00:30:46,552 - INFO - Data successfully saved to ../../data/raw_datasetes/crypto_data_2025_04_01_00.csv
+2025-04-01 00:30:46,601 - INFO - Pipeline executed successfully!
+2025-04-01 00:32:20,299 - INFO - Starting the cryptocurrency data pipeline...
+2025-04-01 00:32:22,076 - INFO - Data successfully processed and cleaned.
+2025-04-01 00:32:22,089 - INFO - Data successfully saved to ../../data/raw_datasetes/crypto_data_2025_04_01_00.csv
+2025-04-01 00:32:22,133 - INFO - Pipeline executed successfully!
+2025-04-01 00:33:02,058 - INFO - Starting the cryptocurrency data pipeline...
+2025-04-01 00:33:03,808 - INFO - Data successfully processed and cleaned.
+2025-04-01 00:33:03,820 - INFO - Data successfully saved to ../../data/raw_datasets/crypto_data_2025_04_01_00.csv
+2025-04-01 00:33:03,876 - INFO - Pipeline executed successfully!
+2025-04-01 00:35:19,340 - INFO - Starting the cryptocurrency data pipeline...
+2025-04-01 00:35:21,049 - INFO - Data successfully processed and cleaned.
+2025-04-01 00:35:21,056 - INFO - Data successfully saved to ../../data/raw_datasets/crypto_data_2025_04_01_00.csv
+2025-04-01 00:35:21,088 - INFO - Pipeline executed successfully!
+2025-04-01 00:35:31,100 - INFO - Starting the cryptocurrency data pipeline...
+2025-04-01 00:35:32,404 - INFO - Data successfully processed and cleaned.
+2025-04-01 00:35:32,420 - INFO - Data successfully saved to ../../data/raw_datasets/crypto_data_2025_04_01_00.csv
+2025-04-01 00:35:32,464 - INFO - Pipeline executed successfully!
+2025-04-01 00:35:42,482 - INFO - Starting the cryptocurrency data pipeline...
+2025-04-01 00:35:43,899 - INFO - Data successfully processed and cleaned.
+2025-04-01 00:35:43,906 - INFO - Data successfully saved to ../../data/raw_datasets/crypto_data_2025_04_01_00.csv
+2025-04-01 00:35:43,946 - INFO - Pipeline executed successfully!

data_collection/pipeline ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from datetime import datetime
2	+ print(datetime.now().strftime("%Y_%m_%d_%H"))

data_collection/scraper.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import sys
+from logger import get_logger
+from exception import CustomException
+from components.data_fetcher import DataFetcher
+from components.data_preprocessor import DataPreprocessor
+import argparse
+from datetime import datetime
+import time
+logger = get_logger()
+def scrape():
+    try:
+        logger.info("Starting the cryptocurrency data pipeline...")
+        print("Starting the cryptocurrency data pipeline...")
+        # Step 1: Fetch data from Binance
+        print("Step 1: Fetching data from Binance...")
+        # Argument parser setup
+        parser = argparse.ArgumentParser(description="Fetch cryptocurrency data from Binance")
+        parser.add_argument("--coin_name", type=str, help="Name of the cryptocurrency (e.g., ethereum, bitcoin)", required=True)
+        parser.add_argument("--interval", type=str, default="1d", help="Time interval (e.g., 1m, 1h, 1d)")
+        parser.add_argument("--limit", type=int, default=365, help="Number of data points to fetch")
+        args = parser.parse_args()
+        # Use arguments to fetch data
+        fetcher = DataFetcher(coin_name=args.coin_name, interval=args.interval, limit=args.limit)
+        raw_data = fetcher.fetch_klines()
+        print("Step 1 completed.")
+        # Step 2: Preprocess the data
+        print("Step 2: Processing the data...")
+        preprocessor = DataPreprocessor()
+        df = preprocessor.process_klines(raw_data)
+        print("Step 2 completed.")
+        # Step 3: Save to CSV
+        print("Step 3: Saving data to CSV...")
+        d=datetime.now().strftime("%Y_%m_%d_%H")
+        preprocessor.save_to_csv(df, file_path=f"../../data/raw_datasets/crypto_data_{d}.csv")
+        print("Step 3 completed.")
+        # Step 4: Display first few rows
+        print("Step 4: Displaying processed data sample...")
+        print(df.head())
+        logger.info("Pipeline executed successfully!")
+        print("Pipeline executed successfully!")
+    except Exception as e:
+        logger.error(f"An error occurred in the pipeline: {e}")
+        print(f"An error occurred in the pipeline: {e}")
+        raise CustomException(e, sys)
+def train():
+    print("Training the model...")
+if __name__ == "__main__":
+   i=1
+   while True:
+        scrape()
+        i+=1
+        if i==164:
+            train()
+            i=1
+        time.sleep(10)