Spaces:

Bhuvi20
/

forecast

Paused

App Files Files Community

Bhuvanesh24 commited on Nov 13, 2024

Commit

4e188a6

1 Parent(s): 8abf9b3

Added app.py

Browse files

Files changed (4) hide show

app.py +40 -0
requirements.txt +7 -0
src/data.py +99 -0
src/model.py +60 -0

app.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import os
+import torch
+from fastapi import FastAPI
+from pydantic import BaseModel
+import numpy as np
+from src.model import LSTM
+# Initialize FastAPI app
+app = FastAPI()
+# Device setup
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+# Ensure the model file is available in the Hugging Face Space's environment
+model_path = './water_forecast_2.pth'
+if not os.path.exists(model_path):
+    raise FileNotFoundError(f"Model file '{model_path}' not found.")
+# Load the model
+model = LSTM(input_size=8, lstm_layer_sizes=[128,128,128], output_size=3).to(device)
+print("Loading model...")
+model.load_state_dict(torch.load(model_path, map_location=device))
+print("Model loaded successfully")
+model.eval()
+class ForecastRequest(BaseModel):
+    state_idx: int
+    target_year: int
+    structured_data: dict
+@app.post("/predict")
+async def predict_usage(data: ForecastRequest):
+    structured_data = data.structured_data
+    tensor_data = torch.tensor(np.array(list(structured_data.values())), dtype=torch.float32).to(device)
+    with torch.no_grad():
+        outputs = model(tensor_data)
+    return {"prediction": outputs.tolist()}

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+torch
+fastapi
+pydantic
+numpy
+pandas
+scikit-learn
+uvicorn

src/data.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import os
+import pandas as pd
+import numpy as np
+import torch
+from torch.utils.data import Dataset, DataLoader
+from sklearn.preprocessing import StandardScaler
+class WaterDataset(Dataset):
+    def __init__(self, sequence_length=5, transform=None):
+        """
+        Initializes the dataset by loading LUC, population, and usage data, merging them
+        based on year and state, and creating sequences of data for training.
+        Args:
+            sequence_length (int): The length of each data sequence for time series forecasting.
+            transform (callable, optional): Optional transform to be applied on a sample.
+        """
+        self.sequence_length = sequence_length
+        self.luc = pd.read_csv('data/luc.csv')
+        self.population = pd.read_csv('data/population.csv')
+        self.usage = pd.read_csv('data/usage.csv')
+        self.transform = transform
+        self.years = sorted(set(self.usage['Year']))
+        self.states = sorted(set(self.usage['State']))
+        self.all_years = sorted(set(self.population['Year']))
+        self.df = self.merge_data()
+        self.x, self.y = self.create_sequence()
+        self.scaler = StandardScaler()
+        self.x = self.scaler.fit_transform(self.x.reshape(-1, self.x.shape[-1])).reshape(self.x.shape)
+    def merge_data(self):
+        """
+        Merges land use classification (LUC) and population data based on year and state.
+        Returns:
+            pd.DataFrame: A DataFrame with merged data on population, urban/rural breakdown,
+                          and LUC attributes for each year and state.
+        """
+        merged_data = []
+        for year, state in [(y, s) for y in self.all_years for s in self.states]:
+            population_data = self.population[(self.population['Year'] == year)]
+            luc_data = self.luc[(self.luc['Year'] == year) & (self.luc['State'] == state)]
+            if not population_data.empty and not luc_data.empty:
+                combined_data = {
+                    'year': year,
+                    'state': state,
+                    'population': population_data['Population'].values[0],
+                    'urban_population': population_data['Urban Population'].values[0],
+                    'rural_population': population_data['Rural Population'].values[0],
+                    'forest': luc_data['Forest'].values[0],
+                    'barren': luc_data['Barren'].values[0],
+                    'others': luc_data['Others'].values[0],
+                    'fallow': luc_data['Fallow'].values[0],
+                    'cropped': luc_data['Cropped'].values[0]
+                }
+                merged_data.append(combined_data)
+        return pd.DataFrame(merged_data)
+    def create_sequence(self):
+        """
+        Creates sequences of input data and their corresponding labels for training.
+        Returns:
+            tuple: Two numpy arrays, one for data sequences and one for label sequences.
+        """
+        data_sequences, label_sequences = [], []
+        missing_sequences = {state: [] for state in self.states}
+        for state in self.states:
+            state_data = self.df[self.df['state'] == state].sort_values('year')
+            usage_state_data = self.usage[self.usage['State'] == state]
+            for i in range(len(state_data) - self.sequence_length):
+                sequence = state_data.iloc[i:i + self.sequence_length]
+                year = sequence['year'].values[-1] + 1
+                usage_label = usage_state_data[usage_state_data['Year'] == year]
+                if len(sequence) == self.sequence_length and not usage_label.empty:
+                    data_sequences.append(sequence[['population', 'urban_population', 'rural_population',
+                                                    'forest', 'barren', 'others', 'fallow', 'cropped']].values.astype(np.float32))
+                    label_sequences.append(usage_label[['Domestic', 'Industrial', 'Irrigation']].values[0].astype(np.float32))
+                else:
+                    missing_sequences[state].append(year)
+        return np.array(data_sequences), np.array(label_sequences)
+    def __len__(self):
+        return len(self.x)
+    def __getitem__(self, index):
+        return (torch.tensor(self.x[index], dtype=torch.float32),
+                torch.tensor(self.y[index], dtype=torch.float32))

src/model.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import torch
+import torch.nn as nn
+import math
+#from transformers import AutoModelForCausalLM, AutoTokenizer
+class LSTM(nn.Module):
+    def __init__(self, input_size, lstm_layer_sizes, output_size):
+        super(LSTM, self).__init__()
+        self.input_size = input_size
+        self.lstm_layer_1 = nn.LSTM(input_size, lstm_layer_sizes[0], batch_first=True)
+        self.lstm_layer_2 = nn.LSTM(lstm_layer_sizes[0], lstm_layer_sizes[1], batch_first=True)
+        self.lstm_layer_3 = nn.LSTM(lstm_layer_sizes[1], lstm_layer_sizes[2], batch_first=True)
+        self.fc = nn.Linear(lstm_layer_sizes[2], output_size)
+    def forward(self, x):
+        out, (hn_1, cn_1) = self.lstm_layer_1(x)
+        out, (hn_2, cn_2) = self.lstm_layer_2(out)
+        out, (hn_3, cn_3) = self.lstm_layer_3(out)
+        out = hn_3[-1]
+        out = self.fc(out)
+        return out
+class Linear(nn.Module):
+    def __init__(self,input_size,output_size):
+        super(Linear,self).__init__()
+        self.relu =nn.relu()
+        self.input = nn.Linear(input_size,1024)
+        self.fc = nn.Linear(1024,256)
+        self.output = nn.Linear(256,output_size)
+    def forward(self,x):
+        out = self.relu(self.input(x))
+        out = self.relu(self.fc(out))
+        out = self.relu(self.output(out))
+        return out[:, -1, :]
+class PositionalEncoding(nn.Module):
+    def __init__(self, dim, max_len=300):
+        super(PositionalEncoding, self).__init__()
+        pe = torch.zeros(max_len, dim)
+        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
+        div_term = torch.exp(torch.arange(0, dim, 2).float() * (-math.log(10000.0) / dim))
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        pe = pe.unsqueeze(0).transpose(0, 1)
+        self.register_buffer('pe', pe)
+    def forward(self, x):
+        return x + self.pe[:x.size(0), :]
+class Transformer(nn.Module):
+    def __init__(self):
+        super(Transformer,self).__init__()