ShadowGard3n commited on
Commit
c2fb4c6
·
1 Parent(s): a868f29

First commit

Browse files
Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.12 based on your pycache version
2
+ FROM python:3.12
3
+
4
+ # Set the working directory to /app
5
+ WORKDIR /app
6
+
7
+ # Copy the requirements file and install dependencies
8
+ COPY requirements.txt .
9
+ RUN pip install --no-cache-dir -r requirements.txt
10
+
11
+ # Create a non-root user for security (Required by HF Spaces)
12
+ RUN useradd -m -u 1000 user
13
+ USER user
14
+ ENV PATH="/home/user/.local/bin:$PATH"
15
+
16
+ # Copy the rest of the application code
17
+ COPY --chown=user . .
18
+
19
+ # Run the setup script to download models from your HF Repo
20
+ # This happens during the build/start phase
21
+ RUN python setup_models.py
22
+
23
+ # Expose the port Hugging Face Spaces expects (7860)
24
+ EXPOSE 7860
25
+
26
+ # Command to run the application
27
+ # 0.0.0.0 is required for Docker containers to be accessible
28
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from routes import predictions
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+
5
+
6
+ app = FastAPI()
7
+
8
+ origins = [
9
+ "http://127.0.0.1:5173", # Add Vite default port
10
+ "http://localhost:5173",
11
+ "http://127.0.0.1:5501",
12
+ "http://localhost:5501",
13
+ "http://127.0.0.1:5500",
14
+ "http://localhost:5500",
15
+ ]
16
+
17
+ app.add_middleware(
18
+ CORSMiddleware,
19
+ allow_origins=origins, # Allows specific origins
20
+ allow_credentials=True,
21
+ allow_methods=["*"], # Allows all methods (GET, POST, etc.)
22
+ allow_headers=["*"], # Allows all headers
23
+ )
24
+
25
+ app.include_router(predictions.router)
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ scikit-learn
4
+ numpy
5
+ xgboost
6
+ joblib
7
+ pandas
8
+ matplotlib
9
+ httpx
10
+ python-dotenv
11
+ huggingface-hub
routes/__pycache__/predictions.cpython-312.pyc ADDED
Binary file (9.81 kB). View file
 
routes/predictions.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Query
2
+ from typing import List
3
+ from schemas.prediction_schemas import CropPredictionRequest, FertilizerPredictionRequest
4
+ from services.prediction_services import get_crop_prediction, get_fertilizer_prediction
5
+ from services.market_services import get_market_prediction, _create_features
6
+ from services.marketTracking_services import fetch_market_data
7
+ from schemas.marketTracker_schemas import MarketPriceRequest, MarketPriceData
8
+ from schemas.weather_schemas import WeatherResponse, ForecastResponse, DayForecast
9
+ from services.weather_service import get_weather_data_for_city, AIR_QUALITY_MAP, get_weather_forecast_for_city
10
+ import os
11
+ import joblib
12
+ import pandas as pd
13
+ router = APIRouter()
14
+
15
+
16
+ MODELS_DIR = 'models'
17
+ models = {}
18
+
19
+ # Ensure models dir exists
20
+ if os.path.exists(MODELS_DIR):
21
+ for model_file in os.listdir(MODELS_DIR):
22
+ if model_file.endswith('.pkl'):
23
+ commodity_name = model_file.replace('.pkl', '').replace('_', '/')
24
+ models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
25
+ print(f"✅ Model loaded for: {commodity_name}")
26
+
27
+ try:
28
+ # Ensure your CSV is accessible
29
+ DF_FULL = pd.read_csv('final_output.csv', parse_dates=['created_at'], index_col='created_at')
30
+ print("✅ Dataset loaded.")
31
+ except FileNotFoundError:
32
+ print("❌ 'final_output.csv' not found. Predictions will fail.")
33
+ DF_FULL = None
34
+
35
+ @router.post("/api/predict_crop")
36
+ def predict_crop(request: CropPredictionRequest):
37
+ return get_crop_prediction(request)
38
+
39
+ @router.post("/api/predict_fertilizer")
40
+ def predict_fertilizer(request: FertilizerPredictionRequest):
41
+ return get_fertilizer_prediction(request)
42
+
43
+ @router.get("/api/predict/{commodity}")
44
+ def predict_commodity_price(commodity: str):
45
+ # result = get_market_prediction(commodity)
46
+ # if "error" in result:
47
+ # raise HTTPException(status_code=404, detail=result["error"])
48
+ # return result
49
+
50
+ if DF_FULL is None:
51
+ raise HTTPException(status_code=500, detail="Server Error: Dataset not loaded.")
52
+
53
+ # 2. Check if Model exists (Normalize to Upper Case)
54
+ target_commodity = commodity.upper()
55
+
56
+ if target_commodity not in models:
57
+ raise HTTPException(status_code=404, detail=f"Model for '{commodity}' not found.")
58
+
59
+ model = models[target_commodity]
60
+
61
+ # 3. Check if we have history for this commodity
62
+ df_commodity = DF_FULL[DF_FULL['commodity'].str.upper() == target_commodity]
63
+ if df_commodity.empty:
64
+ raise HTTPException(status_code=404, detail="No historical data found for commodity")
65
+
66
+ # 4. Get the last known date
67
+ df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
68
+ last_known_date = df_daily.index.max()
69
+
70
+ # 5. Generate Recent History (for comparison chart)
71
+ # Get last 90 days of actual data
72
+ start_context_date = last_known_date - pd.Timedelta(days=90)
73
+ df_featured = _create_features(df_daily)
74
+ test_df = df_featured.loc[df_featured.index >= start_context_date]
75
+
76
+ recent_data = []
77
+ if not test_df.empty:
78
+ FEATURES = [col for col in test_df.columns if col != 'modal_price']
79
+ try:
80
+ predictions = model.predict(test_df[FEATURES])
81
+ for date, actual, pred in zip(test_df.index, test_df['modal_price'], predictions):
82
+ recent_data.append({
83
+ "date": date.strftime('%Y-%m-%d'),
84
+ "actual_price": float(actual),
85
+ "predicted_price": float(pred)
86
+ })
87
+ except Exception as e:
88
+ print(f"Warning: Could not generate history validation: {e}")
89
+
90
+ # 6. Generate Future Forecast (Calling the helper function correctly!)
91
+ try:
92
+ # HERE IS THE FIX: We pass all 4 arguments required by the helper
93
+ daily_forecast_df = get_market_prediction(model, DF_FULL, target_commodity, last_known_date)
94
+
95
+ future_data = []
96
+ for date, row in daily_forecast_df.iterrows():
97
+ future_data.append({
98
+ "date": date.strftime('%Y-%m-%d'),
99
+ "forecast_price": float(row['forecast'])
100
+ })
101
+
102
+ except Exception as e:
103
+ print(f"Forecast Error: {e}")
104
+ raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")
105
+
106
+ return {
107
+ "commodity": commodity,
108
+ "recent_data": recent_data,
109
+ "forecast_data": future_data
110
+ }
111
+
112
+ @router.post(
113
+ "/api/marketPrice",
114
+ response_model=List[MarketPriceData],
115
+ summary="Fetch Agricultural Market Prices",
116
+ description="Retrieves daily market price data for a specific commodity, state, and APMC over the last 7 days."
117
+ )
118
+ async def get_market_price(request: MarketPriceRequest):
119
+ market_data = await fetch_market_data(request)
120
+ return market_data
121
+
122
+ @router.get("/weather/{city}", response_model=WeatherResponse)
123
+ async def get_current_weather(city: str):
124
+ try:
125
+ weather_data = await get_weather_data_for_city(city)
126
+
127
+ current_data = weather_data.get("current", {})
128
+ location_data = weather_data.get("location", {})
129
+ air_quality_data = current_data.get("air_quality", {})
130
+
131
+ aqi_index = air_quality_data.get("us-epa-index")
132
+ air_quality_description = AIR_QUALITY_MAP.get(aqi_index, "Unknown")
133
+
134
+ response_data = WeatherResponse(
135
+ location_name=location_data.get("name", "N/A"),
136
+ temperature_c=current_data.get("temp_c"),
137
+ condition=current_data.get("condition", {}).get("text", "N/A"),
138
+ humidity=current_data.get("humidity"),
139
+ wind_kph=current_data.get("wind_kph"),
140
+ cloud=current_data.get("cloud"),
141
+ is_day=current_data.get("is_day"),
142
+ air_quality=air_quality_description
143
+ )
144
+
145
+ return response_data
146
+
147
+ except HTTPException as e:
148
+ raise e
149
+ except Exception as e:
150
+ raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {str(e)}")
151
+
152
+
153
+ @router.get("/weather/forecast/{city}", response_model=ForecastResponse, summary="Get Weather Forecast")
154
+ async def get_weather_forecast(city: str, days: int = Query(default=1, ge=1, le=14, description="Number of days to forecast (between 1 and 14).")):
155
+ """
156
+ Retrieves the weather forecast for a specific city for a given number of days.
157
+ """
158
+ try:
159
+ forecast_data = await get_weather_forecast_for_city(city, days)
160
+
161
+ location_data = forecast_data.get("location", {})
162
+ forecast_days_raw = forecast_data.get("forecast", {}).get("forecastday", [])
163
+
164
+ processed_forecast_days = []
165
+ for day_data in forecast_days_raw:
166
+ day_details = day_data.get("day", {})
167
+ processed_day = DayForecast(
168
+ date=day_data.get("date"),
169
+ maxtemp_c=day_details.get("maxtemp_c"),
170
+ mintemp_c=day_details.get("mintemp_c"),
171
+ avgtemp_c=day_details.get("avgtemp_c"),
172
+ condition=day_details.get("condition", {}).get("text", "N/A"),
173
+ daily_chance_of_rain=day_details.get("daily_chance_of_rain", 0),
174
+ avghumidity=day_details.get("avghumidity", 0),
175
+ maxwind_kph=day_details.get("maxwind_kph", 0.0),
176
+ )
177
+ processed_forecast_days.append(processed_day)
178
+
179
+ response_data = ForecastResponse(
180
+ location_name=location_data.get("name", "N/A"),
181
+ forecast_days=processed_forecast_days
182
+ )
183
+
184
+ return response_data
185
+
186
+ except HTTPException as e:
187
+ raise e
188
+ except Exception as e:
189
+ raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {str(e)}")
schemas/__pycache__/marketTracker_schemas.cpython-312.pyc ADDED
Binary file (1.8 kB). View file
 
schemas/__pycache__/prediction_schemas.cpython-312.pyc ADDED
Binary file (990 Bytes). View file
 
schemas/__pycache__/weather_schemas.cpython-312.pyc ADDED
Binary file (2.05 kB). View file
 
schemas/marketTracker_schemas.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import Union
3
+
4
+ class MarketPriceRequest(BaseModel):
5
+ commodity_name: str = Field(..., example="Potato")
6
+ state_name: str = Field(..., example="UTTAR PRADESH")
7
+ apmc_name: str = Field(..., example="AGRA")
8
+
9
+ class MarketPriceData(BaseModel):
10
+ # We map our desired field 'date' to the API's 'created_at' field.
11
+ date: str = Field(..., alias="created_at", example="2025-09-16")
12
+
13
+ # The API sends these as strings, so we accept them as Union[str, float] for safety.
14
+ modal_price: Union[str, float] = Field(..., alias="modal_price")
15
+ min_price: Union[str, float] = Field(..., alias="min_price")
16
+ max_price: Union[str, float] = Field(..., alias="max_price")
17
+
18
+ # Map our fields to the API's fields
19
+ total_arrival: str = Field(..., alias="commodity_arrivals")
20
+ total_trade: str = Field(..., alias="commodity_traded")
21
+
22
+ commodity: str = Field(..., alias="commodity")
23
+ apmc: str = Field(..., alias="apmc")
24
+
25
+ class Config:
26
+ # This allows Pydantic to create the model from a dictionary
27
+ from_attributes = True
28
+ # This is CRITICAL: it tells Pydantic to use the 'alias' names when reading the data
29
+ populate_by_name = True
30
+
schemas/market_schemas.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import Dict
3
+
4
+ class MarketPriceInput(BaseModel):
5
+ commodity: str
6
+
7
+ class MarketPriceOutput(BaseModel):
8
+ commodity: str
9
+ forecast: Dict[str, float]
schemas/prediction_schemas.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+ class CropPredictionRequest(BaseModel):
4
+ Nitrogen: float
5
+ Phosphorus: float
6
+ Potassium: float
7
+ temperature: float
8
+ humidity: float
9
+ ph: float
10
+ rainfall: float
11
+
12
+ class FertilizerPredictionRequest(BaseModel):
13
+ Nitrogen: float
14
+ Phosphorus: float
15
+ Potassium: float
16
+ temperature: float
17
+ humidity: float
18
+ moisture: float
19
+ crop_type: str
20
+ soil_type: str
21
+
schemas/weather_schemas.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List
3
+
4
+ class WeatherResponse(BaseModel):
5
+ """
6
+ Defines the updated structure for the weather data response.
7
+ """
8
+ location_name: str
9
+ temperature_c: float
10
+ condition: str
11
+ humidity: int
12
+ wind_kph: float
13
+ # New fields added below
14
+ cloud: int
15
+ is_day: int # Will be 1 for day, 0 for night
16
+ air_quality: str
17
+
18
+ class Config:
19
+ from_attributes = True
20
+
21
+
22
+ class DayForecast(BaseModel):
23
+ """
24
+ Defines the structure for a single day's forecast.
25
+ """
26
+ date: str
27
+ maxtemp_c: float
28
+ mintemp_c: float
29
+ avgtemp_c: float
30
+ condition: str
31
+ daily_chance_of_rain: int
32
+ avghumidity: int
33
+ maxwind_kph: float
34
+
35
+ class Config:
36
+ from_attributes = True
37
+
38
+ class ForecastResponse(BaseModel):
39
+ """
40
+ Defines the overall structure for the weather forecast response.
41
+ """
42
+ location_name: str
43
+ forecast_days: List[DayForecast]
44
+
45
+ class Config:
46
+ from_attributes = True
services/__pycache__/marketTracking_services.cpython-312.pyc ADDED
Binary file (2.93 kB). View file
 
services/__pycache__/market_services.cpython-312.pyc ADDED
Binary file (4.66 kB). View file
 
services/__pycache__/prediction_services.cpython-312.pyc ADDED
Binary file (3.62 kB). View file
 
services/__pycache__/weather_service.cpython-312.pyc ADDED
Binary file (3.54 kB). View file
 
services/marketTracking_services.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import httpx
2
+ from datetime import date, timedelta
3
+ from fastapi import HTTPException
4
+ from schemas.marketTracker_schemas import MarketPriceRequest, MarketPriceData
5
+
6
+ # The external API endpoint we are fetching data from
7
+ ENAM_API_URL = "https://enam.gov.in/web/Ajax_ctrl/trade_data_list"
8
+
9
+ async def fetch_market_data(request: MarketPriceRequest) -> list[MarketPriceData]:
10
+
11
+ today = date.today()
12
+ start_date = today - timedelta(days=6)
13
+
14
+ start_date_str = start_date.strftime("%Y-%m-%d")
15
+ end_date_str = today.strftime("%Y-%m-%d")
16
+
17
+ payload = {
18
+ "language": "en",
19
+ "stateName": request.state_name,
20
+ "apmcName": request.apmc_name,
21
+ "commodityName": request.commodity_name,
22
+ "fromDate": start_date_str,
23
+ "toDate": end_date_str,
24
+ }
25
+
26
+ headers = {
27
+ "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
28
+ "X-Requested-With": "XMLHttpRequest"
29
+ }
30
+
31
+ async with httpx.AsyncClient() as client:
32
+ try:
33
+ response = await client.post(ENAM_API_URL, data=payload, headers=headers, timeout=10.0)
34
+ response.raise_for_status()
35
+
36
+ json_data = response.json()
37
+ api_rows = json_data.get("data", [])
38
+
39
+ if not api_rows:
40
+ return []
41
+
42
+ validated_data = [MarketPriceData.model_validate(row) for row in api_rows]
43
+ return validated_data
44
+
45
+ except httpx.RequestError as exc:
46
+ print(f"An error occurred while requesting {exc.request.url!r}.")
47
+ raise HTTPException(status_code=502, detail=f"Failed to communicate with eNAM portal: {exc}")
48
+ except Exception as e:
49
+ print(f"An unexpected error occurred: {e}")
50
+ raise HTTPException(status_code=500, detail="An internal server error occurred.")
51
+
52
+
services/market_services.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import pandas as pd
2
+ # import numpy as np
3
+ # import matplotlib.pyplot as plt
4
+ # import joblib
5
+ # import os
6
+ # import io
7
+ # import base64
8
+
9
+ # MODELS_DIR = 'models'
10
+ # models = {}
11
+ # for model_file in os.listdir(MODELS_DIR):
12
+ # if model_file.endswith('.pkl'):
13
+ # commodity_name = model_file.replace('.pkl', '').replace('_', '/')
14
+ # models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
15
+ # print(f"✅ Model loaded for: {commodity_name}")
16
+
17
+
18
+ # try:
19
+ # DF_FULL = pd.read_csv('final_output.csv', parse_dates=['created_at'], index_col='created_at')
20
+ # print("✅ Dataset loaded and indexed by 'created_at'.")
21
+ # except FileNotFoundError:
22
+ # print("'your_dataset.csv' not found. The application cannot start.")
23
+ # DF_FULL = None
24
+
25
+
26
+ # def _create_features(df):
27
+ # df = df.copy()
28
+ # df['dayofweek'] = df.index.dayofweek
29
+ # df['dayofyear'] = df.index.dayofyear
30
+ # df['month'] = df.index.month
31
+ # df['year'] = df.index.year
32
+ # df['quarter'] = df.index.quarter
33
+ # df['weekofyear'] = df.index.isocalendar().week.astype(int)
34
+ # df['price_lag_7'] = df['modal_price'].shift(7)
35
+ # df['price_lag_14'] = df['modal_price'].shift(14)
36
+ # df['price_lag_30'] = df['modal_price'].shift(30)
37
+ # df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
38
+ # df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()
39
+ # return df.dropna()
40
+
41
+ # def _forecast_six_months(model, df_full, commodity, last_known_date):
42
+ # df_commodity = df_full[df_full['commodity'] == commodity]
43
+ # df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
44
+
45
+ # future_dates = pd.date_range(start=last_known_date + pd.Timedelta(days=1), periods=180, freq='D')
46
+ # future_df = pd.DataFrame(index=future_dates)
47
+ # future_df['modal_price'] = np.nan
48
+ # df_extended = pd.concat([df_daily, future_df])
49
+
50
+ # for date in future_dates:
51
+ # featured_row = _create_features(df_extended.loc[:date]).iloc[-1:]
52
+ # FEATURES = [col for col in featured_row.columns if col != 'modal_price']
53
+ # prediction = model.predict(featured_row[FEATURES])[0]
54
+ # df_extended.loc[date, 'modal_price'] = prediction
55
+
56
+ # daily_forecast_df = df_extended.loc[future_dates].copy()
57
+ # daily_forecast_df.rename(columns={'modal_price': 'forecast'}, inplace=True)
58
+ # return daily_forecast_df
59
+
60
+ # def get_market_prediction(commodity: str):
61
+
62
+ # if DF_FULL is None:
63
+ # return {"error": "Dataset not found. Please check server configuration."}
64
+
65
+ # if commodity not in models:
66
+ # return {"error": f"Model for commodity '{commodity}' not found."}
67
+
68
+ # model = models[commodity]
69
+
70
+ # # Prepare data for the specific commodity
71
+ # df_commodity = DF_FULL[DF_FULL['commodity'] == commodity]
72
+ # df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
73
+ # df_featured = _create_features(df_daily)
74
+
75
+ # test_df = df_featured.loc[df_featured.index >= '2024-01-01']
76
+ # if test_df.empty:
77
+ # return {"error": f"Not enough recent data to make a prediction for '{commodity}'."}
78
+
79
+ # FEATURES = [col for col in test_df.columns if col != 'modal_price']
80
+ # TARGET = 'modal_price'
81
+ # X_test, y_test = test_df[FEATURES], test_df[TARGET]
82
+
83
+ # predictions = model.predict(X_test)
84
+
85
+ # last_known_date = test_df.index.max()
86
+ # daily_forecast_df = _forecast_six_months(model, DF_FULL, commodity, last_known_date)
87
+ # monthly_forecast_df = daily_forecast_df.resample('ME').last().head(6)
88
+
89
+ # plt.figure(figsize=(12, 6))
90
+ # plt.style.use('seaborn-v0_8-whitegrid')
91
+ # plt.plot(y_test.index, y_test, label='Actual Price (Recent History)', color='green', linewidth=2)
92
+ # plt.plot(y_test.index, predictions, label='Model Prediction (on Recent History)', color='red', linestyle='--')
93
+ # plt.plot(daily_forecast_df.index, daily_forecast_df['forecast'], label='6-Month Forecast', color='purple', linestyle=':')
94
+ # plt.title(f'{commodity} Price: History, Prediction & Forecast', fontsize=16)
95
+ # plt.xlabel('Date')
96
+ # plt.ylabel('Modal Price')
97
+ # plt.legend()
98
+ # plt.tight_layout()
99
+
100
+ # buf = io.BytesIO()
101
+ # plt.savefig(buf, format='png')
102
+ # buf.seek(0)
103
+ # plot_base64 = base64.b64encode(buf.read()).decode('utf-8')
104
+ # plt.close()
105
+
106
+ # formatted_forecast = monthly_forecast_df.reset_index().rename(columns={'index': 'date'}).to_dict('records')
107
+
108
+ # return {
109
+ # "commodity": commodity,
110
+ # "monthly_forecast": formatted_forecast,
111
+ # "plot_base64": plot_base64
112
+ # }
113
+
114
+
115
+ import pandas as pd
116
+ import numpy as np
117
+ import joblib
118
+ import os
119
+ from fastapi import APIRouter, HTTPException
120
+ from pydantic import BaseModel
121
+ from typing import List, Dict, Optional
122
+
123
+ # --- Load Models & Data ---
124
+ MODELS_DIR = 'models'
125
+ models = {}
126
+
127
+ # Ensure models dir exists
128
+ if os.path.exists(MODELS_DIR):
129
+ for model_file in os.listdir(MODELS_DIR):
130
+ if model_file.endswith('.pkl'):
131
+ commodity_name = model_file.replace('.pkl', '').replace('_', '/')
132
+ models[commodity_name] = joblib.load(os.path.join(MODELS_DIR, model_file))
133
+ print(f"✅ Model loaded for: {commodity_name}")
134
+
135
+ try:
136
+ # Ensure your CSV is accessible
137
+ DF_FULL = pd.read_csv('final_output.csv', parse_dates=['created_at'], index_col='created_at')
138
+ print("✅ Dataset loaded.")
139
+ except FileNotFoundError:
140
+ print("❌ 'final_output.csv' not found. Predictions will fail.")
141
+ DF_FULL = None
142
+
143
+ # --- Helper Functions ---
144
+
145
+ def _create_features(df):
146
+ df = df.copy()
147
+ df['dayofweek'] = df.index.dayofweek
148
+ df['dayofyear'] = df.index.dayofyear
149
+ df['month'] = df.index.month
150
+ df['year'] = df.index.year
151
+ df['quarter'] = df.index.quarter
152
+ df['weekofyear'] = df.index.isocalendar().week.astype(int)
153
+ # Lags and Rolling features
154
+ df['price_lag_7'] = df['modal_price'].shift(7)
155
+ df['price_lag_14'] = df['modal_price'].shift(14)
156
+ df['price_lag_30'] = df['modal_price'].shift(30)
157
+ df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
158
+ df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()
159
+ return df.dropna()
160
+
161
+ def get_market_prediction(model, df_full, commodity, last_known_date):
162
+ """
163
+ Iteratively predicts the next 180 days.
164
+ """
165
+ df_commodity = df_full[df_full['commodity'] == commodity]
166
+ df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
167
+
168
+ future_dates = pd.date_range(start=last_known_date + pd.Timedelta(days=1), periods=180, freq='D')
169
+
170
+ # Create a container for future data
171
+ future_df = pd.DataFrame(index=future_dates)
172
+ future_df['modal_price'] = np.nan
173
+
174
+ # Append future placeholder to history so we can calculate lags on the fly
175
+ df_extended = pd.concat([df_daily, future_df])
176
+
177
+ for date in future_dates:
178
+ # Create features for the specific day (uses previous days' data for lags)
179
+ # Note: We take a slice up to 'date' to generate features dynamically
180
+ subset = df_extended.loc[:date]
181
+ if len(subset) < 30: continue # Safety check for rolling windows
182
+
183
+ featured_row = _create_features(subset).iloc[-1:]
184
+
185
+ if featured_row.empty: continue
186
+
187
+ FEATURES = [col for col in featured_row.columns if col != 'modal_price']
188
+
189
+ # Predict
190
+ prediction = model.predict(featured_row[FEATURES])[0]
191
+
192
+ # Update the dataframe so the next loop can use this prediction for its lag features
193
+ df_extended.loc[date, 'modal_price'] = prediction
194
+
195
+ # Extract just the future part
196
+ daily_forecast_df = df_extended.loc[future_dates].copy()
197
+ daily_forecast_df.rename(columns={'modal_price': 'forecast'}, inplace=True)
198
+
199
+ return daily_forecast_df
200
+
services/prediction_services.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import joblib
2
+ import numpy as np
3
+ import os
4
+
5
+ current_dir = os.path.dirname(os.path.abspath(__file__))
6
+
7
+ models_dir = os.path.join(current_dir, '..', 'models')
8
+
9
+ crop_prediction_model = joblib.load(os.path.join(models_dir, 'crop_prediction_model.pkl'))
10
+ crop_prediction_scaler = joblib.load(os.path.join(models_dir, 'crop_prediction_scaler.pkl'))
11
+
12
+ fertilizer_prediction_model = joblib.load(os.path.join(models_dir, 'fertilizer_prediction_model.pkl'))
13
+ fertilizer_prediction_scaler = joblib.load(os.path.join(models_dir, 'fertilizer_prediction_scaler.pkl'))
14
+ crop_encoder = joblib.load(os.path.join(models_dir, 'crop_encoder.pkl'))
15
+ soil_encoder = joblib.load(os.path.join(models_dir, 'soil_encoder.pkl'))
16
+ fertilizer_encoder = joblib.load(os.path.join(models_dir, 'fertilizer_encoder.pkl'))
17
+
18
+
19
+ def get_crop_prediction(data):
20
+ features = np.array([[
21
+ data.Nitrogen,
22
+ data.Phosphorus,
23
+ data.Potassium,
24
+ data.temperature,
25
+ data.humidity,
26
+ data.ph,
27
+ data.rainfall
28
+ ]])
29
+ scaled_features = crop_prediction_scaler.transform(features)
30
+ prediction = crop_prediction_model.predict(scaled_features)
31
+ return {"predicted_crop": prediction[0]}
32
+
33
+
34
+ def get_fertilizer_prediction(data):
35
+ crop_encoded = crop_encoder.transform(np.array([[data.crop_type]]))
36
+ soil_encoded = soil_encoder.transform(np.array([[data.soil_type]]))
37
+ features = np.array([[
38
+ data.temperature,
39
+ data.humidity,
40
+ data.moisture,
41
+ data.Nitrogen,
42
+ data.Potassium,
43
+ data.Phosphorus
44
+ ]])
45
+ scaled_features = fertilizer_prediction_scaler.transform(features)
46
+ final_features = np.concatenate([scaled_features, [soil_encoded], [crop_encoded]], axis=1)
47
+ prediction_encoded = fertilizer_prediction_model.predict(final_features)
48
+ prediction = fertilizer_encoder.inverse_transform(prediction_encoded)
49
+
50
+ return {"predicted_fertilizer": prediction[0]}
51
+
52
+
53
+
54
+ # import joblib
55
+ # import numpy as np
56
+ # import os
57
+
58
+ # current_dir = os.path.dirname(os.path.abspath(__file__))
59
+
60
+ # models_dir = os.path.join(current_dir, '..', 'models')
61
+
62
+ # crop_prediction_model = joblib.load(os.path.join(models_dir, 'crop_prediction_model.pkl'))
63
+ # crop_prediction_scaler = joblib.load(os.path.join(models_dir, 'crop_prediction_scaler.pkl'))
64
+
65
+ # fertilizer_prediction_model = joblib.load(os.path.join(models_dir, 'fertilizer_prediction_model.pkl'))
66
+ # fertilizer_prediction_scaler = joblib.load(os.path.join(models_dir, 'fertilizer_prediction_scaler.pkl'))
67
+ # crop_encoder = joblib.load(os.path.join(models_dir, 'crop_encoder.pkl'))
68
+ # soil_encoder = joblib.load(os.path.join(models_dir, 'soil_encoder.pkl'))
69
+ # fertilizer_encoder = joblib.load(os.path.join(models_dir, 'fertilizer_encoder.pkl'))
70
+
71
+
72
+ # def get_crop_prediction(data):
73
+ # features = np.array([[
74
+ # data.Nitrogen,
75
+ # data.Phosphorous,
76
+ # data.Potassium,
77
+ # data.temperature,
78
+ # data.humidity,
79
+ # data.ph,
80
+ # data.rainfall
81
+ # ]])
82
+ # scaled_features = crop_prediction_scaler.transform(features)
83
+ # prediction = crop_prediction_model.predict(scaled_features)
84
+ # return {"predicted_crop": prediction[0]}
85
+
86
+
87
+ # def get_fertilizer_prediction(data):
88
+ # crop_encoded = crop_encoder.transform(np.array([[data.crop_type]]))
89
+ # soil_encoded = soil_encoder.transform(np.array([[data.soil_type]]))
90
+ # features = np.array([[
91
+ # data.Temperature,
92
+ # data.Humidity,
93
+ # data.Moisture,
94
+ # data.Nitrogen,
95
+ # data.Potassium,
96
+ # data.Phosphorus
97
+ # ]])
98
+ # scaled_features = fertilizer_prediction_scaler.transform(features)
99
+ # final_features = np.concatenate([scaled_features, [soil_encoded], [crop_encoded]], axis=1)
100
+ # prediction_encoded = fertilizer_prediction_model.predict(final_features)
101
+ # prediction = fertilizer_encoder.inverse_transform(prediction_encoded)
102
+
103
+ # return {"predicted_fertilizer": prediction[0]}
104
+
105
+
106
+
107
+
108
+ # # import joblib
109
+ # # import numpy as np
110
+ # # import os
111
+
112
+ # # # Get the absolute path to the current file's directory
113
+ # # current_dir = os.path.dirname(os.path.abspath(__file__))
114
+
115
+ # # # Construct the absolute path to the models directory
116
+ # # models_dir = os.path.join(current_dir, '..', 'models')
117
+
118
+ # # # Load crop prediction model and scaler
119
+ # # crop_prediction_model = joblib.load(os.path.join(models_dir, 'crop_prediction_model.pkl'))
120
+ # # crop_prediction_scaler = joblib.load(os.path.join(models_dir, 'crop_prediction_scaler.pkl'))
121
+
122
+ # # # Load fertilizer prediction model and encoders/scalers
123
+ # # fertilizer_prediction_model = joblib.load(os.path.join(models_dir, 'fertilizer_prediction_model.pkl'))
124
+ # # fertilizer_prediction_scaler = joblib.load(os.path.join(models_dir, 'fertilizer_prediction_scaler.pkl'))
125
+ # # crop_encoder = joblib.load(os.path.join(models_dir, 'crop_encoder.pkl'))
126
+ # # soil_encoder = joblib.load(os.path.join(models_dir, 'soil_encoder.pkl'))
127
+ # # fertilizer_encoder = joblib.load(os.path.join(models_dir, 'fertilizer_encoder.pkl'))
128
+
129
+
130
+ # # def get_crop_prediction(data):
131
+ # # """
132
+ # # Predicts the recommended crop based on soil and weather conditions.
133
+ # # """
134
+ # # features = np.array([[
135
+ # # data.Nitrogen,
136
+ # # data.Phosphorous,
137
+ # # data.Potassium,
138
+ # # data.temperature,
139
+ # # data.humidity,
140
+ # # data.ph,
141
+ # # data.rainfall
142
+ # # ]])
143
+ # # scaled_features = crop_prediction_scaler.transform(features)
144
+ # # prediction = crop_prediction_model.predict(scaled_features)
145
+ # # return {"predicted_crop": prediction[0]}
146
+
147
+
148
+ # # def get_fertilizer_prediction(data):
149
+ # # """
150
+ # # Predicts the recommended fertilizer based on soil, weather, and crop type.
151
+ # # """
152
+ # # soil_encoded = soil_encoder.transform(np.array([[data.soil_type]]))
153
+ # # crop_encoded = crop_encoder.transform(np.array([[data.crop_type]]))
154
+
155
+ # # features = np.array([[
156
+ # # data.Nitrogen,
157
+ # # data.Phosphorus,
158
+ # # data.Potassium,
159
+ # # data.Temperature,
160
+ # # data.Humidity,
161
+ # # data.Moisture
162
+ # # ]])
163
+
164
+ # # scaled_features = fertilizer_prediction_scaler.transform(features)
165
+
166
+ # # final_features = np.concatenate([scaled_features, [soil_encoded], [crop_encoded]], axis=1)
167
+
168
+ # # prediction_encoded = fertilizer_prediction_model.predict(final_features)
169
+ # # prediction = fertilizer_encoder.inverse_transform(prediction_encoded)
170
+
171
+ # # return {"predicted_fertilizer": prediction[0]}
172
+
173
+
174
+ # import joblib
175
+ # import numpy as np
176
+ # import os
177
+
178
+ # # Get the absolute path to the current file's directory
179
+ # current_dir = os.path.dirname(os.path.abspath(__file__))
180
+
181
+ # # Construct the absolute path to the models directory
182
+ # models_dir = os.path.join(current_dir, '..', 'models')
183
+
184
+ # # Load crop prediction model and scaler
185
+ # crop_prediction_model = joblib.load(os.path.join(models_dir, 'crop_prediction_model.pkl'))
186
+ # crop_prediction_scaler = joblib.load(os.path.join(models_dir, 'crop_prediction_scaler.pkl'))
187
+
188
+ # # Load fertilizer prediction model and encoders/scalers
189
+ # fertilizer_prediction_model = joblib.load(os.path.join(models_dir, 'fertilizer_prediction_model.pkl'))
190
+ # fertilizer_prediction_scaler = joblib.load(os.path.join(models_dir, 'fertilizer_prediction_scaler.pkl'))
191
+ # crop_encoder = joblib.load(os.path.join(models_dir, 'crop_encoder.pkl'))
192
+ # soil_encoder = joblib.load(os.path.join(models_dir, 'soil_encoder.pkl'))
193
+ # fertilizer_encoder = joblib.load(os.path.join(models_dir, 'fertilizer_encoder.pkl'))
194
+
195
+
196
+ # def get_crop_prediction(data):
197
+ # """
198
+ # Predicts the recommended crop based on soil and weather conditions.
199
+ # """
200
+ # features = np.array([[
201
+ # data.Nitrogen,
202
+ # data.Phosphorous,
203
+ # data.Potassium,
204
+ # data.temperature,
205
+ # data.humidity,
206
+ # data.ph,
207
+ # data.rainfall
208
+ # ]])
209
+ # scaled_features = crop_prediction_scaler.transform(features)
210
+ # prediction = crop_prediction_model.predict(scaled_features)
211
+ # return {"predicted_crop": prediction[0]}
212
+
213
+
214
+ # def get_fertilizer_prediction(data):
215
+ # """
216
+ # Predicts the recommended fertilizer based on soil, weather, and crop type.
217
+ # """
218
+ # soil_encoded = soil_encoder.transform(np.array([[data.soil_type]]))
219
+ # crop_encoded = crop_encoder.transform(np.array([[data.crop_type]]))
220
+
221
+ # features = np.array([[
222
+ # data.Nitrogen,
223
+ # data.Phosphorus,
224
+ # data.Potassium,
225
+ # data.Temperature,
226
+ # data.Humidity,
227
+ # data.Moisture
228
+ # ]])
229
+
230
+ # scaled_features = fertilizer_prediction_scaler.transform(features)
231
+
232
+ # final_features = np.concatenate([scaled_features, [soil_encoded], [crop_encoded]], axis=1)
233
+
234
+ # prediction_encoded = fertilizer_prediction_model.predict(final_features)
235
+ # prediction = fertilizer_encoder.inverse_transform(prediction_encoded)
236
+
237
+ # return {"predicted_fertilizer": prediction[0]}
238
+
239
+
240
+
services/weather_service.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import httpx
3
+ from fastapi import HTTPException
4
+ from dotenv import load_dotenv
5
+
6
+ # Load environment variables from .env file
7
+ load_dotenv()
8
+
9
+ # Get the API key from environment variables
10
+ API_KEY = os.getenv("WEATHER_API_KEY")
11
+ if not API_KEY:
12
+ raise ValueError("No WEATHER_API_KEY found in environment variables")
13
+
14
+ BASE_URL = "http://api.weatherapi.com/v1/current.json"
15
+
16
+ AIR_QUALITY_MAP = {
17
+ 1: "Good",
18
+ 2: "Moderate",
19
+ 3: "Unhealthy for sensitive groups",
20
+ 4: "Unhealthy",
21
+ 5: "Very Unhealthy",
22
+ 6: "Hazardous",
23
+ }
24
+
25
+ async def get_weather_data_for_city(city: str) -> dict:
26
+
27
+ params = {"key": API_KEY, "q": city, "aqi": "yes"}
28
+
29
+ async with httpx.AsyncClient() as client:
30
+ try:
31
+ response = await client.get(BASE_URL, params=params)
32
+
33
+ # Raise an exception for bad status codes (4xx or 5xx)
34
+ response.raise_for_status()
35
+
36
+ return response.json()
37
+
38
+ except httpx.HTTPStatusError as e:
39
+ # Handle specific API error responses
40
+ if e.response.status_code == 400:
41
+ raise HTTPException(status_code=404, detail=f"City '{city}' not found.")
42
+ else:
43
+ raise HTTPException(status_code=e.response.status_code, detail="Error fetching weather data.")
44
+ except httpx.RequestError:
45
+ # Handle network-related errors
46
+ raise HTTPException(status_code=503, detail="Service is unavailable.")
47
+
48
+ async def get_weather_forecast_for_city(city: str, days: int) -> dict:
49
+ """
50
+ Fetches the weather forecast for a given city and number of days.
51
+ """
52
+ url = f"{BASE_URL}/forecast.json"
53
+ params = {"key": API_KEY, "q": city, "days": days, "aqi": "no", "alerts": "no"}
54
+
55
+ async with httpx.AsyncClient() as client:
56
+ try:
57
+ response = await client.get(url, params=params)
58
+ response.raise_for_status()
59
+ return response.json()
60
+ except httpx.HTTPStatusError as e:
61
+ if e.response.status_code == 400:
62
+ raise HTTPException(status_code=404, detail=f"City '{city}' not found.")
63
+ else:
64
+ raise HTTPException(status_code=e.response.status_code, detail="Error fetching weather forecast data.")
65
+ except httpx.RequestError:
66
+ raise HTTPException(status_code=503, detail="Service is unavailable.")
setup_models.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # setup_models.py
2
+ import os
3
+ import shutil
4
+ from huggingface_hub import hf_hub_download, snapshot_download
5
+
6
+ # Configuration
7
+ REPO_ID = "ShadowGard3n/AgroVision-Models"
8
+ DEST_MODELS_DIR = "models"
9
+
10
+ # Ensure the destination models directory exists
11
+ os.makedirs(DEST_MODELS_DIR, exist_ok=True)
12
+
13
+ print(f"--- Starting Download from {REPO_ID} ---")
14
+
15
+ # 1. Download final_output.csv to the root directory
16
+ try:
17
+ print("Downloading final_output.csv...")
18
+ hf_hub_download(
19
+ repo_id=REPO_ID,
20
+ filename="final_output.csv",
21
+ local_dir=".",
22
+ local_dir_use_symlinks=False
23
+ )
24
+ print("✅ final_output.csv downloaded.")
25
+ except Exception as e:
26
+ print(f"❌ Error downloading csv: {e}")
27
+
28
+ # 2. Download 'market_models' folder and move files to 'models/'
29
+ try:
30
+ print("Downloading market_models...")
31
+ market_path = snapshot_download(
32
+ repo_id=REPO_ID,
33
+ allow_patterns="market_models/*",
34
+ local_dir="temp_download",
35
+ local_dir_use_symlinks=False
36
+ )
37
+
38
+ # Move files from temp_download/market_models to models/
39
+ source_dir = os.path.join(market_path, "market_models")
40
+ if os.path.exists(source_dir):
41
+ for file_name in os.listdir(source_dir):
42
+ full_file_name = os.path.join(source_dir, file_name)
43
+ if os.path.isfile(full_file_name):
44
+ shutil.move(full_file_name, DEST_MODELS_DIR)
45
+ print("✅ Market models moved to /models.")
46
+ except Exception as e:
47
+ print(f"❌ Error downloading market models: {e}")
48
+
49
+ # 3. Download 'other_models' folder and move files to 'models/'
50
+ try:
51
+ print("Downloading other_models...")
52
+ other_path = snapshot_download(
53
+ repo_id=REPO_ID,
54
+ allow_patterns="other_models/*",
55
+ local_dir="temp_download_other",
56
+ local_dir_use_symlinks=False
57
+ )
58
+
59
+ # Move files from temp_download_other/other_models to models/
60
+ source_dir = os.path.join(other_path, "other_models")
61
+ if os.path.exists(source_dir):
62
+ for file_name in os.listdir(source_dir):
63
+ full_file_name = os.path.join(source_dir, file_name)
64
+ if os.path.isfile(full_file_name):
65
+ shutil.move(full_file_name, DEST_MODELS_DIR)
66
+ print("✅ Other models moved to /models.")
67
+ except Exception as e:
68
+ print(f"❌ Error downloading other models: {e}")
69
+
70
+ # Cleanup temp folders
71
+ shutil.rmtree("temp_download", ignore_errors=True)
72
+ shutil.rmtree("temp_download_other", ignore_errors=True)
73
+
74
+ print("--- Setup Complete ---")
train_market_model.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # needs to run only once
2
+
3
+ import pandas as pd
4
+ import numpy as np
5
+ import matplotlib.pyplot as plt
6
+ import xgboost as xgb
7
+ from sklearn.metrics import mean_absolute_error, r2_score
8
+ import joblib
9
+ import os
10
+
11
+ class CommodityPricePredictor:
12
+ def __init__(self, df):
13
+ if not isinstance(df.index, pd.DatetimeIndex):
14
+ try:
15
+ df['created_at'] = pd.to_datetime(df['created_at'])
16
+ df = df.set_index('created_at')
17
+ except (KeyError, TypeError):
18
+ raise TypeError("DataFrame must have a DatetimeIndex or a 'created_at' column to convert.")
19
+ self.df_full = df.copy().sort_index()
20
+ self.models = {} # to store a trained model for each commodity
21
+ print("✅ Predictor initialized.")
22
+
23
+ def _create_features(self, df):
24
+ df = df.copy()
25
+ df['dayofweek'] = df.index.dayofweek
26
+ df['dayofyear'] = df.index.dayofyear
27
+ df['month'] = df.index.month
28
+ df['year'] = df.index.year
29
+ df['quarter'] = df.index.quarter
30
+ df['weekofyear'] = df.index.isocalendar().week.astype(int)
31
+
32
+ # Lag features (price from previous periods)
33
+ df['price_lag_7'] = df['modal_price'].shift(7)
34
+ df['price_lag_14'] = df['modal_price'].shift(14)
35
+ df['price_lag_30'] = df['modal_price'].shift(30)
36
+
37
+ # Rolling window features (trend over the last month)
38
+ df['rolling_mean_30'] = df['modal_price'].shift(1).rolling(window=30).mean()
39
+ df['rolling_std_30'] = df['modal_price'].shift(1).rolling(window=30).std()
40
+
41
+ return df.dropna()
42
+
43
+ def train(self, commodity):
44
+ """
45
+ Trains a new XGBoost model for a specific commodity.
46
+ """
47
+ print(f"--- Training model for: {commodity} ---")
48
+
49
+ df_commodity = self.df_full[self.df_full['commodity'] == commodity]
50
+ if df_commodity.empty:
51
+ print(f" Warning: No data found for {commodity}. Skipping training.")
52
+ return
53
+
54
+ df_daily = df_commodity.groupby(df_commodity.index).agg({
55
+ 'modal_price': 'mean' # Use the average price for that day
56
+ })
57
+
58
+ df_featured = self._create_features(df_daily)
59
+
60
+ train_df = df_featured.loc[df_featured.index < '2024-01-01']
61
+ test_df = df_featured.loc[df_featured.index >= '2024-01-01']
62
+
63
+ if test_df.empty or train_df.empty:
64
+ print(f"⚠️ Warning: Not enough data to perform train/test split for {commodity}.")
65
+ return
66
+
67
+ print(f"Training data from {train_df.index.min().date()} to {train_df.index.max().date()}")
68
+ print(f"Testing data from {test_df.index.min().date()} to {test_df.index.max().date()}")
69
+
70
+ FEATURES = [col for col in df_featured.columns if col != 'modal_price']
71
+ TARGET = 'modal_price'
72
+
73
+ X_train, y_train = train_df[FEATURES], train_df[TARGET]
74
+ X_test, y_test = test_df[FEATURES], test_df[TARGET]
75
+
76
+ model = xgb.XGBRegressor(
77
+ n_estimators=1000,
78
+ learning_rate=0.01,
79
+ eval_metric='mae',
80
+ early_stopping_rounds=20
81
+ )
82
+
83
+ model.fit(X_train, y_train,
84
+ eval_set=[(X_test, y_test)],
85
+ verbose=False)
86
+
87
+ self.models[commodity] = model
88
+ print(f"✅ Model for {commodity} trained and stored.")
89
+ self.evaluate(commodity, test_df) # Evaluate right after training
90
+
91
+ def evaluate(self, commodity, test_df):
92
+ if commodity not in self.models:
93
+ print(f"❌ Error: Model for {commodity} not found. Please train it first.")
94
+ return
95
+
96
+ model = self.models[commodity]
97
+ FEATURES = [col for col in test_df.columns if col != 'modal_price']
98
+ TARGET = 'modal_price'
99
+
100
+ X_test, y_test = test_df[FEATURES], test_df[TARGET]
101
+ predictions = model.predict(X_test)
102
+
103
+ mae = mean_absolute_error(y_test, predictions)
104
+ r2 = r2_score(y_test, predictions)
105
+
106
+ print(f"\n--- Evaluation Results for {commodity} ---")
107
+ print(f"R-squared (R²): {r2:.3f}")
108
+ print(f"Mean Absolute Error (MAE): {mae:.2f}")
109
+ print("--------------------------------------")
110
+
111
+ daily_forecast_df, monthly_forecast_df = self.forecast_six_months(commodity, test_df.index.max())
112
+
113
+ # plt.figure(figsize=(15, 6))
114
+ # plt.style.use('seaborn-v0_8-whitegrid')
115
+ # plt.plot(y_test.index, y_test, label='Actual Price', color='green')
116
+ # plt.plot(y_test.index, predictions, label='Predicted Price (on test data)', color='red', linestyle='--')
117
+
118
+ # if daily_forecast_df is not None and not daily_forecast_df.empty:
119
+ # plt.plot(daily_forecast_df.index, daily_forecast_df['forecast'],
120
+ # label='6-Month Forecast (Daily)', color='purple', linestyle=':')
121
+
122
+ # plt.title(f'{commodity} Price: Actual vs. Predicted & 6-Month Forecast', fontsize=16)
123
+ # plt.xlabel('Date')
124
+ # plt.ylabel('Modal Price')
125
+ # plt.legend()
126
+ # plt.show()
127
+
128
+ if monthly_forecast_df is not None and not monthly_forecast_df.empty:
129
+ print(f"\n--- 6-Month Forecast for {commodity} (End of Month Price) ---")
130
+ print(monthly_forecast_df.to_string(float_format="%.2f"))
131
+ print("---------------------------------------------------------")
132
+
133
+
134
+ def forecast_six_months(self, commodity, last_known_date):
135
+
136
+ if commodity not in self.models:
137
+ print(f"❌ Error: Model for {commodity} not found.")
138
+ return None, None
139
+
140
+ model = self.models[commodity]
141
+ df_commodity = self.df_full[self.df_full['commodity'] == commodity]
142
+ df_daily = df_commodity.groupby(df_commodity.index).agg({'modal_price': 'mean'})
143
+
144
+ future_dates = pd.date_range(start=last_known_date + pd.Timedelta(days=1), periods=180, freq='D')
145
+ future_df = pd.DataFrame(index=future_dates, columns=['modal_price'])
146
+ df_extended = pd.concat([df_daily, future_df])
147
+
148
+ for date in future_dates:
149
+ featured_row = self._create_features(df_extended.loc[:date]).iloc[-1:]
150
+ FEATURES = [col for col in featured_row.columns if col != 'modal_price']
151
+ prediction = model.predict(featured_row[FEATURES])[0]
152
+ df_extended.loc[date, 'modal_price'] = prediction
153
+
154
+ daily_forecast_df = df_extended.loc[future_dates].copy()
155
+ daily_forecast_df.rename(columns={'modal_price': 'forecast'}, inplace=True)
156
+ monthly_forecast_df = daily_forecast_df.resample('ME').last().head(6)
157
+ return daily_forecast_df, monthly_forecast_df
158
+
159
+ def train_and_save_models(df):
160
+ """
161
+ Trains a model for each commodity and saves it to a .pkl file.
162
+ """
163
+ predictor = CommodityPricePredictor(df)
164
+ commodities = df['commodity'].unique()
165
+
166
+ if not os.path.exists('models'):
167
+ os.makedirs('models')
168
+
169
+ for commodity in commodities:
170
+ predictor.train(commodity)
171
+ if commodity in predictor.models:
172
+ model = predictor.models[commodity]
173
+ # joblib.dump(model, f'models/{commodity}.pkl')
174
+ # print(f"✅ Model for {commodity} saved to models/{commodity}.pkl")
175
+ # # Replace invalid characters for filenames
176
+ safe_commodity_name = commodity.replace('/', '_')
177
+ joblib.dump(model, f'models/{safe_commodity_name}.pkl')
178
+ print(f"✅ Model for {commodity} saved to models/{safe_commodity_name}.pkl")
179
+
180
+
181
+ if __name__ == '__main__':
182
+ # Load your dataset here
183
+ # For example:
184
+ df_final = pd.read_csv('final_output.csv')
185
+ train_and_save_models(df_final)