Spaces:
Sleeping
Sleeping
π Initial upload of my app
Browse files- README.md +36 -28
- __pycache__/ui.cpython-311.pyc +0 -0
- __pycache__/utils.cpython-311.pyc +0 -0
- app.py +5 -23
- demo/demo.mp4 +2 -2
- demo/demo.png +0 -0
- requirements.txt +4 -5
- ui.py +22 -20
- utils.py +47 -135
README.md
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
-
#
|
| 2 |
-
Detecting Personally Identifiable Information (PII) using BiLSTM-CRF model
|
| 3 |
|
| 4 |
-
|
|
|
|
|
|
|
| 5 |
|
| 6 |

|
| 7 |
|
|
@@ -9,20 +10,24 @@ Detecting Personally Identifiable Information (PII) using BiLSTM-CRF model
|
|
| 9 |
|
| 10 |
## β¨ Features
|
| 11 |
|
| 12 |
-
-
|
| 13 |
-
-
|
| 14 |
-
-
|
| 15 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
-
##
|
| 18 |
|
| 19 |
-
1.
|
| 20 |
```bash
|
| 21 |
-
git clone https://github.com/yourusername/
|
| 22 |
-
cd
|
| 23 |
```
|
| 24 |
|
| 25 |
-
2.
|
| 26 |
```bash
|
| 27 |
# Create a virtual environment
|
| 28 |
python -m venv .venv
|
|
@@ -34,31 +39,34 @@ Detecting Personally Identifiable Information (PII) using BiLSTM-CRF model
|
|
| 34 |
.venv\Scripts\activate
|
| 35 |
```
|
| 36 |
|
| 37 |
-
3.
|
| 38 |
```bash
|
| 39 |
pip install -r requirements.txt
|
| 40 |
```
|
| 41 |
|
| 42 |
-
##
|
| 43 |
|
| 44 |
-
1.
|
| 45 |
```bash
|
| 46 |
-
|
| 47 |
```
|
| 48 |
|
| 49 |
-
2.
|
| 50 |
|
| 51 |
-
3.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
-
|
| 54 |
|
| 55 |
-
|
| 56 |
|
| 57 |
-
-
|
| 58 |
-
-
|
| 59 |
-
-
|
| 60 |
-
-
|
| 61 |
-
- And many more...
|
| 62 |
|
| 63 |
## π€ Contributing
|
| 64 |
|
|
@@ -76,6 +84,6 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
|
|
| 76 |
|
| 77 |
## π Acknowledgements
|
| 78 |
|
| 79 |
-
- [
|
| 80 |
-
- [
|
| 81 |
-
- [
|
|
|
|
| 1 |
+
# Sales Forecasting with LightGBM
|
|
|
|
| 2 |
|
| 3 |
+
A retail sales prediction application built with LightGBM and Gradio for interactive forecasting.
|
| 4 |
+
|
| 5 |
+
## π Demo
|
| 6 |
|
| 7 |

|
| 8 |
|
|
|
|
| 10 |
|
| 11 |
## β¨ Features
|
| 12 |
|
| 13 |
+
- Interactive web interface for sales prediction
|
| 14 |
+
- Takes into account various features including:
|
| 15 |
+
- Promotional events
|
| 16 |
+
- Holiday status
|
| 17 |
+
- Historical sales data (various lags and rolling means)
|
| 18 |
+
- Temporal features (day, month, year, day of week)
|
| 19 |
+
- Built with LightGBM for fast and accurate predictions
|
| 20 |
+
- Simple and intuitive user interface
|
| 21 |
|
| 22 |
+
## π Installation
|
| 23 |
|
| 24 |
+
1. Clone the repository:
|
| 25 |
```bash
|
| 26 |
+
git clone https://github.com/yourusername/sales-forecasting.git
|
| 27 |
+
cd sales-forecasting
|
| 28 |
```
|
| 29 |
|
| 30 |
+
2. Create and activate a virtual environment:
|
| 31 |
```bash
|
| 32 |
# Create a virtual environment
|
| 33 |
python -m venv .venv
|
|
|
|
| 39 |
.venv\Scripts\activate
|
| 40 |
```
|
| 41 |
|
| 42 |
+
3. Install the required dependencies:
|
| 43 |
```bash
|
| 44 |
pip install -r requirements.txt
|
| 45 |
```
|
| 46 |
|
| 47 |
+
## π οΈ Usage
|
| 48 |
|
| 49 |
+
1. Run the application:
|
| 50 |
```bash
|
| 51 |
+
python app.py
|
| 52 |
```
|
| 53 |
|
| 54 |
+
2. Open your web browser and navigate to the URL shown in the terminal (typically http://localhost:7860)
|
| 55 |
|
| 56 |
+
3. Input the required information:
|
| 57 |
+
- Promo status (0 or 1)
|
| 58 |
+
- Holiday status (0 or 1)
|
| 59 |
+
- Date in YYYY-MM-DD format
|
| 60 |
+
- Sales lags and rolling means
|
| 61 |
|
| 62 |
+
4. Click "Predict Sales" to see the prediction
|
| 63 |
|
| 64 |
+
## π¦ Dependencies
|
| 65 |
|
| 66 |
+
- gradio >= 3.50.0
|
| 67 |
+
- joblib >= 1.3.0
|
| 68 |
+
- lightgbm >= 4.0.0
|
| 69 |
+
- pandas >= 2.0.0
|
|
|
|
| 70 |
|
| 71 |
## π€ Contributing
|
| 72 |
|
|
|
|
| 84 |
|
| 85 |
## π Acknowledgements
|
| 86 |
|
| 87 |
+
- [LightGBM](https://github.com/microsoft/LightGBM) - The gradient boosting framework used for predictions
|
| 88 |
+
- [Gradio](https://gradio.app/) - For the simple web interface
|
| 89 |
+
- [Pandas](https://pandas.pydata.org/) - For data manipulation and analysis
|
__pycache__/ui.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/ui.cpython-311.pyc and b/__pycache__/ui.cpython-311.pyc differ
|
|
|
__pycache__/utils.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/utils.cpython-311.pyc and b/__pycache__/utils.cpython-311.pyc differ
|
|
|
app.py
CHANGED
|
@@ -1,25 +1,7 @@
|
|
| 1 |
-
import
|
| 2 |
-
from
|
| 3 |
-
from ui import render_ui
|
| 4 |
-
from model import BiLSTMCRF
|
| 5 |
-
|
| 6 |
-
# Cache model and tokenizer
|
| 7 |
-
@st.cache_resource
|
| 8 |
-
def get_model_and_tokenizer():
|
| 9 |
-
return load_full_model_and_tokenizer("models/best_bilstm_crf_model.pt")
|
| 10 |
-
|
| 11 |
-
model, tokenizer, idx2tag = get_model_and_tokenizer()
|
| 12 |
-
|
| 13 |
-
def main():
|
| 14 |
-
st.title("π Detecting PII with BiLSTM-CRF")
|
| 15 |
-
|
| 16 |
-
text = st.text_area("Enter text to analyze:", height=200)
|
| 17 |
-
|
| 18 |
-
if st.button("Analyze"):
|
| 19 |
-
if text.strip():
|
| 20 |
-
render_ui(text, model, tokenizer, idx2tag)
|
| 21 |
-
else:
|
| 22 |
-
st.warning("β οΈ Please enter some text.")
|
| 23 |
|
| 24 |
if __name__ == "__main__":
|
| 25 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
from utils import load_artifacts, predict_sales
|
| 2 |
+
from ui import build_ui
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
if __name__ == "__main__":
|
| 5 |
+
model, feature_cols = load_artifacts()
|
| 6 |
+
iface = build_ui(model, feature_cols, predict_sales)
|
| 7 |
+
iface.launch()
|
demo/demo.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:709f027723ef11b7699671bfb67b904580a63b70330dbef4069ffed351f4af8f
|
| 3 |
+
size 896228
|
demo/demo.png
CHANGED
|
|
requirements.txt
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
pandas
|
| 5 |
-
pytorch-crf==0.7.2
|
|
|
|
| 1 |
+
gradio>=3.50.0
|
| 2 |
+
joblib>=1.3.0
|
| 3 |
+
lightgbm>=4.0.0
|
| 4 |
+
pandas>=2.0.0
|
|
|
ui.py
CHANGED
|
@@ -1,26 +1,28 @@
|
|
| 1 |
-
import
|
| 2 |
-
from utils import prepare_inputs
|
| 3 |
-
import torch
|
| 4 |
-
import pandas as pd
|
| 5 |
|
| 6 |
-
def
|
| 7 |
-
|
| 8 |
-
|
| 9 |
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
|
| 14 |
-
|
| 15 |
-
labels = [idx2tag.get(tag, "O") for tag in predictions[0]]
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
| 21 |
|
| 22 |
-
|
|
|
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
def build_ui(model, feature_cols, predict_fn):
|
| 4 |
+
with gr.Blocks() as demo:
|
| 5 |
+
gr.Markdown("## π Retail Sales Prediction App")
|
| 6 |
|
| 7 |
+
with gr.Row():
|
| 8 |
+
promo = gr.Radio([0, 1], label="Promo", value=0)
|
| 9 |
+
holiday = gr.Radio([0, 1], label="Holiday", value=0)
|
| 10 |
|
| 11 |
+
date = gr.Textbox(label="Date (YYYY-MM-DD)", value="2023-11-01")
|
|
|
|
| 12 |
|
| 13 |
+
with gr.Row():
|
| 14 |
+
lag_1 = gr.Number(label="Sales Lag 1 Day", value=100)
|
| 15 |
+
lag_7 = gr.Number(label="Sales Lag 7 Days", value=120)
|
| 16 |
+
mean_3 = gr.Number(label="Rolling Mean (3 Days)", value=110)
|
| 17 |
+
mean_7 = gr.Number(label="Rolling Mean (7 Days)", value=115)
|
| 18 |
|
| 19 |
+
predict_btn = gr.Button("Predict Sales")
|
| 20 |
+
output = gr.Number(label="Predicted Sales", precision=2)
|
| 21 |
|
| 22 |
+
predict_btn.click(
|
| 23 |
+
fn=lambda p, h, d, l1, l7, m3, m7: predict_fn(model, feature_cols, p, h, d, l1, l7, m3, m7),
|
| 24 |
+
inputs=[promo, holiday, date, lag_1, lag_7, mean_3, mean_7],
|
| 25 |
+
outputs=output
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
return demo
|
utils.py
CHANGED
|
@@ -1,141 +1,53 @@
|
|
| 1 |
-
import
|
| 2 |
-
|
| 3 |
-
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
"""
|
| 9 |
-
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
12: 'B-CREDITCARDNUMBER',
|
| 29 |
-
13: 'B-CURRENCY',
|
| 30 |
-
14: 'B-CURRENCYCODE',
|
| 31 |
-
15: 'B-CURRENCYNAME',
|
| 32 |
-
16: 'B-CURRENCYSYMBOL',
|
| 33 |
-
17: 'B-DATE',
|
| 34 |
-
18: 'B-DOB',
|
| 35 |
-
19: 'B-EMAIL',
|
| 36 |
-
20: 'B-ETHEREUMADDRESS',
|
| 37 |
-
21: 'B-EYECOLOR',
|
| 38 |
-
22: 'B-FIRSTNAME',
|
| 39 |
-
23: 'B-GENDER',
|
| 40 |
-
24: 'B-HEIGHT',
|
| 41 |
-
25: 'B-IBAN',
|
| 42 |
-
26: 'B-IP',
|
| 43 |
-
27: 'B-IPV4',
|
| 44 |
-
28: 'B-IPV6',
|
| 45 |
-
29: 'B-JOBAREA',
|
| 46 |
-
30: 'B-JOBTITLE',
|
| 47 |
-
31: 'B-JOBTYPE',
|
| 48 |
-
32: 'B-LASTNAME',
|
| 49 |
-
33: 'B-LITECOINADDRESS',
|
| 50 |
-
34: 'B-MAC',
|
| 51 |
-
35: 'B-MASKEDNUMBER',
|
| 52 |
-
36: 'B-MIDDLENAME',
|
| 53 |
-
37: 'B-NEARBYGPSCOORDINATE',
|
| 54 |
-
38: 'B-ORDINALDIRECTION',
|
| 55 |
-
39: 'B-PASSWORD',
|
| 56 |
-
40: 'B-PHONEIMEI',
|
| 57 |
-
41: 'B-PHONENUMBER',
|
| 58 |
-
42: 'B-PIN',
|
| 59 |
-
43: 'B-PREFIX',
|
| 60 |
-
44: 'B-SECONDARYADDRESS',
|
| 61 |
-
45: 'B-SEX',
|
| 62 |
-
46: 'B-SSN',
|
| 63 |
-
47: 'B-STATE',
|
| 64 |
-
48: 'B-STREET',
|
| 65 |
-
49: 'B-TIME',
|
| 66 |
-
50: 'B-URL',
|
| 67 |
-
51: 'B-USERAGENT',
|
| 68 |
-
52: 'B-USERNAME',
|
| 69 |
-
53: 'B-VEHICLEVIN',
|
| 70 |
-
54: 'B-VEHICLEVRM',
|
| 71 |
-
55: 'B-ZIPCODE',
|
| 72 |
-
56: 'I-ACCOUNTNAME',
|
| 73 |
-
57: 'I-ACCOUNTNUMBER',
|
| 74 |
-
58: 'I-AGE',
|
| 75 |
-
59: 'I-AMOUNT',
|
| 76 |
-
60: 'I-BIC',
|
| 77 |
-
61: 'I-BITCOINADDRESS',
|
| 78 |
-
62: 'I-BUILDINGNUMBER',
|
| 79 |
-
63: 'I-CITY',
|
| 80 |
-
64: 'I-COMPANYNAME',
|
| 81 |
-
65: 'I-COUNTY',
|
| 82 |
-
66: 'I-CREDITCARDCVV',
|
| 83 |
-
67: 'I-CREDITCARDISSUER',
|
| 84 |
-
68: 'I-CREDITCARDNUMBER',
|
| 85 |
-
69: 'I-CURRENCY',
|
| 86 |
-
70: 'I-CURRENCYCODE',
|
| 87 |
-
71: 'I-CURRENCYNAME',
|
| 88 |
-
72: 'I-CURRENCYSYMBOL',
|
| 89 |
-
73: 'I-DATE',
|
| 90 |
-
74: 'I-DOB',
|
| 91 |
-
75: 'I-EMAIL',
|
| 92 |
-
76: 'I-ETHEREUMADDRESS',
|
| 93 |
-
77: 'I-EYECOLOR',
|
| 94 |
-
78: 'I-FIRSTNAME',
|
| 95 |
-
79: 'I-GENDER',
|
| 96 |
-
80: 'I-HEIGHT',
|
| 97 |
-
81: 'I-IBAN',
|
| 98 |
-
82: 'I-IP',
|
| 99 |
-
83: 'I-IPV4',
|
| 100 |
-
84: 'I-IPV6',
|
| 101 |
-
85: 'I-JOBAREA',
|
| 102 |
-
86: 'I-JOBTITLE',
|
| 103 |
-
87: 'I-JOBTYPE',
|
| 104 |
-
88: 'I-LASTNAME',
|
| 105 |
-
89: 'I-LITECOINADDRESS',
|
| 106 |
-
90: 'I-MAC',
|
| 107 |
-
91: 'I-MASKEDNUMBER',
|
| 108 |
-
92: 'I-MIDDLENAME',
|
| 109 |
-
93: 'I-NEARBYGPSCOORDINATE',
|
| 110 |
-
94: 'I-PASSWORD',
|
| 111 |
-
95: 'I-PHONEIMEI',
|
| 112 |
-
96: 'I-PHONENUMBER',
|
| 113 |
-
97: 'I-PIN',
|
| 114 |
-
98: 'I-PREFIX',
|
| 115 |
-
99: 'I-SECONDARYADDRESS',
|
| 116 |
-
100: 'I-SSN',
|
| 117 |
-
101: 'I-STATE',
|
| 118 |
-
102: 'I-STREET',
|
| 119 |
-
103: 'I-TIME',
|
| 120 |
-
104: 'I-URL',
|
| 121 |
-
105: 'I-USERAGENT',
|
| 122 |
-
106: 'I-USERNAME',
|
| 123 |
-
107: 'I-VEHICLEVIN',
|
| 124 |
-
108: 'I-VEHICLEVRM',
|
| 125 |
-
109: 'I-ZIPCODE',
|
| 126 |
-
110: 'O'}
|
| 127 |
|
| 128 |
-
return
|
| 129 |
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
)
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
return
|
|
|
|
| 1 |
+
import joblib
|
| 2 |
+
import lightgbm as lgb
|
| 3 |
+
import pandas as pd
|
| 4 |
|
| 5 |
+
# Load artifacts
|
| 6 |
+
def load_artifacts():
|
| 7 |
+
model = lgb.Booster(model_file="models/lgb_sales_model.txt")
|
| 8 |
+
feature_cols = joblib.load("models/feature_cols.pkl")
|
| 9 |
+
return model, feature_cols
|
| 10 |
+
|
| 11 |
+
# Preprocess new input row into model-ready features
|
| 12 |
+
def preprocess_input(promo, holiday, date, past_sales):
|
| 13 |
"""
|
| 14 |
+
Args:
|
| 15 |
|
| 16 |
+
promo: int (0/1)
|
| 17 |
+
holiday: int (0/1)
|
| 18 |
+
date: datetime-like
|
| 19 |
+
past_sales: dict with keys ['lag_1','lag_7','mean_3','mean_7']
|
| 20 |
+
|
| 21 |
+
Returns:
|
| 22 |
+
pd.DataFrame with a single row ready for prediction
|
| 23 |
+
"""
|
| 24 |
+
date = pd.to_datetime(date)
|
| 25 |
|
| 26 |
+
features = {
|
| 27 |
+
"promo": promo,
|
| 28 |
+
"holiday": holiday,
|
| 29 |
+
"day": date.day,
|
| 30 |
+
"month": date.month,
|
| 31 |
+
"year": date.year,
|
| 32 |
+
"day_of_week": date.weekday(),
|
| 33 |
+
"is_weekend": 1 if date.weekday() >= 5 else 0,
|
| 34 |
+
"sales_lag_1": past_sales.get("lag_1", 0),
|
| 35 |
+
"sales_lag_7": past_sales.get("lag_7", 0),
|
| 36 |
+
"rolling_mean_3": past_sales.get("mean_3", 0),
|
| 37 |
+
"rolling_mean_7": past_sales.get("mean_7", 0),
|
| 38 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
+
return pd.DataFrame([features])
|
| 41 |
|
| 42 |
+
# Prediction
|
| 43 |
+
def predict_sales(model, feature_cols, promo, holiday, date, lag_1, lag_7, mean_3, mean_7):
|
| 44 |
+
past_sales = {
|
| 45 |
+
"lag_1": lag_1,
|
| 46 |
+
"lag_7": lag_7,
|
| 47 |
+
"mean_3": mean_3,
|
| 48 |
+
"mean_7": mean_7,
|
| 49 |
+
}
|
| 50 |
+
X = preprocess_input(promo, holiday, date, past_sales)
|
| 51 |
+
X = X[feature_cols] # ensure correct column order
|
| 52 |
+
prediction = model.predict(X)[0]
|
| 53 |
+
return round(prediction, 2)
|