clementBE commited on
Commit
ea1fb77
·
verified ·
0 Parent(s):

initial commit

Browse files
Files changed (4) hide show
  1. .gitattributes +35 -0
  2. README.md +12 -0
  3. app.py +82 -0
  4. requirements.txt +7 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Trainer4Xlsx
3
+ emoji: 🔥
4
+ colorFrom: yellow
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 5.36.2
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.pipeline import make_pipeline
5
+ from sklearn.linear_model import LogisticRegression
6
+ from sklearn.feature_extraction.text import TfidfVectorizer
7
+ from sklearn.metrics import classification_report
8
+
9
+ model = None
10
+ X_test = None
11
+ y_test = None
12
+
13
+ def load_excel(file):
14
+ # Read Excel file
15
+ xls = pd.ExcelFile(file.name)
16
+ # Just take first sheet to get columns
17
+ df = pd.read_excel(xls, xls.sheet_names[0])
18
+ columns = list(df.columns)
19
+ return columns, xls.sheet_names
20
+
21
+ def load_sheet(file, sheet_name):
22
+ xls = pd.ExcelFile(file.name)
23
+ df = pd.read_excel(xls, sheet_name)
24
+ return df.head().to_dict(), list(df.columns)
25
+
26
+ def train_model(file, sheet_name, text_col, target_col):
27
+ global model, X_test, y_test
28
+
29
+ xls = pd.ExcelFile(file.name)
30
+ df = pd.read_excel(xls, sheet_name)
31
+
32
+ # Drop rows with missing in selected columns
33
+ df = df[[text_col, target_col]].dropna()
34
+
35
+ X = df[text_col].astype(str)
36
+ y = df[target_col].astype(str)
37
+
38
+ # Split train/test for evaluation
39
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
40
+
41
+ # Simple pipeline TFIDF + Logistic Regression
42
+ model = make_pipeline(TfidfVectorizer(), LogisticRegression(max_iter=200))
43
+ model.fit(X_train, y_train)
44
+
45
+ y_pred = model.predict(X_test)
46
+ report = classification_report(y_test, y_pred)
47
+ return report
48
+
49
+ def predict_text(text):
50
+ global model
51
+ if model is None:
52
+ return "Please train the model first."
53
+ pred = model.predict([text])
54
+ return pred[0]
55
+
56
+ with gr.Blocks() as demo:
57
+ gr.Markdown("## Upload Excel training file")
58
+ upload = gr.File(label="Upload XLSX file")
59
+
60
+ cols_dropdown = gr.Dropdown(label="Select Category Column for Training")
61
+ sheet_dropdown = gr.Dropdown(label="Select Sheet", interactive=True)
62
+
63
+ train_btn = gr.Button("Train Model")
64
+ output_train = gr.Textbox(label="Training Report", lines=10)
65
+
66
+ text_input = gr.Textbox(label="Text to Classify")
67
+ predict_btn = gr.Button("Predict")
68
+ output_pred = gr.Textbox(label="Prediction")
69
+
70
+ # When file uploaded, populate sheets dropdown
71
+ upload.change(lambda f: load_excel(f), inputs=upload, outputs=[cols_dropdown, sheet_dropdown])
72
+
73
+ # When sheet selected, load sheet to get columns for text + target
74
+ sheet_dropdown.change(lambda f, s: load_sheet(f, s), inputs=[upload, sheet_dropdown], outputs=[output_train, cols_dropdown])
75
+
76
+ # When train clicked, train the model using selected columns
77
+ train_btn.click(train_model, inputs=[upload, sheet_dropdown, cols_dropdown, cols_dropdown], outputs=output_train)
78
+
79
+ # Predict button
80
+ predict_btn.click(predict_text, inputs=text_input, outputs=output_pred)
81
+
82
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio>=3.0
2
+ pandas>=1.3
3
+ scikit-learn>=1.0
4
+ openpyxl>=3.0
5
+ transformers>=4.0
6
+ datasets>=2.0
7
+ torch>=1.12