Thanut003 commited on
Commit
99ddd93
Β·
verified Β·
1 Parent(s): f92f52e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -125
app.py CHANGED
@@ -1,128 +1,3 @@
1
- # import gradio as gr
2
- # import joblib
3
- # import pandas as pd
4
- # import re
5
- # import nltk
6
- # import numpy as np
7
- # import traceback
8
- # from khmernltk import word_tokenize
9
-
10
- # # --- 1. SETUP ---
11
- # try:
12
- # nltk.data.find('corpora/stopwords')
13
- # except LookupError:
14
- # nltk.download('stopwords')
15
-
16
- # from nltk.corpus import stopwords
17
- # english_stopwords = set(stopwords.words('english'))
18
-
19
- # # CRITICAL: This list MUST match the order of your LabelEncoder classes (0, 1, 2...)
20
- # LABELS = [
21
- # 'Culture', 'Economic', 'Education', 'Environment',
22
- # 'Health', 'Politics', 'Human Rights', 'Science'
23
- # ]
24
-
25
- # def clean_khmer_text(text):
26
- # if not isinstance(text, str): return ""
27
- # text = re.sub(r'<[^>]+>', '', text)
28
- # text = re.sub(r'[\u200B-\u200D\uFEFF]', '', text)
29
- # text = re.sub(r'[!"#$%&\'()*+,β€”./:;<=>?@[\]^_`{|}~αŸ”αŸ•αŸ–αŸ—αŸ˜αŸ™αŸšαŸ›Β«Β»-]', '', text)
30
- # text = re.sub(r'\s+', ' ', text).strip()
31
- # return text
32
-
33
- # def khmer_tokenize(text):
34
- # cleaned = clean_khmer_text(text)
35
- # if not cleaned: return ""
36
- # tokens = word_tokenize(cleaned)
37
- # processed_tokens = []
38
- # for token in tokens:
39
- # if re.match(r'^[a-zA-Z0-9]+$', token):
40
- # token_lower = token.lower()
41
- # if token_lower in english_stopwords: continue
42
- # processed_tokens.append(token_lower)
43
- # else:
44
- # processed_tokens.append(token)
45
- # return " ".join(processed_tokens)
46
-
47
- # # # --- 2. LOAD MODELS ---
48
- # print("Loading processors...")
49
- # try:
50
- # vectorizer = joblib.load("tfidf_vectorizer.joblib")
51
- # svd = joblib.load("truncated_svd.joblib")
52
- # print("βœ… Vectorizer & SVD loaded")
53
- # except Exception as e:
54
- # print(f"❌ CRITICAL LOAD ERROR: {e}")
55
-
56
- # models = {}
57
- # model_files = {
58
- # "XGBoost": "xgboost_model.joblib",
59
- # "LightGBM": "lightgbm_model.joblib",
60
- # "Random Forest": "random_forest_model.joblib",
61
- # "Logistic Regression": "logistic_regression_model.joblib",
62
- # "Linear SVM": "linear_svm_model.joblib"
63
- # }
64
-
65
- # for name, filename in model_files.items():
66
- # try:
67
- # models[name] = joblib.load(filename)
68
- # print(f"βœ… Loaded {name}")
69
- # except:
70
- # print(f"⚠️ Skipping {name}")
71
-
72
- # # --- 3. PREDICTION FUNCTION ---
73
- # def predict(text, model_name):
74
- # if not text: return "Please enter text", {}, []
75
- # if model_name not in models: return "Model not found", {}, []
76
-
77
- # try:
78
- # # Pipeline
79
- # processed = khmer_tokenize(text)
80
- # vectors = vectorizer.transform([processed]) # TF-IDF Matrix (Sparse)
81
- # vectors_reduced = svd.transform(vectors) # SVD Matrix (Dense)
82
- # model = models[model_name]
83
-
84
- # # --- EXTRACT KEYWORDS ---
85
- # # We look at the TF-IDF vector to find the strongest words
86
- # feature_array = np.array(vectorizer.get_feature_names_out())
87
- # # Sort by score (descending)
88
- # tfidf_sorting = np.argsort(vectors.toarray()).flatten()[::-1]
89
-
90
- # # Get top 10 words that actually have a score > 0
91
- # top_n = 10
92
- # keywords = []
93
- # for idx in tfidf_sorting[:top_n]:
94
- # if vectors[0, idx] > 0:
95
- # keywords.append(feature_array[idx])
96
-
97
- # # --- PREDICTION ---
98
- # if hasattr(model, "predict_proba"):
99
- # probas = model.predict_proba(vectors_reduced)[0]
100
- # confidences = {LABELS[i]: float(probas[i]) for i in range(len(LABELS))}
101
- # top_label = max(confidences, key=confidences.get)
102
- # else:
103
- # raw_pred = model.predict(vectors_reduced)[0]
104
- # pred_idx = int(raw_pred) if isinstance(raw_pred, (int, np.integer)) else np.argmax(raw_pred)
105
- # top_label = LABELS[pred_idx]
106
- # confidences = {LABELS[pred_idx]: 1.0}
107
-
108
- # # Return 3 items: Label, Confidences, Keywords List
109
- # return top_label, confidences, keywords
110
-
111
- # except Exception as e:
112
- # return f"Error: {str(e)}", {}, []
113
-
114
-
115
-
116
- # # --- 4. LAUNCH ---
117
- # # IMPORTANT: allowed_origins="*" fixes the 405 error
118
- # demo = gr.Interface(
119
- # fn=predict,
120
- # inputs=[gr.Textbox(), gr.Dropdown(choices=list(models.keys()))],
121
- # outputs=[gr.Label(), gr.Label(), gr.JSON()]
122
- # )
123
- # demo.launch()
124
-
125
-
126
  import gradio as gr
127
  import joblib
128
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import joblib
3
  import pandas as pd