DKartsev commited on
Commit
d054384
·
verified ·
1 Parent(s): c549afe

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +37 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from datasets import load_dataset
3
+ from sklearn.feature_extraction.text import TfidfVectorizer
4
+ from sklearn.linear_model import LogisticRegression
5
+ from sklearn.pipeline import Pipeline
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.metrics import classification_report
8
+
9
+ # Загрузка датасета
10
+ dataset = load_dataset("UniversalCEFR/cefr_sp_en", split="train")
11
+
12
+ # Подготовка данных
13
+ texts = [item['text'] for item in dataset if item['text']]
14
+ labels = [item['cefr_level'] for item in dataset if item['text']]
15
+
16
+ # Делим на тренировку и тест
17
+ X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)
18
+
19
+ # Модель
20
+ model = Pipeline([
21
+ ("tfidf", TfidfVectorizer(max_features=5000)),
22
+ ("clf", LogisticRegression(max_iter=1000))
23
+ ])
24
+ model.fit(X_train, y_train)
25
+
26
+ # Проверка
27
+ print(classification_report(y_test, model.predict(X_test)))
28
+
29
+ # Интерфейс Gradio
30
+ def predict(text):
31
+ pred = model.predict([text])[0]
32
+ proba = model.predict_proba([text])[0]
33
+ confidence = round(max(proba) * 100, 2)
34
+ return f"Уровень: {pred} (уверенность: {confidence}%)"
35
+
36
+ interface = gr.Interface(fn=predict, inputs="text", outputs="text", title="CEFR Level Estimator")
37
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ scikit-learn
2
+ pandas
3
+ matplotlib
4
+ datasets
5
+ gradio