Mahmoud-Dev commited on
Commit
4744848
·
verified ·
1 Parent(s): 95b5c45

Test: Simple Gradio app to verify functionality

Browse files
Files changed (1) hide show
  1. app.py +4 -98
app.py CHANGED
@@ -1,103 +1,9 @@
1
  import gradio as gr
2
- import torch
3
- from datasets import load_dataset
4
- from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
5
 
6
- # Global variables for caching
7
- dataset = None
8
- tokenizer = None
9
- model = None
10
- tokenized_dataset = None
11
 
12
- def load_resources():
13
- global dataset, tokenizer, model, tokenized_dataset
14
-
15
- if dataset is not None:
16
- return
17
-
18
- try:
19
- dataset = load_dataset('arbml/Arabic_Sentiment_Twitter_Corpus')
20
- except:
21
- dataset = load_dataset('asas-ai/Arabic_Sentiment_Twitter_Corpus')
22
-
23
- tokenizer = AutoTokenizer.from_pretrained('distilbert-base-multilingual-cased')
24
- model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-multilingual-cased', num_labels=3)
25
-
26
- def preprocess_function(examples):
27
- text_column = 'tweet' if 'tweet' in examples else 'text'
28
- encoding = tokenizer(examples[text_column], truncation=True, padding='max_length', max_length=128)
29
-
30
- if 'label' in examples:
31
- encoding['labels'] = examples['label']
32
- elif 'sentiment' in examples:
33
- encoding['labels'] = examples['sentiment']
34
- return encoding
35
-
36
- def train_model(epochs, batch_size, learning_rate):
37
- try:
38
- load_resources()
39
-
40
- global tokenized_dataset
41
- if tokenized_dataset is None:
42
- tokenized_dataset = dataset.map(preprocess_function, batched=True, remove_columns=dataset['train'].column_names)
43
-
44
- training_args = TrainingArguments(
45
- output_dir='./results',
46
- num_train_epochs=int(epochs),
47
- per_device_train_batch_size=int(batch_size),
48
- per_device_eval_batch_size=int(batch_size),
49
- learning_rate=float(learning_rate),
50
- weight_decay=0.01,
51
- save_strategy='epoch',
52
- logging_steps=50,
53
- )
54
-
55
- trainer = Trainer(
56
- model=model,
57
- args=training_args,
58
- train_dataset=tokenized_dataset['train'],
59
- eval_dataset=tokenized_dataset.get('validation', tokenized_dataset['train']),
60
- )
61
-
62
- trainer.train()
63
-
64
- return f"Training completed successfully! Model saved in ./results"
65
- except Exception as e:
66
- return f"Error during training: {str(e)}"
67
-
68
- with gr.Blocks(title="DistilBERT Arabic Sentiment Training") as demo:
69
- gr.Markdown("# DistilBERT Arabic Sentiment Training")
70
- gr.Markdown("Fine-tune DistilBERT on Arabic sentiment analysis (Saudi dialect)")
71
-
72
- gr.Markdown("### Model Information:")
73
- gr.Markdown("- **Base Model**: distilbert-base-multilingual-cased (67M parameters)")
74
- gr.Markdown("- **Task**: Text Classification (Multilingual)")
75
- gr.Markdown("- **Dataset**: arbml/Arabic_Sentiment_Twitter_Corpus (58.8k examples)")
76
- gr.Markdown("- **Language**: Arabic (Saudi & Gulf dialects)")
77
-
78
- with gr.Row():
79
- with gr.Column():
80
- gr.Markdown("### Training Settings")
81
- epochs = gr.Slider(minimum=1, maximum=10, value=3, step=1, label="Epochs")
82
- batch_size = gr.Slider(minimum=8, maximum=64, value=32, step=8, label="Batch Size")
83
- learning_rate = gr.Slider(minimum=1e-5, maximum=1e-3, value=2e-5, step=1e-5, label="Learning Rate")
84
-
85
- with gr.Column():
86
- gr.Markdown("### Training Status")
87
- output_text = gr.Textbox(label="Output", lines=10, interactive=False)
88
-
89
- train_button = gr.Button("Start Training", variant="primary")
90
- train_button.click(
91
- fn=train_model,
92
- inputs=[epochs, batch_size, learning_rate],
93
- outputs=output_text
94
- )
95
-
96
- gr.Markdown("### Training Details:")
97
- gr.Markdown("- **Hardware**: Free GPU (Hugging Face Spaces)")
98
- gr.Markdown("- **Expected Time**: 5-10 minutes (GPU) or 15-20 minutes (CPU)")
99
- gr.Markdown("- **Output Directory**: ./results")
100
- gr.Markdown("- **Usage**: Arabic text only")
101
 
102
  if __name__ == "__main__":
103
- demo.launch()
 
1
  import gradio as gr
 
 
 
2
 
3
+ def greet(name):
4
+ return f"Hello {name}!"
 
 
 
5
 
6
+ iface = gr.Interface(fn=greet, inputs="text", outputs="text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  if __name__ == "__main__":
9
+ iface.launch()