sedtha commited on
Commit
a11f9ec
Β·
verified Β·
1 Parent(s): 469c844

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -0
app.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+ import torch
4
+
5
+ # ==========================
6
+ # 1. Load model from Hugging Face
7
+ # ==========================
8
+
9
+ MODEL_NAME = "angkor96/khmer-news-summarization" # e.g., "Sedtha-019/khmer-summarization"
10
+
11
+ print("Loading model and tokenizer...")
12
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
13
+ model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
14
+
15
+ # Move to GPU if available
16
+ device = "cuda" if torch.cuda.is_available() else "cpu"
17
+ model = model.to(device)
18
+
19
+ print(f"βœ… Model loaded successfully on {device}!")
20
+
21
+ # ==========================
22
+ # 2. Summarization function
23
+ # ==========================
24
+ def summarize_khmer_text(text, max_length=150, min_length=40):
25
+ """
26
+ Summarize Khmer text
27
+ """
28
+ if not text or text.strip() == "":
29
+ return "⚠️ αžŸαžΌαž˜αž”αž‰αŸ’αž…αžΌαž›αž’αžαŸ’αžαž”αž‘ / Please enter text"
30
+
31
+ if len(text.strip()) < 20:
32
+ return "⚠️ αž’αžαŸ’αžαž”αž‘αžαŸ’αž›αžΈαž–αŸαž€ / Text is too short to summarize"
33
+
34
+ try:
35
+ # Tokenize input
36
+ inputs = tokenizer(
37
+ text,
38
+ max_length=1024,
39
+ truncation=True,
40
+ padding="max_length",
41
+ return_tensors="pt"
42
+ ).to(device)
43
+
44
+ # Generate summary
45
+ with torch.no_grad():
46
+ summary_ids = model.generate(
47
+ inputs["input_ids"],
48
+ max_length=max_length,
49
+ min_length=min_length,
50
+ length_penalty=2.0,
51
+ num_beams=4,
52
+ early_stopping=True,
53
+ no_repeat_ngram_size=3
54
+ )
55
+
56
+ # Decode output
57
+ summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
58
+
59
+ return summary
60
+
61
+ except Exception as e:
62
+ return f"❌ Error: {str(e)}"
63
+
64
+ # ==========================
65
+ # 3. Gradio UI
66
+ # ==========================
67
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
68
+ gr.Markdown(
69
+ """
70
+ # πŸ‡°πŸ‡­ Khmer Text Summarization
71
+ ### αž”αž‰αŸ’αž…αžΌαž›αž’αžαŸ’αžαž”αž‘αžαŸ’αž˜αŸ‚αžš αž αžΎαž™αž‘αž‘αž½αž›αž”αžΆαž“αž€αžΆαžšαžŸαž„αŸ’αžαŸαž”αžŠαŸ„αž™αžŸαŸ’αžœαŸαž™αž”αŸ’αžšαžœαžαŸ’αžαž·
72
+ Enter Khmer text and get an automatic summary
73
+ """
74
+ )
75
+
76
+ with gr.Row():
77
+ with gr.Column():
78
+ input_text = gr.Textbox(
79
+ lines=10,
80
+ placeholder="αž”αž‰αŸ’αž…αžΌαž›αž’αžαŸ’αžαž”αž‘αžαŸ’αž˜αŸ‚αžšαž“αŸ…αž‘αžΈαž“αŸαŸ‡...\nEnter Khmer text here...",
81
+ label="πŸ“ αž’αžαŸ’αžαž”αž‘αžŠαžΎαž˜ / Original Text"
82
+ )
83
+
84
+ with gr.Row():
85
+ max_len = gr.Slider(
86
+ minimum=50,
87
+ maximum=300,
88
+ value=150,
89
+ step=10,
90
+ label="Maximum Summary Length"
91
+ )
92
+ min_len = gr.Slider(
93
+ minimum=20,
94
+ maximum=100,
95
+ value=40,
96
+ step=10,
97
+ label="Minimum Summary Length"
98
+ )
99
+
100
+ submit_btn = gr.Button("πŸ”„ Summarize / αžŸαž„αŸ’αžαŸαž”", variant="primary")
101
+
102
+ with gr.Column():
103
+ output_text = gr.Textbox(
104
+ lines=10,
105
+ label="πŸ“‹ αžŸαž„αŸ’αžαŸαž” / Summary"
106
+ )
107
+
108
+ # Examples
109
+ gr.Examples(
110
+ examples=[
111
+ ["αž”αŸ’αžšαž‘αŸαžŸαž€αž˜αŸ’αž–αž»αž‡αžΆαž˜αžΆαž“αž”αŸ’αžšαžœαžαŸ’αžαž·αžŸαžΆαžŸαŸ’αžšαŸ’αžαž™αžΌαžšαž›αž„αŸ‹αž“αž·αž„αžŸαž˜αŸ’αž”αžΌαžšαž”αŸ‚αž”αžŠαŸ„αž™αžœαž”αŸ’αž”αž’αž˜αŸŒαŸ” αž’αžΆαžŽαžΆαž…αž€αŸ’αžšαžαŸ’αž˜αŸ‚αžšαž”αžΆαž“αžšαžΈαž€αž…αž˜αŸ’αžšαžΎαž“αž€αŸ’αž“αž»αž„αžŸαžαžœαžαŸ’αžŸαž‘αžΈαŸ©αžŠαž›αŸ‹αž‘αžΈαŸ‘αŸ₯αŸ” αž’αž„αŸ’αž‚αžšαžœαžαŸ’αžαž‡αžΆαžŸαŸ’αž“αžΆαžŠαŸƒαžŸαŸ’αžαžΆαž”αžαŸ’αž™αž€αž˜αŸ’αž˜αžŠαŸαž’αžŸαŸ’αž…αžΆαžšαŸ’αž™αž˜αž½αž™αžšαž”αžŸαŸ‹αž–αž·αž—αž–αž›αŸ„αž€αŸ”", 100, 30],
112
+ ["αž€αžΆαžšαž’αž”αŸ‹αžšαŸ†αž‡αžΆαž˜αžΌαž›αžŠαŸ’αž‹αžΆαž“αž‚αŸ’αžšαžΉαŸ‡αžŸαŸ†αžαžΆαž“αŸ‹αžŸαž˜αŸ’αžšαžΆαž”αŸ‹αž€αžΆαžšαž’αž—αž·αžœαžŒαŸ’αžαž“αŸαž‡αžΆαžαž·αŸ” αžŸαž·αžŸαŸ’αžŸαžΆαž“αž»αžŸαž·αžŸαŸ’αžŸαž‚αž”αŸ’αž”αžΈαžšαŸ€αž“αžŸαžΌαžαŸ’αžšαž™αŸ‰αžΆαž„αžŸαŸ’αž’αž·αžαžšαž»αŸ†αŸ” αž‚αŸ’αžšαžΌαž”αž„αŸ’αžšαŸ€αž“αž˜αžΆαž“αžαž½αž“αžΆαž‘αžΈαžŸαŸ†αžαžΆαž“αŸ‹αž€αŸ’αž“αž»αž„αž€αžΆαžšαž”αž„αŸ’αž€αžΎαžαž’αž“αžΆαž‚αžαž€αž»αž˜αžΆαžšαŸ”", 80, 25],
113
+ ],
114
+ inputs=[input_text, max_len, min_len],
115
+ )
116
+
117
+ # Connect button
118
+ submit_btn.click(
119
+ fn=summarize_khmer_text,
120
+ inputs=[input_text, max_len, min_len],
121
+ outputs=output_text
122
+ )
123
+
124
+ # ==========================
125
+ # 4. Launch
126
+ # ==========================
127
+ if __name__ == "__main__":
128
+ demo.launch(share=True)