Flippinjack commited on
Commit
3c86a47
ยท
1 Parent(s): f4b0528

added the full app into the space and also changed the maximum and minimum value

Browse files
Files changed (1) hide show
  1. app.py +216 -99
app.py CHANGED
@@ -1,57 +1,86 @@
1
  """
2
- Gradio Interface for Indonesian Court Document Summarization
3
-
4
- This is a conversion from Flask to Gradio for easier deployment on Hugging Face Spaces.
5
 
6
  LEARNING NOTES:
7
- - Gradio automatically creates a web UI from function definitions
8
- - No need for HTML templates or route decorators
9
- - Input/output types define the UI components
 
10
  """
11
 
12
  import gradio as gr
13
  import torch
 
14
  from pretrained_summarizer import create_summarizer
 
15
 
16
  # ============================================================================
17
- # Step 1: Initialize the model (same as Flask)
18
  # ============================================================================
19
- print("Loading summarization model...")
20
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
21
- print(f"Using device: {device}")
22
 
 
 
 
 
 
 
23
  try:
24
  summarizer = create_summarizer("balanced")
25
- print("โœ“ Summarization model loaded successfully!")
 
26
  except Exception as e:
27
- print(f"โœ— Failed to load model: {e}")
28
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  # ============================================================================
31
- # Step 2: Define the main function (replaces Flask route)
32
  # ============================================================================
 
33
  def summarize_document(document, max_length, min_length, num_beams):
34
- """
35
- This function replaces your Flask /summarize endpoint.
36
 
37
- Parameters match your Flask API, but return values are simpler
38
- - No jsonify() needed
39
- - Gradio handles the response automatically
40
- """
41
 
42
- # Validation (same as Flask)
43
  if not document or not document.strip():
44
- return "โŒ Error: Please enter a document to summarize"
45
 
46
  if max_length < min_length:
47
- return "โŒ Error: Max length must be greater than min length"
48
 
49
- # Cap max_length (same as Flask)
50
  if max_length > 1024:
51
  max_length = 1024
52
 
53
  try:
54
- # Generate summary (same logic as Flask)
55
  summary = summarizer.summarize(
56
  document=document,
57
  max_length=int(max_length),
@@ -59,110 +88,198 @@ def summarize_document(document, max_length, min_length, num_beams):
59
  num_beams=int(num_beams)
60
  )
61
 
62
- # Calculate statistics
63
  doc_words = len(document.split())
64
  summary_words = len(summary.split())
65
- compression_ratio = round(summary_words / doc_words, 2) if doc_words > 0 else 0
66
 
67
- # Format output with statistics
68
  output = f"""๐Ÿ“ SUMMARY:
69
  {summary}
70
 
71
  ๐Ÿ“Š STATISTICS:
72
- โ€ข Document length: {doc_words} words
73
- โ€ข Summary length: {summary_words} words
74
- โ€ข Compression ratio: {compression_ratio}x
75
- โ€ข Device used: {device}
76
  """
77
  return output
78
 
79
  except Exception as e:
80
- return f"โŒ Error during summarization: {str(e)}"
81
 
82
 
83
- # ============================================================================
84
- # Step 3: Create Gradio Interface
85
- # ============================================================================
86
- # This replaces your HTML templates and Flask routes
87
- demo = gr.Interface(
88
- fn=summarize_document, # The function to call
89
-
90
- # Define inputs (replaces HTML form fields)
91
  inputs=[
92
  gr.Textbox(
93
  label="๐Ÿ“„ Indonesian Court Document",
94
- placeholder="Paste your court document text here...",
95
- lines=10,
96
- max_lines=20
97
- ),
98
- gr.Slider(
99
- minimum=50,
100
- maximum=1024,
101
- value=200,
102
- step=10,
103
- label="Max Summary Length (words)",
104
- info="Maximum length of the generated summary"
105
  ),
106
- gr.Slider(
107
- minimum=10,
108
- maximum=100,
109
- value=30,
110
- step=5,
111
- label="Min Summary Length (words)",
112
- info="Minimum length of the generated summary"
113
- ),
114
- gr.Slider(
115
- minimum=1,
116
- maximum=10,
117
- value=4,
118
- step=1,
119
- label="Num Beams",
120
- info="Higher = better quality but slower (recommended: 4)"
121
- )
122
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
- # Define output (replaces JSON response)
125
- outputs=gr.Textbox(
126
- label="โœจ Generated Summary",
127
- lines=15,
128
- max_lines=25
129
- ),
130
 
131
- # UI Configuration
132
- title="๐Ÿ›๏ธ Indonesian Court Document Summarizer",
133
- description="""
134
- This tool uses a pre-trained AI model to summarize Indonesian court documents.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
- **How to use:**
137
- 1. Paste your court document in the text box
138
- 2. Adjust the summary length parameters (optional)
139
- 3. Click "Submit" to generate summary
140
 
141
- **Note:** First run may take longer as the model loads.
142
- """,
143
 
144
- # Example inputs for users to try
 
 
 
 
 
 
 
 
 
 
145
  examples=[
146
- [
147
- "Putusan Pengadilan Negeri Jakarta ini memutuskan bahwa terdakwa terbukti bersalah melakukan tindak pidana korupsi dengan merugikan negara sebesar 5 miliar rupiah. Majelis hakim mempertimbangkan bahwa terdakwa telah dengan sengaja memperkaya diri sendiri dan menyalahgunakan wewenang sebagai pejabat publik. Berdasarkan pertimbangan tersebut, terdakwa dijatuhi hukuman penjara selama 8 tahun dan denda 500 juta rupiah.",
148
- 200,
149
- 30,
150
- 4
151
- ]
152
  ]
153
  )
154
 
155
  # ============================================================================
156
- # Step 4: Launch the app
157
  # ============================================================================
158
- if __name__ == "__main__":
159
- # For local testing:
160
- # demo.launch(share=False)
161
 
162
- # For Hugging Face Spaces deployment:
163
- # Note: In Gradio 6.0+, theme is passed to launch() not Interface()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  demo.launch(
165
- server_name="0.0.0.0", # Allow external access
166
- server_port=7860, # Default HF Spaces port
167
- share=False # Don't create public link (HF does this)
168
  )
 
 
 
 
 
 
 
1
  """
2
+ Combined Gradio App - Indonesian AI Tools
3
+ This is the MAIN FILE for Hugging Face Spaces deployment
 
4
 
5
  LEARNING NOTES:
6
+ - Uses gr.TabbedInterface to combine multiple features
7
+ - Each tab is a separate gr.Interface
8
+ - This replaces your entire Flask app with one file
9
+ - Hugging Face Spaces will automatically run this file
10
  """
11
 
12
  import gradio as gr
13
  import torch
14
+ import tiktoken
15
  from pretrained_summarizer import create_summarizer
16
+ from ml_model import GPTModel, generate_text_better, text_token_ids, token_text_ids
17
 
18
  # ============================================================================
19
+ # Initialize Device
20
  # ============================================================================
 
21
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22
+ print(f"๐Ÿ–ฅ๏ธ Using device: {device}")
23
 
24
+ # ============================================================================
25
+ # Load Models
26
+ # ============================================================================
27
+
28
+ # --- Summarization Model ---
29
+ print("\n[1/2] Loading summarization model...")
30
  try:
31
  summarizer = create_summarizer("balanced")
32
+ print("โœ“ Summarization model loaded!")
33
+ summarizer_available = True
34
  except Exception as e:
35
+ print(f"โœ— Summarization model failed: {e}")
36
+ summarizer_available = False
37
+
38
+ # --- Text Generation Model (Optional - may not fit in free tier) ---
39
+ print("\n[2/2] Loading custom GPT model...")
40
+ try:
41
+ checkpoint = torch.load('gpt_model_checkpoint.pth', map_location=device)
42
+ model = GPTModel(checkpoint['config'])
43
+ model.load_state_dict(checkpoint['model_state_dict'])
44
+ model.to(device)
45
+ model.eval()
46
+ tokenizer = tiktoken.get_encoding("gpt2")
47
+ print("โœ“ Custom GPT model loaded!")
48
+ gpt_available = True
49
+ except FileNotFoundError:
50
+ print("โœ— GPT model not found (gpt_model_checkpoint.pth)")
51
+ print(" Skipping text generation feature...")
52
+ gpt_available = False
53
+ except Exception as e:
54
+ print(f"โœ— GPT model failed: {e}")
55
+ gpt_available = False
56
+
57
+ print("\n" + "="*60)
58
+ print("๐Ÿš€ Gradio App Ready!")
59
+ print("="*60)
60
+ print(f"โœ“ Summarization: {'Available' if summarizer_available else 'Unavailable'}")
61
+ print(f"โœ“ Text Generation: {'Available' if gpt_available else 'Unavailable'}")
62
+ print("="*60 + "\n")
63
 
64
  # ============================================================================
65
+ # TAB 1: Court Document Summarization
66
  # ============================================================================
67
+
68
  def summarize_document(document, max_length, min_length, num_beams):
69
+ """Summarize Indonesian court documents"""
 
70
 
71
+ if not summarizer_available:
72
+ return "โŒ Summarization model is not available"
 
 
73
 
 
74
  if not document or not document.strip():
75
+ return "โŒ Please enter a document to summarize"
76
 
77
  if max_length < min_length:
78
+ return "โŒ Max length must be greater than min length"
79
 
 
80
  if max_length > 1024:
81
  max_length = 1024
82
 
83
  try:
 
84
  summary = summarizer.summarize(
85
  document=document,
86
  max_length=int(max_length),
 
88
  num_beams=int(num_beams)
89
  )
90
 
 
91
  doc_words = len(document.split())
92
  summary_words = len(summary.split())
93
+ compression = round(summary_words / doc_words, 2) if doc_words > 0 else 0
94
 
 
95
  output = f"""๐Ÿ“ SUMMARY:
96
  {summary}
97
 
98
  ๐Ÿ“Š STATISTICS:
99
+ โ€ข Original: {doc_words} words
100
+ โ€ข Summary: {summary_words} words
101
+ โ€ข Compression: {compression}x
102
+ โ€ข Device: {device}
103
  """
104
  return output
105
 
106
  except Exception as e:
107
+ return f"โŒ Error: {str(e)}"
108
 
109
 
110
+ summarize_interface = gr.Interface(
111
+ fn=summarize_document,
 
 
 
 
 
 
112
  inputs=[
113
  gr.Textbox(
114
  label="๐Ÿ“„ Indonesian Court Document",
115
+ placeholder="Paste your court document here...",
116
+ lines=10
 
 
 
 
 
 
 
 
 
117
  ),
118
+ gr.Slider(50, 1024, value=200, step=10, label="Max Length"),
119
+ gr.Slider(10, 500, value=30, step=10, label="Min Length"),
120
+ gr.Slider(1, 10, value=4, step=1, label="Num Beams")
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  ],
122
+ outputs=gr.Textbox(label="โœจ Summary", lines=15),
123
+ title="๐Ÿ›๏ธ Court Document Summarizer",
124
+ description="Summarize Indonesian court documents using AI",
125
+ examples=[
126
+ [
127
+ "Putusan Pengadilan Negeri Jakarta ini memutuskan bahwa terdakwa terbukti bersalah melakukan tindak pidana korupsi dengan merugikan negara sebesar 5 miliar rupiah. Majelis hakim mempertimbangkan bahwa terdakwa telah dengan sengaja memperkaya diri sendiri dan menyalahgunakan wewenang sebagai pejabat publik. Berdasarkan pertimbangan tersebut, terdakwa dijatuhi hukuman penjara selama 8 tahun dan denda 500 juta rupiah.",
128
+ 200, 30, 4
129
+ ]
130
+ ]
131
+ )
132
+
133
+ # ============================================================================
134
+ # TAB 2: Text Generation (if model available)
135
+ # ============================================================================
136
+
137
+ def generate_text(prompt, max_tokens, temperature, top_k):
138
+ """Generate text using custom GPT model"""
139
 
140
+ if not gpt_available:
141
+ return "โŒ Text generation model is not available. This feature requires the 1.5GB model checkpoint which may not be included in this deployment."
 
 
 
 
142
 
143
+ if not prompt or not prompt.strip():
144
+ return "โŒ Please enter a prompt"
145
+
146
+ try:
147
+ encoded = text_token_ids(prompt, tokenizer).to(device)
148
+
149
+ with torch.no_grad():
150
+ token_ids = generate_text_better(
151
+ model=model,
152
+ idx=encoded,
153
+ max_new_tokens=int(max_tokens),
154
+ context_size=checkpoint['config']['context_length'],
155
+ temperature=float(temperature),
156
+ top_k=int(top_k)
157
+ )
158
+
159
+ generated_text = token_text_ids(token_ids, tokenizer)
160
+
161
+ output = f"""๐Ÿค– GENERATED TEXT:
162
+ {generated_text}
163
+
164
+ โš™๏ธ PARAMETERS:
165
+ โ€ข Tokens: {max_tokens}
166
+ โ€ข Temperature: {temperature}
167
+ โ€ข Top-K: {top_k}
168
+ โ€ข Device: {device}
169
+ """
170
+ return output
171
 
172
+ except Exception as e:
173
+ return f"โŒ Error: {str(e)}"
 
 
174
 
 
 
175
 
176
+ generate_interface = gr.Interface(
177
+ fn=generate_text,
178
+ inputs=[
179
+ gr.Textbox(label="๐Ÿ’ญ Prompt", lines=5, placeholder="Enter your prompt..."),
180
+ gr.Slider(10, 500, value=100, step=10, label="Max Tokens"),
181
+ gr.Slider(0.1, 2.0, value=0.8, step=0.1, label="Temperature"),
182
+ gr.Slider(1, 100, value=50, step=1, label="Top-K")
183
+ ],
184
+ outputs=gr.Textbox(label="โœจ Generated Text", lines=15),
185
+ title="๐Ÿš€ Text Generator",
186
+ description="Generate text using custom GPT model",
187
  examples=[
188
+ ["Once upon a time,", 150, 0.8, 50],
189
+ ["The future of AI is", 100, 0.7, 40]
 
 
 
 
190
  ]
191
  )
192
 
193
  # ============================================================================
194
+ # TAB 3: About / Info
195
  # ============================================================================
 
 
 
196
 
197
+ def get_system_info():
198
+ """Display system and model information"""
199
+
200
+ info = f"""# ๐Ÿค– Indonesian AI Tools
201
+
202
+ ## System Information
203
+ - **Device**: {device}
204
+ - **PyTorch Version**: {torch.__version__}
205
+ - **CUDA Available**: {torch.cuda.is_available()}
206
+
207
+ ## Available Models
208
+ - **โœ… Summarization**: {'Loaded' if summarizer_available else 'โŒ Not Available'}
209
+ - **Text Generation**: {'โœ… Loaded' if gpt_available else 'โŒ Not Available'}
210
+
211
+ ## Features
212
+ 1. **Court Document Summarization**
213
+ - Summarizes Indonesian legal documents
214
+ - Uses pre-trained transformer model
215
+ - Adjustable summary length
216
+
217
+ 2. **Text Generation** (if available)
218
+ - Custom GPT model
219
+ - Trained on specific corpus
220
+ - Creative text generation
221
+
222
+ ## Usage Tips
223
+ - For summarization: Use 4-6 beams for best quality
224
+ - For generation: Temperature 0.7-0.9 for creative output
225
+ - Adjust parameters based on your needs
226
+
227
+ ## Technical Details
228
+ - Framework: Gradio + PyTorch
229
+ - Deployment: Hugging Face Spaces compatible
230
+ - GPU Support: Automatic detection
231
+ """
232
+ return info
233
+
234
+
235
+ info_interface = gr.Interface(
236
+ fn=get_system_info,
237
+ inputs=[],
238
+ outputs=gr.Markdown(),
239
+ title="โ„น๏ธ About",
240
+ description="System information and usage guide"
241
+ )
242
+
243
+ # ============================================================================
244
+ # Create Combined Tabbed Interface
245
+ # ============================================================================
246
+ # This is the KEY difference from Flask:
247
+ # - One file combines all features
248
+ # - Tabs organize different functions
249
+ # - No routing needed - Gradio handles everything
250
+
251
+ demo = gr.TabbedInterface(
252
+ # List of all interfaces (tabs)
253
+ interface_list=[
254
+ summarize_interface,
255
+ generate_interface if gpt_available else info_interface,
256
+ info_interface
257
+ ],
258
+
259
+ # Tab names
260
+ tab_names=[
261
+ "๐Ÿ“„ Summarize",
262
+ "๐Ÿš€ Generate" if gpt_available else "โ„น๏ธ Info",
263
+ "โ„น๏ธ About"
264
+ ],
265
+
266
+ # Overall title
267
+ title="๐Ÿ‡ฎ๐Ÿ‡ฉ Indonesian AI Tools"
268
+ )
269
+
270
+ # ============================================================================
271
+ # Launch Application
272
+ # ============================================================================
273
+ if __name__ == "__main__":
274
+ # For Hugging Face Spaces:
275
  demo.launch(
276
+ server_name="0.0.0.0", # Allow external connections
277
+ server_port=7860, # HF Spaces default port
278
+ share=False # HF handles sharing
279
  )
280
+
281
+ # For local testing with public URL:
282
+ # demo.launch(share=True)
283
+
284
+ # For local testing only:
285
+ # demo.launch()