File size: 7,080 Bytes
6dcb6b4
9cfd753
 
 
6dcb6b4
 
9cfd753
 
 
6dcb6b4
2d98c81
6dcb6b4
 
 
2d98c81
 
6dcb6b4
 
9cfd753
6dcb6b4
9cfd753
6dcb6b4
9cfd753
6dcb6b4
 
 
9cfd753
6dcb6b4
9cfd753
 
6dcb6b4
2d98c81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6dcb6b4
9cfd753
6dcb6b4
2d98c81
9cfd753
 
6dcb6b4
9cfd753
 
 
 
6dcb6b4
2d98c81
 
 
 
 
 
 
 
 
 
 
9cfd753
6dcb6b4
2d98c81
6dcb6b4
 
9cfd753
6dcb6b4
9cfd753
f4b0528
 
6dcb6b4
 
9cfd753
6dcb6b4
 
 
 
 
 
 
9cfd753
6dcb6b4
 
9cfd753
6dcb6b4
9cfd753
6dcb6b4
 
 
 
9cfd753
 
 
 
6dcb6b4
 
 
 
9cfd753
6dcb6b4
 
9cfd753
 
 
 
 
 
 
 
6dcb6b4
 
 
9cfd753
 
 
 
2d98c81
 
 
 
9cfd753
2d98c81
 
 
9cfd753
 
6897a38
6dcb6b4
9cfd753
2d98c81
 
 
 
9cfd753
6897a38
9cfd753
 
 
 
6897a38
9cfd753
 
6897a38
9cfd753
6dcb6b4
 
9cfd753
 
 
 
 
 
6dcb6b4
9cfd753
 
 
 
6dcb6b4
9cfd753
 
 
 
6dcb6b4
9cfd753
 
6dcb6b4
9cfd753
6dcb6b4
9cfd753
 
2d98c81
6897a38
 
 
9cfd753
6dcb6b4
 
 
 
9cfd753
3c86a47
 
9cfd753
 
 
 
 
6dcb6b4
9cfd753
 
 
6dcb6b4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
"""
Gradio Interface for Indonesian Court Document Summarization

This is a conversion from Flask to Gradio for easier deployment on Hugging Face Spaces.

LEARNING NOTES:
- Gradio automatically creates a web UI from function definitions
- No need for HTML templates or route decorators
- Input/output types define the UI components
"""
import os
import gradio as gr
import torch
from pretrained_summarizer import create_summarizer
from pypdf import PdfReader
from docx import Document

# ============================================================================
# Step 1: Initialize the model (same as Flask)
# ============================================================================
print("Loading summarization model...")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

try:
    summarizer = create_summarizer("balanced")
    print("βœ“ Summarization model loaded successfully!")
except Exception as e:
    print(f"βœ— Failed to load model: {e}")
    raise



def read_file(file_path):
    _, ext = os.path.splitext(file_path)
    ext = ext.lower()

    try:
        if ext == '.txt':
            with open(file_path, 'r', encoding= 'utf-8') as f:
                return f.read()
        elif ext == '.pdf':
            reader = PdfReader(file_path)
            content = ""
            for page in reader.pages:
                content += page.extract_text()
            return content
        elif ext == '.docx':
            doc = Document(file_path)
            content = ""
            for paragraph in doc.paragraphs:
                content += paragraph.text + "\n"
            return content

        else:
            return f"Unsupported file extention {ext}"
    except FileNotFoundError:
        return f"Error: the file {file_path} is not found"
    except Exception as e:
        return f"An Error has occured {e}"

# ============================================================================
# Step 2: Define the main function (replaces Flask route)
# ============================================================================
def summarize_document(text_input,file_upload, max_length, min_length, num_beams):
    """
    This function replaces your Flask /summarize endpoint.

    Parameters match your Flask API, but return values are simpler
    - No jsonify() needed
    - Gradio handles the response automatically
    """

    document = None

    if file_upload is not None:
        try:
            document = read_file(file_upload)
            print(document)
        except Exception as e:
            return f"Error reading file {str(e)}"
    elif text_input and text_input.strip():
        document = text_input

    # Validation (same as Flask)
    if not document or not document.strip():
        return "❌ Error: Please enter a document or upload a file to summarize"

    if max_length < min_length:
        return "❌ Error: Max length must be greater than min length"

    # Cap max_length (same as Flask)
    if max_length > 1024:
        max_length = 1024

    try:
        # Generate summary (same logic as Flask)
        summary = summarizer.summarize(
            document=document,
            max_length=int(max_length),
            min_length=int(min_length),
            num_beams=int(num_beams)
        )

        # Calculate statistics
        doc_words = len(document.split())
        summary_words = len(summary.split())
        compression_ratio = round(summary_words / doc_words, 2) if doc_words > 0 else 0

        # Format output with statistics
        output = f"""πŸ“ SUMMARY:
{summary}

πŸ“Š STATISTICS:
β€’ Document length: {doc_words} words
β€’ Summary length: {summary_words} words
β€’ Compression ratio: {compression_ratio}x
β€’ Device used: {device}
"""
        return output

    except Exception as e:
        return f"❌ Error during summarization: {str(e)}"


# ============================================================================
# Step 3: Create Gradio Interface
# ============================================================================
# This replaces your HTML templates and Flask routes
demo = gr.Interface(
    fn=summarize_document,  # The function to call

    # Define inputs (replaces HTML form fields)
    inputs=[
        gr.Textbox(
            label="πŸ“„ Indonesian Court Document",
            placeholder="Paste your court document text here...",
            lines=10,
            max_lines=20
        ),
        gr.File(
            label = "Upload pdf or Word document",
            file_types = [".pdf",".docx",".txt"]
        ),
        gr.Slider(
            minimum=50,
            maximum=300,
            value=120,
            step=10,
            label="Max Summary Length (words)",
            info="Maximum length of the generated summary (recommended: 100-150)"
        ),
        gr.Slider(
            minimum=30,
            maximum=150,
            value=50,
            step=5,
            label="Min Summary Length (words)",
            info="Minimum length of the generated summary (recommended: 40-60)"
        ),
        gr.Slider(
            minimum=1,
            maximum=10,
            value=6,
            step=1,
            label="Num Beams",
            info="Higher = better quality but slower (recommended: 6-8)"
        )
    ],

    # Define output (replaces JSON response)
    outputs=gr.Textbox(
        label="✨ Generated Summary",
        lines=15,
        max_lines=25
    ),

    # UI Configuration
    title="πŸ›οΈ Indonesian Court Document Summarizer",
    description="""
    This tool uses a pre-trained AI model to summarize Indonesian court documents.

    **How to use:**
    1. Paste your court document in the text box
    2. Adjust the summary length parameters (optional)
    3. Click "Submit" to generate summary

    **Note:** First run may take longer as the model loads.
    """,

    # Example inputs for users to try
    examples=[
        [
            "Putusan Pengadilan Negeri Jakarta ini memutuskan bahwa terdakwa terbukti bersalah melakukan tindak pidana korupsi dengan merugikan negara sebesar 5 miliar rupiah. Majelis hakim mempertimbangkan bahwa terdakwa telah dengan sengaja memperkaya diri sendiri dan menyalahgunakan wewenang sebagai pejabat publik. Berdasarkan pertimbangan tersebut, terdakwa dijatuhi hukuman penjara selama 8 tahun dan denda 500 juta rupiah.",
            None,
            120,
            50,
            6
        ]
    ]
)

# ============================================================================
# Step 4: Launch the app
# ============================================================================
if __name__ == "__main__":
    # For local testing:
    # demo.launch(share=False)

    # For Hugging Face Spaces deployment:
    # Note: In Gradio 6.0+, theme is passed to launch() not Interface()
    demo.launch(
        server_name="0.0.0.0",  # Allow external access
        server_port=7860,        # Default HF Spaces port
        share=False              # Don't create public link (HF does this)
    )