Spaces:
Sleeping
Sleeping
File size: 7,080 Bytes
6dcb6b4 9cfd753 6dcb6b4 9cfd753 6dcb6b4 2d98c81 6dcb6b4 2d98c81 6dcb6b4 9cfd753 6dcb6b4 9cfd753 6dcb6b4 9cfd753 6dcb6b4 9cfd753 6dcb6b4 9cfd753 6dcb6b4 2d98c81 6dcb6b4 9cfd753 6dcb6b4 2d98c81 9cfd753 6dcb6b4 9cfd753 6dcb6b4 2d98c81 9cfd753 6dcb6b4 2d98c81 6dcb6b4 9cfd753 6dcb6b4 9cfd753 f4b0528 6dcb6b4 9cfd753 6dcb6b4 9cfd753 6dcb6b4 9cfd753 6dcb6b4 9cfd753 6dcb6b4 9cfd753 6dcb6b4 9cfd753 6dcb6b4 9cfd753 6dcb6b4 9cfd753 2d98c81 9cfd753 2d98c81 9cfd753 6897a38 6dcb6b4 9cfd753 2d98c81 9cfd753 6897a38 9cfd753 6897a38 9cfd753 6897a38 9cfd753 6dcb6b4 9cfd753 6dcb6b4 9cfd753 6dcb6b4 9cfd753 6dcb6b4 9cfd753 6dcb6b4 9cfd753 6dcb6b4 9cfd753 2d98c81 6897a38 9cfd753 6dcb6b4 9cfd753 3c86a47 9cfd753 6dcb6b4 9cfd753 6dcb6b4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
"""
Gradio Interface for Indonesian Court Document Summarization
This is a conversion from Flask to Gradio for easier deployment on Hugging Face Spaces.
LEARNING NOTES:
- Gradio automatically creates a web UI from function definitions
- No need for HTML templates or route decorators
- Input/output types define the UI components
"""
import os
import gradio as gr
import torch
from pretrained_summarizer import create_summarizer
from pypdf import PdfReader
from docx import Document
# ============================================================================
# Step 1: Initialize the model (same as Flask)
# ============================================================================
print("Loading summarization model...")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
try:
summarizer = create_summarizer("balanced")
print("β Summarization model loaded successfully!")
except Exception as e:
print(f"β Failed to load model: {e}")
raise
def read_file(file_path):
_, ext = os.path.splitext(file_path)
ext = ext.lower()
try:
if ext == '.txt':
with open(file_path, 'r', encoding= 'utf-8') as f:
return f.read()
elif ext == '.pdf':
reader = PdfReader(file_path)
content = ""
for page in reader.pages:
content += page.extract_text()
return content
elif ext == '.docx':
doc = Document(file_path)
content = ""
for paragraph in doc.paragraphs:
content += paragraph.text + "\n"
return content
else:
return f"Unsupported file extention {ext}"
except FileNotFoundError:
return f"Error: the file {file_path} is not found"
except Exception as e:
return f"An Error has occured {e}"
# ============================================================================
# Step 2: Define the main function (replaces Flask route)
# ============================================================================
def summarize_document(text_input,file_upload, max_length, min_length, num_beams):
"""
This function replaces your Flask /summarize endpoint.
Parameters match your Flask API, but return values are simpler
- No jsonify() needed
- Gradio handles the response automatically
"""
document = None
if file_upload is not None:
try:
document = read_file(file_upload)
print(document)
except Exception as e:
return f"Error reading file {str(e)}"
elif text_input and text_input.strip():
document = text_input
# Validation (same as Flask)
if not document or not document.strip():
return "β Error: Please enter a document or upload a file to summarize"
if max_length < min_length:
return "β Error: Max length must be greater than min length"
# Cap max_length (same as Flask)
if max_length > 1024:
max_length = 1024
try:
# Generate summary (same logic as Flask)
summary = summarizer.summarize(
document=document,
max_length=int(max_length),
min_length=int(min_length),
num_beams=int(num_beams)
)
# Calculate statistics
doc_words = len(document.split())
summary_words = len(summary.split())
compression_ratio = round(summary_words / doc_words, 2) if doc_words > 0 else 0
# Format output with statistics
output = f"""π SUMMARY:
{summary}
π STATISTICS:
β’ Document length: {doc_words} words
β’ Summary length: {summary_words} words
β’ Compression ratio: {compression_ratio}x
β’ Device used: {device}
"""
return output
except Exception as e:
return f"β Error during summarization: {str(e)}"
# ============================================================================
# Step 3: Create Gradio Interface
# ============================================================================
# This replaces your HTML templates and Flask routes
demo = gr.Interface(
fn=summarize_document, # The function to call
# Define inputs (replaces HTML form fields)
inputs=[
gr.Textbox(
label="π Indonesian Court Document",
placeholder="Paste your court document text here...",
lines=10,
max_lines=20
),
gr.File(
label = "Upload pdf or Word document",
file_types = [".pdf",".docx",".txt"]
),
gr.Slider(
minimum=50,
maximum=300,
value=120,
step=10,
label="Max Summary Length (words)",
info="Maximum length of the generated summary (recommended: 100-150)"
),
gr.Slider(
minimum=30,
maximum=150,
value=50,
step=5,
label="Min Summary Length (words)",
info="Minimum length of the generated summary (recommended: 40-60)"
),
gr.Slider(
minimum=1,
maximum=10,
value=6,
step=1,
label="Num Beams",
info="Higher = better quality but slower (recommended: 6-8)"
)
],
# Define output (replaces JSON response)
outputs=gr.Textbox(
label="β¨ Generated Summary",
lines=15,
max_lines=25
),
# UI Configuration
title="ποΈ Indonesian Court Document Summarizer",
description="""
This tool uses a pre-trained AI model to summarize Indonesian court documents.
**How to use:**
1. Paste your court document in the text box
2. Adjust the summary length parameters (optional)
3. Click "Submit" to generate summary
**Note:** First run may take longer as the model loads.
""",
# Example inputs for users to try
examples=[
[
"Putusan Pengadilan Negeri Jakarta ini memutuskan bahwa terdakwa terbukti bersalah melakukan tindak pidana korupsi dengan merugikan negara sebesar 5 miliar rupiah. Majelis hakim mempertimbangkan bahwa terdakwa telah dengan sengaja memperkaya diri sendiri dan menyalahgunakan wewenang sebagai pejabat publik. Berdasarkan pertimbangan tersebut, terdakwa dijatuhi hukuman penjara selama 8 tahun dan denda 500 juta rupiah.",
None,
120,
50,
6
]
]
)
# ============================================================================
# Step 4: Launch the app
# ============================================================================
if __name__ == "__main__":
# For local testing:
# demo.launch(share=False)
# For Hugging Face Spaces deployment:
# Note: In Gradio 6.0+, theme is passed to launch() not Interface()
demo.launch(
server_name="0.0.0.0", # Allow external access
server_port=7860, # Default HF Spaces port
share=False # Don't create public link (HF does this)
)
|