indo_summary_AI / app.py
Flippinjack's picture
added file upload button and upload file function
2d98c81
"""
Gradio Interface for Indonesian Court Document Summarization
This is a conversion from Flask to Gradio for easier deployment on Hugging Face Spaces.
LEARNING NOTES:
- Gradio automatically creates a web UI from function definitions
- No need for HTML templates or route decorators
- Input/output types define the UI components
"""
import os
import gradio as gr
import torch
from pretrained_summarizer import create_summarizer
from pypdf import PdfReader
from docx import Document
# ============================================================================
# Step 1: Initialize the model (same as Flask)
# ============================================================================
print("Loading summarization model...")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
try:
summarizer = create_summarizer("balanced")
print("βœ“ Summarization model loaded successfully!")
except Exception as e:
print(f"βœ— Failed to load model: {e}")
raise
def read_file(file_path):
_, ext = os.path.splitext(file_path)
ext = ext.lower()
try:
if ext == '.txt':
with open(file_path, 'r', encoding= 'utf-8') as f:
return f.read()
elif ext == '.pdf':
reader = PdfReader(file_path)
content = ""
for page in reader.pages:
content += page.extract_text()
return content
elif ext == '.docx':
doc = Document(file_path)
content = ""
for paragraph in doc.paragraphs:
content += paragraph.text + "\n"
return content
else:
return f"Unsupported file extention {ext}"
except FileNotFoundError:
return f"Error: the file {file_path} is not found"
except Exception as e:
return f"An Error has occured {e}"
# ============================================================================
# Step 2: Define the main function (replaces Flask route)
# ============================================================================
def summarize_document(text_input,file_upload, max_length, min_length, num_beams):
"""
This function replaces your Flask /summarize endpoint.
Parameters match your Flask API, but return values are simpler
- No jsonify() needed
- Gradio handles the response automatically
"""
document = None
if file_upload is not None:
try:
document = read_file(file_upload)
print(document)
except Exception as e:
return f"Error reading file {str(e)}"
elif text_input and text_input.strip():
document = text_input
# Validation (same as Flask)
if not document or not document.strip():
return "❌ Error: Please enter a document or upload a file to summarize"
if max_length < min_length:
return "❌ Error: Max length must be greater than min length"
# Cap max_length (same as Flask)
if max_length > 1024:
max_length = 1024
try:
# Generate summary (same logic as Flask)
summary = summarizer.summarize(
document=document,
max_length=int(max_length),
min_length=int(min_length),
num_beams=int(num_beams)
)
# Calculate statistics
doc_words = len(document.split())
summary_words = len(summary.split())
compression_ratio = round(summary_words / doc_words, 2) if doc_words > 0 else 0
# Format output with statistics
output = f"""πŸ“ SUMMARY:
{summary}
πŸ“Š STATISTICS:
β€’ Document length: {doc_words} words
β€’ Summary length: {summary_words} words
β€’ Compression ratio: {compression_ratio}x
β€’ Device used: {device}
"""
return output
except Exception as e:
return f"❌ Error during summarization: {str(e)}"
# ============================================================================
# Step 3: Create Gradio Interface
# ============================================================================
# This replaces your HTML templates and Flask routes
demo = gr.Interface(
fn=summarize_document, # The function to call
# Define inputs (replaces HTML form fields)
inputs=[
gr.Textbox(
label="πŸ“„ Indonesian Court Document",
placeholder="Paste your court document text here...",
lines=10,
max_lines=20
),
gr.File(
label = "Upload pdf or Word document",
file_types = [".pdf",".docx",".txt"]
),
gr.Slider(
minimum=50,
maximum=300,
value=120,
step=10,
label="Max Summary Length (words)",
info="Maximum length of the generated summary (recommended: 100-150)"
),
gr.Slider(
minimum=30,
maximum=150,
value=50,
step=5,
label="Min Summary Length (words)",
info="Minimum length of the generated summary (recommended: 40-60)"
),
gr.Slider(
minimum=1,
maximum=10,
value=6,
step=1,
label="Num Beams",
info="Higher = better quality but slower (recommended: 6-8)"
)
],
# Define output (replaces JSON response)
outputs=gr.Textbox(
label="✨ Generated Summary",
lines=15,
max_lines=25
),
# UI Configuration
title="πŸ›οΈ Indonesian Court Document Summarizer",
description="""
This tool uses a pre-trained AI model to summarize Indonesian court documents.
**How to use:**
1. Paste your court document in the text box
2. Adjust the summary length parameters (optional)
3. Click "Submit" to generate summary
**Note:** First run may take longer as the model loads.
""",
# Example inputs for users to try
examples=[
[
"Putusan Pengadilan Negeri Jakarta ini memutuskan bahwa terdakwa terbukti bersalah melakukan tindak pidana korupsi dengan merugikan negara sebesar 5 miliar rupiah. Majelis hakim mempertimbangkan bahwa terdakwa telah dengan sengaja memperkaya diri sendiri dan menyalahgunakan wewenang sebagai pejabat publik. Berdasarkan pertimbangan tersebut, terdakwa dijatuhi hukuman penjara selama 8 tahun dan denda 500 juta rupiah.",
None,
120,
50,
6
]
]
)
# ============================================================================
# Step 4: Launch the app
# ============================================================================
if __name__ == "__main__":
# For local testing:
# demo.launch(share=False)
# For Hugging Face Spaces deployment:
# Note: In Gradio 6.0+, theme is passed to launch() not Interface()
demo.launch(
server_name="0.0.0.0", # Allow external access
server_port=7860, # Default HF Spaces port
share=False # Don't create public link (HF does this)
)