Spaces:
Sleeping
Sleeping
| """ | |
| Gradio Interface for Indonesian Court Document Summarization | |
| This is a conversion from Flask to Gradio for easier deployment on Hugging Face Spaces. | |
| LEARNING NOTES: | |
| - Gradio automatically creates a web UI from function definitions | |
| - No need for HTML templates or route decorators | |
| - Input/output types define the UI components | |
| """ | |
| import os | |
| import gradio as gr | |
| import torch | |
| from pretrained_summarizer import create_summarizer | |
| from pypdf import PdfReader | |
| from docx import Document | |
| # ============================================================================ | |
| # Step 1: Initialize the model (same as Flask) | |
| # ============================================================================ | |
| print("Loading summarization model...") | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| print(f"Using device: {device}") | |
| try: | |
| summarizer = create_summarizer("balanced") | |
| print("β Summarization model loaded successfully!") | |
| except Exception as e: | |
| print(f"β Failed to load model: {e}") | |
| raise | |
| def read_file(file_path): | |
| _, ext = os.path.splitext(file_path) | |
| ext = ext.lower() | |
| try: | |
| if ext == '.txt': | |
| with open(file_path, 'r', encoding= 'utf-8') as f: | |
| return f.read() | |
| elif ext == '.pdf': | |
| reader = PdfReader(file_path) | |
| content = "" | |
| for page in reader.pages: | |
| content += page.extract_text() | |
| return content | |
| elif ext == '.docx': | |
| doc = Document(file_path) | |
| content = "" | |
| for paragraph in doc.paragraphs: | |
| content += paragraph.text + "\n" | |
| return content | |
| else: | |
| return f"Unsupported file extention {ext}" | |
| except FileNotFoundError: | |
| return f"Error: the file {file_path} is not found" | |
| except Exception as e: | |
| return f"An Error has occured {e}" | |
| # ============================================================================ | |
| # Step 2: Define the main function (replaces Flask route) | |
| # ============================================================================ | |
| def summarize_document(text_input,file_upload, max_length, min_length, num_beams): | |
| """ | |
| This function replaces your Flask /summarize endpoint. | |
| Parameters match your Flask API, but return values are simpler | |
| - No jsonify() needed | |
| - Gradio handles the response automatically | |
| """ | |
| document = None | |
| if file_upload is not None: | |
| try: | |
| document = read_file(file_upload) | |
| print(document) | |
| except Exception as e: | |
| return f"Error reading file {str(e)}" | |
| elif text_input and text_input.strip(): | |
| document = text_input | |
| # Validation (same as Flask) | |
| if not document or not document.strip(): | |
| return "β Error: Please enter a document or upload a file to summarize" | |
| if max_length < min_length: | |
| return "β Error: Max length must be greater than min length" | |
| # Cap max_length (same as Flask) | |
| if max_length > 1024: | |
| max_length = 1024 | |
| try: | |
| # Generate summary (same logic as Flask) | |
| summary = summarizer.summarize( | |
| document=document, | |
| max_length=int(max_length), | |
| min_length=int(min_length), | |
| num_beams=int(num_beams) | |
| ) | |
| # Calculate statistics | |
| doc_words = len(document.split()) | |
| summary_words = len(summary.split()) | |
| compression_ratio = round(summary_words / doc_words, 2) if doc_words > 0 else 0 | |
| # Format output with statistics | |
| output = f"""π SUMMARY: | |
| {summary} | |
| π STATISTICS: | |
| β’ Document length: {doc_words} words | |
| β’ Summary length: {summary_words} words | |
| β’ Compression ratio: {compression_ratio}x | |
| β’ Device used: {device} | |
| """ | |
| return output | |
| except Exception as e: | |
| return f"β Error during summarization: {str(e)}" | |
| # ============================================================================ | |
| # Step 3: Create Gradio Interface | |
| # ============================================================================ | |
| # This replaces your HTML templates and Flask routes | |
| demo = gr.Interface( | |
| fn=summarize_document, # The function to call | |
| # Define inputs (replaces HTML form fields) | |
| inputs=[ | |
| gr.Textbox( | |
| label="π Indonesian Court Document", | |
| placeholder="Paste your court document text here...", | |
| lines=10, | |
| max_lines=20 | |
| ), | |
| gr.File( | |
| label = "Upload pdf or Word document", | |
| file_types = [".pdf",".docx",".txt"] | |
| ), | |
| gr.Slider( | |
| minimum=50, | |
| maximum=300, | |
| value=120, | |
| step=10, | |
| label="Max Summary Length (words)", | |
| info="Maximum length of the generated summary (recommended: 100-150)" | |
| ), | |
| gr.Slider( | |
| minimum=30, | |
| maximum=150, | |
| value=50, | |
| step=5, | |
| label="Min Summary Length (words)", | |
| info="Minimum length of the generated summary (recommended: 40-60)" | |
| ), | |
| gr.Slider( | |
| minimum=1, | |
| maximum=10, | |
| value=6, | |
| step=1, | |
| label="Num Beams", | |
| info="Higher = better quality but slower (recommended: 6-8)" | |
| ) | |
| ], | |
| # Define output (replaces JSON response) | |
| outputs=gr.Textbox( | |
| label="β¨ Generated Summary", | |
| lines=15, | |
| max_lines=25 | |
| ), | |
| # UI Configuration | |
| title="ποΈ Indonesian Court Document Summarizer", | |
| description=""" | |
| This tool uses a pre-trained AI model to summarize Indonesian court documents. | |
| **How to use:** | |
| 1. Paste your court document in the text box | |
| 2. Adjust the summary length parameters (optional) | |
| 3. Click "Submit" to generate summary | |
| **Note:** First run may take longer as the model loads. | |
| """, | |
| # Example inputs for users to try | |
| examples=[ | |
| [ | |
| "Putusan Pengadilan Negeri Jakarta ini memutuskan bahwa terdakwa terbukti bersalah melakukan tindak pidana korupsi dengan merugikan negara sebesar 5 miliar rupiah. Majelis hakim mempertimbangkan bahwa terdakwa telah dengan sengaja memperkaya diri sendiri dan menyalahgunakan wewenang sebagai pejabat publik. Berdasarkan pertimbangan tersebut, terdakwa dijatuhi hukuman penjara selama 8 tahun dan denda 500 juta rupiah.", | |
| None, | |
| 120, | |
| 50, | |
| 6 | |
| ] | |
| ] | |
| ) | |
| # ============================================================================ | |
| # Step 4: Launch the app | |
| # ============================================================================ | |
| if __name__ == "__main__": | |
| # For local testing: | |
| # demo.launch(share=False) | |
| # For Hugging Face Spaces deployment: | |
| # Note: In Gradio 6.0+, theme is passed to launch() not Interface() | |
| demo.launch( | |
| server_name="0.0.0.0", # Allow external access | |
| server_port=7860, # Default HF Spaces port | |
| share=False # Don't create public link (HF does this) | |
| ) | |