Spaces:

Flippinjack
/

indo_summary_AI

Sleeping

App Files Files Community

Flippinjack commited on Jan 6

Commit

9cfd753

1 Parent(s): 3c86a47

removed the text generation

Browse files

Files changed (1) hide show

app.py +99 -216

app.py CHANGED Viewed

@@ -1,86 +1,57 @@
 """
-Combined Gradio App - Indonesian AI Tools
-This is the MAIN FILE for Hugging Face Spaces deployment
 LEARNING NOTES:
-- Uses gr.TabbedInterface to combine multiple features
-- Each tab is a separate gr.Interface
-- This replaces your entire Flask app with one file
-- Hugging Face Spaces will automatically run this file
 """
 import gradio as gr
 import torch
-import tiktoken
 from pretrained_summarizer import create_summarizer
-from ml_model import GPTModel, generate_text_better, text_token_ids, token_text_ids
 # ============================================================================
-# Initialize Device
 # ============================================================================
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-print(f"🖥️  Using device: {device}")
-# ============================================================================
-# Load Models
-# ============================================================================
-# --- Summarization Model ---
-print("\n[1/2] Loading summarization model...")
 try:
     summarizer = create_summarizer("balanced")
-    print("✓ Summarization model loaded!")
-    summarizer_available = True
 except Exception as e:
-    print(f"✗ Summarization model failed: {e}")
-    summarizer_available = False
-# --- Text Generation Model (Optional - may not fit in free tier) ---
-print("\n[2/2] Loading custom GPT model...")
-try:
-    checkpoint = torch.load('gpt_model_checkpoint.pth', map_location=device)
-    model = GPTModel(checkpoint['config'])
-    model.load_state_dict(checkpoint['model_state_dict'])
-    model.to(device)
-    model.eval()
-    tokenizer = tiktoken.get_encoding("gpt2")
-    print("✓ Custom GPT model loaded!")
-    gpt_available = True
-except FileNotFoundError:
-    print("✗ GPT model not found (gpt_model_checkpoint.pth)")
-    print("  Skipping text generation feature...")
-    gpt_available = False
-except Exception as e:
-    print(f"✗ GPT model failed: {e}")
-    gpt_available = False
-print("\n" + "="*60)
-print("🚀 Gradio App Ready!")
-print("="*60)
-print(f"✓ Summarization: {'Available' if summarizer_available else 'Unavailable'}")
-print(f"✓ Text Generation: {'Available' if gpt_available else 'Unavailable'}")
-print("="*60 + "\n")
 # ============================================================================
-# TAB 1: Court Document Summarization
 # ============================================================================
 def summarize_document(document, max_length, min_length, num_beams):
-    """Summarize Indonesian court documents"""
-    if not summarizer_available:
-        return "❌ Summarization model is not available"
     if not document or not document.strip():
-        return "❌ Please enter a document to summarize"
     if max_length < min_length:
-        return "❌ Max length must be greater than min length"
     if max_length > 1024:
         max_length = 1024
     try:
         summary = summarizer.summarize(
             document=document,
             max_length=int(max_length),
@@ -88,198 +59,110 @@ def summarize_document(document, max_length, min_length, num_beams):
             num_beams=int(num_beams)
         )
         doc_words = len(document.split())
         summary_words = len(summary.split())
-        compression = round(summary_words / doc_words, 2) if doc_words > 0 else 0
         output = f"""📝 SUMMARY:
 {summary}
 📊 STATISTICS:
-• Original: {doc_words} words
-• Summary: {summary_words} words
-• Compression: {compression}x
-• Device: {device}
 """
         return output
     except Exception as e:
-        return f"❌ Error: {str(e)}"
-summarize_interface = gr.Interface(
-    fn=summarize_document,
     inputs=[
         gr.Textbox(
             label="📄 Indonesian Court Document",
-            placeholder="Paste your court document here...",
-            lines=10
         ),
-        gr.Slider(50, 1024, value=200, step=10, label="Max Length"),
-        gr.Slider(10, 500, value=30, step=10, label="Min Length"),
-        gr.Slider(1, 10, value=4, step=1, label="Num Beams")
     ],
-    outputs=gr.Textbox(label="✨ Summary", lines=15),
-    title="🏛️ Court Document Summarizer",
-    description="Summarize Indonesian court documents using AI",
-    examples=[
-        [
-            "Putusan Pengadilan Negeri Jakarta ini memutuskan bahwa terdakwa terbukti bersalah melakukan tindak pidana korupsi dengan merugikan negara sebesar 5 miliar rupiah. Majelis hakim mempertimbangkan bahwa terdakwa telah dengan sengaja memperkaya diri sendiri dan menyalahgunakan wewenang sebagai pejabat publik. Berdasarkan pertimbangan tersebut, terdakwa dijatuhi hukuman penjara selama 8 tahun dan denda 500 juta rupiah.",
-            200, 30, 4
-        ]
-    ]
-)
-# ============================================================================
-# TAB 2: Text Generation (if model available)
-# ============================================================================
-def generate_text(prompt, max_tokens, temperature, top_k):
-    """Generate text using custom GPT model"""
-    if not gpt_available:
-        return "❌ Text generation model is not available. This feature requires the 1.5GB model checkpoint which may not be included in this deployment."
-    if not prompt or not prompt.strip():
-        return "❌ Please enter a prompt"
-    try:
-        encoded = text_token_ids(prompt, tokenizer).to(device)
-        with torch.no_grad():
-            token_ids = generate_text_better(
-                model=model,
-                idx=encoded,
-                max_new_tokens=int(max_tokens),
-                context_size=checkpoint['config']['context_length'],
-                temperature=float(temperature),
-                top_k=int(top_k)
-            )
-        generated_text = token_text_ids(token_ids, tokenizer)
-        output = f"""🤖 GENERATED TEXT:
-{generated_text}
-⚙️ PARAMETERS:
-• Tokens: {max_tokens}
-• Temperature: {temperature}
-• Top-K: {top_k}
-• Device: {device}
-"""
-        return output
-    except Exception as e:
-        return f"❌ Error: {str(e)}"
-generate_interface = gr.Interface(
-    fn=generate_text,
-    inputs=[
-        gr.Textbox(label="💭 Prompt", lines=5, placeholder="Enter your prompt..."),
-        gr.Slider(10, 500, value=100, step=10, label="Max Tokens"),
-        gr.Slider(0.1, 2.0, value=0.8, step=0.1, label="Temperature"),
-        gr.Slider(1, 100, value=50, step=1, label="Top-K")
-    ],
-    outputs=gr.Textbox(label="✨ Generated Text", lines=15),
-    title="🚀 Text Generator",
-    description="Generate text using custom GPT model",
     examples=[
-        ["Once upon a time,", 150, 0.8, 50],
-        ["The future of AI is", 100, 0.7, 40]
     ]
 )
 # ============================================================================
-# TAB 3: About / Info
-# ============================================================================
-def get_system_info():
-    """Display system and model information"""
-    info = f"""# 🤖 Indonesian AI Tools
-## System Information
-- **Device**: {device}
-- **PyTorch Version**: {torch.__version__}
-- **CUDA Available**: {torch.cuda.is_available()}
-## Available Models
-- **✅ Summarization**: {'Loaded' if summarizer_available else '❌ Not Available'}
-- **Text Generation**: {'✅ Loaded' if gpt_available else '❌ Not Available'}
-## Features
-1. **Court Document Summarization**
-   - Summarizes Indonesian legal documents
-   - Uses pre-trained transformer model
-   - Adjustable summary length
-2. **Text Generation** (if available)
-   - Custom GPT model
-   - Trained on specific corpus
-   - Creative text generation
-## Usage Tips
-- For summarization: Use 4-6 beams for best quality
-- For generation: Temperature 0.7-0.9 for creative output
-- Adjust parameters based on your needs
-## Technical Details
-- Framework: Gradio + PyTorch
-- Deployment: Hugging Face Spaces compatible
-- GPU Support: Automatic detection
-"""
-    return info
-info_interface = gr.Interface(
-    fn=get_system_info,
-    inputs=[],
-    outputs=gr.Markdown(),
-    title="ℹ️ About",
-    description="System information and usage guide"
-)
-# ============================================================================
-# Create Combined Tabbed Interface
-# ============================================================================
-# This is the KEY difference from Flask:
-# - One file combines all features
-# - Tabs organize different functions
-# - No routing needed - Gradio handles everything
-demo = gr.TabbedInterface(
-    # List of all interfaces (tabs)
-    interface_list=[
-        summarize_interface,
-        generate_interface if gpt_available else info_interface,
-        info_interface
-    ],
-    # Tab names
-    tab_names=[
-        "📄 Summarize",
-        "🚀 Generate" if gpt_available else "ℹ️ Info",
-        "ℹ️ About"
-    ],
-    # Overall title
-    title="🇮🇩 Indonesian AI Tools"
-)
-# ============================================================================
-# Launch Application
 # ============================================================================
 if __name__ == "__main__":
-    # For Hugging Face Spaces:
     demo.launch(
-        server_name="0.0.0.0",  # Allow external connections
-        server_port=7860,        # HF Spaces default port
-        share=False              # HF handles sharing
     )
-    # For local testing with public URL:
-    # demo.launch(share=True)
-    # For local testing only:
-    # demo.launch()

 """
+Gradio Interface for Indonesian Court Document Summarization
+This is a conversion from Flask to Gradio for easier deployment on Hugging Face Spaces.
 LEARNING NOTES:
+- Gradio automatically creates a web UI from function definitions
+- No need for HTML templates or route decorators
+- Input/output types define the UI components
 """
 import gradio as gr
 import torch
 from pretrained_summarizer import create_summarizer
 # ============================================================================
+# Step 1: Initialize the model (same as Flask)
 # ============================================================================
+print("Loading summarization model...")
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
 try:
     summarizer = create_summarizer("balanced")
+    print("✓ Summarization model loaded successfully!")
 except Exception as e:
+    print(f"✗ Failed to load model: {e}")
+    raise
 # ============================================================================
+# Step 2: Define the main function (replaces Flask route)
 # ============================================================================
 def summarize_document(document, max_length, min_length, num_beams):
+    """
+    This function replaces your Flask /summarize endpoint.
+    Parameters match your Flask API, but return values are simpler
+    - No jsonify() needed
+    - Gradio handles the response automatically
+    """
+    # Validation (same as Flask)
     if not document or not document.strip():
+        return "❌ Error: Please enter a document to summarize"
     if max_length < min_length:
+        return "❌ Error: Max length must be greater than min length"
+    # Cap max_length (same as Flask)
     if max_length > 1024:
         max_length = 1024
     try:
+        # Generate summary (same logic as Flask)
         summary = summarizer.summarize(
             document=document,
             max_length=int(max_length),
             num_beams=int(num_beams)
         )
+        # Calculate statistics
         doc_words = len(document.split())
         summary_words = len(summary.split())
+        compression_ratio = round(summary_words / doc_words, 2) if doc_words > 0 else 0
+        # Format output with statistics
         output = f"""📝 SUMMARY:
 {summary}
 📊 STATISTICS:
+• Document length: {doc_words} words
+• Summary length: {summary_words} words
+• Compression ratio: {compression_ratio}x
+• Device used: {device}
 """
         return output
     except Exception as e:
+        return f"❌ Error during summarization: {str(e)}"
+# ============================================================================
+# Step 3: Create Gradio Interface
+# ============================================================================
+# This replaces your HTML templates and Flask routes
+demo = gr.Interface(
+    fn=summarize_document,  # The function to call
+    # Define inputs (replaces HTML form fields)
     inputs=[
         gr.Textbox(
             label="📄 Indonesian Court Document",
+            placeholder="Paste your court document text here...",
+            lines=10,
+            max_lines=20
+        ),
+        gr.Slider(
+            minimum=100,
+            maximum=1024,
+            value=200,
+            step=10,
+            label="Max Summary Length (words)",
+            info="Maximum length of the generated summary"
         ),
+        gr.Slider(
+            minimum=100,
+            maximum=200,
+            value=30,
+            step=5,
+            label="Min Summary Length (words)",
+            info="Minimum length of the generated summary"
+        ),
+        gr.Slider(
+            minimum=1,
+            maximum=10,
+            value=4,
+            step=1,
+            label="Num Beams",
+            info="Higher = better quality but slower (recommended: 4)"
+        )
     ],
+    # Define output (replaces JSON response)
+    outputs=gr.Textbox(
+        label="✨ Generated Summary",
+        lines=15,
+        max_lines=25
+    ),
+    # UI Configuration
+    title="🏛️ Indonesian Court Document Summarizer",
+    description="""
+    This tool uses a pre-trained AI model to summarize Indonesian court documents.
+    **How to use:**
+    1. Paste your court document in the text box
+    2. Adjust the summary length parameters (optional)
+    3. Click "Submit" to generate summary
+    **Note:** First run may take longer as the model loads.
+    """,
+    # Example inputs for users to try
     examples=[
+        [
+            "Putusan Pengadilan Negeri Jakarta ini memutuskan bahwa terdakwa terbukti bersalah melakukan tindak pidana korupsi dengan merugikan negara sebesar 5 miliar rupiah. Majelis hakim mempertimbangkan bahwa terdakwa telah dengan sengaja memperkaya diri sendiri dan menyalahgunakan wewenang sebagai pejabat publik. Berdasarkan pertimbangan tersebut, terdakwa dijatuhi hukuman penjara selama 8 tahun dan denda 500 juta rupiah.",
+            200,
+            30,
+            4
+        ]
     ]
 )
 # ============================================================================
+# Step 4: Launch the app
 # ============================================================================
 if __name__ == "__main__":
+    # For local testing:
+    # demo.launch(share=False)
+    # For Hugging Face Spaces deployment:
+    # Note: In Gradio 6.0+, theme is passed to launch() not Interface()
     demo.launch(
+        server_name="0.0.0.0",  # Allow external access
+        server_port=7860,        # Default HF Spaces port
+        share=False              # Don't create public link (HF does this)
     )