File size: 4,425 Bytes
98fba7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import gradio as gr
import json
import re
from typing import List, Dict

def split_text_into_sentences(text: str) -> Dict:
    """
    Split text into sentences and return as JSON
    
    Args:
        text (str): Input text paragraph
        
    Returns:
        Dict: JSON response with sentences and metadata
    """
    if not text or not text.strip():
        return {
            "status": "error",
            "message": "Empty input text",
            "sentences": [],
            "count": 0
        }
    
    # Clean the text
    text = text.strip()
    
    # Simple sentence splitting using regex
    # This pattern looks for sentence endings followed by whitespace or end of string
    sentence_pattern = r'(?<=[.!?])\s+(?=[A-Z])'
    
    # Split the text
    sentences = re.split(sentence_pattern, text)
    
    # Clean up sentences (remove extra whitespace)
    sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
    
    # Create response
    response = {
        "status": "success",
        "sentences": sentences,
        "count": len(sentences),
        "original_length": len(text),
        "metadata": {
            "avg_sentence_length": sum(len(s) for s in sentences) / len(sentences) if sentences else 0,
            "longest_sentence": max(len(s) for s in sentences) if sentences else 0,
            "shortest_sentence": min(len(s) for s in sentences) if sentences else 0
        }
    }
    
    return response

def format_json_output(result: Dict) -> str:
    """Format the result as pretty JSON string"""
    return json.dumps(result, indent=2, ensure_ascii=False)

# Create Gradio interface
with gr.Blocks(title="Text to Sentences API") as demo:
    gr.Markdown("# Text to Sentences Splitter API")
    gr.Markdown("Enter a text paragraph and get it split into sentences with JSON output.")
    
    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(
                label="Input Text",
                placeholder="Enter your text paragraph here...",
                lines=5,
                max_lines=10
            )
            submit_btn = gr.Button("Split into Sentences", variant="primary")
            
        with gr.Column():
            output_json = gr.JSON(
                label="JSON Output",
                show_label=True
            )
            
    # Example inputs
    gr.Examples(
        examples=[
            ["Hello world! How are you today? I hope you're doing well. This is a test sentence."],
            ["The quick brown fox jumps over the lazy dog. Machine learning is fascinating! Natural language processing involves many complex tasks. Text processing is an important skill."],
            ["What is artificial intelligence? AI refers to computer systems that can perform tasks typically requiring human intelligence. These systems can learn, reason, and adapt to new situations."]
        ],
        inputs=input_text,
        outputs=output_json,
        fn=split_text_into_sentences,
        cache_examples=True
    )
    
    # Connect the interface
    submit_btn.click(
        fn=split_text_into_sentences,
        inputs=input_text,
        outputs=output_json
    )
    
    # API documentation
    gr.Markdown("""
    ## API Usage
    
    This app provides both a web interface and API endpoints.
    
    ### Using the API programmatically:
    
    ```python
    import requests
    import json
    
    # Replace with your actual Hugging Face Space URL
    url = "https://your-username-text-splitter.hf.space/api/predict"
    
    payload = {
        "data": ["Your text paragraph here..."]
    }
    
    response = requests.post(url, json=payload)
    result = response.json()
    print(json.dumps(result["data"][0], indent=2))
    ```
    
    ### cURL example:
    ```bash
    curl -X POST https://your-username-text-splitter.hf.space/api/predict \
         -H "Content-Type: application/json" \
         -d '{"data": ["Hello world! How are you? This is a test."]}'
    ```
    
    ### Response format:
    ```json
    {
      "status": "success",
      "sentences": ["Hello world!", "How are you?", "This is a test."],
      "count": 3,
      "original_length": 45,
      "metadata": {
        "avg_sentence_length": 15.0,
        "longest_sentence": 17,
        "shortest_sentence": 12
      }
    }
    ```
    """)

# Launch the app
if __name__ == "__main__":
    demo.launch()