Spaces:
Sleeping
Sleeping
File size: 4,425 Bytes
98fba7d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import gradio as gr
import json
import re
from typing import List, Dict
def split_text_into_sentences(text: str) -> Dict:
"""
Split text into sentences and return as JSON
Args:
text (str): Input text paragraph
Returns:
Dict: JSON response with sentences and metadata
"""
if not text or not text.strip():
return {
"status": "error",
"message": "Empty input text",
"sentences": [],
"count": 0
}
# Clean the text
text = text.strip()
# Simple sentence splitting using regex
# This pattern looks for sentence endings followed by whitespace or end of string
sentence_pattern = r'(?<=[.!?])\s+(?=[A-Z])'
# Split the text
sentences = re.split(sentence_pattern, text)
# Clean up sentences (remove extra whitespace)
sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
# Create response
response = {
"status": "success",
"sentences": sentences,
"count": len(sentences),
"original_length": len(text),
"metadata": {
"avg_sentence_length": sum(len(s) for s in sentences) / len(sentences) if sentences else 0,
"longest_sentence": max(len(s) for s in sentences) if sentences else 0,
"shortest_sentence": min(len(s) for s in sentences) if sentences else 0
}
}
return response
def format_json_output(result: Dict) -> str:
"""Format the result as pretty JSON string"""
return json.dumps(result, indent=2, ensure_ascii=False)
# Create Gradio interface
with gr.Blocks(title="Text to Sentences API") as demo:
gr.Markdown("# Text to Sentences Splitter API")
gr.Markdown("Enter a text paragraph and get it split into sentences with JSON output.")
with gr.Row():
with gr.Column():
input_text = gr.Textbox(
label="Input Text",
placeholder="Enter your text paragraph here...",
lines=5,
max_lines=10
)
submit_btn = gr.Button("Split into Sentences", variant="primary")
with gr.Column():
output_json = gr.JSON(
label="JSON Output",
show_label=True
)
# Example inputs
gr.Examples(
examples=[
["Hello world! How are you today? I hope you're doing well. This is a test sentence."],
["The quick brown fox jumps over the lazy dog. Machine learning is fascinating! Natural language processing involves many complex tasks. Text processing is an important skill."],
["What is artificial intelligence? AI refers to computer systems that can perform tasks typically requiring human intelligence. These systems can learn, reason, and adapt to new situations."]
],
inputs=input_text,
outputs=output_json,
fn=split_text_into_sentences,
cache_examples=True
)
# Connect the interface
submit_btn.click(
fn=split_text_into_sentences,
inputs=input_text,
outputs=output_json
)
# API documentation
gr.Markdown("""
## API Usage
This app provides both a web interface and API endpoints.
### Using the API programmatically:
```python
import requests
import json
# Replace with your actual Hugging Face Space URL
url = "https://your-username-text-splitter.hf.space/api/predict"
payload = {
"data": ["Your text paragraph here..."]
}
response = requests.post(url, json=payload)
result = response.json()
print(json.dumps(result["data"][0], indent=2))
```
### cURL example:
```bash
curl -X POST https://your-username-text-splitter.hf.space/api/predict \
-H "Content-Type: application/json" \
-d '{"data": ["Hello world! How are you? This is a test."]}'
```
### Response format:
```json
{
"status": "success",
"sentences": ["Hello world!", "How are you?", "This is a test."],
"count": 3,
"original_length": 45,
"metadata": {
"avg_sentence_length": 15.0,
"longest_sentence": 17,
"shortest_sentence": 12
}
}
```
""")
# Launch the app
if __name__ == "__main__":
demo.launch() |