para_split / app.py
chetanganatra's picture
Create app.py
98fba7d verified
import gradio as gr
import json
import re
from typing import List, Dict
def split_text_into_sentences(text: str) -> Dict:
"""
Split text into sentences and return as JSON
Args:
text (str): Input text paragraph
Returns:
Dict: JSON response with sentences and metadata
"""
if not text or not text.strip():
return {
"status": "error",
"message": "Empty input text",
"sentences": [],
"count": 0
}
# Clean the text
text = text.strip()
# Simple sentence splitting using regex
# This pattern looks for sentence endings followed by whitespace or end of string
sentence_pattern = r'(?<=[.!?])\s+(?=[A-Z])'
# Split the text
sentences = re.split(sentence_pattern, text)
# Clean up sentences (remove extra whitespace)
sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
# Create response
response = {
"status": "success",
"sentences": sentences,
"count": len(sentences),
"original_length": len(text),
"metadata": {
"avg_sentence_length": sum(len(s) for s in sentences) / len(sentences) if sentences else 0,
"longest_sentence": max(len(s) for s in sentences) if sentences else 0,
"shortest_sentence": min(len(s) for s in sentences) if sentences else 0
}
}
return response
def format_json_output(result: Dict) -> str:
"""Format the result as pretty JSON string"""
return json.dumps(result, indent=2, ensure_ascii=False)
# Create Gradio interface
with gr.Blocks(title="Text to Sentences API") as demo:
gr.Markdown("# Text to Sentences Splitter API")
gr.Markdown("Enter a text paragraph and get it split into sentences with JSON output.")
with gr.Row():
with gr.Column():
input_text = gr.Textbox(
label="Input Text",
placeholder="Enter your text paragraph here...",
lines=5,
max_lines=10
)
submit_btn = gr.Button("Split into Sentences", variant="primary")
with gr.Column():
output_json = gr.JSON(
label="JSON Output",
show_label=True
)
# Example inputs
gr.Examples(
examples=[
["Hello world! How are you today? I hope you're doing well. This is a test sentence."],
["The quick brown fox jumps over the lazy dog. Machine learning is fascinating! Natural language processing involves many complex tasks. Text processing is an important skill."],
["What is artificial intelligence? AI refers to computer systems that can perform tasks typically requiring human intelligence. These systems can learn, reason, and adapt to new situations."]
],
inputs=input_text,
outputs=output_json,
fn=split_text_into_sentences,
cache_examples=True
)
# Connect the interface
submit_btn.click(
fn=split_text_into_sentences,
inputs=input_text,
outputs=output_json
)
# API documentation
gr.Markdown("""
## API Usage
This app provides both a web interface and API endpoints.
### Using the API programmatically:
```python
import requests
import json
# Replace with your actual Hugging Face Space URL
url = "https://your-username-text-splitter.hf.space/api/predict"
payload = {
"data": ["Your text paragraph here..."]
}
response = requests.post(url, json=payload)
result = response.json()
print(json.dumps(result["data"][0], indent=2))
```
### cURL example:
```bash
curl -X POST https://your-username-text-splitter.hf.space/api/predict \
-H "Content-Type: application/json" \
-d '{"data": ["Hello world! How are you? This is a test."]}'
```
### Response format:
```json
{
"status": "success",
"sentences": ["Hello world!", "How are you?", "This is a test."],
"count": 3,
"original_length": 45,
"metadata": {
"avg_sentence_length": 15.0,
"longest_sentence": 17,
"shortest_sentence": 12
}
}
```
""")
# Launch the app
if __name__ == "__main__":
demo.launch()