Spaces:

sitwala
/

hyphen_problem

Sleeping

App Files Files Community

SitwalaM commited on Dec 18, 2024

Commit

c649b24

1 Parent(s): f6b9e7f

proper code

Browse files

Files changed (1) hide show

app.py +21 -63

app.py CHANGED Viewed

@@ -1,66 +1,24 @@
 import streamlit as st
-import pandas as pd
-from dolma.core.utils import split_paragraphs, split_sentences
-# Title of the Streamlit app
-st.title('Text Splitter: Paragraphs and Sentences')
-# File uploader for text document
-uploaded_file = st.file_uploader("Upload a text file", type=["txt"])
-if uploaded_file:
-    # Read the uploaded text file
-    sample_text = uploaded_file.read().decode("utf-8")
-else:
-    # Text input from user
-    sample_text = st.text_area("Or paste your text below", height=300)
-if sample_text:
-    # Split the text into paragraphs
-    paragraphs = split_paragraphs(sample_text)
-    # Split the text into sentences
-    sentences = split_sentences(sample_text)
-    # Show number of paragraphs and sentences
-    st.write(f"Number of paragraphs: {len(paragraphs)}")
-    st.write(f"Number of sentences: {len(sentences)}")
-    # Create two columns for separate views
-    col1, col2 = st.columns(2)
-    # Display paragraphs in the left column
-    with col1:
-        st.header("Paragraphs")
-        for i, paragraph in enumerate(paragraphs):
-            st.subheader(f"Paragraph {i + 1}")
-            st.write(paragraph.text)
-    # Display sentences in the right column
-    with col2:
-        st.header("Sentences")
-        for i, sentence in enumerate(sentences):
-            st.subheader(f"Sentence {i + 1}")
-            st.write(sentence.text)
-    # Convert paragraphs and sentences to pandas DataFrames
-    paragraphs_df = pd.DataFrame([p.text for p in paragraphs], columns=["Paragraph"])
-    sentences_df = pd.DataFrame([s.text for s in sentences], columns=["Sentence"])
-    # Option to download the paragraphs and sentences as CSV files
-    st.download_button(
-        label="Download Paragraphs as CSV",
-        data=paragraphs_df.to_csv(index=False).encode('utf-8'),
-        file_name="paragraphs.csv",
-        mime="text/csv"
-    )
-    st.download_button(
-        label="Download Sentences as CSV",
-        data=sentences_df.to_csv(index=False).encode('utf-8'),
-        file_name="sentences.csv",
-        mime="text/csv"
-    )
-else:
-    st.write("Please upload a text file or paste your text to split it into paragraphs and sentences.")

 import streamlit as st
+import re
+def clean_text(text):
+    # Replace hyphen followed by space or multiple spaces with just a hyphen
+    text = re.sub(r'-\s+', '-', text)
+    return text
+# Title of the Streamlit app
+st.title('Text Cleaner')
+# Input text from the user
+input_text = st.text_area("Enter text to clean:")
+# Button to trigger cleaning
+if st.button('Clean Text'):
+    if input_text:
+        # Clean the text
+        cleaned_text = clean_text(input_text)
+        # Display the cleaned text
+        st.subheader("Cleaned Text")
+        st.write(cleaned_text)
+    else:
+        st.warning("Please enter some text to clean.")