SitwalaM commited on
Commit
c649b24
·
1 Parent(s): f6b9e7f

proper code

Browse files
Files changed (1) hide show
  1. app.py +21 -63
app.py CHANGED
@@ -1,66 +1,24 @@
1
  import streamlit as st
2
- import pandas as pd
3
- from dolma.core.utils import split_paragraphs, split_sentences
4
 
5
- # Title of the Streamlit app
6
- st.title('Text Splitter: Paragraphs and Sentences')
7
-
8
- # File uploader for text document
9
- uploaded_file = st.file_uploader("Upload a text file", type=["txt"])
10
-
11
- if uploaded_file:
12
- # Read the uploaded text file
13
- sample_text = uploaded_file.read().decode("utf-8")
14
- else:
15
- # Text input from user
16
- sample_text = st.text_area("Or paste your text below", height=300)
17
-
18
- if sample_text:
19
- # Split the text into paragraphs
20
- paragraphs = split_paragraphs(sample_text)
21
-
22
- # Split the text into sentences
23
- sentences = split_sentences(sample_text)
24
-
25
- # Show number of paragraphs and sentences
26
- st.write(f"Number of paragraphs: {len(paragraphs)}")
27
- st.write(f"Number of sentences: {len(sentences)}")
28
-
29
- # Create two columns for separate views
30
- col1, col2 = st.columns(2)
31
 
32
- # Display paragraphs in the left column
33
- with col1:
34
- st.header("Paragraphs")
35
- for i, paragraph in enumerate(paragraphs):
36
- st.subheader(f"Paragraph {i + 1}")
37
- st.write(paragraph.text)
38
-
39
- # Display sentences in the right column
40
- with col2:
41
- st.header("Sentences")
42
- for i, sentence in enumerate(sentences):
43
- st.subheader(f"Sentence {i + 1}")
44
- st.write(sentence.text)
45
-
46
- # Convert paragraphs and sentences to pandas DataFrames
47
- paragraphs_df = pd.DataFrame([p.text for p in paragraphs], columns=["Paragraph"])
48
- sentences_df = pd.DataFrame([s.text for s in sentences], columns=["Sentence"])
49
-
50
- # Option to download the paragraphs and sentences as CSV files
51
- st.download_button(
52
- label="Download Paragraphs as CSV",
53
- data=paragraphs_df.to_csv(index=False).encode('utf-8'),
54
- file_name="paragraphs.csv",
55
- mime="text/csv"
56
- )
57
-
58
- st.download_button(
59
- label="Download Sentences as CSV",
60
- data=sentences_df.to_csv(index=False).encode('utf-8'),
61
- file_name="sentences.csv",
62
- mime="text/csv"
63
- )
64
-
65
- else:
66
- st.write("Please upload a text file or paste your text to split it into paragraphs and sentences.")
 
1
  import streamlit as st
2
+ import re
 
3
 
4
+ def clean_text(text):
5
+ # Replace hyphen followed by space or multiple spaces with just a hyphen
6
+ text = re.sub(r'-\s+', '-', text)
7
+ return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ # Title of the Streamlit app
10
+ st.title('Text Cleaner')
11
+
12
+ # Input text from the user
13
+ input_text = st.text_area("Enter text to clean:")
14
+
15
+ # Button to trigger cleaning
16
+ if st.button('Clean Text'):
17
+ if input_text:
18
+ # Clean the text
19
+ cleaned_text = clean_text(input_text)
20
+ # Display the cleaned text
21
+ st.subheader("Cleaned Text")
22
+ st.write(cleaned_text)
23
+ else:
24
+ st.warning("Please enter some text to clean.")