sidbhasin commited on
Commit
7d37af7
·
verified ·
1 Parent(s): 89f7ad6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +189 -0
app.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ import nltk
4
+ from nltk.tokenize import sent_tokenize, word_tokenize
5
+ from nltk.corpus import stopwords
6
+ from collections import Counter
7
+ import spacy
8
+ import pandas as pd
9
+ import plotly.graph_objects as go
10
+ from textblob import TextBlob
11
+ import re
12
+
13
+ # Download required NLTK data
14
+ try:
15
+ nltk.data.find('tokenizers/punkt')
16
+ except LookupError:
17
+ nltk.download('punkt')
18
+ nltk.download('stopwords')
19
+ nltk.download('averaged_perceptron_tagger')
20
+
21
+ # Load spaCy model
22
+ try:
23
+ nlp = spacy.load('en_core_web_sm')
24
+ except:
25
+ st.warning("Installing spaCy model...")
26
+ import os
27
+ os.system('python -m spacy download en_core_web_sm')
28
+ nlp = spacy.load('en_core_web_sm')
29
+
30
+ # Initialize summarizer
31
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
32
+
33
+ def analyze_content(text):
34
+ """Analyze content and return metrics"""
35
+ # Basic metrics
36
+ words = word_tokenize(text)
37
+ sentences = sent_tokenize(text)
38
+ paragraphs = text.split('\n\n')
39
+
40
+ # Word count without stopwords
41
+ stop_words = set(stopwords.words('english'))
42
+ meaningful_words = [w for w in words if w.lower() not in stop_words and w.isalnum()]
43
+
44
+ # Heading detection (assuming markdown or HTML-like format)
45
+ headings = len(re.findall(r'#{1,6}\s.*|<h[1-6]>.*?</h[1-6]>', text))
46
+
47
+ # Keyword extraction using spaCy
48
+ doc = nlp(text)
49
+ keywords = [token.text for token in doc if token.pos_ in ['NOUN', 'PROPN']]
50
+ keyword_freq = Counter(keywords)
51
+
52
+ return {
53
+ 'total_words': len(meaningful_words),
54
+ 'sentences': len(sentences),
55
+ 'paragraphs': len(paragraphs),
56
+ 'headings': headings,
57
+ 'keywords': dict(keyword_freq.most_common(10))
58
+ }
59
+
60
+ def calculate_content_score(metrics, targets):
61
+ """Calculate content score based on metrics"""
62
+ score = 0
63
+ weights = {
64
+ 'words': 0.3,
65
+ 'sentences': 0.2,
66
+ 'paragraphs': 0.2,
67
+ 'headings': 0.3
68
+ }
69
+
70
+ for metric, target in targets.items():
71
+ if metric in metrics:
72
+ current = metrics[metric]
73
+ if metric == 'total_words':
74
+ score += min((current / target['min']) * weights['words'], weights['words']) * 100
75
+ elif metric == 'headings':
76
+ score += min((current / target['min']) * weights['headings'], weights['headings']) * 100
77
+ elif metric == 'paragraphs':
78
+ score += min((current / target['min']) * weights['paragraphs'], weights['paragraphs']) * 100
79
+
80
+ return min(round(score), 100)
81
+
82
+ def create_gauge_chart(score):
83
+ """Create a gauge chart for content score"""
84
+ fig = go.Figure(go.Indicator(
85
+ mode = "gauge+number",
86
+ value = score,
87
+ domain = {'x': [0, 1], 'y': [0, 1]},
88
+ gauge = {
89
+ 'axis': {'range': [0, 100]},
90
+ 'bar': {'color': "#1f77b4"},
91
+ 'steps': [
92
+ {'range': [0, 50], 'color': "lightgray"},
93
+ {'range': [50, 75], 'color': "gray"},
94
+ {'range': [75, 100], 'color': "darkgray"}
95
+ ]
96
+ }
97
+ ))
98
+ fig.update_layout(height=250)
99
+ return fig
100
+
101
+ def main():
102
+ st.set_page_config(page_title="Content Optimizer", layout="wide")
103
+
104
+ # Custom CSS
105
+ st.markdown("""
106
+ <style>
107
+ .stTextArea textarea {
108
+ height: 400px;
109
+ }
110
+ .metric-card {
111
+ background-color: white;
112
+ padding: 20px;
113
+ border-radius: 10px;
114
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
115
+ margin: 10px 0;
116
+ }
117
+ </style>
118
+ """, unsafe_allow_html=True)
119
+
120
+ # Sidebar configuration
121
+ st.sidebar.title("Content Targets")
122
+ targets = {
123
+ 'total_words': {
124
+ 'min': st.sidebar.slider("Minimum words", 300, 3000, 1500),
125
+ 'max': st.sidebar.slider("Maximum words", 1000, 5000, 2500)
126
+ },
127
+ 'headings': {
128
+ 'min': st.sidebar.slider("Minimum headings", 1, 20, 8),
129
+ 'max': st.sidebar.slider("Maximum headings", 5, 30, 15)
130
+ },
131
+ 'paragraphs': {
132
+ 'min': st.sidebar.slider("Minimum paragraphs", 5, 50, 15),
133
+ 'max': st.sidebar.slider("Maximum paragraphs", 10, 100, 25)
134
+ }
135
+ }
136
+
137
+ # Main content area
138
+ st.title("Content Optimizer")
139
+
140
+ col1, col2 = st.columns([2, 1])
141
+
142
+ with col1:
143
+ text = st.text_area("Enter your content here", height=400)
144
+ if st.button("Analyze Content"):
145
+ if text:
146
+ # Analyze content
147
+ metrics = analyze_content(text)
148
+ score = calculate_content_score(metrics, targets)
149
+
150
+ # Store results in session state
151
+ st.session_state.metrics = metrics
152
+ st.session_state.score = score
153
+
154
+ # Create summary
155
+ summary = summarizer(text, max_length=130, min_length=30, do_sample=False)[0]['summary_text']
156
+ st.session_state.summary = summary
157
+
158
+ with col2:
159
+ if hasattr(st.session_state, 'score'):
160
+ st.plotly_chart(create_gauge_chart(st.session_state.score), use_container_width=True)
161
+
162
+ # Display metrics
163
+ st.markdown("### Content Structure")
164
+ metrics = st.session_state.metrics
165
+
166
+ cols = st.columns(2)
167
+ with cols[0]:
168
+ st.metric("Words", metrics['total_words'], f"Target: {targets['total_words']['min']}-{targets['total_words']['max']}")
169
+ st.metric("Paragraphs", metrics['paragraphs'], f"Target: {targets['paragraphs']['min']}-{targets['paragraphs']['max']}")
170
+
171
+ with cols[1]:
172
+ st.metric("Headings", metrics['headings'], f"Target: {targets['headings']['min']}-{targets['headings']['max']}")
173
+ st.metric("Sentences", metrics['sentences'])
174
+
175
+ # Display keywords
176
+ st.markdown("### Top Keywords")
177
+ keywords_df = pd.DataFrame(
178
+ metrics['keywords'].items(),
179
+ columns=['Keyword', 'Frequency']
180
+ )
181
+ st.dataframe(keywords_df, use_container_width=True)
182
+
183
+ # Display summary
184
+ if hasattr(st.session_state, 'summary'):
185
+ st.markdown("### Content Summary")
186
+ st.write(st.session_state.summary)
187
+
188
+ if __name__ == "__main__":
189
+ main()