Spaces:

sidbhasin
/

PDF_Answer_AI_By_Syncmerce

Sleeping

App Files Files Community

sidbhasin commited on Nov 12, 2024

Commit

7d37af7

verified ·

1 Parent(s): 89f7ad6

Create app.py

Browse files

Files changed (1) hide show

app.py +189 -0

app.py ADDED Viewed

	@@ -0,0 +1,189 @@

+import streamlit as st
+from transformers import pipeline
+import nltk
+from nltk.tokenize import sent_tokenize, word_tokenize
+from nltk.corpus import stopwords
+from collections import Counter
+import spacy
+import pandas as pd
+import plotly.graph_objects as go
+from textblob import TextBlob
+import re
+# Download required NLTK data
+try:
+    nltk.data.find('tokenizers/punkt')
+except LookupError:
+    nltk.download('punkt')
+    nltk.download('stopwords')
+    nltk.download('averaged_perceptron_tagger')
+# Load spaCy model
+try:
+    nlp = spacy.load('en_core_web_sm')
+except:
+    st.warning("Installing spaCy model...")
+    import os
+    os.system('python -m spacy download en_core_web_sm')
+    nlp = spacy.load('en_core_web_sm')
+# Initialize summarizer
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+def analyze_content(text):
+    """Analyze content and return metrics"""
+    # Basic metrics
+    words = word_tokenize(text)
+    sentences = sent_tokenize(text)
+    paragraphs = text.split('\n\n')
+    # Word count without stopwords
+    stop_words = set(stopwords.words('english'))
+    meaningful_words = [w for w in words if w.lower() not in stop_words and w.isalnum()]
+    # Heading detection (assuming markdown or HTML-like format)
+    headings = len(re.findall(r'#{1,6}\s.*|<h[1-6]>.*?</h[1-6]>', text))
+    # Keyword extraction using spaCy
+    doc = nlp(text)
+    keywords = [token.text for token in doc if token.pos_ in ['NOUN', 'PROPN']]
+    keyword_freq = Counter(keywords)
+    return {
+        'total_words': len(meaningful_words),
+        'sentences': len(sentences),
+        'paragraphs': len(paragraphs),
+        'headings': headings,
+        'keywords': dict(keyword_freq.most_common(10))
+    }
+def calculate_content_score(metrics, targets):
+    """Calculate content score based on metrics"""
+    score = 0
+    weights = {
+        'words': 0.3,
+        'sentences': 0.2,
+        'paragraphs': 0.2,
+        'headings': 0.3
+    }
+    for metric, target in targets.items():
+        if metric in metrics:
+            current = metrics[metric]
+            if metric == 'total_words':
+                score += min((current / target['min']) * weights['words'], weights['words']) * 100
+            elif metric == 'headings':
+                score += min((current / target['min']) * weights['headings'], weights['headings']) * 100
+            elif metric == 'paragraphs':
+                score += min((current / target['min']) * weights['paragraphs'], weights['paragraphs']) * 100
+    return min(round(score), 100)
+def create_gauge_chart(score):
+    """Create a gauge chart for content score"""
+    fig = go.Figure(go.Indicator(
+        mode = "gauge+number",
+        value = score,
+        domain = {'x': [0, 1], 'y': [0, 1]},
+        gauge = {
+            'axis': {'range': [0, 100]},
+            'bar': {'color': "#1f77b4"},
+            'steps': [
+                {'range': [0, 50], 'color': "lightgray"},
+                {'range': [50, 75], 'color': "gray"},
+                {'range': [75, 100], 'color': "darkgray"}
+            ]
+        }
+    ))
+    fig.update_layout(height=250)
+    return fig
+def main():
+    st.set_page_config(page_title="Content Optimizer", layout="wide")
+    # Custom CSS
+    st.markdown("""
+        <style>
+        .stTextArea textarea {
+            height: 400px;
+        }
+        .metric-card {
+            background-color: white;
+            padding: 20px;
+            border-radius: 10px;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+            margin: 10px 0;
+        }
+        </style>
+    """, unsafe_allow_html=True)
+    # Sidebar configuration
+    st.sidebar.title("Content Targets")
+    targets = {
+        'total_words': {
+            'min': st.sidebar.slider("Minimum words", 300, 3000, 1500),
+            'max': st.sidebar.slider("Maximum words", 1000, 5000, 2500)
+        },
+        'headings': {
+            'min': st.sidebar.slider("Minimum headings", 1, 20, 8),
+            'max': st.sidebar.slider("Maximum headings", 5, 30, 15)
+        },
+        'paragraphs': {
+            'min': st.sidebar.slider("Minimum paragraphs", 5, 50, 15),
+            'max': st.sidebar.slider("Maximum paragraphs", 10, 100, 25)
+        }
+    }
+    # Main content area
+    st.title("Content Optimizer")
+    col1, col2 = st.columns([2, 1])
+    with col1:
+        text = st.text_area("Enter your content here", height=400)
+        if st.button("Analyze Content"):
+            if text:
+                # Analyze content
+                metrics = analyze_content(text)
+                score = calculate_content_score(metrics, targets)
+                # Store results in session state
+                st.session_state.metrics = metrics
+                st.session_state.score = score
+                # Create summary
+                summary = summarizer(text, max_length=130, min_length=30, do_sample=False)[0]['summary_text']
+                st.session_state.summary = summary
+    with col2:
+        if hasattr(st.session_state, 'score'):
+            st.plotly_chart(create_gauge_chart(st.session_state.score), use_container_width=True)
+            # Display metrics
+            st.markdown("### Content Structure")
+            metrics = st.session_state.metrics
+            cols = st.columns(2)
+            with cols[0]:
+                st.metric("Words", metrics['total_words'], f"Target: {targets['total_words']['min']}-{targets['total_words']['max']}")
+                st.metric("Paragraphs", metrics['paragraphs'], f"Target: {targets['paragraphs']['min']}-{targets['paragraphs']['max']}")
+            with cols[1]:
+                st.metric("Headings", metrics['headings'], f"Target: {targets['headings']['min']}-{targets['headings']['max']}")
+                st.metric("Sentences", metrics['sentences'])
+            # Display keywords
+            st.markdown("### Top Keywords")
+            keywords_df = pd.DataFrame(
+                metrics['keywords'].items(),
+                columns=['Keyword', 'Frequency']
+            )
+            st.dataframe(keywords_df, use_container_width=True)
+            # Display summary
+            if hasattr(st.session_state, 'summary'):
+                st.markdown("### Content Summary")
+                st.write(st.session_state.summary)
+if __name__ == "__main__":
+    main()