Spaces:

shaheerawan3
/

AI_OutputAnalyzer

Sleeping

App Files Files Community

shaheerawan3 commited on Dec 23, 2024

Commit

318abcf

verified ·

1 Parent(s): 2a2426e

Update app.py

Browse files

Files changed (1) hide show

app.py +249 -322

app.py CHANGED Viewed

@@ -1,52 +1,90 @@
 import streamlit as st
-import pandas as pd
-import numpy as np
-import plotly.graph_objects as go
-import plotly.express as px
-from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
-from nltk.tokenize import sent_tokenize, word_tokenize
-from nltk.corpus import stopwords
-from nltk.sentiment import SentimentIntensityAnalyzer
-from gensim import corpora, models
-import spacy
-import requests
-from langdetect import detect
-import json
-import base64
-from datetime import datetime
-import tempfile
-from fpdf import FPDF
-import os
-from functools import lru_cache
 import logging
-from concurrent.futures import ThreadPoolExecutor
-from typing import Dict, List, Any, Optional
-import io
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
-logger = logging.getLogger(__name__)
 class TextProcessor:
-    """Handles text input processing and validation"""
-    @staticmethod
-    def process_file_upload(uploaded_file) -> Optional[str]:
-        """Process uploaded file and return text content"""
         try:
-            if uploaded_file is None:
                 return None
-            # Get file extension
-            file_extension = uploaded_file.name.split('.')[-1].lower()
-            if file_extension == 'txt':
-                return uploaded_file.read().decode('utf-8')
-            else:
-                raise ValueError(f"Unsupported file type: {file_extension}")
         except Exception as e:
             logger.error(f"Error processing file upload: {str(e)}")
@@ -54,31 +92,41 @@ class TextProcessor:
             return None
     @staticmethod
-    def validate_text(text: str) -> bool:
-        """Validate input text"""
         if not text or len(text.strip()) == 0:
             st.error("Please enter some text to analyze")
             return False
-        if len(text.split()) > 10000:  # Arbitrary limit
-            st.error("Text is too long. Please enter a shorter text")
             return False
         return True
 class AdvancedAnalyzer:
-    """Handles text analysis using various NLP techniques"""
-    def __init__(self):
         self._initialize_models()
-    @lru_cache(maxsize=1)
     def _initialize_models(self):
-        """Initialize all required models with caching"""
         try:
             self.sentiment_analyzer = SentimentIntensityAnalyzer()
-            self.nlp = spacy.load('en_core_web_sm')
             self.sentiment_model = pipeline(
                 "sentiment-analysis",
-                model="nlptown/bert-base-multilingual-uncased-sentiment",
                 return_all_scores=True
             )
             logger.info("Models initialized successfully")
@@ -86,200 +134,172 @@ class AdvancedAnalyzer:
             logger.error(f"Error initializing models: {str(e)}")
             raise
-    def analyze_sentiment_batch(self, text: str, batch_size: int = 1000) -> Dict:
-        """Analyze sentiment in batches for better performance"""
-        sentences = sent_tokenize(text)
-        results = []
-        with ThreadPoolExecutor() as executor:
-            for i in range(0, len(sentences), batch_size):
-                batch = sentences[i:i + batch_size]
-                results.extend(executor.map(self.analyze_sentiment, batch))
-        # Aggregate results
-        compound = np.mean([r['compound'] for r in results])
-        emotions = {
-            'positive': np.mean([r['emotions']['positive'] for r in results]),
-            'negative': np.mean([r['emotions']['negative'] for r in results]),
-            'neutral': np.mean([r['emotions']['neutral'] for r in results])
-        }
-        return {'compound': compound, 'emotions': emotions}
-    def analyze_sentiment(self, text: str, language: str = 'en') -> Dict:
-        """Analyze sentiment with emotion detection"""
-        try:
-            if language != 'en':
-                sentiments = self.sentiment_model(text)[0]
-                return {
-                    'compound': max(s['score'] for s in sentiments),
-                    'emotions': {s['label']: s['score'] for s in sentiments}
-                }
-            else:
-                scores = self.sentiment_analyzer.polarity_scores(text)
-                return {
-                    'compound': scores['compound'],
-                    'emotions': {
-                        'positive': scores['pos'],
-                        'negative': scores['neg'],
-                        'neutral': scores['neu']
-                    }
-                }
-        except Exception as e:
-            logger.error(f"Error in sentiment analysis: {str(e)}")
-            raise
-    def extract_entities(self, text: str) -> Dict[str, List[str]]:
-        """Extract named entities with confidence scores"""
-        try:
-            doc = self.nlp(text)
-            entities = {}
-            for ent in doc.ents:
-                if ent.label_ not in entities:
-                    entities[ent.label_] = []
-                # Only include entities with high confidence
-                if ent.label_prob >= 0.8:
-                    entities[ent.label_].append({
-                        'text': ent.text,
-                        'confidence': round(ent.label_prob, 3)
-                    })
-            return entities
-        except Exception as e:
-            logger.error(f"Error in entity extraction: {str(e)}")
-            raise
-    def topic_modeling(self, text: str, num_topics: int = 3) -> List[Dict]:
-        """Extract main topics using LDA with preprocessing"""
         try:
-            # Tokenize and clean text
-            doc = self.nlp(text.lower())
-            tokens = [
-                token.lemma_ for token in doc
-                if not token.is_stop and not token.is_punct and token.is_alpha
-            ]
-            # Create dictionary and corpus
-            texts = [tokens]
-            dictionary = corpora.Dictionary(texts)
-            corpus = [dictionary.doc2bow(text) for text in texts]
-            # Train LDA model with coherence optimization
-            lda_model = models.LdaModel(
-                corpus=corpus,
-                id2word=dictionary,
-                num_topics=num_topics,
-                random_state=42,
-                passes=15,
-                alpha='auto',
-                per_word_topics=True
             )
-            # Extract topics with probabilities
-            topics = []
-            for idx, topic in lda_model.show_topics(formatted=False):
-                topics.append({
-                    'id': idx,
-                    'words': [(word, round(prob, 4))
-                             for word, prob in topic],
-                    'coherence': round(lda_model.get_topic_coherence(topic), 4)
-                })
-            return sorted(topics, key=lambda x: x['coherence'], reverse=True)
         except Exception as e:
-            logger.error(f"Error in topic modeling: {str(e)}")
             raise
-class PDFGenerator:
-    """Generates professional PDF reports with visualizations"""
-    def __init__(self):
-        self.pdf = FPDF()
-    def generate_report(self, analysis_results: Dict) -> str:
-        """Generate a detailed PDF report with charts"""
-        try:
-            self.pdf.add_page()
-            self._add_header()
-            self._add_summary(analysis_results)
-            self._add_sentiment_analysis(analysis_results)
-            self._add_topics(analysis_results)
-            self._add_entities(analysis_results)
-            self._add_footer()
-            # Save to temporary file
-            with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
-                self.pdf.output(tmp.name)
-                return tmp.name
-        except Exception as e:
-            logger.error(f"Error generating PDF report: {str(e)}")
-            raise
-    def _add_header(self):
-        """Add report header"""
-        self.pdf.set_font('Arial', 'B', 16)
-        self.pdf.cell(190, 10, 'AI Output Analysis Report', 0, 1, 'C')
-        self.pdf.ln(10)
-    def _add_summary(self, results: Dict):
-        """Add analysis summary"""
-        self.pdf.set_font('Arial', 'B', 12)
-        self.pdf.cell(190, 10, 'Analysis Summary', 0, 1, 'L')
-        self.pdf.set_font('Arial', '', 10)
-        compound_score = results['sentiment']['compound']
-        sentiment_label = (
-            'Positive' if compound_score > 0.05
-            else 'Negative' if compound_score < -0.05
-            else 'Neutral'
-        )
-        self.pdf.cell(190, 10,
-                     f"Overall Sentiment: {sentiment_label} ({compound_score:.2f})",
-                     0, 1, 'L')
 def main():
-    st.set_page_config(
-        page_title="Enhanced AI Output Analyzer",
-        layout="wide",
-        initial_sidebar_state="expanded"
-    )
-    # Load custom CSS
-    st.markdown("""
-        <style>
-        .main { padding: 2rem; }
-        .stMetric {
-            background-color: #f0f2f6;
-            padding: 1rem;
-            border-radius: 0.5rem;
-            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
-        }
-        .entity-tag {
-            background-color: #e9ecef;
-            padding: 0.2rem 0.5rem;
-            border-radius: 0.25rem;
-            margin: 0.2rem;
-            display: inline-block;
-        }
-        </style>
-    """, unsafe_allow_html=True)
-    # Initialize session state
-    if 'analysis_history' not in st.session_state:
-        st.session_state.analysis_history = []
     # Sidebar configuration
     with st.sidebar:
         st.title("Analysis Settings")
-        # Analysis options
-        st.subheader("Analysis Options")
-        num_topics = st.slider("Number of Topics", 2, 10, 3)
-        min_entity_confidence = st.slider("Entity Confidence Threshold", 0.0, 1.0, 0.8)
-        batch_size = st.select_slider(
-            "Processing Batch Size",
-            options=[500, 1000, 2000, 5000],
-            value=1000
         )
     # Main content
@@ -288,7 +308,6 @@ def main():
     # Input section
     input_method = st.radio("Choose input method:", ["Text Input", "File Upload"])
-    text_processor = TextProcessor()
     if input_method == "File Upload":
         text = text_processor.process_file_upload(
             st.file_uploader("Upload a text file", type=['txt'])
@@ -297,108 +316,16 @@ def main():
         text = st.text_area("Enter text to analyze:", height=200)
     # Analysis section
-    if st.button("Analyze", type="primary") and text_processor.validate_text(text):
         try:
-            with st.spinner("Performing analysis..."):
-                analyzer = AdvancedAnalyzer()
-                # Perform analysis with progress tracking
-                progress_bar = st.progress(0)
-                # Sentiment analysis
-                results = {
-                    'sentiment': analyzer.analyze_sentiment_batch(
-                        text, batch_size=batch_size
-                    )
-                }
-                progress_bar.progress(0.33)
-                # Topic modeling
-                results['topics'] = analyzer.topic_modeling(
-                    text, num_topics=num_topics
-                )
-                progress_bar.progress(0.66)
-                # Entity extraction
-                results['entities'] = analyzer.extract_entities(text)
-                progress_bar.progress(1.0)
-                # Display results
-                st.success("Analysis complete!")
-                # Save to history
-                st.session_state.analysis_history.append({
-                    'timestamp': datetime.now(),
-                    'results': results
-                })
-                # Display visualizations
-                display_results(results)
-                # Generate report
-                generate_downloadable_report(results)
         except Exception as e:
-            logger.error(f"Error during analysis: {str(e)}")
             st.error(f"An error occurred during analysis: {str(e)}")
-def display_results(results: Dict):
-    """Display analysis results with interactive visualizations"""
-    # Sentiment Analysis
-    st.subheader("Sentiment Analysis")
-    col1, col2 = st.columns(2)
-    with col1:
-        # Sentiment gauge
-        fig = go.Figure(go.Indicator(
-            mode="gauge+number",
-            value=results['sentiment']['compound'],
-            domain={'x': [0, 1], 'y': [0, 1]},
-            gauge={
-                'axis': {'range': [-1, 1]},
-                'bar': {'color': "darkblue"},
-                'steps': [
-                    {'range': [-1, -0.05], 'color': "lightcoral"},
-                    {'range': [-0.05, 0.05], 'color': "lightgray"},
-                    {'range': [0.05, 1], 'color': "lightgreen"}
-                ]
-            }
-        ))
-        st.plotly_chart(fig)
-    with col2:
-        # Emotions pie chart
-        emotions_df = pd.DataFrame(
-            results['sentiment']['emotions'].items(),
-            columns=['Emotion', 'Score']
-        )
-        fig = px.pie(
-            emotions_df,
-            values='Score',
-            names='Emotion',
-            title="Emotional Distribution"
-        )
-        st.plotly_chart(fig)
-def generate_downloadable_report(results: Dict):
-    """Generate and provide downloadable report"""
-    try:
-        pdf_generator = PDFGenerator()
-        pdf_path = pdf_generator.generate_report(results)
-        with open(pdf_path, "rb") as pdf_file:
-            st.download_button(
-                label="📊 Download Analysis Report (PDF)",
-                data=pdf_file,
-                file_name=f"analysis_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf",
-                mime="application/pdf"
-            )
-        # Clean up
-        os.unlink(pdf_path)
-    except Exception as e:
-        logger.error(f"Error generating downloadable report: {str(e)}")
-        st.error("Failed to generate report. Please try again.")
 if __name__ == "__main__":
     main()

+# config.yaml
+config_yaml = """
+models:
+  spacy: en_core_web_sm
+  sentiment: nlptown/bert-base-multilingual-uncased-sentiment
+analysis:
+  batch_size: 1000
+  min_entity_confidence: 0.8
+  num_topics: 3
+  max_text_length: 50000
+security:
+  max_file_size: 5242880  # 5MB
+  allowed_extensions: [txt]
+logging:
+  level: INFO
+  format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+"""
+# utils/config.py
+import yaml
+from pathlib import Path
+def load_config():
+    config_path = Path("config.yaml")
+    if not config_path.exists():
+        with open(config_path, "w") as f:
+            f.write(config_yaml)
+    with open(config_path) as f:
+        return yaml.safe_load(f)
+# utils/logger.py
+import logging
+from typing import Optional
+def setup_logger(name: Optional[str] = None) -> logging.Logger:
+    config = load_config()
+    logger = logging.getLogger(name or __name__)
+    logger.setLevel(config['logging']['level'])
+    if not logger.handlers:
+        handler = logging.StreamHandler()
+        formatter = logging.Formatter(config['logging']['format'])
+        handler.setFormatter(formatter)
+        logger.addHandler(handler)
+    return logger
+# text_processing.py
+from typing import Optional, Dict
 import streamlit as st
+from pathlib import Path
 import logging
+logger = setup_logger("text_processing")
 class TextProcessor:
+    def __init__(self, config: Dict):
+        self.config = config
+        self.max_file_size = config['security']['max_file_size']
+        self.allowed_extensions = config['security']['allowed_extensions']
+    def validate_file(self, uploaded_file) -> bool:
+        if uploaded_file is None:
+            return False
+        # Check file size
+        if uploaded_file.size > self.max_file_size:
+            st.error(f"File size exceeds {self.max_file_size/1024/1024}MB limit")
+            return False
+        # Check extension
+        ext = Path(uploaded_file.name).suffix[1:].lower()
+        if ext not in self.allowed_extensions:
+            st.error(f"Unsupported file type. Allowed types: {', '.join(self.allowed_extensions)}")
+            return False
+        return True
+    def process_file_upload(self, uploaded_file) -> Optional[str]:
         try:
+            if not self.validate_file(uploaded_file):
                 return None
+            return uploaded_file.read().decode('utf-8')
         except Exception as e:
             logger.error(f"Error processing file upload: {str(e)}")
             return None
     @staticmethod
+    def validate_text(text: str, max_length: int) -> bool:
         if not text or len(text.strip()) == 0:
             st.error("Please enter some text to analyze")
             return False
+        if len(text) > max_length:
+            st.error(f"Text exceeds maximum length of {max_length} characters")
             return False
         return True
+# analysis.py
+from typing import Dict, List
+import spacy
+from transformers import pipeline
+from nltk.sentiment import SentimentIntensityAnalyzer
+from nltk.tokenize import sent_tokenize
+from gensim import corpora, models
+import numpy as np
+from concurrent.futures import ThreadPoolExecutor
+import streamlit as st
+logger = setup_logger("analysis")
 class AdvancedAnalyzer:
+    def __init__(self, config: Dict):
+        self.config = config
         self._initialize_models()
+    @st.cache_resource
     def _initialize_models(self):
         try:
             self.sentiment_analyzer = SentimentIntensityAnalyzer()
+            self.nlp = spacy.load(self.config['models']['spacy'])
             self.sentiment_model = pipeline(
                 "sentiment-analysis",
+                model=self.config['models']['sentiment'],
                 return_all_scores=True
             )
             logger.info("Models initialized successfully")
             logger.error(f"Error initializing models: {str(e)}")
             raise
+    def analyze_text(self, text: str) -> Dict:
+        """Complete text analysis pipeline"""
+        results = {}
+        # Use st.progress to show analysis progress
+        progress_bar = st.progress(0)
+        status_text = st.empty()
         try:
+            # Sentiment Analysis
+            status_text.text("Analyzing sentiment...")
+            results['sentiment'] = self.analyze_sentiment_batch(
+                text,
+                self.config['analysis']['batch_size']
+            )
+            progress_bar.progress(0.33)
+            # Topic Modeling
+            status_text.text("Extracting topics...")
+            results['topics'] = self.topic_modeling(
+                text,
+                self.config['analysis']['num_topics']
             )
+            progress_bar.progress(0.66)
+            # Entity Extraction
+            status_text.text("Identifying entities...")
+            results['entities'] = self.extract_entities(text)
+            progress_bar.progress(1.0)
+            status_text.text("Analysis complete!")
+            return results
         except Exception as e:
+            logger.error(f"Error in analysis pipeline: {str(e)}")
             raise
+        finally:
+            progress_bar.empty()
+            status_text.empty()
+    # Rest of the AdvancedAnalyzer methods remain the same...
+# ui.py
+import streamlit as st
+import plotly.graph_objects as go
+import plotly.express as px
+import pandas as pd
+from typing import Dict
+class UI:
+    @staticmethod
+    def setup_page():
+        st.set_page_config(
+            page_title="Enhanced AI Output Analyzer",
+            layout="wide",
+            initial_sidebar_state="expanded"
+        )
+        st.markdown("""
+            <style>
+            .main { padding: 2rem; }
+            .stMetric {
+                background-color: var(--background-color);
+                padding: 1rem;
+                border-radius: 0.5rem;
+                box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+            }
+            .entity-tag {
+                background-color: var(--secondary-background-color);
+                padding: 0.2rem 0.5rem;
+                border-radius: 0.25rem;
+                margin: 0.2rem;
+                display: inline-block;
+            }
+            </style>
+        """, unsafe_allow_html=True)
+    @staticmethod
+    def display_results(results: Dict):
+        """Display analysis results with interactive visualizations"""
+        st.subheader("Analysis Results")
+        # Create tabs for different analyses
+        sentiment_tab, topics_tab, entities_tab = st.tabs([
+            "Sentiment Analysis",
+            "Topic Modeling",
+            "Named Entities"
+        ])
+        with sentiment_tab:
+            UI._display_sentiment(results['sentiment'])
+        with topics_tab:
+            UI._display_topics(results['topics'])
+        with entities_tab:
+            UI._display_entities(results['entities'])
+    @staticmethod
+    def _display_sentiment(sentiment_results: Dict):
+        col1, col2 = st.columns(2)
+        with col1:
+            # Sentiment gauge
+            fig = go.Figure(go.Indicator(
+                mode="gauge+number",
+                value=sentiment_results['compound'],
+                domain={'x': [0, 1], 'y': [0, 1]},
+                gauge={
+                    'axis': {'range': [-1, 1]},
+                    'bar': {'color': "darkblue"},
+                    'steps': [
+                        {'range': [-1, -0.05], 'color': "lightcoral"},
+                        {'range': [-0.05, 0.05], 'color': "lightgray"},
+                        {'range': [0.05, 1], 'color': "lightgreen"}
+                    ]
+                }
+            ))
+            st.plotly_chart(fig)
+        with col2:
+            # Emotions pie chart
+            emotions_df = pd.DataFrame(
+                sentiment_results['emotions'].items(),
+                columns=['Emotion', 'Score']
+            )
+            fig = px.pie(
+                emotions_df,
+                values='Score',
+                names='Emotion',
+                title="Emotional Distribution"
+            )
+            st.plotly_chart(fig)
+    # Rest of the UI methods...
+# main.py
+import streamlit as st
+from utils.config import load_config
+from text_processing import TextProcessor
+from analysis import AdvancedAnalyzer
+from ui import UI
 def main():
+    # Load configuration
+    config = load_config()
+    # Setup UI
+    UI.setup_page()
+    # Initialize processors
+    text_processor = TextProcessor(config)
+    analyzer = AdvancedAnalyzer(config)
     # Sidebar configuration
     with st.sidebar:
         st.title("Analysis Settings")
+        config['analysis']['num_topics'] = st.slider(
+            "Number of Topics",
+            2, 10,
+            config['analysis']['num_topics']
+        )
+        config['analysis']['min_entity_confidence'] = st.slider(
+            "Entity Confidence Threshold",
+            0.0, 1.0,
+            config['analysis']['min_entity_confidence']
         )
     # Main content
     # Input section
     input_method = st.radio("Choose input method:", ["Text Input", "File Upload"])
     if input_method == "File Upload":
         text = text_processor.process_file_upload(
             st.file_uploader("Upload a text file", type=['txt'])
         text = st.text_area("Enter text to analyze:", height=200)
     # Analysis section
+    if st.button("Analyze", type="primary") and text_processor.validate_text(
+        text, config['analysis']['max_text_length']
+    ):
         try:
+            with st.spinner("Analyzing text..."):
+                results = analyzer.analyze_text(text)
+                UI.display_results(results)
         except Exception as e:
             st.error(f"An error occurred during analysis: {str(e)}")
 if __name__ == "__main__":
     main()