Spaces:

Ashkchamp
/

Atlan

Configuration error

App Files Files Community

ashkunwar commited on Sep 12, 2025

Commit

4ee7173

1 Parent(s): 3046482

Update application with enhanced features for Hugging Face deployment

Browse files

Files changed (16) hide show

.python-version +0 -1
.streamlit/secrets.toml.template +5 -0
Atlan/Dockerfile +56 -0
Atlan/app.py +513 -0
Atlan/requirements.txt +17 -0
DEPLOYMENT_GUIDE.md +0 -0
Dockerfile +51 -0
Dockerfile.fastapi +0 -0
README_HF.md +0 -0
app.py +12 -16
deploy_prep.bat +0 -0
deploy_prep.sh +0 -0
fastapi_app.py +0 -0
main.py +0 -284
requirements.txt +2 -0
scraper.py +0 -291

.python-version DELETED Viewed

	@@ -1 +0,0 @@
1	- 3.9

.streamlit/secrets.toml.template ADDED Viewed

	@@ -0,0 +1,5 @@

+# Copy this file to .streamlit/secrets.toml and add your actual API key
+# DO NOT commit the actual secrets.toml file to git
+[default]
+GROQ_API_KEY = "your_groq_api_key_here"

Atlan/Dockerfile ADDED Viewed

	@@ -0,0 +1,56 @@

+# Dockerfile for Hugging Face Spaces - Streamlit App
+FROM python:3.11-slim
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Create user for security
+RUN useradd -m -u 1000 user
+USER user
+# Set environment variables
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH \
+    PYTHONPATH=$HOME/app \
+    PYTHONUNBUFFERED=1
+# Set working directory
+WORKDIR $HOME/app
+# Copy requirements first for better Docker layer caching
+COPY --chown=user:user requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir --user -r requirements.txt
+# Copy the application files
+COPY --chown=user:user . .
+# Create necessary directories
+RUN mkdir -p $HOME/.streamlit
+# Create Streamlit config
+RUN echo "\
+[general]\n\
+email = \"\"\n\
+" > $HOME/.streamlit/credentials.toml
+RUN echo "\
+[server]\n\
+headless = true\n\
+enableCORS = false\n\
+enableXsrfProtection = false\n\
+port = 7860\n\
+" > $HOME/.streamlit/config.toml
+# Expose the port that Hugging Face Spaces expects
+EXPOSE 7860
+# Health check
+HEALTHCHECK CMD curl --fail http://localhost:7860/_stcore/health
+# Command to run the Streamlit app
+CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]

Atlan/app.py ADDED Viewed

	@@ -0,0 +1,513 @@

+import streamlit as st
+st.set_page_config(
+    page_title="🎯 Atlan Customer Support Copilot",
+    page_icon="🎯",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+import json
+import asyncio
+import logging
+import os
+from typing import List, Dict
+from datetime import datetime
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+from dotenv import load_dotenv
+load_dotenv()
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+try:
+    # Try multiple sources for API key: Environment variables first (HF Spaces), then Streamlit secrets
+    if 'GROQ_API_KEY' in os.environ:
+        st.success("🔑 API key loaded from environment variables")
+    elif hasattr(st, 'secrets') and 'GROQ_API_KEY' in st.secrets:
+        os.environ['GROQ_API_KEY'] = st.secrets['GROQ_API_KEY']
+        st.success("🔑 API key loaded from Streamlit Cloud secrets")
+    elif hasattr(st, 'secrets') and hasattr(st.secrets, 'default') and 'GROQ_API_KEY' in st.secrets.default:
+        os.environ['GROQ_API_KEY'] = st.secrets.default['GROQ_API_KEY']
+        st.success("🔑 API key loaded from Streamlit secrets")
+    else:
+        st.error("⚠️ GROQ_API_KEY not found!")
+        st.info("**For Hugging Face Spaces deployment:**")
+        st.info("1. Go to your Space Settings")
+        st.info("2. Click 'Variables and secrets' tab")
+        st.info("3. Add GROQ_API_KEY with your actual API key")
+        st.code("""
+        # In Hugging Face Spaces Secrets:
+        GROQ_API_KEY = "gsk_your_actual_groq_api_key_here"
+        """)
+        st.info("**For Streamlit Cloud deployment:**")
+        st.info("Add your API key in the Streamlit Cloud app settings > Secrets tab")
+        st.info("**For local development:**")
+        st.info("Add GROQ_API_KEY to your .env file")
+        st.code("""
+        # In .env file:
+        GROQ_API_KEY=your_groq_api_key_here
+        """)
+        st.stop()
+except Exception as e:
+    st.error(f"⚠️ Error accessing API key: {e}")
+    st.error("Please check your configuration")
+    st.stop()
+try:
+    from models import Ticket, TicketClassification, TopicTagEnum, SentimentEnum, PriorityEnum
+    from classifier import TicketClassifier
+    from enhanced_rag import EnhancedRAGPipeline
+except ImportError as e:
+    st.error(f"❌ Failed to import required modules: {e}")
+    st.error("Please ensure all required files are present")
+    st.stop()
+# Import application modules after environment setup
+try:
+    from models import Ticket, TicketClassification, TopicTagEnum, SentimentEnum, PriorityEnum
+    from classifier import TicketClassifier
+    from enhanced_rag import EnhancedRAGPipeline
+except ImportError as e:
+    st.error(f"❌ Failed to import required modules: {e}")
+    st.error("Please ensure all required files are present in the directory")
+    st.stop()
+st.markdown("""
+<style>
+    .main-header {
+        text-align: center;
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        color: white;
+        padding: 2rem;
+        border-radius: 10px;
+        margin-bottom: 2rem;
+    }
+    .ticket-card {
+        border: 1px solid #e1e5e9;
+        border-radius: 8px;
+        padding: 1rem;
+        margin: 1rem 0;
+        background: white;
+        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+    }
+    .tag {
+        background: #667eea;
+        color: white;
+        padding: 0.2rem 0.5rem;
+        border-radius: 15px;
+        font-size: 0.8rem;
+        margin: 0.2rem;
+        display: inline-block;
+    }
+    .metric-card {
+        background: white;
+        padding: 1rem;
+        border-radius: 8px;
+        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        text-align: center;
+    }
+</style>
+""", unsafe_allow_html=True)
+@st.cache_resource
+def initialize_ai_models():
+    try:
+        classifier = TicketClassifier()
+        rag_pipeline = EnhancedRAGPipeline(groq_client=classifier.client)
+        return classifier, rag_pipeline
+    except Exception as e:
+        st.error(f"❌ Failed to initialize AI models: {e}")
+        return None, None
+def load_sample_tickets():
+    try:
+        with open('sample_tickets.json', 'r') as f:
+            tickets_data = json.load(f)
+        return [Ticket(**ticket_data) for ticket_data in tickets_data]
+    except FileNotFoundError:
+        st.warning("📋 Sample tickets file not found. Using demo data for cloud deployment.")
+        # Create minimal demo data for cloud deployment
+        demo_tickets = [
+            {
+                "id": "DEMO-001",
+                "subject": "Demo ticket - Connection issue",
+                "body": "This is a demo ticket showing connection problems with our data source."
+            },
+            {
+                "id": "DEMO-002",
+                "subject": "Demo ticket - API question",
+                "body": "This is a demo ticket asking about API usage and documentation."
+            }
+        ]
+        return [Ticket(**ticket_data) for ticket_data in demo_tickets]
+    except Exception as e:
+        st.error(f"❌ Error loading tickets: {e}")
+        return []
+async def classify_tickets_async(classifier, tickets):
+    try:
+        classifications = await classifier.classify_tickets_bulk(tickets)
+        return list(zip(tickets, classifications))
+    except Exception as e:
+        st.error(f"❌ Classification error: {e}")
+        return []
+def run_async(coro):
+    try:
+        loop = asyncio.get_event_loop()
+    except RuntimeError:
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+    return loop.run_until_complete(coro)
+def calculate_stats(classified_tickets):
+    if not classified_tickets:
+        return {
+            'total': 0,
+            'high_priority': 0,
+            'frustrated': 0,
+            'rag_eligible': 0,
+            'most_common_tag': 'N/A',
+            'tag_counts': {}
+        }
+    total = len(classified_tickets)
+    high_priority = sum(1 for _, classification in classified_tickets
+                       if classification.priority == PriorityEnum.P0)
+    frustrated = sum(1 for _, classification in classified_tickets
+                    if classification.sentiment in [SentimentEnum.FRUSTRATED, SentimentEnum.ANGRY])
+    # Count RAG-eligible topics
+    rag_topics = ['How-to', 'Product', 'Best practices', 'API/SDK', 'SSO']
+    rag_eligible = sum(1 for _, classification in classified_tickets
+                      if any(tag.value in rag_topics for tag in classification.topic_tags))
+    # Count tag frequencies
+    tag_counts = {}
+    for _, classification in classified_tickets:
+        for tag in classification.topic_tags:
+            tag_counts[tag.value] = tag_counts.get(tag.value, 0) + 1
+    most_common_tag = max(tag_counts.keys(), key=lambda x: tag_counts[x]) if tag_counts else 'N/A'
+    return {
+        'total': total,
+        'high_priority': high_priority,
+        'frustrated': frustrated,
+        'rag_eligible': rag_eligible,
+        'most_common_tag': most_common_tag,
+        'tag_counts': tag_counts
+    }
+def display_ticket_card(ticket, classification):
+    with st.container():
+        st.markdown(f"**{ticket.id}**")
+        st.write(f"**Subject:** {ticket.subject}")
+        st.write(f"**Message:** {ticket.body[:300]}{'...' if len(ticket.body) > 300 else ''}")
+        st.write("**📋 Topics:**")
+        cols = st.columns(len(classification.topic_tags))
+        for i, tag in enumerate(classification.topic_tags):
+            with cols[i]:
+                st.markdown(f'<span style="background: #667eea; color: white; padding: 0.2rem 0.5rem; border-radius: 10px; font-size: 0.8rem; margin: 0.1rem;">{tag.value}</span>', unsafe_allow_html=True)
+        sentiment_color = '#ff6b6b' if 'frustrated' in classification.sentiment.value.lower() else '#ff3838' if 'angry' in classification.sentiment.value.lower() else '#4ecdc4' if 'curious' in classification.sentiment.value.lower() else '#95a5a6'
+        st.markdown(f"**😊 Sentiment:** <span style='background: {sentiment_color}; color: white; padding: 0.3rem 0.8rem; border-radius: 15px; font-size: 0.9rem;'>{classification.sentiment.value}</span>", unsafe_allow_html=True)
+        priority_color = '#ff3838' if 'P0' in classification.priority.value else '#ffa726' if 'P1' in classification.priority.value else '#66bb6a'
+        st.markdown(f"**🔥 Priority:** <span style='background: {priority_color}; color: white; padding: 0.3rem 0.8rem; border-radius: 15px; font-size: 0.9rem;'>{classification.priority.value}</span>", unsafe_allow_html=True)
+        st.write(f"**🤖 AI Reasoning:** {classification.reasoning}")
+        st.divider()
+def main():
+    classifier, rag_pipeline = initialize_ai_models()
+    if classifier is None or rag_pipeline is None:
+        st.stop()
+    st.markdown("""
+    <div class="main-header">
+        <h1>🎯 Atlan Customer Support Copilot</h1>
+        <p>AI-powered ticket classification and intelligent response generation</p>
+    </div>
+    """, unsafe_allow_html=True)
+    # Sidebar navigation
+    st.sidebar.title("🧭 Navigation")
+    page = st.sidebar.selectbox("Choose a page", [
+        "📊 Bulk Classification Dashboard",
+        "🤖 Interactive AI Agent",
+        "📝 Single Ticket Classification",
+        "📂 Upload & Classify"
+    ])
+    # Page routing
+    if page == "📊 Bulk Classification Dashboard":
+        bulk_dashboard_page(classifier)
+    elif page == "🤖 Interactive AI Agent":
+        interactive_agent_page(classifier, rag_pipeline)
+    elif page == "📝 Single Ticket Classification":
+        single_ticket_page(classifier)
+    elif page == "📂 Upload & Classify":
+        upload_classify_page(classifier)
+def bulk_dashboard_page(classifier):
+    """Bulk classification dashboard page"""
+    st.header("📊 Bulk Classification Dashboard")
+    st.subheader("Auto-loaded sample tickets with AI classification")
+    # Initialize session state for bulk results
+    if 'bulk_results' not in st.session_state:
+        st.session_state.bulk_results = None
+    # Auto-load bulk results
+    if st.session_state.bulk_results is None:
+        with st.spinner("🔄 Loading and classifying sample tickets..."):
+            tickets = load_sample_tickets()
+            if tickets:
+                try:
+                    classified_tickets = run_async(classify_tickets_async(classifier, tickets))
+                    st.session_state.bulk_results = classified_tickets
+                    st.success(f"✅ Successfully classified {len(classified_tickets)} tickets!")
+                except Exception as e:
+                    st.error(f"❌ Error during classification: {e}")
+                    st.session_state.bulk_results = []
+            else:
+                st.session_state.bulk_results = []
+    if st.session_state.bulk_results:
+        # Display statistics
+        stats = calculate_stats(st.session_state.bulk_results)
+        col1, col2, col3, col4, col5 = st.columns(5)
+        with col1:
+            st.metric("📋 Total Tickets", stats['total'])
+        with col2:
+            st.metric("🚨 High Priority", stats['high_priority'])
+        with col3:
+            st.metric("😤 Frustrated/Angry", stats['frustrated'])
+        with col4:
+            st.metric("🤖 RAG-Eligible", stats['rag_eligible'])
+        with col5:
+            st.metric("🏷️ Top Topic", stats['most_common_tag'])
+        # Visualizations
+        if stats['tag_counts']:
+            col1, col2 = st.columns(2)
+            with col1:
+                # Priority distribution
+                priority_data = {}
+                for _, classification in st.session_state.bulk_results:
+                    priority = classification.priority.value
+                    priority_data[priority] = priority_data.get(priority, 0) + 1
+                fig_priority = px.pie(
+                    values=list(priority_data.values()),
+                    names=list(priority_data.keys()),
+                    title="📊 Priority Distribution",
+                    color_discrete_map={
+                        'P0 (High)': '#ff3838',
+                        'P1 (Medium)': '#ffa726',
+                        'P2 (Low)': '#66bb6a'
+                    }
+                )
+                st.plotly_chart(fig_priority, use_container_width=True)
+            with col2:
+                # Topic distribution
+                fig_tags = px.bar(
+                    x=list(stats['tag_counts'].values()),
+                    y=list(stats['tag_counts'].keys()),
+                    orientation='h',
+                    title="🏷️ Topic Distribution",
+                    labels={'x': 'Count', 'y': 'Topics'}
+                )
+                fig_tags.update_layout(height=400)
+                st.plotly_chart(fig_tags, use_container_width=True)
+        # Display tickets with filters
+        st.subheader("📋 All Classified Tickets")
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            priority_filter = st.selectbox("Filter by Priority",
+                ["All"] + [p.value for p in PriorityEnum])
+        with col2:
+            sentiment_filter = st.selectbox("Filter by Sentiment",
+                ["All"] + [s.value for s in SentimentEnum])
+        with col3:
+            topic_filter = st.selectbox("Filter by Topic",
+                ["All"] + [t.value for t in TopicTagEnum])
+        # Apply filters
+        filtered_results = st.session_state.bulk_results
+        if priority_filter != "All":
+            filtered_results = [(t, c) for t, c in filtered_results if c.priority.value == priority_filter]
+        if sentiment_filter != "All":
+            filtered_results = [(t, c) for t, c in filtered_results if c.sentiment.value == sentiment_filter]
+        if topic_filter != "All":
+            filtered_results = [(t, c) for t, c in filtered_results if any(tag.value == topic_filter for tag in c.topic_tags)]
+        st.info(f"Showing {len(filtered_results)} of {len(st.session_state.bulk_results)} tickets")
+        # Display filtered tickets
+        for ticket, classification in filtered_results:
+            display_ticket_card(ticket, classification)
+    # Refresh button
+    if st.button("🔄 Refresh Classifications"):
+        st.session_state.bulk_results = None
+        st.rerun()
+def interactive_agent_page(classifier, rag_pipeline):
+    """Interactive AI agent page"""
+    st.header("🤖 Interactive AI Agent")
+    st.subheader("Submit a new ticket or question from any channel")
+    # Input form
+    with st.form("interactive_form"):
+        question = st.text_area(
+            "Customer Question or Ticket:",
+            placeholder="Enter the customer's question or ticket description...",
+            height=150
+        )
+        channel = st.selectbox(
+            "Channel:",
+            ["Web", "Email", "WhatsApp", "Voice", "Live Chat"]
+        )
+        submit_button = st.form_submit_button("🚀 Process with AI Agent")
+    if submit_button and question:
+        with st.spinner("🤖 Analyzing question and generating response..."):
+            try:
+                # Create a dummy ticket for classification
+                ticket = Ticket(id="INTERACTIVE-001", subject=question[:80], body=question)
+                # Classify the ticket
+                classification = run_async(classifier.classify_ticket(ticket))
+                topic_tags = [tag.value for tag in classification.topic_tags]
+                # Generate response using RAG pipeline
+                rag_result = run_async(rag_pipeline.generate_answer(question, topic_tags))
+                # Display results in two columns
+                col1, col2 = st.columns(2)
+                with col1:
+                    st.subheader("📊 Internal Analysis (Back-end View)")
+                    st.markdown(f"""
+                    **🏷️ Topic Tags:** {', '.join([f'`{tag}`' for tag in topic_tags])}
+                    **😊 Sentiment:** `{classification.sentiment.value}`
+                    **⚡ Priority:** `{classification.priority.value}`
+                    **🤖 AI Reasoning:** {classification.reasoning}
+                    """)
+                with col2:
+                    st.subheader("💬 Final Response (Front-end View)")
+                    if rag_result['type'] == 'direct_answer':
+                        st.success("💡 Direct Answer (RAG-Generated)")
+                        st.write(rag_result['answer'])
+                        if rag_result.get('sources'):
+                            st.subheader("📚 Sources:")
+                            for source in rag_result['sources']:
+                                st.markdown(f"- [{source}]({source})")
+                    else:
+                        st.warning("📋 Ticket Routed")
+                        st.write(rag_result['message'])
+            except Exception as e:
+                st.error(f"❌ Error processing question: {e}")
+def single_ticket_page(classifier):
+    """Single ticket classification page"""
+    st.header("📝 Single Ticket Classification")
+    with st.form("single_ticket_form"):
+        ticket_id = st.text_input("Ticket ID:", placeholder="e.g., TICKET-001")
+        subject = st.text_input("Subject:", placeholder="Enter ticket subject")
+        body = st.text_area("Message Body:", placeholder="Enter the full ticket message...", height=150)
+        classify_button = st.form_submit_button("🔍 Classify Ticket")
+    if classify_button and ticket_id and subject and body:
+        with st.spinner("🔄 Classifying ticket..."):
+            try:
+                ticket = Ticket(id=ticket_id, subject=subject, body=body)
+                classification = run_async(classifier.classify_ticket(ticket))
+                st.success("✅ Classification complete!")
+                display_ticket_card(ticket, classification)
+            except Exception as e:
+                st.error(f"❌ Error classifying ticket: {e}")
+def upload_classify_page(classifier):
+    """Upload and classify page"""
+    st.header("📂 Upload & Classify Tickets")
+    uploaded_file = st.file_uploader("Choose a JSON file", type="json")
+    if uploaded_file is not None:
+        try:
+            tickets_data = json.load(uploaded_file)
+            tickets = [Ticket(**ticket_data) for ticket_data in tickets_data]
+            st.info(f"📄 Loaded {len(tickets)} tickets from file")
+            if st.button("🚀 Classify All Tickets"):
+                with st.spinner("🔄 Classifying tickets..."):
+                    try:
+                        classified_tickets = run_async(classify_tickets_async(classifier, tickets))
+                        st.success(f"✅ Successfully classified {len(classified_tickets)} tickets!")
+                        # Display statistics
+                        stats = calculate_stats(classified_tickets)
+                        col1, col2, col3, col4 = st.columns(4)
+                        with col1:
+                            st.metric("Total", stats['total'])
+                        with col2:
+                            st.metric("High Priority", stats['high_priority'])
+                        with col3:
+                            st.metric("Frustrated", stats['frustrated'])
+                        with col4:
+                            st.metric("RAG-Eligible", stats['rag_eligible'])
+                        # Display tickets
+                        for ticket, classification in classified_tickets:
+                            display_ticket_card(ticket, classification)
+                    except Exception as e:
+                        st.error(f"❌ Error during classification: {e}")
+        except Exception as e:
+            st.error(f"❌ Error loading file: {e}")
+# Footer
+def show_footer():
+    """Display footer"""
+    st.markdown("---")
+    st.markdown("""
+    <div style="text-align: center; color: #666; padding: 1rem;">
+        <p>🎯 <strong>Atlan Customer Support Copilot</strong> - AI-powered ticket classification and response generation</p>
+        <p>Built with Streamlit • Powered by Groq AI • Enhanced RAG Pipeline</p>
+    </div>
+    """, unsafe_allow_html=True)
+# Run the app
+if __name__ == "__main__":
+    main()
+    show_footer()

Atlan/requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+streamlit>=1.28,<2
+groq>=0.31
+pydantic>=2.11,<3
+python-dotenv>=1.1
+httpx>=0.28
+requests>=2.32
+aiohttp>=3.12
+beautifulsoup4>=4.13
+# If you don't strictly need lxml, delete the next line to avoid native deps.
+lxml==6.0.1
+numpy==1.26.4
+pandas==2.2.2
+scikit-learn==1.5.2
+sentence-transformers>=2.2
+plotly>=5.17.0

DEPLOYMENT_GUIDE.md ADDED Viewed

File without changes

Dockerfile ADDED Viewed

	@@ -0,0 +1,51 @@

+# Dockerfile for Hugging Face Spaces - Streamlit App
+FROM python:3.9-slim
+# Create user for security
+RUN useradd -m -u 1000 user
+USER user
+# Set environment variables
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH \
+    PYTHONPATH=$HOME/app \
+    PYTHONUNBUFFERED=1
+# Set working directory
+WORKDIR $HOME/app
+# Copy requirements first for better Docker layer caching
+COPY --chown=user:user requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir --user -r requirements.txt
+# Copy the application files
+COPY --chown=user:user . .
+# Create necessary directories
+RUN mkdir -p $HOME/.streamlit
+# Create Streamlit config
+RUN echo "\
+[general]\n\
+email = \"\"\n\
+" > $HOME/.streamlit/credentials.toml
+RUN echo "\
+[server]\n\
+headless = true\n\
+enableCORS = false\n\
+enableXsrfProtection = false\n\
+port = 7860\n\
+" > $HOME/.streamlit/config.toml
+# Expose the port that Hugging Face Spaces expects
+EXPOSE 7860
+# Health check
+HEALTHCHECK CMD curl --fail http://localhost:7860/_stcore/health
+# Command to run the Streamlit app
+CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]

Dockerfile.fastapi ADDED Viewed

File without changes

README_HF.md ADDED Viewed

File without changes

app.py CHANGED Viewed

@@ -23,18 +23,25 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 try:
-    # Try Streamlit Cloud secrets first, then fall back to .env
     if hasattr(st, 'secrets') and 'GROQ_API_KEY' in st.secrets:
         os.environ['GROQ_API_KEY'] = st.secrets['GROQ_API_KEY']
         st.success("🔑 API key loaded from Streamlit Cloud secrets")
-    elif 'GROQ_API_KEY' not in os.environ:
         st.error("⚠️ GROQ_API_KEY not found!")
-        st.info("**For Streamlit Cloud deployment:**")
-        st.info("Add your API key in the Streamlit Cloud app settings > Secrets tab")
         st.code("""
-        # In Streamlit Cloud Secrets:
         GROQ_API_KEY = "your_groq_api_key_here"
         """)
         st.info("**For local development:**")
         st.info("Add GROQ_API_KEY to your .env file")
         st.code("""
@@ -42,22 +49,11 @@ try:
         GROQ_API_KEY=your_groq_api_key_here
         """)
         st.stop()
-    else:
-        st.success("🔑 API key loaded from environment")
 except Exception as e:
     st.error(f"⚠️ Error accessing API key: {e}")
     st.error("Please check your configuration")
     st.stop()
-try:
-    from models import Ticket, TicketClassification, TopicTagEnum, SentimentEnum, PriorityEnum
-    from classifier import TicketClassifier
-    from enhanced_rag import EnhancedRAGPipeline
-except ImportError as e:
-    st.error(f"❌ Failed to import required modules: {e}")
-    st.error("Please ensure all required files are present")
-    st.stop()
 # Import application modules after environment setup
 try:
     from models import Ticket, TicketClassification, TopicTagEnum, SentimentEnum, PriorityEnum

 logger = logging.getLogger(__name__)
 try:
+    # Try multiple sources for API key: Streamlit secrets, environment variables, .env file
     if hasattr(st, 'secrets') and 'GROQ_API_KEY' in st.secrets:
         os.environ['GROQ_API_KEY'] = st.secrets['GROQ_API_KEY']
         st.success("🔑 API key loaded from Streamlit Cloud secrets")
+    elif 'GROQ_API_KEY' in os.environ:
+        st.success("🔑 API key loaded from environment variables")
+    elif hasattr(st, 'secrets') and hasattr(st.secrets, 'default') and 'GROQ_API_KEY' in st.secrets.default:
+        os.environ['GROQ_API_KEY'] = st.secrets.default['GROQ_API_KEY']
+        st.success("🔑 API key loaded from Streamlit secrets")
+    else:
         st.error("⚠️ GROQ_API_KEY not found!")
+        st.info("**For Hugging Face Spaces deployment:**")
+        st.info("Add your API key in the Space settings > Secrets tab")
         st.code("""
+        # In Hugging Face Spaces Secrets:
         GROQ_API_KEY = "your_groq_api_key_here"
         """)
+        st.info("**For Streamlit Cloud deployment:**")
+        st.info("Add your API key in the Streamlit Cloud app settings > Secrets tab")
         st.info("**For local development:**")
         st.info("Add GROQ_API_KEY to your .env file")
         st.code("""
         GROQ_API_KEY=your_groq_api_key_here
         """)
         st.stop()
 except Exception as e:
     st.error(f"⚠️ Error accessing API key: {e}")
     st.error("Please check your configuration")
     st.stop()
 # Import application modules after environment setup
 try:
     from models import Ticket, TicketClassification, TopicTagEnum, SentimentEnum, PriorityEnum

deploy_prep.bat ADDED Viewed

File without changes

deploy_prep.sh ADDED Viewed

File without changes

fastapi_app.py ADDED Viewed

File without changes

main.py DELETED Viewed

@@ -1,284 +0,0 @@
-import os
-import json
-import logging
-from typing import List, Dict
-from fastapi import FastAPI, HTTPException, Request, File, UploadFile, Form
-from fastapi.responses import HTMLResponse, JSONResponse
-from dotenv import load_dotenv
-import uvicorn
-import httpx
-from models import (
-    Ticket,
-    TicketClassification,
-    ClassifiedTicket,
-    SingleTicketRequest,
-    BulkTicketRequest,
-    ClassificationResponse
-)
-from classifier import TicketClassifier
-# Setup logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# Load environment variables
-load_dotenv()
-# Initialize FastAPI app
-app = FastAPI(
-    title="Atlan Customer Support Copilot",
-    description="AI-powered ticket classification and response generation",
-    version="1.0.0"
-)
-# Initialize the classifier
-classifier = TicketClassifier()
-async def rag_pipeline(question: str, topic_tags: List[str]) -> Dict:
-    """Enhanced RAG pipeline with proper knowledge retrieval"""
-    try:
-        # Import the enhanced RAG system
-        from enhanced_rag import EnhancedRAGPipeline
-        # Initialize RAG pipeline with Groq client from classifier
-        rag = EnhancedRAGPipeline(groq_client=classifier.client)
-        # Generate answer using the enhanced pipeline
-        result = await rag.generate_answer(question, topic_tags)
-        return result
-    except ImportError as e:
-        logger.warning(f"Enhanced RAG system not available: {e}")
-        # Fallback to basic routing if enhanced RAG fails
-        return await fallback_rag_pipeline(question, topic_tags)
-    except Exception as e:
-        logger.error(f"RAG pipeline error: {e}")
-        # Fallback to basic routing if enhanced RAG fails
-        return await fallback_rag_pipeline(question, topic_tags)
-async def fallback_rag_pipeline(question: str, topic_tags: List[str]) -> Dict:
-    """Fallback RAG pipeline for when enhanced system is not available"""
-    if any(tag in ["How-to", "Product", "Best practices", "API/SDK", "SSO"] for tag in topic_tags):
-        # Basic knowledge responses
-        context = f"Based on Atlan documentation for topics: {', '.join(topic_tags)}"
-        return {
-            "type": "direct_answer",
-            "answer": f"Based on the documentation, here's information about: {question}. {context}",
-            "sources": ["https://docs.atlan.com/", "https://developer.atlan.com/"]
-        }
-    else:
-        return {
-            "type": "routing",
-            "message": f"This ticket has been classified as a '{topic_tags[0] if topic_tags else 'General'}' issue and routed to the appropriate team."
-        }
-@app.get("/")
-async def root():
-    """API root endpoint."""
-    return {
-        "message": "Atlan Customer Support Copilot API",
-        "version": "1.0.0",
-        "endpoints": [
-            "/health",
-            "/classify-single",
-            "/classify-bulk",
-            "/bulk-dashboard",
-            "/interactive-agent",
-            "/sample-tickets"
-        ]
-    }
-@app.post("/classify-single", response_model=ClassificationResponse)
-async def classify_single_ticket(request: SingleTicketRequest):
-    """Classify a single support ticket."""
-    try:
-        classification = await classifier.classify_ticket(request.ticket)
-        classified_ticket = ClassifiedTicket(
-            ticket=request.ticket,
-            classification=classification
-        )
-        return ClassificationResponse(
-            success=True,
-            data=[classified_ticket],
-            total_processed=1
-        )
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Classification failed: {str(e)}")
-@app.post("/classify-bulk", response_model=ClassificationResponse)
-async def classify_bulk_tickets(request: BulkTicketRequest):
-    """Classify multiple support tickets."""
-    try:
-        if not request.tickets:
-            raise HTTPException(status_code=400, detail="No tickets provided")
-        classifications = await classifier.classify_tickets_bulk(request.tickets)
-        classified_tickets = [
-            ClassifiedTicket(ticket=ticket, classification=classification)
-            for ticket, classification in zip(request.tickets, classifications)
-        ]
-        return ClassificationResponse(
-            success=True,
-            data=classified_tickets,
-            total_processed=len(classified_tickets)
-        )
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Bulk classification failed: {str(e)}")
-@app.get("/sample-tickets", response_model=ClassificationResponse)
-async def classify_sample_tickets():
-    """Load and classify the sample tickets from the JSON file."""
-    try:
-        # Load sample tickets
-        sample_file_path = "sample_tickets.json"
-        if not os.path.exists(sample_file_path):
-            raise HTTPException(status_code=404, detail="Sample tickets file not found")
-        with open(sample_file_path, "r") as f:
-            tickets_data = json.load(f)
-        # Convert to Ticket objects
-        tickets = [Ticket(**ticket_data) for ticket_data in tickets_data]
-        # Classify all tickets
-        classifications = await classifier.classify_tickets_bulk(tickets)
-        classified_tickets = [
-            ClassifiedTicket(ticket=ticket, classification=classification)
-            for ticket, classification in zip(tickets, classifications)
-        ]
-        return ClassificationResponse(
-            success=True,
-            data=classified_tickets,
-            total_processed=len(classified_tickets)
-        )
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Failed to process sample tickets: {str(e)}")
-@app.get("/bulk-dashboard", response_model=ClassificationResponse)
-async def bulk_dashboard():
-    """Automatically load and classify all sample tickets for the bulk dashboard on page load."""
-    try:
-        # Load sample tickets
-        sample_file_path = "sample_tickets.json"
-        if not os.path.exists(sample_file_path):
-            logger.warning(f"Sample tickets file not found: {sample_file_path}")
-            return ClassificationResponse(
-                success=True,
-                data=[],
-                total_processed=0
-            )
-        with open(sample_file_path, "r") as f:
-            tickets_data = json.load(f)
-        logger.info(f"Loaded {len(tickets_data)} sample tickets for bulk processing")
-        # Convert to Ticket objects
-        tickets = [Ticket(**ticket_data) for ticket_data in tickets_data]
-        # Classify all tickets
-        classifications = await classifier.classify_tickets_bulk(tickets)
-        classified_tickets = [
-            ClassifiedTicket(ticket=ticket, classification=classification)
-            for ticket, classification in zip(tickets, classifications)
-        ]
-        logger.info(f"Successfully classified {len(classified_tickets)} tickets for bulk dashboard")
-        return ClassificationResponse(
-            success=True,
-            data=classified_tickets,
-            total_processed=len(classified_tickets)
-        )
-    except Exception as e:
-        logger.error(f"Failed to process bulk dashboard: {str(e)}")
-        raise HTTPException(status_code=500, detail=f"Failed to process bulk dashboard: {str(e)}")
-@app.post("/upload-tickets", response_model=ClassificationResponse)
-async def upload_and_classify_tickets(file: UploadFile = File(...)):
-    """Upload a JSON file and classify the tickets."""
-    try:
-        if not file.filename.endswith('.json'):
-            raise HTTPException(status_code=400, detail="File must be a JSON file")
-        content = await file.read()
-        tickets_data = json.loads(content)
-        # Convert to Ticket objects
-        tickets = [Ticket(**ticket_data) for ticket_data in tickets_data]
-        # Classify all tickets
-        classifications = await classifier.classify_tickets_bulk(tickets)
-        classified_tickets = [
-            ClassifiedTicket(ticket=ticket, classification=classification)
-            for ticket, classification in zip(tickets, classifications)
-        ]
-        return ClassificationResponse(
-            success=True,
-            data=classified_tickets,
-            total_processed=len(classified_tickets)
-        )
-    except json.JSONDecodeError:
-        raise HTTPException(status_code=400, detail="Invalid JSON file")
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Failed to process uploaded tickets: {str(e)}")
-@app.post("/interactive-agent")
-async def interactive_agent(
-    question: str = Form(...),
-    channel: str = Form("web")
-):
-    """Interactive endpoint for new ticket/question submission."""
-    # Create a dummy ticket
-    ticket = Ticket(id="INTERACTIVE-001", subject=question[:80], body=question)
-    classification = await classifier.classify_ticket(ticket)
-    topic_tags = [tag.value for tag in classification.topic_tags]
-    # Internal analysis view
-    analysis = {
-        "topic_tags": topic_tags,
-        "sentiment": classification.sentiment.value,
-        "priority": classification.priority.value,
-        "reasoning": classification.reasoning
-    }
-    # Final response view
-    rag_topics = ["How-to", "Product", "Best practices", "API/SDK", "SSO"]
-    if any(tag in rag_topics for tag in topic_tags):
-        rag_result = await rag_pipeline(question, topic_tags)
-        final_response = {
-            "type": "direct_answer",
-            "answer": rag_result.get("answer", "No answer found."),
-            "sources": rag_result.get("sources", [])
-        }
-    else:
-        final_response = {
-            "type": "routing",
-            "message": f"This ticket has been classified as a '{topic_tags[0]}' issue and routed to the appropriate team."
-        }
-    return JSONResponse({
-        "internal_analysis": analysis,
-        "final_response": final_response
-    })
-@app.get("/health")
-async def health_check():
-    """Health check endpoint."""
-    return {"status": "healthy", "service": "Atlan Customer Support Copilot"}
-if __name__ == "__main__":
-    uvicorn.run(app, host="127.0.0.1", port=8000)

requirements.txt CHANGED Viewed

@@ -1,4 +1,6 @@
 streamlit>=1.28,<2
 groq>=0.31
 pydantic>=2.11,<3
 python-dotenv>=1.1

 streamlit>=1.28,<2
+fastapi>=0.104.0
+uvicorn[standard]>=0.24.0
 groq>=0.31
 pydantic>=2.11,<3
 python-dotenv>=1.1

scraper.py DELETED Viewed

@@ -1,291 +0,0 @@
-#!/usr/bin/env python3
-import asyncio
-import aiohttp
-import json
-import re
-from bs4 import BeautifulSoup
-from urllib.parse import urljoin, urlparse
-from pathlib import Path
-import time
-from typing import List, Dict, Set
-import logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-class AtlanDocScraper:
-    def __init__(self):
-        self.session = None
-        self.scraped_urls = set()
-        self.knowledge_base = []
-        self.base_urls = {
-            "docs": "https://docs.atlan.com/",
-            "developer": "https://developer.atlan.com/"
-        }
-        self.max_pages_per_site = 50
-        self.delay_between_requests = 1
-    async def create_session(self):
-        """Create an aiohttp session with proper headers"""
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
-            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
-            'Accept-Language': 'en-US,en;q=0.5',
-            'Accept-Encoding': 'gzip, deflate',
-            'Connection': 'keep-alive'
-        }
-        timeout = aiohttp.ClientTimeout(total=30)
-        self.session = aiohttp.ClientSession(headers=headers, timeout=timeout)
-    async def close_session(self):
-        """Close the aiohttp session"""
-        if self.session:
-            await self.session.close()
-    def clean_text(self, text: str) -> str:
-        """Clean and normalize text content"""
-        if not text:
-            return ""
-        # Remove extra whitespace and normalize
-        text = re.sub(r'\s+', ' ', text.strip())
-        # Remove common navigation elements
-        text = re.sub(r'(Home|Navigation|Menu|Footer|Header|Sidebar)', '', text, flags=re.IGNORECASE)
-        # Remove very short content
-        if len(text) < 50:
-            return ""
-        return text
-    def extract_main_content(self, soup: BeautifulSoup) -> str:
-        """Extract main content from HTML, focusing on documentation"""
-        # Try to find main content areas
-        content_selectors = [
-            'main',
-            'article',
-            '.content',
-            '.main-content',
-            '.documentation',
-            '.docs-content',
-            '#content',
-            '.markdown-body',
-            '.prose'
-        ]
-        main_content = ""
-        for selector in content_selectors:
-            content_elem = soup.select_one(selector)
-            if content_elem:
-                main_content = content_elem.get_text(separator=' ', strip=True)
-                break
-        # Fallback: get all text but filter out navigation
-        if not main_content:
-            # Remove navigation, footer, header elements
-            for tag in soup.find_all(['nav', 'footer', 'header', 'aside']):
-                tag.decompose()
-            main_content = soup.get_text(separator=' ', strip=True)
-        return self.clean_text(main_content)
-    def extract_links(self, soup: BeautifulSoup, base_url: str) -> List[str]:
-        """Extract relevant internal links from the page"""
-        links = []
-        for link in soup.find_all('a', href=True):
-            href = link['href']
-            full_url = urljoin(base_url, href)
-            # Only include links from the same domain
-            if urlparse(full_url).netloc in [urlparse(url).netloc for url in self.base_urls.values()]:
-                # Filter out non-documentation links
-                if not any(skip in full_url.lower() for skip in ['#', 'mailto:', 'tel:', 'javascript:']):
-                    links.append(full_url)
-        return list(set(links))  # Remove duplicates
-    async def scrape_page(self, url: str) -> Dict:
-        """Scrape a single page and extract content"""
-        if url in self.scraped_urls:
-            return None
-        try:
-            logger.info(f"Scraping: {url}")
-            async with self.session.get(url) as response:
-                if response.status != 200:
-                    logger.warning(f"Failed to fetch {url}: {response.status}")
-                    return None
-                html = await response.text()
-                soup = BeautifulSoup(html, 'html.parser')
-                # Extract metadata
-                title = soup.find('title')
-                title_text = title.get_text().strip() if title else ""
-                # Extract main content
-                content = self.extract_main_content(soup)
-                if not content:
-                    logger.warning(f"No content extracted from {url}")
-                    return None
-                # Extract links for further crawling
-                links = self.extract_links(soup, url)
-                self.scraped_urls.add(url)
-                return {
-                    'url': url,
-                    'title': title_text,
-                    'content': content,
-                    'links': links,
-                    'timestamp': time.time(),
-                    'source': 'docs' if 'docs.atlan.com' in url else 'developer'
-                }
-        except Exception as e:
-            logger.error(f"Error scraping {url}: {str(e)}")
-            return None
-    async def crawl_site(self, base_url: str, max_pages: int = 50) -> List[Dict]:
-        """Crawl a site starting from base URL"""
-        pages_data = []
-        urls_to_visit = [base_url]
-        visited = set()
-        while urls_to_visit and len(pages_data) < max_pages:
-            current_url = urls_to_visit.pop(0)
-            if current_url in visited:
-                continue
-            visited.add(current_url)
-            # Scrape the page
-            page_data = await self.scrape_page(current_url)
-            if page_data:
-                pages_data.append(page_data)
-                # Add new links to visit (limit to avoid infinite crawling)
-                new_links = [link for link in page_data['links']
-                           if link not in visited and link not in urls_to_visit]
-                urls_to_visit.extend(new_links[:10])  # Limit new links per page
-            # Be respectful - add delay between requests
-            await asyncio.sleep(self.delay_between_requests)
-        return pages_data
-    async def scrape_all_sites(self) -> List[Dict]:
-        """Scrape all configured sites"""
-        await self.create_session()
-        try:
-            all_pages = []
-            for site_name, base_url in self.base_urls.items():
-                logger.info(f"Starting to crawl {site_name}: {base_url}")
-                site_pages = await self.crawl_site(base_url, self.max_pages_per_site)
-                all_pages.extend(site_pages)
-                logger.info(f"Scraped {len(site_pages)} pages from {site_name}")
-                # Delay between sites
-                await asyncio.sleep(2)
-            self.knowledge_base = all_pages
-            return all_pages
-        finally:
-            await self.close_session()
-    def save_knowledge_base(self, filename: str = "atlan_knowledge_base.json"):
-        """Save the scraped knowledge base to a JSON file"""
-        output_path = Path(filename)
-        with open(output_path, 'w', encoding='utf-8') as f:
-            json.dump(self.knowledge_base, f, indent=2, ensure_ascii=False)
-        logger.info(f"Knowledge base saved to {output_path}")
-        logger.info(f"Total pages: {len(self.knowledge_base)}")
-        # Print summary statistics
-        source_counts = {}
-        for page in self.knowledge_base:
-            source = page.get('source', 'unknown')
-            source_counts[source] = source_counts.get(source, 0) + 1
-        logger.info(f"Pages by source: {source_counts}")
-    def load_knowledge_base(self, filename: str = "atlan_knowledge_base.json") -> List[Dict]:
-        """Load existing knowledge base from file"""
-        try:
-            with open(filename, 'r', encoding='utf-8') as f:
-                self.knowledge_base = json.load(f)
-            logger.info(f"Loaded {len(self.knowledge_base)} pages from {filename}")
-            return self.knowledge_base
-        except FileNotFoundError:
-            logger.warning(f"Knowledge base file {filename} not found")
-            return []
-        except Exception as e:
-            logger.error(f"Error loading knowledge base: {str(e)}")
-            return []
-async def main():
-    """Main function to run the scraper"""
-    scraper = AtlanDocScraper()
-    print("🕷️  Starting Atlan Documentation Scraper...")
-    print("=" * 50)
-    # Check if knowledge base already exists
-    existing_kb = scraper.load_knowledge_base()
-    if existing_kb:
-        print(f"📚 Found existing knowledge base with {len(existing_kb)} pages")
-        response = input("Do you want to re-scrape? (y/N): ").strip().lower()
-        if response != 'y':
-            print("✅ Using existing knowledge base")
-            return
-    print("🚀 Starting web scraping...")
-    print("⏱️  This may take several minutes...")
-    start_time = time.time()
-    try:
-        pages = await scraper.scrape_all_sites()
-        scraper.save_knowledge_base()
-        end_time = time.time()
-        duration = end_time - start_time
-        print(f"\n✅ Scraping completed!")
-        print(f"📊 Statistics:")
-        print(f"   - Total pages scraped: {len(pages)}")
-        print(f"   - Time taken: {duration:.2f} seconds")
-        print(f"   - Average time per page: {duration/len(pages):.2f} seconds")
-        # Show sample of scraped content
-        if pages:
-            print(f"\n📄 Sample page:")
-            sample = pages[0]
-            print(f"   - Title: {sample['title'][:100]}...")
-            print(f"   - URL: {sample['url']}")
-            print(f"   - Content length: {len(sample['content'])} characters")
-    except KeyboardInterrupt:
-        print("\n⚠️  Scraping interrupted by user")
-    except Exception as e:
-        print(f"\n❌ Error during scraping: {str(e)}")
-if __name__ == "__main__":
-    asyncio.run(main())