Spaces:

Yeetek
/

anthropic-topic-segmentation

Runtime error

App Files Files Community

Yeetek commited on Jun 15, 2025

Commit

b3e0a65

verified ·

1 Parent(s): 069d845

Upload 17 files

Browse files

Files changed (8) hide show

Dockerfile +6 -3
README.md +8 -8
config/settings.py +1 -1
gradio_app.py +381 -0
models/input.py +4 -2
models/output.py +6 -3
requirements.txt +5 -1
start.sh +31 -0

Dockerfile CHANGED Viewed

@@ -31,17 +31,20 @@ RUN pip install --no-cache-dir -r requirements.txt
 # Copy application code
 COPY . .
 # Create non-root user for security
 RUN useradd --create-home --shell /bin/bash app \
     && chown -R app:app /app
 USER app
 # Health check
-HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
-    CMD curl -f http://localhost:7860/health || exit 1
 # Expose port (HuggingFace Spaces uses 7860)
 EXPOSE 7860
 # Start command optimized for HuggingFace Spaces
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]

 # Copy application code
 COPY . .
+# Make startup script executable
+RUN chmod +x start.sh
 # Create non-root user for security
 RUN useradd --create-home --shell /bin/bash app \
     && chown -R app:app /app
 USER app
 # Health check
+HEALTHCHECK --interval=30s --timeout=30s --start-period=15s --retries=3 \
+    CMD curl -f http://localhost:7860/ || exit 1
 # Expose port (HuggingFace Spaces uses 7860)
 EXPOSE 7860
 # Start command optimized for HuggingFace Spaces
+CMD ["./start.sh"]

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🎯
 colorFrom: blue
 colorTo: purple
 sdk: docker
-app_file: app.py
 pinned: false
 license: mit
 ---
@@ -20,7 +20,7 @@ A production-ready microservice that uses **Anthropic's Claude models** for inte
 ## 🚀 **Live Demo on HuggingFace Spaces**
-Try the API directly: [https://huggingface.co/spaces/YOUR-USERNAME/anthropic-topic-segmentation](https://huggingface.co/spaces/YOUR-USERNAME/anthropic-topic-segmentation)
 ## ✨ **Key Features**
@@ -55,7 +55,7 @@ Successfully processes Czech Shoptet integration discussions, extracting:
 ```bash
 # Clone the repository
-git clone https://huggingface.co/spaces/YOUR-USERNAME/anthropic-topic-segmentation
 cd anthropic-topic-segmentation
 # Create .env file
@@ -84,12 +84,12 @@ uvicorn app:app --host 0.0.0.0 --port 7860
 ### **Health Check**
 ```bash
-curl https://your-space.hf.space/health
 ```
 ### **Topic Extraction**
 ```bash
-curl -X POST https://your-space.hf.space/segment \
   -H "Content-Type: application/json" \
   -d '{
     "sentences": [
@@ -110,8 +110,8 @@ curl -X POST https://your-space.hf.space/segment \
 ```
 ### **Interactive Documentation**
-- **Swagger UI**: https://your-space.hf.space/docs
-- **ReDoc**: https://your-space.hf.space/redoc
 ## 🔧 **n8n Integration**
@@ -122,7 +122,7 @@ Perfect for workflow automation:
   "workflow_name": "Czech E-commerce Analysis",
   "http_request": {
     "method": "POST",
-    "url": "https://your-space.hf.space/segment",
     "body": {
       "sentences": "{{ $json.transcript }}",
       "prompt_config": {

 colorFrom: blue
 colorTo: purple
 sdk: docker
+app_file: gradio_app.py
 pinned: false
 license: mit
 ---
 ## 🚀 **Live Demo on HuggingFace Spaces**
+Try the API directly: [https://huggingface.co/spaces/Yeetek/anthropic-topic-segmentation](https://huggingface.co/spaces/Yeetek/anthropic-topic-segmentation)
 ## ✨ **Key Features**
 ```bash
 # Clone the repository
+git clone https://huggingface.co/spaces/Yeetek/anthropic-topic-segmentation
 cd anthropic-topic-segmentation
 # Create .env file
 ### **Health Check**
 ```bash
+curl https://yeetek-anthropic-topic-segmentation.hf.space/health
 ```
 ### **Topic Extraction**
 ```bash
+curl -X POST https://yeetek-anthropic-topic-segmentation.hf.space/segment \
   -H "Content-Type: application/json" \
   -d '{
     "sentences": [
 ```
 ### **Interactive Documentation**
+- **Swagger UI**: https://yeetek-anthropic-topic-segmentation.hf.space/docs
+- **ReDoc**: https://yeetek-anthropic-topic-segmentation.hf.space/redoc
 ## 🔧 **n8n Integration**
   "workflow_name": "Czech E-commerce Analysis",
   "http_request": {
     "method": "POST",
+         "url": "https://yeetek-anthropic-topic-segmentation.hf.space/segment",
     "body": {
       "sentences": "{{ $json.transcript }}",
       "prompt_config": {

config/settings.py CHANGED Viewed

@@ -25,7 +25,7 @@ class AnthropicModel(str, Enum):
     """Supported Anthropic models."""
     CLAUDE_3_5_SONNET = "claude-3-5-sonnet-20241022"
     CLAUDE_3_5_HAIKU = "claude-3-5-haiku-20241022"
-    CLAUDE_3_SONNET = "claude-3-sonnet-20240229"  # Deprecated but kept for compatibility
     CLAUDE_3_HAIKU = "claude-3-haiku-20240307"    # Deprecated but kept for compatibility

     """Supported Anthropic models."""
     CLAUDE_3_5_SONNET = "claude-3-5-sonnet-20241022"
     CLAUDE_3_5_HAIKU = "claude-3-5-haiku-20241022"
+    CLAUDE_3_SONNET = "claude-3-5-sonnet-20241022"  # Updated to current version
     CLAUDE_3_HAIKU = "claude-3-haiku-20240307"    # Deprecated but kept for compatibility

gradio_app.py ADDED Viewed

	@@ -0,0 +1,381 @@

+"""
+Gradio interface for Anthropic Topic Segmentation Microservice.
+This creates a web interface that displays the README content as the main page
+and provides an interactive API interface for HuggingFace Spaces.
+"""
+import gradio as gr
+import requests
+import json
+import os
+from typing import Dict, Any, List
+import markdown
+# Read the README content
+def load_readme():
+    """Load and convert README.md to HTML."""
+    try:
+        with open("README.md", "r", encoding="utf-8") as f:
+            readme_content = f.read()
+        # Remove YAML frontmatter
+        if readme_content.startswith("---"):
+            parts = readme_content.split("---", 2)
+            if len(parts) >= 3:
+                readme_content = parts[2].strip()
+        # Convert markdown to HTML
+        html_content = markdown.markdown(readme_content, extensions=['codehilite', 'fenced_code'])
+        return html_content
+    except Exception as e:
+        return f"<p>Error loading README: {str(e)}</p>"
+# API endpoint URL (FastAPI backend runs on port 8000)
+API_BASE_URL = "http://localhost:8000"
+def call_health_check():
+    """Call the health check endpoint."""
+    try:
+        response = requests.get(f"{API_BASE_URL}/health", timeout=10)
+        if response.status_code == 200:
+            return "✅ API is healthy", json.dumps(response.json(), indent=2)
+        else:
+            return f"❌ API returned status {response.status_code}", response.text
+    except Exception as e:
+        return f"❌ Error connecting to API", str(e)
+def call_segment_api(sentences_json: str, template: str, language: str, business_domain: str):
+    """Call the topic segmentation API."""
+    try:
+        # Parse the input JSON
+        try:
+            sentences_data = json.loads(sentences_json)
+        except json.JSONDecodeError as e:
+            return f"❌ Invalid JSON format: {str(e)}", ""
+        # Prepare the request
+        request_data = {
+            "sentences": sentences_data,
+            "prompt_config": {
+                "template": template,
+                "language": language,
+                "business_domain": business_domain if business_domain else None
+            }
+        }
+        # Make the API call
+        response = requests.post(
+            f"{API_BASE_URL}/segment",
+            json=request_data,
+            timeout=120,
+            headers={"Content-Type": "application/json"}
+        )
+        if response.status_code == 200:
+            result = response.json()
+            # Format the response nicely
+            topics_summary = f"✅ Successfully extracted {len(result.get('topics', []))} topics"
+            return topics_summary, json.dumps(result, indent=2)
+        else:
+            return f"❌ API returned status {response.status_code}", response.text
+    except Exception as e:
+        return f"❌ Error calling API: {str(e)}", ""
+# Load README content
+readme_html = load_readme()
+# Sample data for the API demo
+sample_sentences = [
+    {
+        "text": "Zákazníci požadují nestandardní úpravy košíku v Shoptetu.",
+        "speaker": "Client",
+        "start_time": 2.01,
+        "end_time": 8.45,
+        "sentence_index": 1
+    },
+    {
+        "text": "Potřebujeme implementovat speciální platební metody.",
+        "speaker": "Client",
+        "start_time": 8.45,
+        "end_time": 15.2,
+        "sentence_index": 2
+    },
+    {
+        "text": "API má problémy s rychlostí načítání.",
+        "speaker": "Developer",
+        "start_time": 15.2,
+        "end_time": 20.1,
+        "sentence_index": 3
+    }
+]
+sample_json = json.dumps(sample_sentences, indent=2, ensure_ascii=False)
+# Create the Gradio interface
+with gr.Blocks(
+    title="🎯 Anthropic Topic Segmentation Microservice",
+    theme=gr.themes.Soft(),
+    css="""
+    .main-header {
+        text-align: center;
+        padding: 20px;
+        background: linear-gradient(90deg, #3b82f6, #8b5cf6);
+        color: white;
+        border-radius: 10px;
+        margin-bottom: 20px;
+    }
+    .api-section {
+        border: 2px solid #e5e7eb;
+        border-radius: 10px;
+        padding: 20px;
+        margin: 10px 0;
+    }
+    """
+) as app:
+    # Main header
+    gr.HTML("""
+    <div class="main-header">
+        <h1>🎯 Anthropic Topic Segmentation Microservice</h1>
+        <p>AI-powered topic extraction from Czech e-commerce transcripts using Anthropic Claude</p>
+        <p><strong>✅ Production Ready | 🌍 Multi-Language | 🔄 n8n Compatible</strong></p>
+    </div>
+    """)
+    with gr.Tabs():
+        # Tab 1: Documentation (README)
+        with gr.Tab("📚 Documentation"):
+            gr.HTML(readme_html)
+        # Tab 2: API Testing Interface
+        with gr.Tab("🧪 API Testing"):
+            gr.HTML('<div class="api-section">')
+            gr.Markdown("## 🔍 Health Check")
+            with gr.Row():
+                health_btn = gr.Button("Check API Health", variant="primary")
+                health_status = gr.Textbox(label="Status", interactive=False)
+            health_response = gr.Code(label="Health Response", language="json")
+            health_btn.click(
+                call_health_check,
+                outputs=[health_status, health_response]
+            )
+            gr.HTML('</div><div class="api-section">')
+            gr.Markdown("## 🎯 Topic Segmentation")
+            gr.Markdown("Test the topic extraction API with your own data or use the sample below:")
+            with gr.Row():
+                with gr.Column(scale=2):
+                    sentences_input = gr.Code(
+                        label="Sentences JSON",
+                        language="json",
+                        value=sample_json,
+                        lines=15
+                    )
+                with gr.Column(scale=1):
+                    template_dropdown = gr.Dropdown(
+                        choices=["interview", "customer_call", "feedback_ticket", "general_commentary"],
+                        value="customer_call",
+                        label="Template"
+                    )
+                    language_dropdown = gr.Dropdown(
+                        choices=["cs", "en", "sk", "auto"],
+                        value="cs",
+                        label="Language"
+                    )
+                    business_domain = gr.Textbox(
+                        label="Business Domain (optional)",
+                        value="E-commerce",
+                        placeholder="e.g., E-commerce, Healthcare, Finance"
+                    )
+                    segment_btn = gr.Button("Extract Topics", variant="primary")
+            with gr.Row():
+                segment_status = gr.Textbox(label="Status", interactive=False)
+            segment_response = gr.Code(label="API Response", language="json", lines=20)
+            segment_btn.click(
+                call_segment_api,
+                inputs=[sentences_input, template_dropdown, language_dropdown, business_domain],
+                outputs=[segment_status, segment_response]
+            )
+            gr.HTML('</div>')
+        # Tab 3: API Documentation
+        with gr.Tab("📖 API Reference"):
+            gr.Markdown("""
+            ## 🔗 API Endpoints
+                         ### Base URL
+             ```
+             https://yeetek-anthropic-topic-segmentation.hf.space
+             ```
+            ### Endpoints
+            #### `GET /health`
+            Check the health status of the API and Anthropic integration.
+            #### `POST /segment`
+            Extract topics from transcript data.
+            **Request Body:**
+            ```json
+            {
+              "sentences": [
+                {
+                  "text": "Your transcript text here",
+                  "speaker": "Speaker name",
+                  "start_time": 0.0,
+                  "end_time": 5.0,
+                  "sentence_index": 1
+                }
+              ],
+              "prompt_config": {
+                "template": "customer_call",
+                "language": "cs",
+                "business_domain": "E-commerce"
+              }
+            }
+            ```
+            #### `GET /docs`
+            Interactive API documentation (Swagger UI)
+            #### `GET /redoc`
+            Alternative API documentation (ReDoc)
+            ## 🔧 Integration Examples
+                         ### cURL
+             ```bash
+             curl -X POST https://yeetek-anthropic-topic-segmentation.hf.space/segment \\
+               -H "Content-Type: application/json" \\
+               -d @your-request.json
+             ```
+             ### Python
+             ```python
+             import requests
+             response = requests.post(
+                 "https://yeetek-anthropic-topic-segmentation.hf.space/segment",
+                 json=your_request_data
+             )
+             result = response.json()
+             ```
+             ### n8n Workflow
+             Use the HTTP Request node with:
+             - **Method**: POST
+             - **URL**: https://yeetek-anthropic-topic-segmentation.hf.space/segment
+             - **Body**: JSON with your transcript data
+            """)
+        # Tab 4: Examples
+        with gr.Tab("💡 Examples"):
+            gr.Markdown("""
+            ## 🇨🇿 Czech E-commerce Example
+            Perfect for analyzing Shoptet integration discussions:
+            ```json
+            {
+              "sentences": [
+                {
+                  "text": "Z��kazníci požadují nestandardní úpravy košíku v Shoptetu.",
+                  "speaker": "Client",
+                  "start_time": 2.01,
+                  "end_time": 8.45,
+                  "sentence_index": 1
+                },
+                {
+                  "text": "Potřebujeme implementovat speciální platební metody.",
+                  "speaker": "Client",
+                  "start_time": 8.45,
+                  "end_time": 15.2,
+                  "sentence_index": 2
+                }
+              ],
+              "prompt_config": {
+                "template": "customer_call",
+                "language": "cs",
+                "business_domain": "E-commerce"
+              }
+            }
+            ```
+            ## 🇬🇧 English Business Interview
+            ```json
+            {
+              "sentences": [
+                {
+                  "text": "Our main challenge is customer retention in the B2B segment.",
+                  "speaker": "Manager",
+                  "start_time": 0.0,
+                  "end_time": 4.5,
+                  "sentence_index": 1
+                },
+                {
+                  "text": "We need better integration with existing CRM systems.",
+                  "speaker": "Manager",
+                  "start_time": 4.5,
+                  "end_time": 8.2,
+                  "sentence_index": 2
+                }
+              ],
+              "prompt_config": {
+                "template": "interview",
+                "language": "en",
+                "business_domain": "SaaS"
+              }
+            }
+            ```
+            ## 📊 Expected Output
+            The API returns structured business insights:
+            ```json
+            {
+              "status": "success",
+              "topics": [
+                {
+                  "topic_name": "Nestandardní požadavky na košík",
+                  "topic_type": "client_needs_b2b",
+                  "topic_detail": "Zákazníci požadují nestandardní úpravy košíku...",
+                  "confidence_score": 0.9,
+                  "actionable_insights": [
+                    "Vytvořit standardizovaný proces pro handling nestandardních požadavků"
+                  ]
+                }
+              ],
+              "metadata": {
+                "processing_time": 10.5,
+                "topics_extracted": 3,
+                "average_confidence": 0.85
+              }
+            }
+            ```
+            """)
+# Launch the app
+if __name__ == "__main__":
+    app.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True
+    )

models/input.py CHANGED Viewed

@@ -252,7 +252,8 @@ class TranscriptRequest(BaseModel):
     model_config = ConfigDict(
         str_strip_whitespace=True,
         validate_assignment=True,
-        extra="forbid"
     )
     # Core transcript data
@@ -284,7 +285,8 @@ class TranscriptRequest(BaseModel):
     model_config_override: Optional[ModelConfiguration] = Field(
         default=None,
-        description="Model configuration overrides"
     )
     # Processing options

     model_config = ConfigDict(
         str_strip_whitespace=True,
         validate_assignment=True,
+        extra="forbid",
+        protected_namespaces=()
     )
     # Core transcript data
     model_config_override: Optional[ModelConfiguration] = Field(
         default=None,
+        description="Model configuration overrides",
+        alias="model_config_override"
     )
     # Processing options

models/output.py CHANGED Viewed

@@ -236,7 +236,8 @@ class ProcessingMetadata(BaseModel):
     """
     model_config = ConfigDict(
         validate_assignment=True,
-        extra="forbid"
     )
     # Request information
@@ -449,7 +450,8 @@ class HealthCheckResponse(BaseModel):
     """
     model_config = ConfigDict(
         validate_assignment=True,
-        extra="forbid"
     )
     status: str = Field(
@@ -489,7 +491,8 @@ class ModelStatusResponse(BaseModel):
     """
     model_config = ConfigDict(
         validate_assignment=True,
-        extra="forbid"
     )
     current_model: str = Field(

     """
     model_config = ConfigDict(
         validate_assignment=True,
+        extra="forbid",
+        protected_namespaces=()
     )
     # Request information
     """
     model_config = ConfigDict(
         validate_assignment=True,
+        extra="forbid",
+        protected_namespaces=()
     )
     status: str = Field(
     """
     model_config = ConfigDict(
         validate_assignment=True,
+        extra="forbid",
+        protected_namespaces=()
     )
     current_model: str = Field(

requirements.txt CHANGED Viewed

@@ -35,4 +35,8 @@ mypy==1.7.1
 psutil==5.9.6
 # SSL certificates fix
-certifi>=2023.0.0

 psutil==5.9.6
 # SSL certificates fix
+certifi>=2023.0.0
+# Gradio web interface for HuggingFace Spaces
+gradio>=4.0.0
+markdown>=3.5.0

start.sh ADDED Viewed

	@@ -0,0 +1,31 @@

+#!/bin/bash
+# Startup script for HuggingFace Spaces
+# Runs both FastAPI backend and Gradio frontend
+echo "🚀 Starting Anthropic Topic Segmentation Microservice..."
+# Start FastAPI backend in the background
+echo "📡 Starting FastAPI backend on port 8000..."
+uvicorn app:app --host 0.0.0.0 --port 8000 --workers 1 &
+FASTAPI_PID=$!
+# Wait a moment for FastAPI to start
+sleep 5
+# Check if FastAPI is running
+if curl -f http://localhost:8000/health > /dev/null 2>&1; then
+    echo "✅ FastAPI backend is healthy"
+else
+    echo "❌ FastAPI backend failed to start"
+    exit 1
+fi
+# Start Gradio frontend on port 7860 (HuggingFace Spaces standard)
+echo "🎨 Starting Gradio frontend on port 7860..."
+python gradio_app.py
+# If Gradio exits, also stop FastAPI
+echo "🛑 Stopping services..."
+kill $FASTAPI_PID 2>/dev/null || true
+wait