Spaces:

softblackhole
/

auto-tagging-rag

Sleeping

App Files Files Community

soft.engineer commited on Nov 18, 2025

Commit

dfabcc2

1 Parent(s): 2813e41

add some feature

Browse files

Files changed (3) hide show

Dockerfile +35 -0
core/eval.py +9 -1
core/report_generator.py +51 -6

Dockerfile ADDED Viewed

	@@ -0,0 +1,35 @@

+FROM python:3.10-slim
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first (for better caching)
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Models download automatically on first run (SentenceTransformers, spaCy, etc.)
+# No manual download needed
+# Copy application
+COPY . .
+# Create necessary directories
+RUN mkdir -p reports chroma_data
+# Expose port
+EXPOSE 7860
+# Set environment variables
+ENV GRADIO_SERVER_PORT=7860
+ENV CHROMA_PERSIST_DIR=/app/chroma_data
+# Run application
+CMD ["python", "app.py"]

core/eval.py CHANGED Viewed

@@ -225,15 +225,23 @@ class RAGEvaluator:
                 if user_sat_score is None:
                     user_sat_score = user_satisfaction.get(f'query_{i}')
             query_result = {
                 'query_id': query_id,
                 'query': query,
                 'ground_truth': query_data.get('ground_truth', []),
                 'user_satisfaction': user_sat_score,
                 'pipelines': {}
             }
-            k_values = query_data.get('k_values', [1, 3, 5])
             for pipeline in pipelines:
                 pipeline_results = {}

                 if user_sat_score is None:
                     user_sat_score = user_satisfaction.get(f'query_{i}')
+            # Extract tags and tag_operator for representative examples
+            tags = query_data.get('tags', []) or []
+            if isinstance(tags, str):
+                tags = [t.strip() for t in tags.split(',') if t.strip()]
+            tag_operator = query_data.get('tag_operator', 'OR')
             query_result = {
                 'query_id': query_id,
                 'query': query,
                 'ground_truth': query_data.get('ground_truth', []),
                 'user_satisfaction': user_sat_score,
+                'tags': tags,
+                'tag_operator': tag_operator,
                 'pipelines': {}
             }
+            k_values = query_data.get('k_values', [1, 3, 5, 10])  # Include k=10 as per requirements
             for pipeline in pipelines:
                 pipeline_results = {}

core/report_generator.py CHANGED Viewed

@@ -166,12 +166,29 @@ class ReportGenerator:
             query_id = row['query_id']
             query_result = next((r for r in raw_results if r.get('query_id') == query_id), None)
             if query_result:
                 examples['best_performing'].append({
                     'query': query_result['query'],
                     'pipeline': row['pipeline'],
                     'precision_at_k': row['precision_at_k'],
                     'ndcg_at_k': row['ndcg_at_k'],
-                    'mrr': row['mrr']
                 })
         # Worst performing queries
@@ -180,12 +197,19 @@ class ReportGenerator:
             query_id = row['query_id']
             query_result = next((r for r in raw_results if r.get('query_id') == query_id), None)
             if query_result:
                 examples['worst_performing'].append({
                     'query': query_result['query'],
                     'pipeline': row['pipeline'],
                     'precision_at_k': row['precision_at_k'],
                     'ndcg_at_k': row['ndcg_at_k'],
-                    'mrr': row['mrr']
                 })
         # Most improved (hybrid vs baseline)
@@ -200,11 +224,18 @@ class ReportGenerator:
                 for query_id in improvement.index:
                     query_result = next((r for r in raw_results if r.get('query_id') == query_id), None)
                     if query_result:
                         examples['most_improved'].append({
                             'query': query_result['query'],
                             'baseline_precision': baseline_df.loc[query_id, 'precision_at_k'],
                             'hybrid_precision': hybrid_df.loc[query_id, 'precision_at_k'],
-                            'improvement': improvement[query_id]
                         })
         return examples
@@ -327,20 +358,34 @@ class ReportGenerator:
             for example in examples['best_performing']:
                 html += f"<div class='example-item'><strong>Query:</strong> {example['query']}<br>"
                 html += f"<strong>Pipeline:</strong> {self.METHOD_NAMES.get(example['pipeline'], example['pipeline'])}<br>"
-                html += f"<strong>Precision@5:</strong> {example['precision_at_k']:.3f}</div>"
         if examples.get('worst_performing'):
             html += "<h3>Worst Performing Queries</h3>"
             for example in examples['worst_performing']:
                 html += f"<div class='example-item'><strong>Query:</strong> {example['query']}<br>"
                 html += f"<strong>Pipeline:</strong> {self.METHOD_NAMES.get(example['pipeline'], example['pipeline'])}<br>"
-                html += f"<strong>Precision@5:</strong> {example['precision_at_k']:.3f}</div>"
         if examples.get('most_improved'):
             html += "<h3>Most Improved Queries (Hybrid vs Baseline)</h3>"
             for example in examples['most_improved']:
                 html += f"<div class='example-item'><strong>Query:</strong> {example['query']}<br>"
-                html += f"<strong>Improvement:</strong> +{example['improvement']:.3f}</div>"
         if not html:
             return "<p>No representative examples available.</p>"

             query_id = row['query_id']
             query_result = next((r for r in raw_results if r.get('query_id') == query_id), None)
             if query_result:
+                # Extract tags from query result if available
+                tags_used = query_result.get('tags', []) or []
+                if isinstance(tags_used, str):
+                    tags_used = [t.strip() for t in tags_used.split(',') if t.strip()]
+                # Get tags from retrieved documents (from pipeline results)
+                tags_in_results = []
+                if 'pipelines' in query_result:
+                    pipeline_data = query_result['pipelines'].get(row['pipeline'], {})
+                    # Try to extract tags from any k value's results
+                    for k_val, pipeline_result in pipeline_data.items():
+                        if isinstance(pipeline_result, dict) and 'sources' in str(pipeline_result):
+                            # Tags would be in sources metadata, extract if available
+                            pass  # Tags in sources metadata are complex to extract here
                 examples['best_performing'].append({
                     'query': query_result['query'],
                     'pipeline': row['pipeline'],
                     'precision_at_k': row['precision_at_k'],
                     'ndcg_at_k': row['ndcg_at_k'],
+                    'mrr': row['mrr'],
+                    'tags_used': tags_used,
+                    'tag_operator': query_result.get('tag_operator', 'N/A')
                 })
         # Worst performing queries
             query_id = row['query_id']
             query_result = next((r for r in raw_results if r.get('query_id') == query_id), None)
             if query_result:
+                # Extract tags from query result if available
+                tags_used = query_result.get('tags', []) or []
+                if isinstance(tags_used, str):
+                    tags_used = [t.strip() for t in tags_used.split(',') if t.strip()]
                 examples['worst_performing'].append({
                     'query': query_result['query'],
                     'pipeline': row['pipeline'],
                     'precision_at_k': row['precision_at_k'],
                     'ndcg_at_k': row['ndcg_at_k'],
+                    'mrr': row['mrr'],
+                    'tags_used': tags_used,
+                    'tag_operator': query_result.get('tag_operator', 'N/A')
                 })
         # Most improved (hybrid vs baseline)
                 for query_id in improvement.index:
                     query_result = next((r for r in raw_results if r.get('query_id') == query_id), None)
                     if query_result:
+                        # Extract tags from query result if available
+                        tags_used = query_result.get('tags', []) or []
+                        if isinstance(tags_used, str):
+                            tags_used = [t.strip() for t in tags_used.split(',') if t.strip()]
                         examples['most_improved'].append({
                             'query': query_result['query'],
                             'baseline_precision': baseline_df.loc[query_id, 'precision_at_k'],
                             'hybrid_precision': hybrid_df.loc[query_id, 'precision_at_k'],
+                            'improvement': improvement[query_id],
+                            'tags_used': tags_used,
+                            'tag_operator': query_result.get('tag_operator', 'N/A')
                         })
         return examples
             for example in examples['best_performing']:
                 html += f"<div class='example-item'><strong>Query:</strong> {example['query']}<br>"
                 html += f"<strong>Pipeline:</strong> {self.METHOD_NAMES.get(example['pipeline'], example['pipeline'])}<br>"
+                html += f"<strong>Precision@5:</strong> {example['precision_at_k']:.3f}, <strong>nDCG@5:</strong> {example['ndcg_at_k']:.3f}, <strong>MRR:</strong> {example['mrr']:.3f}<br>"
+                tags_used = example.get('tags_used', [])
+                if tags_used:
+                    html += f"<strong>Tags Used:</strong> {', '.join(tags_used)} (Operator: {example.get('tag_operator', 'N/A')})<br>"
+                html += "</div>"
         if examples.get('worst_performing'):
             html += "<h3>Worst Performing Queries</h3>"
             for example in examples['worst_performing']:
                 html += f"<div class='example-item'><strong>Query:</strong> {example['query']}<br>"
                 html += f"<strong>Pipeline:</strong> {self.METHOD_NAMES.get(example['pipeline'], example['pipeline'])}<br>"
+                html += f"<strong>Precision@5:</strong> {example['precision_at_k']:.3f}, <strong>nDCG@5:</strong> {example['ndcg_at_k']:.3f}, <strong>MRR:</strong> {example['mrr']:.3f}<br>"
+                tags_used = example.get('tags_used', [])
+                if tags_used:
+                    html += f"<strong>Tags Used:</strong> {', '.join(tags_used)} (Operator: {example.get('tag_operator', 'N/A')})<br>"
+                html += "</div>"
         if examples.get('most_improved'):
             html += "<h3>Most Improved Queries (Hybrid vs Baseline)</h3>"
             for example in examples['most_improved']:
                 html += f"<div class='example-item'><strong>Query:</strong> {example['query']}<br>"
+                html += f"<strong>Baseline Precision@5:</strong> {example['baseline_precision']:.3f}<br>"
+                html += f"<strong>Hybrid Precision@5:</strong> {example['hybrid_precision']:.3f}<br>"
+                html += f"<strong>Improvement:</strong> +{example['improvement']:.3f}<br>"
+                tags_used = example.get('tags_used', [])
+                if tags_used:
+                    html += f"<strong>Tags Used:</strong> {', '.join(tags_used)} (Operator: {example.get('tag_operator', 'N/A')})<br>"
+                html += "</div>"
         if not html:
             return "<p>No representative examples available.</p>"