| | """ |
| | Root pytest configuration and shared fixtures. |
| | |
| | This module provides fixtures that are available to all test modules. |
| | """ |
| | import pytest |
| | import numpy as np |
| | import pandas as pd |
| | import tempfile |
| | import sqlite3 |
| | from pathlib import Path |
| | from sklearn.feature_extraction.text import TfidfVectorizer |
| |
|
| |
|
| | @pytest.fixture |
| | def sample_text_data(): |
| | """Fixture providing sample text data for testing.""" |
| | return [ |
| | "Fixed bug in authentication system using OAuth2", |
| | "Implemented REST API endpoint for user data retrieval", |
| | "Added unit tests for data processing pipeline", |
| | "Refactored code to improve performance and reduce memory usage", |
| | "Updated database schema with new migration scripts", |
| | ] |
| |
|
| |
|
| | @pytest.fixture |
| | def sample_dirty_text(): |
| | """Fixture providing text with common GitHub noise.""" |
| | return [ |
| | "Fixed bug https://github.com/repo/issues/123 in auth system", |
| | "Added feature with <b>HTML tags</b> and `inline code`", |
| | "Removed emoji 😀 and special characters", |
| | """Updated docs with code block: |
| | ```python |
| | def foo(): |
| | pass |
| | ``` |
| | """, |
| | "Fixed multiple spaces and\n\nnewlines", |
| | ] |
| |
|
| |
|
| | @pytest.fixture |
| | def sample_labels(): |
| | """Fixture providing sample multi-label data.""" |
| | return pd.DataFrame({ |
| | 'Language': [1, 1, 1, 0, 1], |
| | 'Data Structure': [1, 0, 0, 1, 1], |
| | 'Testing': [0, 0, 1, 0, 0], |
| | 'API': [1, 1, 0, 0, 0], |
| | 'DevOps': [0, 0, 0, 1, 1], |
| | }) |
| |
|
| |
|
| | @pytest.fixture |
| | def sample_dataframe(sample_text_data, sample_labels): |
| | """Fixture providing complete sample dataframe.""" |
| | df = pd.DataFrame({ |
| | 'Repo Name': ['repo1', 'repo2', 'repo1', 'repo3', 'repo2'], |
| | 'PR #': [1, 2, 3, 4, 5], |
| | 'issue text': [sample_text_data[0], sample_text_data[1], |
| | sample_text_data[2], sample_text_data[3], |
| | sample_text_data[4]], |
| | 'issue description': ['Description for issue 1', 'Description for issue 2', |
| | 'Description for issue 3', 'Description for issue 4', |
| | 'Description for issue 5'], |
| | }) |
| | |
| | |
| | for col in sample_labels.columns: |
| | df[col] = sample_labels[col].values |
| | |
| | return df |
| |
|
| |
|
| | @pytest.fixture |
| | def temp_db(sample_dataframe): |
| | """Fixture providing temporary SQLite database.""" |
| | with tempfile.NamedTemporaryFile(mode='w', suffix='.db', delete=False) as f: |
| | db_path = f.name |
| | |
| | |
| | conn = sqlite3.connect(db_path) |
| | sample_dataframe.to_sql('nlbse_tool_competition_data_by_issue', |
| | conn, if_exists='replace', index=False) |
| | conn.close() |
| | |
| | yield Path(db_path) |
| | |
| | |
| | Path(db_path).unlink() |
| |
|
| |
|
| | @pytest.fixture |
| | def sample_tfidf_vectorizer(): |
| | """Fixture providing a simple TF-IDF vectorizer.""" |
| | vectorizer = TfidfVectorizer( |
| | max_features=100, |
| | ngram_range=(1, 2), |
| | stop_words='english' |
| | ) |
| | return vectorizer |
| |
|
| |
|
| | @pytest.fixture |
| | def sample_sparse_features(): |
| | """Fixture providing sample sparse feature matrix.""" |
| | |
| | features = np.zeros((100, 50)) |
| | |
| | |
| | for i in range(100): |
| | |
| | n_nonzero = np.random.randint(5, 11) |
| | indices = np.random.choice(50, n_nonzero, replace=False) |
| | features[i, indices] = np.random.rand(n_nonzero) |
| | |
| | return features |
| |
|
| |
|
| | @pytest.fixture |
| | def sample_multilabel_data(): |
| | """Fixture providing sample multi-label classification data.""" |
| | n_samples = 100 |
| | n_labels = 10 |
| | |
| | |
| | labels = np.zeros((n_samples, n_labels), dtype=int) |
| | |
| | for i in range(n_samples): |
| | |
| | n_labels_per_sample = np.random.randint(1, 6) |
| | label_indices = np.random.choice(n_labels, n_labels_per_sample, replace=False) |
| | labels[i, label_indices] = 1 |
| | |
| | return labels |
| |
|
| |
|
| | @pytest.fixture |
| | def empty_text_samples(): |
| | """Fixture providing edge case: empty or null text samples.""" |
| | return [ |
| | "", |
| | None, |
| | " ", |
| | "\n\n\n", |
| | "a", |
| | ] |
| |
|