Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| # -*- coding: utf-8 -*- | |
| """ | |
| Language Detector | |
| This module provides functionality for detecting programming languages in a repository. | |
| """ | |
| import os | |
| import logging | |
| from collections import Counter | |
| logger = logging.getLogger(__name__) | |
| # File extension to language mapping | |
| EXTENSION_TO_LANGUAGE = { | |
| '.py': 'Python', | |
| '.js': 'JavaScript', | |
| '.jsx': 'JavaScript', | |
| '.ts': 'TypeScript', | |
| '.tsx': 'TypeScript', | |
| '.java': 'Java', | |
| '.go': 'Go', | |
| '.rs': 'Rust', | |
| '.cpp': 'C++', | |
| '.cc': 'C++', | |
| '.cxx': 'C++', | |
| '.c': 'C', | |
| '.h': 'C', | |
| '.hpp': 'C++', | |
| '.cs': 'C#', | |
| '.php': 'PHP', | |
| '.rb': 'Ruby', | |
| '.swift': 'Swift', | |
| '.kt': 'Kotlin', | |
| '.scala': 'Scala', | |
| '.r': 'R', | |
| '.sh': 'Shell', | |
| '.bash': 'Shell', | |
| '.zsh': 'Shell', | |
| '.html': 'HTML', | |
| '.htm': 'HTML', | |
| '.css': 'CSS', | |
| '.scss': 'SCSS', | |
| '.sass': 'SCSS', | |
| '.less': 'Less', | |
| '.md': 'Markdown', | |
| '.json': 'JSON', | |
| '.xml': 'XML', | |
| '.yaml': 'YAML', | |
| '.yml': 'YAML', | |
| '.sql': 'SQL', | |
| '.graphql': 'GraphQL', | |
| '.gql': 'GraphQL', | |
| } | |
| # Special files to language mapping | |
| SPECIAL_FILES_TO_LANGUAGE = { | |
| 'Dockerfile': 'Docker', | |
| 'docker-compose.yml': 'Docker', | |
| 'docker-compose.yaml': 'Docker', | |
| 'Makefile': 'Make', | |
| 'CMakeLists.txt': 'CMake', | |
| 'package.json': 'JavaScript', | |
| 'tsconfig.json': 'TypeScript', | |
| 'requirements.txt': 'Python', | |
| 'setup.py': 'Python', | |
| 'pom.xml': 'Java', | |
| 'build.gradle': 'Java', | |
| 'Cargo.toml': 'Rust', | |
| 'go.mod': 'Go', | |
| } | |
| class LanguageDetector: | |
| """ | |
| Detects programming languages in a repository. | |
| """ | |
| def __init__(self): | |
| """ | |
| Initialize the LanguageDetector. | |
| """ | |
| logger.info("Initialized LanguageDetector") | |
| def detect_languages(self, repo_path): | |
| """ | |
| Detect programming languages in a repository. | |
| Args: | |
| repo_path (str): The path to the repository. | |
| Returns: | |
| list: A list of detected programming languages, sorted by prevalence. | |
| """ | |
| logger.info(f"Detecting languages in repository: {repo_path}") | |
| language_counter = Counter() | |
| for root, dirs, files in os.walk(repo_path): | |
| # Skip hidden directories and common non-code directories | |
| dirs[:] = [d for d in dirs if not d.startswith('.') and | |
| d not in ['node_modules', 'venv', '.git', '__pycache__', 'dist', 'build']] | |
| for file in files: | |
| file_path = os.path.join(root, file) | |
| # Check if it's a special file | |
| if file in SPECIAL_FILES_TO_LANGUAGE: | |
| language = SPECIAL_FILES_TO_LANGUAGE[file] | |
| language_counter[language] += 1 | |
| continue | |
| # Check file extension | |
| _, ext = os.path.splitext(file) | |
| if ext in EXTENSION_TO_LANGUAGE: | |
| language = EXTENSION_TO_LANGUAGE[ext] | |
| language_counter[language] += 1 | |
| # Get the top languages (limit to supported languages) | |
| supported_languages = [ | |
| "Python", "JavaScript", "TypeScript", "Java", | |
| "Go", "Rust", "C++", "C#", "PHP", "Ruby", | |
| "Swift", "Kotlin", "Scala", "R", "Shell" | |
| ] | |
| detected_languages = [lang for lang, _ in language_counter.most_common() | |
| if lang in supported_languages] | |
| logger.info(f"Detected languages: {detected_languages}") | |
| return detected_languages | |
| def get_language_breakdown(self, repo_path): | |
| """ | |
| Get a breakdown of programming languages in a repository by lines of code. | |
| Args: | |
| repo_path (str): The path to the repository. | |
| Returns: | |
| dict: A dictionary mapping languages to lines of code. | |
| """ | |
| logger.info(f"Getting language breakdown for repository: {repo_path}") | |
| language_loc = {} | |
| for root, dirs, files in os.walk(repo_path): | |
| # Skip hidden directories and common non-code directories | |
| dirs[:] = [d for d in dirs if not d.startswith('.') and | |
| d not in ['node_modules', 'venv', '.git', '__pycache__', 'dist', 'build']] | |
| for file in files: | |
| file_path = os.path.join(root, file) | |
| # Determine the language | |
| language = None | |
| # Check if it's a special file | |
| if file in SPECIAL_FILES_TO_LANGUAGE: | |
| language = SPECIAL_FILES_TO_LANGUAGE[file] | |
| else: | |
| # Check file extension | |
| _, ext = os.path.splitext(file) | |
| if ext in EXTENSION_TO_LANGUAGE: | |
| language = EXTENSION_TO_LANGUAGE[ext] | |
| if language: | |
| # Count lines of code | |
| try: | |
| with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: | |
| line_count = sum(1 for _ in f) | |
| if language in language_loc: | |
| language_loc[language] += line_count | |
| else: | |
| language_loc[language] = line_count | |
| except Exception as e: | |
| logger.warning(f"Error counting lines in {file_path}: {e}") | |
| logger.info(f"Language breakdown: {language_loc}") | |
| return language_loc |