Spaces:
Sleeping
Sleeping
| """ | |
| Utility script to explore arXiv categories and extract them from papers. | |
| """ | |
| import arxiv | |
| from typing import List, Dict | |
| def get_paper_categories(paper_id: str) -> List[str]: | |
| """ | |
| Get the categories of a specific paper. | |
| Args: | |
| paper_id: arXiv ID (e.g., "2406.01234") | |
| Returns: | |
| List of category codes (e.g., ["cs.CL", "cs.AI"]) | |
| """ | |
| search = arxiv.Search(id_list=[paper_id]) | |
| results = list(search.results()) | |
| if results: | |
| paper = results[0] | |
| return paper.categories | |
| else: | |
| return [] | |
| def search_by_category(category: str, max_results: int = 10) -> List[Dict]: | |
| """ | |
| Search for papers in a specific category. | |
| Args: | |
| category: Category code (e.g., "cs.CL") | |
| max_results: Maximum number of results | |
| Returns: | |
| List of paper information dictionaries | |
| """ | |
| search = arxiv.Search( | |
| query=f"cat:{category}", | |
| max_results=max_results, | |
| sort_by=arxiv.SortCriterion.SubmittedDate | |
| ) | |
| papers = [] | |
| for paper in search.results(): | |
| papers.append({ | |
| 'id': paper.entry_id, | |
| 'title': paper.title, | |
| 'authors': [author.name for author in paper.authors], | |
| 'categories': paper.categories, | |
| 'published': paper.published.isoformat(), | |
| 'summary': paper.summary[:200] + "..." if len(paper.summary) > 200 else paper.summary | |
| }) | |
| return papers | |
| def get_common_categories() -> Dict[str, str]: | |
| """ | |
| Get a dictionary of common arXiv categories with descriptions. | |
| """ | |
| return { | |
| # Computer Science | |
| "cs.AI": "Artificial Intelligence", | |
| "cs.CL": "Computation and Language (NLP)", | |
| "cs.CV": "Computer Vision and Pattern Recognition", | |
| "cs.LG": "Machine Learning", | |
| "cs.NE": "Neural and Evolutionary Computing", | |
| "cs.IR": "Information Retrieval", | |
| "cs.SE": "Software Engineering", | |
| "cs.DC": "Distributed, Parallel, and Cluster Computing", | |
| "cs.CR": "Cryptography and Security", | |
| "cs.DB": "Databases", | |
| "cs.AR": "Hardware Architecture", | |
| "cs.CG": "Computational Geometry", | |
| "cs.GT": "Computer Science and Game Theory", | |
| "cs.LO": "Logic in Computer Science", | |
| "cs.MS": "Mathematical Software", | |
| "cs.NA": "Numerical Analysis", | |
| "cs.OS": "Operating Systems", | |
| "cs.PF": "Performance", | |
| "cs.PL": "Programming Languages", | |
| "cs.RO": "Robotics", | |
| "cs.SC": "Symbolic Computation", | |
| "cs.SD": "Sound", | |
| "cs.SI": "Social and Information Networks", | |
| "cs.SY": "Systems and Control", | |
| # Mathematics | |
| "math.OC": "Optimization and Control", | |
| "math.ST": "Statistics Theory", | |
| "math.NA": "Numerical Analysis", | |
| "math.PR": "Probability", | |
| "math.AT": "Algebraic Topology", | |
| "math.AG": "Algebraic Geometry", | |
| "math.AP": "Analysis of PDEs", | |
| "math.CT": "Category Theory", | |
| "math.CA": "Classical Analysis and ODEs", | |
| "math.CO": "Combinatorics", | |
| "math.AC": "Commutative Algebra", | |
| "math.CV": "Complex Variables", | |
| "math.DG": "Differential Geometry", | |
| "math.DS": "Dynamical Systems", | |
| "math.FA": "Functional Analysis", | |
| "math.GM": "General Mathematics", | |
| "math.GN": "General Topology", | |
| "math.GT": "Geometric Topology", | |
| "math.GR": "Group Theory", | |
| "math.HO": "History and Overview", | |
| "math.IT": "Information Theory", | |
| "math.KT": "K-Theory and Homology", | |
| "math.LO": "Logic", | |
| "math.MP": "Mathematical Physics", | |
| "math.MG": "Metric Geometry", | |
| "math.NT": "Number Theory", | |
| "math.OA": "Operator Algebras", | |
| "math.RA": "Rings and Algebras", | |
| "math.RT": "Representation Theory", | |
| "math.SP": "Spectral Theory", | |
| "math.SG": "Symplectic Geometry", | |
| # Physics | |
| "physics.comp-ph": "Computational Physics", | |
| "physics.data-an": "Data Analysis, Statistics and Probability", | |
| "physics.acc-ph": "Accelerator Physics", | |
| "physics.ao-ph": "Atmospheric and Oceanic Physics", | |
| "physics.app-ph": "Applied Physics", | |
| "physics.atm-clus": "Atomic and Molecular Clusters", | |
| "physics.atom-ph": "Atomic Physics", | |
| "physics.bio-ph": "Biological Physics", | |
| "physics.chem-ph": "Chemical Physics", | |
| "physics.class-ph": "Classical Physics", | |
| "physics.flu-dyn": "Fluid Dynamics", | |
| "physics.gen-ph": "General Physics", | |
| "physics.geo-ph": "Geophysics", | |
| "physics.hist-ph": "History and Philosophy of Physics", | |
| "physics.ins-det": "Instrumentation and Detectors", | |
| "physics.med-ph": "Medical Physics", | |
| "physics.optics": "Optics", | |
| "physics.plasm-ph": "Plasma Physics", | |
| "physics.pop-ph": "Popular Physics", | |
| "physics.soc-ph": "Physics and Society", | |
| "physics.space-ph": "Space Physics", | |
| # Quantitative Biology | |
| "q-bio.BM": "Biomolecules", | |
| "q-bio.CB": "Cell Behavior", | |
| "q-bio.GN": "Genomics", | |
| "q-bio.MN": "Molecular Networks", | |
| "q-bio.NC": "Neurons and Cognition", | |
| "q-bio.OT": "Other Quantitative Biology", | |
| "q-bio.PE": "Populations and Evolution", | |
| "q-bio.QM": "Quantitative Methods", | |
| "q-bio.SC": "Subcellular Processes", | |
| "q-bio.TO": "Tissues and Organs" | |
| } | |
| # Example usage | |
| if __name__ == "__main__": | |
| # Example 1: Get categories of a specific paper | |
| paper_id = "2406.01234" | |
| categories = get_paper_categories(paper_id) | |
| print(f"Categories for {paper_id}: {categories}") | |
| # Example 2: Search papers in a category | |
| category = "cs.CL" | |
| papers = search_by_category(category, max_results=3) | |
| print(f"\nRecent papers in {category}:") | |
| for paper in papers: | |
| print(f"- {paper['title']}") | |
| print(f" Categories: {paper['categories']}") | |
| print() | |
| # Example 3: Show common categories | |
| common_cats = get_common_categories() | |
| print("Common arXiv Categories:") | |
| for code, description in list(common_cats.items())[:10]: # Show first 10 | |
| print(f"{code}: {description}") |