Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| OpenAlex API 测试脚本 | |
| 测试OpenAlex API的引用量数据和检索功能 | |
| """ | |
| import requests | |
| import json | |
| import time | |
| from typing import Dict, Any, List | |
| class OpenAlexTester: | |
| """OpenAlex API 测试器""" | |
| def __init__(self): | |
| self.base_url = "https://api.openalex.org/works" | |
| self.timeout = 30 | |
| self.headers = { | |
| 'User-Agent': 'AcademicReviewerSystem/1.0 (mailto:test@example.com)' | |
| } | |
| def test_search(self, query: str, limit: int = 10, sort: str = "cited_by_count") -> Dict[str, Any]: | |
| """测试OpenAlex搜索API""" | |
| print(f"\n=== 测试OpenAlex API ===") | |
| print(f"查询: {query}") | |
| print(f"排序: {sort}") | |
| print(f"限制: {limit}") | |
| print("-" * 50) | |
| params = { | |
| "search": query, | |
| "per-page": limit, | |
| "sort": sort, | |
| "select": "id,title,publication_year,type,open_access,abstract_inverted_index,cited_by_count,citation_count,referenced_works_count,authorships,primary_location" | |
| } | |
| try: | |
| response = requests.get( | |
| self.base_url, | |
| params=params, | |
| headers=self.headers, | |
| timeout=self.timeout | |
| ) | |
| response.raise_for_status() | |
| data = response.json() | |
| items = data.get("results", []) | |
| total_results = data.get("meta", {}).get("count", 0) | |
| print(f"总命中数: {total_results}") | |
| print(f"返回结果数: {len(items)}") | |
| print() | |
| # 显示前几个结果 | |
| for i, item in enumerate(items[:5], 1): | |
| title = item.get('title', 'N/A') | |
| if len(title) > 80: | |
| title = title[:80] + "..." | |
| # 获取引用量信息 | |
| cited_by_count = item.get('cited_by_count', 0) | |
| citation_count = item.get('citation_count', 0) | |
| referenced_works_count = item.get('referenced_works_count', 0) | |
| # 获取发表年份 | |
| pub_year = item.get('publication_year', 'N/A') | |
| # 获取期刊信息 | |
| primary_location = item.get('primary_location', {}) | |
| source = primary_location.get('source', {}) | |
| journal = source.get('display_name', 'N/A') if source else 'N/A' | |
| # 获取作者信息 | |
| authorships = item.get('authorships', []) | |
| author_names = [] | |
| for authorship in authorships[:3]: | |
| author = authorship.get('author', {}) | |
| display_name = author.get('display_name', '') | |
| if display_name: | |
| author_names.append(display_name) | |
| print(f"结果 {i}:") | |
| print(f" 标题: {title}") | |
| print(f" 被引用次数: {cited_by_count}") | |
| print(f" 引用次数: {citation_count}") | |
| print(f" 参考文献数: {referenced_works_count}") | |
| print(f" 期刊: {journal}") | |
| print(f" 年份: {pub_year}") | |
| print(f" 作者: {', '.join(author_names)}") | |
| print(f" OpenAlex ID: {item.get('id', 'N/A')}") | |
| print() | |
| return { | |
| "success": True, | |
| "total_results": total_results, | |
| "items": items, | |
| "params": params | |
| } | |
| except Exception as e: | |
| print(f"API调用失败: {str(e)}") | |
| return { | |
| "success": False, | |
| "error": str(e), | |
| "params": params | |
| } | |
| def test_different_sort_options(self, query: str) -> None: | |
| """测试不同的排序选项""" | |
| sort_options = [ | |
| "cited_by_count", # 按被引用次数排序 | |
| "publication_date", # 按发表时间排序 | |
| "relevance_score", # 按相关性排序 | |
| ] | |
| print(f"\n=== 测试不同排序选项 ===") | |
| print(f"查询: {query}") | |
| print("=" * 60) | |
| for sort_option in sort_options: | |
| print(f"\n--- 排序: {sort_option} ---") | |
| result = self.test_search(query, sort=sort_option, limit=5) | |
| if result["success"]: | |
| # 显示引用量统计 | |
| items = result["items"] | |
| cited_counts = [item.get('cited_by_count', 0) for item in items] | |
| print(f"被引用次数统计: {cited_counts}") | |
| if cited_counts: | |
| print(f"平均被引用次数: {sum(cited_counts) / len(cited_counts):.2f}") | |
| print(f"最大被引用次数: {max(cited_counts)}") | |
| # 按引用量排序显示 | |
| sorted_items = sorted(items, key=lambda x: x.get('cited_by_count', 0), reverse=True) | |
| print(f"按引用量排序的前3个结果:") | |
| for i, item in enumerate(sorted_items[:3], 1): | |
| title = item.get('title', 'N/A') | |
| if len(title) > 50: | |
| title = title[:50] + "..." | |
| cited_count = item.get('cited_by_count', 0) | |
| print(f" {i}. {title} (被引用: {cited_count})") | |
| else: | |
| print(f"排序选项 {sort_option} 失败") | |
| time.sleep(1) # 避免请求过快 | |
| def test_different_queries(self) -> None: | |
| """测试不同的查询""" | |
| test_queries = [ | |
| "machine learning", | |
| "CRISPR", | |
| "cryo-electron microscopy", | |
| "artificial intelligence", | |
| "deep learning", | |
| ] | |
| print(f"\n=== 测试不同查询 ===") | |
| print("=" * 60) | |
| for query in test_queries: | |
| print(f"\n--- 查询: {query} ---") | |
| result = self.test_search(query, sort="cited_by_count", limit=3) | |
| if result["success"]: | |
| items = result["items"] | |
| cited_counts = [item.get('cited_by_count', 0) for item in items] | |
| print(f"被引用次数: {cited_counts}") | |
| else: | |
| print(f"查询 {query} 失败") | |
| time.sleep(1) | |
| def test_work_details(self, work_id: str) -> None: | |
| """测试工作详情查找功能""" | |
| print(f"\n=== 测试工作详情查找 ===") | |
| print(f"Work ID: {work_id}") | |
| print("-" * 50) | |
| url = f"https://api.openalex.org/works/{work_id}" | |
| try: | |
| response = requests.get(url, headers=self.headers, timeout=self.timeout) | |
| response.raise_for_status() | |
| data = response.json() | |
| title = data.get('title', 'N/A') | |
| cited_by_count = data.get('cited_by_count', 0) | |
| citation_count = data.get('citation_count', 0) | |
| referenced_works_count = data.get('referenced_works_count', 0) | |
| print(f"标题: {title}") | |
| print(f"被引用次数: {cited_by_count}") | |
| print(f"引用次数: {citation_count}") | |
| print(f"参考文献数: {referenced_works_count}") | |
| print(f"OpenAlex ID: {data.get('id', 'N/A')}") | |
| # 显示完整的引用量相关字段 | |
| print(f"\n引用量相关字段:") | |
| for key, value in data.items(): | |
| if 'cite' in key.lower() or 'reference' in key.lower(): | |
| print(f" {key}: {value}") | |
| except Exception as e: | |
| print(f"工作详情查找失败: {str(e)}") | |
| def compare_with_crossref(self, query: str) -> None: | |
| """与Crossref对比测试""" | |
| print(f"\n=== 与Crossref对比测试 ===") | |
| print(f"查询: {query}") | |
| print("=" * 60) | |
| # OpenAlex测试 | |
| print(f"\n--- OpenAlex结果 ---") | |
| openalex_result = self.test_search(query, sort="cited_by_count", limit=5) | |
| if openalex_result["success"]: | |
| openalex_items = openalex_result["items"] | |
| openalex_citations = [item.get('cited_by_count', 0) for item in openalex_items] | |
| print(f"OpenAlex引用量: {openalex_citations}") | |
| if openalex_citations: | |
| print(f"OpenAlex平均引用量: {sum(openalex_citations) / len(openalex_citations):.2f}") | |
| print(f"OpenAlex最大引用量: {max(openalex_citations)}") | |
| # Crossref测试(简化版) | |
| print(f"\n--- Crossref结果 ---") | |
| try: | |
| crossref_url = "https://api.crossref.org/works" | |
| crossref_params = { | |
| "query": query, | |
| "rows": 5, | |
| "sort": "relevance", | |
| "order": "desc", | |
| "select": "DOI,title,is-referenced-by-count" | |
| } | |
| crossref_response = requests.get(crossref_url, params=crossref_params, headers=self.headers, timeout=30) | |
| crossref_response.raise_for_status() | |
| crossref_data = crossref_response.json() | |
| crossref_items = crossref_data.get("message", {}).get("items", []) | |
| crossref_citations = [item.get('is-referenced-by-count', 0) for item in crossref_items] | |
| # 按引用量排序 | |
| crossref_sorted = sorted(crossref_items, key=lambda x: x.get('is-referenced-by-count', 0), reverse=True) | |
| crossref_sorted_citations = [item.get('is-referenced-by-count', 0) for item in crossref_sorted] | |
| print(f"Crossref引用量: {crossref_sorted_citations}") | |
| if crossref_sorted_citations: | |
| print(f"Crossref平均引用量: {sum(crossref_sorted_citations) / len(crossref_sorted_citations):.2f}") | |
| print(f"Crossref最大引用量: {max(crossref_sorted_citations)}") | |
| except Exception as e: | |
| print(f"Crossref对比测试失败: {str(e)}") | |
| # 对比分析 | |
| if openalex_result["success"] and crossref_sorted_citations: | |
| print(f"\n--- 对比分析 ---") | |
| openalex_max = max(openalex_citations) if openalex_citations else 0 | |
| crossref_max = max(crossref_sorted_citations) if crossref_sorted_citations else 0 | |
| print(f"OpenAlex最大引用量: {openalex_max}") | |
| print(f"Crossref最大引用量: {crossref_max}") | |
| print(f"引用量差异: {abs(openalex_max - crossref_max)}") | |
| if openalex_max > crossref_max: | |
| print("✅ OpenAlex引用量更高") | |
| elif crossref_max > openalex_max: | |
| print("✅ Crossref引用量更高") | |
| else: | |
| print("📊 两者引用量相同") | |
| def main(): | |
| """主函数""" | |
| tester = OpenAlexTester() | |
| print("OpenAlex API 测试工具") | |
| print("=" * 60) | |
| while True: | |
| print("\n请选择测试选项:") | |
| print("1. 测试单个查询") | |
| print("2. 测试不同排序选项") | |
| print("3. 测试不同查询") | |
| print("4. 测试工作详情查找") | |
| print("5. 与Crossref对比测试") | |
| print("6. 退出") | |
| choice = input("\n请输入选项 (1-6): ").strip() | |
| if choice == "1": | |
| query = input("请输入查询内容: ").strip() | |
| if query: | |
| tester.test_search(query) | |
| elif choice == "2": | |
| query = input("请输入查询内容: ").strip() | |
| if query: | |
| tester.test_different_sort_options(query) | |
| elif choice == "3": | |
| tester.test_different_queries() | |
| elif choice == "4": | |
| work_id = input("请输入OpenAlex Work ID: ").strip() | |
| if work_id: | |
| tester.test_work_details(work_id) | |
| elif choice == "5": | |
| query = input("请输入查询内容: ").strip() | |
| if query: | |
| tester.compare_with_crossref(query) | |
| elif choice == "6": | |
| print("退出测试工具") | |
| break | |
| else: | |
| print("无效选项,请重新选择") | |
| if __name__ == "__main__": | |
| main() | |