new_recommendation / test_openalex_api.py
wujian123's picture
Upload all project files
3c6b551
#!/usr/bin/env python3
"""
OpenAlex API 测试脚本
测试OpenAlex API的引用量数据和检索功能
"""
import requests
import json
import time
from typing import Dict, Any, List
class OpenAlexTester:
"""OpenAlex API 测试器"""
def __init__(self):
self.base_url = "https://api.openalex.org/works"
self.timeout = 30
self.headers = {
'User-Agent': 'AcademicReviewerSystem/1.0 (mailto:test@example.com)'
}
def test_search(self, query: str, limit: int = 10, sort: str = "cited_by_count") -> Dict[str, Any]:
"""测试OpenAlex搜索API"""
print(f"\n=== 测试OpenAlex API ===")
print(f"查询: {query}")
print(f"排序: {sort}")
print(f"限制: {limit}")
print("-" * 50)
params = {
"search": query,
"per-page": limit,
"sort": sort,
"select": "id,title,publication_year,type,open_access,abstract_inverted_index,cited_by_count,citation_count,referenced_works_count,authorships,primary_location"
}
try:
response = requests.get(
self.base_url,
params=params,
headers=self.headers,
timeout=self.timeout
)
response.raise_for_status()
data = response.json()
items = data.get("results", [])
total_results = data.get("meta", {}).get("count", 0)
print(f"总命中数: {total_results}")
print(f"返回结果数: {len(items)}")
print()
# 显示前几个结果
for i, item in enumerate(items[:5], 1):
title = item.get('title', 'N/A')
if len(title) > 80:
title = title[:80] + "..."
# 获取引用量信息
cited_by_count = item.get('cited_by_count', 0)
citation_count = item.get('citation_count', 0)
referenced_works_count = item.get('referenced_works_count', 0)
# 获取发表年份
pub_year = item.get('publication_year', 'N/A')
# 获取期刊信息
primary_location = item.get('primary_location', {})
source = primary_location.get('source', {})
journal = source.get('display_name', 'N/A') if source else 'N/A'
# 获取作者信息
authorships = item.get('authorships', [])
author_names = []
for authorship in authorships[:3]:
author = authorship.get('author', {})
display_name = author.get('display_name', '')
if display_name:
author_names.append(display_name)
print(f"结果 {i}:")
print(f" 标题: {title}")
print(f" 被引用次数: {cited_by_count}")
print(f" 引用次数: {citation_count}")
print(f" 参考文献数: {referenced_works_count}")
print(f" 期刊: {journal}")
print(f" 年份: {pub_year}")
print(f" 作者: {', '.join(author_names)}")
print(f" OpenAlex ID: {item.get('id', 'N/A')}")
print()
return {
"success": True,
"total_results": total_results,
"items": items,
"params": params
}
except Exception as e:
print(f"API调用失败: {str(e)}")
return {
"success": False,
"error": str(e),
"params": params
}
def test_different_sort_options(self, query: str) -> None:
"""测试不同的排序选项"""
sort_options = [
"cited_by_count", # 按被引用次数排序
"publication_date", # 按发表时间排序
"relevance_score", # 按相关性排序
]
print(f"\n=== 测试不同排序选项 ===")
print(f"查询: {query}")
print("=" * 60)
for sort_option in sort_options:
print(f"\n--- 排序: {sort_option} ---")
result = self.test_search(query, sort=sort_option, limit=5)
if result["success"]:
# 显示引用量统计
items = result["items"]
cited_counts = [item.get('cited_by_count', 0) for item in items]
print(f"被引用次数统计: {cited_counts}")
if cited_counts:
print(f"平均被引用次数: {sum(cited_counts) / len(cited_counts):.2f}")
print(f"最大被引用次数: {max(cited_counts)}")
# 按引用量排序显示
sorted_items = sorted(items, key=lambda x: x.get('cited_by_count', 0), reverse=True)
print(f"按引用量排序的前3个结果:")
for i, item in enumerate(sorted_items[:3], 1):
title = item.get('title', 'N/A')
if len(title) > 50:
title = title[:50] + "..."
cited_count = item.get('cited_by_count', 0)
print(f" {i}. {title} (被引用: {cited_count})")
else:
print(f"排序选项 {sort_option} 失败")
time.sleep(1) # 避免请求过快
def test_different_queries(self) -> None:
"""测试不同的查询"""
test_queries = [
"machine learning",
"CRISPR",
"cryo-electron microscopy",
"artificial intelligence",
"deep learning",
]
print(f"\n=== 测试不同查询 ===")
print("=" * 60)
for query in test_queries:
print(f"\n--- 查询: {query} ---")
result = self.test_search(query, sort="cited_by_count", limit=3)
if result["success"]:
items = result["items"]
cited_counts = [item.get('cited_by_count', 0) for item in items]
print(f"被引用次数: {cited_counts}")
else:
print(f"查询 {query} 失败")
time.sleep(1)
def test_work_details(self, work_id: str) -> None:
"""测试工作详情查找功能"""
print(f"\n=== 测试工作详情查找 ===")
print(f"Work ID: {work_id}")
print("-" * 50)
url = f"https://api.openalex.org/works/{work_id}"
try:
response = requests.get(url, headers=self.headers, timeout=self.timeout)
response.raise_for_status()
data = response.json()
title = data.get('title', 'N/A')
cited_by_count = data.get('cited_by_count', 0)
citation_count = data.get('citation_count', 0)
referenced_works_count = data.get('referenced_works_count', 0)
print(f"标题: {title}")
print(f"被引用次数: {cited_by_count}")
print(f"引用次数: {citation_count}")
print(f"参考文献数: {referenced_works_count}")
print(f"OpenAlex ID: {data.get('id', 'N/A')}")
# 显示完整的引用量相关字段
print(f"\n引用量相关字段:")
for key, value in data.items():
if 'cite' in key.lower() or 'reference' in key.lower():
print(f" {key}: {value}")
except Exception as e:
print(f"工作详情查找失败: {str(e)}")
def compare_with_crossref(self, query: str) -> None:
"""与Crossref对比测试"""
print(f"\n=== 与Crossref对比测试 ===")
print(f"查询: {query}")
print("=" * 60)
# OpenAlex测试
print(f"\n--- OpenAlex结果 ---")
openalex_result = self.test_search(query, sort="cited_by_count", limit=5)
if openalex_result["success"]:
openalex_items = openalex_result["items"]
openalex_citations = [item.get('cited_by_count', 0) for item in openalex_items]
print(f"OpenAlex引用量: {openalex_citations}")
if openalex_citations:
print(f"OpenAlex平均引用量: {sum(openalex_citations) / len(openalex_citations):.2f}")
print(f"OpenAlex最大引用量: {max(openalex_citations)}")
# Crossref测试(简化版)
print(f"\n--- Crossref结果 ---")
try:
crossref_url = "https://api.crossref.org/works"
crossref_params = {
"query": query,
"rows": 5,
"sort": "relevance",
"order": "desc",
"select": "DOI,title,is-referenced-by-count"
}
crossref_response = requests.get(crossref_url, params=crossref_params, headers=self.headers, timeout=30)
crossref_response.raise_for_status()
crossref_data = crossref_response.json()
crossref_items = crossref_data.get("message", {}).get("items", [])
crossref_citations = [item.get('is-referenced-by-count', 0) for item in crossref_items]
# 按引用量排序
crossref_sorted = sorted(crossref_items, key=lambda x: x.get('is-referenced-by-count', 0), reverse=True)
crossref_sorted_citations = [item.get('is-referenced-by-count', 0) for item in crossref_sorted]
print(f"Crossref引用量: {crossref_sorted_citations}")
if crossref_sorted_citations:
print(f"Crossref平均引用量: {sum(crossref_sorted_citations) / len(crossref_sorted_citations):.2f}")
print(f"Crossref最大引用量: {max(crossref_sorted_citations)}")
except Exception as e:
print(f"Crossref对比测试失败: {str(e)}")
# 对比分析
if openalex_result["success"] and crossref_sorted_citations:
print(f"\n--- 对比分析 ---")
openalex_max = max(openalex_citations) if openalex_citations else 0
crossref_max = max(crossref_sorted_citations) if crossref_sorted_citations else 0
print(f"OpenAlex最大引用量: {openalex_max}")
print(f"Crossref最大引用量: {crossref_max}")
print(f"引用量差异: {abs(openalex_max - crossref_max)}")
if openalex_max > crossref_max:
print("✅ OpenAlex引用量更高")
elif crossref_max > openalex_max:
print("✅ Crossref引用量更高")
else:
print("📊 两者引用量相同")
def main():
"""主函数"""
tester = OpenAlexTester()
print("OpenAlex API 测试工具")
print("=" * 60)
while True:
print("\n请选择测试选项:")
print("1. 测试单个查询")
print("2. 测试不同排序选项")
print("3. 测试不同查询")
print("4. 测试工作详情查找")
print("5. 与Crossref对比测试")
print("6. 退出")
choice = input("\n请输入选项 (1-6): ").strip()
if choice == "1":
query = input("请输入查询内容: ").strip()
if query:
tester.test_search(query)
elif choice == "2":
query = input("请输入查询内容: ").strip()
if query:
tester.test_different_sort_options(query)
elif choice == "3":
tester.test_different_queries()
elif choice == "4":
work_id = input("请输入OpenAlex Work ID: ").strip()
if work_id:
tester.test_work_details(work_id)
elif choice == "5":
query = input("请输入查询内容: ").strip()
if query:
tester.compare_with_crossref(query)
elif choice == "6":
print("退出测试工具")
break
else:
print("无效选项,请重新选择")
if __name__ == "__main__":
main()