Spaces:

wujian123
/

new_recommendation

Sleeping

File size: 3,383 Bytes

3c6b551

#!/usr/bin/env python3
"""
测试集成代码中的OpenAlex排序问题
"""

import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

from reviewer_recommendation.searcher import OpenAlexSearcher, DynamicAcademicSearcher
from reviewer_recommendation.models import PaperInfo

def test_integration_sorting():
    """测试集成代码中的排序问题"""
    print("=== 测试集成代码中的OpenAlex排序 ===")
    
    # 创建测试用的论文信息
    paper = PaperInfo(
        title="Molecular and Structural Biology Cryo-EM",
        abstract="Test abstract for cryo-EM research",
        keywords=["cryo-EM", "structural biology", "molecular biology"]
    )
    
    # 创建OpenAlex检索器
    openalex_searcher = OpenAlexSearcher(limit=10)
    
    # 创建动态检索器
    dynamic_searcher = DynamicAcademicSearcher(openalex_searcher=openalex_searcher)
    
    print(f"测试论文: {paper.title}")
    print("=" * 60)
    
    # 测试直接调用OpenAlexSearcher
    print("\n--- 直接测试OpenAlexSearcher ---")
    
    query = "Molecular and Structural Biology Cryo-EM"
    
    print("1. 按引用量排序:")
    results_cited = openalex_searcher.search(query, sort_by_citations=True)
    if results_cited:
        citations = [r.get('citedByCount', 0) for r in results_cited]
        print(f"   引用量: {citations[:5]}")
        print(f"   最大引用量: {max(citations)}")
    
    print("\n2. 按相关性排序:")
    results_relevance = openalex_searcher.search(query, sort_by_citations=False)
    if results_relevance:
        citations = [r.get('citedByCount', 0) for r in results_relevance]
        print(f"   引用量: {citations[:5]}")
        print(f"   最大引用量: {max(citations)}")
    
    # 测试DynamicAcademicSearcher
    print("\n--- 测试DynamicAcademicSearcher ---")
    
    try:
        channel1_results, channel2_results = dynamic_searcher.search_with_dynamic_queries(paper, num_queries=1)
        
        print(f"通道1结果数量: {len(channel1_results)}")
        if channel1_results:
            citations1 = [r.get('citedByCount', 0) for r in channel1_results]
            print(f"通道1引用量: {citations1[:5]}")
            print(f"通道1最大引用量: {max(citations1)}")
        
        print(f"通道2结果数量: {len(channel2_results)}")
        if channel2_results:
            citations2 = [r.get('citedByCount', 0) for r in channel2_results]
            print(f"通道2引用量: {citations2[:5]}")
            print(f"通道2最大引用量: {max(citations2)}")
        
        # 对比分析
        if channel1_results and channel2_results:
            max1 = max([r.get('citedByCount', 0) for r in channel1_results])
            max2 = max([r.get('citedByCount', 0) for r in channel2_results])
            
            print(f"\n--- 对比分析 ---")
            print(f"通道1最大引用量: {max1}")
            print(f"通道2最大引用量: {max2}")
            
            if max1 < max2:
                print("❌ 问题确认：通道1的引用量反而更低！")
            else:
                print("✅ 通道1工作正常")
        
    except Exception as e:
        print(f"DynamicAcademicSearcher测试失败: {str(e)}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    test_integration_sorting()