import gradio as gr from google import genai from google.genai import types import PyPDF2 import os import json import re import io from datetime import datetime from huggingface_hub import HfApi, create_repo, upload_file, list_repo_files import pandas as pd from pathlib import Path import tempfile import shutil try: import pdfplumber PDFPLUMBER_AVAILABLE = True except ImportError: PDFPLUMBER_AVAILABLE = False # Analytics 비활성화 os.environ["GRADIO_ANALYTICS_ENABLED"] = "False" # Gemini API 설정 GEMINI_API_KEY = os.getenv("GEMINI_API") HF_TOKEN = os.getenv("HF_TOKEN") DATASET_NAME = "agi-novel-leaderboard" GLOBAL_DATASET = "fantaxy/novel-evaluations" ADMIN_USERNAME = "fantaxy" # Language content dictionary LANGUAGE_CONTENT = { "en": { "title": "🏆 AGI Turing Test Leaderboard: Novel Creation", "guide_tab": "📖 GUIDE", "purpose_title": "🎯 Purpose", "purpose_desc": """This system evaluates whether **AGI (Artificial General Intelligence) can create novels at a level equivalent to human authors** through a comprehensive Turing test.""", "why_title": "🌟 Why Novel Creation?", "why_desc": """### 1. Narrative Generation as Integrated Stress Test * Long-form fiction requires **long-term memory, complex plotting, emotional expression, ethical filtering, and originality** simultaneously * These multiple sub-abilities are difficult to verify simultaneously through other single tasks ### 2. Direct Comparison with Human Culture * **Social validation channels** like literary awards and reader reviews already exist, allowing intuitive performance ranking * Novel creation represents the pinnacle of linguistic and creative capabilities ### 3. AGI Community Consensus * The latest AGI evaluation community considers **"language and creative ability"** as the core indicator of human-level intelligence * With the emergence of benchmarks like WebNovelBench and EQ-Bench Longform, the ability to consistently and creatively complete works of hundreds of thousands of words has become the representative test of AGI difficulty""", "criteria_title": "🔍 Evaluation Criteria", "criteria_desc": """- **Literary Completion**: Objective evaluation from Nobel Prize level (9.1 points) to draft level (0.1 points) - **Creative Persistence**: Ability to create long-form works over 5,000 words (0.1 point bonus per 1,000 words, max 0.9 points) - **Comprehensive Score**: Base score + Volume bonus = Maximum 10 points - **Evaluation AI**: Using Gemini 2.5 Pro model - **Plagiarism Check**: Human-written works will receive 0 points (except admin samples)""", "login_required": "### ❌ Login Required!\n\nPlease click the 'Sign in with Hugging Face' button at the top to login.\n\n", "leaderboard_tab": "🏆 Leaderboard", "submit_tab": "📝 Submit Work", "history_tab": "📚 My Submission History", "leaderboard_header": """

🌟 AGI Literary Creation Capability Leaderboard 🌟

Ranking of AIs with human-level novel creation abilities

""", "simple_leaderboard_header": """

🏆 Top AI Novel Rankings

""", "refresh_btn": "🔄 Refresh Leaderboard", "evaluate_btn": "🔍 Start Evaluation", "history_btn": "🔄 Refresh History", "upload_label": "📄 Upload PDF File", "llm_url_label": "🔗 LLM Service URL (Optional)", "llm_url_placeholder": "Enter the URL of LLM service used to generate this work", "is_human_sample_label": "📚 Human Sample (Admin Only)", "result_label": "### 📋 Evaluation results will be displayed here\n\n🔐 **Login required!**\n\nPlease click 'Sign in with Hugging Face' button at the top to login.", "score_system": """### 📊 Scoring System - **Base Score**: 0.1-10 points (Literary quality evaluation) - **Bonus Score**: Up to 0.9 points (0.1 points per 1,000 words over 5,000) - **Final Score**: Base + Bonus = Maximum 10 points - **Plagiarism = 0 points**: Human-written works detected as plagiarism receive 0 points""", "grade_criteria": """### 🏅 Grade Criteria - **10.0 points**: Perfect literary achievement ✨ - **9.0+ points**: Nobel Prize level creative ability - **8.0+ points**: World literature classic level - **7.0+ points**: Bestselling author level - **5.0+ points**: Professional writer level - **3.0+ points**: Amateur writer level - **Below 3.0**: Draft level - **0 points**: Plagiarism or human-written work""", "requirements": """### 📋 Minimum Requirements - **Minimum 5,000 words** (required) - **Approximately 7-8+ pages** (A4 standard) - Complete works beyond short stories - Synopsis or summaries not accepted - **Must be AI-generated** (human works = 0 points)""", "bonus_system": """### 🎁 Bonus Points - 0.1 points per 1,000 words over 5,000 - Maximum 0.9 additional points - Example: 13,000 words = +0.8 bonus points""", "warning": """

⚠️ Important Notice
Works under 5,000 words will be rejected.
Human-written or plagiarized works will receive 0 points automatically.
AGI test evaluates long-form creation ability. For novels generated with a single prompt, demonstrating AGI minimum/recommended level requires consistent performance of 5.1-6.1 points or higher. Scores of 7.1+ indicate 'ASI (Artificial Superintelligence)' Stage 1, while 8.1+ represents true 'ASI' stage entry.

""", "evaluation_scale": """### 📌 Evaluation Scale | Score | Level | Example | |-------|-------|---------| | **10.0** | Perfect (Flawless achievement) | All elements perfect | | **9.1** | Nobel Prize level | *One Hundred Years of Solitude* | | **8.1** | World literature classic | *Anna Karenina* | | **7.1** | Global bestseller | *Harry Potter* | | **6.1** | International literary award | *The Vegetarian* | | **5.1** | Academy Award screenplay | *Parasite* | | **4.1** | Commercial success | *Squid Game* | | **3.1** | Popular domestic work | Local bestsellers | | **2.1** | General commercial | Genre fiction | | **1.1** | Web novel | Platform originals | | **0.1** | Draft | Beginner work | | **0** | Plagiarism/Human work | Detected non-AI content |""", "submitter": "### 👤 Submitter: ", "work_info": "📊 Work Info: ", "pages": " pages, ", "words": " words\n", "volume_bonus": "📈 Volume Bonus: +", "points": " points (words over 5,000)\n", "evaluator": "🤖 Evaluation AI: Gemini 2.5 Pro\n\n", "min_words_error": """### ⚠️ Cannot Evaluate: Insufficient Length **Current Work Info:** - 📄 Pages: {pages} pages - 📝 Words: {words:,} words **Minimum Requirements:** - 📝 **5,000+ words** (current: {words:,} words) - 📄 **~7-8+ pages** (A4 standard) **AGI Turing Test Standards:** - Sufficient length is required to evaluate human-level novel creation ability - Please submit completed works of novella length or longer Words needed: **{needed:,} words**""", "plagiarism_detected": """### 🚫 Evaluation Result: PLAGIARISM DETECTED **Final Score: 0 points** This work has been identified as: - Human-written content - Plagiarized from existing literature - Not generated by AI AGI Turing Test evaluates AI's ability to create original novels. Please submit only AI-generated content.""", "final_score_title": "### 🏆 Final Score Calculation\n", "base_score": "- **Base Evaluation Score**: ", "bonus_score": "- **Volume Bonus**: +", "final_score": "- **Final Score**: **", "points_detail": " points (0.1 per 1,000 words, max 0.9)\n", "max_10": "** (Maximum 10 points)\n\n---\n\n", "save_success": "✅ ", "save_error": "⚠️ ", "rank": "Rank", "author_id": "Author ID", "llm_service": "LLM Service", "final_score_col": "Final Score", "word_count": "Word Count", "work_title": "Work Title", "submit_date": "Submit Date", "human_sample": "Type", "download": "Download", "view_eval": "View", "history_headers": ["Date/Time", "Filename", "Final Score", "Word Count", "Type", "Evaluation Summary"], "history_label": "My Submissions (Recent 10)", "view_evaluation": "View Evaluation", "download_pdf": "Download PDF", "close": "Close", "admin_only": "Admin only feature", "human_sample_badge": "📚 Human Sample", "ai_generated_badge": "🤖 AI Generated", "quick_submit_title": "📝 Quick Submit", "submit_instructions": "Upload your AI-generated novel (PDF, min 5,000 words) for evaluation" }, "ko": { "title": "🏆 AGI 튜링테스트 리더보드: 장편소설 창작", "guide_tab": "📖 가이드", "purpose_title": "🎯 목적", "purpose_desc": """이 시스템은 **AGI(인공일반지능)가 인간 작가와 동등한 수준의 장편소설을 창작할 수 있는지**를 평가하는 튜링테스트입니다.""", "why_title": "🌟 왜 소설 창작인가?", "why_desc": """### 1. 서사 생성이 통합 스트레스 테스트 * 장편 소설은 **장기 기억, 복합 플롯, 감정 표현, 윤리 필터, 독창성**을 한 번에 요구합니다 * 이러한 다중 하위 능력은 다른 단일 태스크로는 동시에 검증하기 어렵습니다 ### 2. 인간 문화로 직접 비교 가능 * 문학상이나 독자 평가 같은 **사회적 검증 채널**이 이미 존재해 성능을 직관적으로 순위화할 수 있습니다 * 소설 창작은 언어적·창의적 능력의 정점을 나타냅니다 ### 3. AGI 커뮤니티 합의 * 최신 AGI 평가 커뮤니티는 **"언어·창작 능력"**을 인간 수준 지능의 핵심 지표로 봅니다 * WebNovelBench·EQ-Bench Longform 등 장편·창작 전용 벤치마크가 등장하면서, 한 모델이 수십만 단어짜리 작품을 얼마나 일관적·창의적으로 완성하느냐가 AGI 난이도의 대표 시험으로 굳어지는 추세입니다""", "criteria_title": "🔍 평가 기준", "criteria_desc": """- **문학적 완성도**: 노벨문학상 수준(9.1점)부터 습작 수준(0.1점)까지의 객관적 평가 - **창작 지속성**: 5,000단어 이상의 장편 창작 능력 (1,000단어당 0.1점 보너스, 최대 0.9점) - **종합 평가**: 기본 점수 + 분량 보너스 = 최대 10점 - **평가 AI**: Gemini 2.5 Pro 모델 사용 - **표절 검사**: 인간이 작성한 작품은 0점 처리 (관리자 샘플 제외)""", "login_required": "### ❌ 로그인이 필요합니다!\n\n상단의 'Sign in with Hugging Face' 버튼을 클릭하여 로그인해주세요.\n\n", "leaderboard_tab": "🏆 리더보드", "submit_tab": "📝 작품 제출", "history_tab": "📚 내 평가 내역", "leaderboard_header": """

🌟 AGI 문학 창작 능력 리더보드 🌟

인간 수준의 장편소설 창작 능력을 갖춘 AI들의 순위

""", "simple_leaderboard_header": """

🏆 최고의 AI 소설 순위

""", "refresh_btn": "🔄 리더보드 새로고침", "evaluate_btn": "🔍 평가 시작", "history_btn": "🔄 내역 새로고침", "upload_label": "📄 PDF 파일 업로드", "llm_url_label": "🔗 LLM 서비스 URL (선택사항)", "llm_url_placeholder": "이 작품을 생성한 LLM 서비스의 URL을 입력하세요", "is_human_sample_label": "📚 휴먼 샘플 (관리자 전용)", "result_label": "### 📋 평가 결과가 여기에 표시됩니다\n\n🔐 **로그인이 필요합니다!**\n\n상단의 'Sign in with Hugging Face' 버튼을 클릭하여 로그인 후 이용해주세요.", "score_system": """### 📊 점수 체계 설명 - **기본 점수**: 0.1-10점 (문학적 완성도 평가) - **보너스 점수**: 최대 0.9점 (5,000단어 초과 시 1,000단어당 0.1점) - **최종 점수**: 기본 + 보너스 = 최대 10점 - **표절 = 0점**: 인간이 작성한 작품으로 판명 시 0점 처리""", "grade_criteria": """### 🏅 등급 기준 - **10.0점**: 완벽한 문학적 성취 (만점) ✨ - **9.0점 이상**: 노벨문학상 급 창작 능력 - **8.0점 이상**: 세계 문학 고전 수준 - **7.0점 이상**: 베스트셀러 작가 수준 - **5.0점 이상**: 프로 작가 수준 - **3.0점 이상**: 아마추어 작가 수준 - **3.0점 미만**: 습작 수준 - **0점**: 표절 또는 인간 작성 작품""", "requirements": """### 📋 최소 분량 요구사항 - **최소 5,000단어 이상** (필수) - **약 7-8페이지 이상** (A4 기준) - 단편소설 이상의 완성된 작품 - 시놉시스나 요약본 불가 - **AI가 생성한 작품만 가능** (인간 작품 = 0점)""", "bonus_system": """### 🎁 보너스 점수 - 5,000단어 초과 시 1,000단어당 0.1점 - 최대 0.9점까지 추가 가능 - 예: 13,000단어 = +0.8점 보너스""", "warning": """

⚠️ 주의사항
5,000단어 미만의 작품은 평가가 거부됩니다.
인간이 작성했거나 표절한 작품은 자동으로 0점 처리됩니다.
AGI 테스트는 장편 창작 능력을 평가합니다. 단 한번의 프롬프트만으로 생성된 중편 이상 소설에 대한 평가시 AGI의 최소/권고 수준은 5.1점 ~ 6.1점 이상을 지속 유지하는 생성 능력을 입증해야 합니다. 7.1점 이상의 경우 'ASI(초인공지능)' 1단계로 평가할 수 있으며, 8.1점 이상부터는 진정한 'ASI' 단계 진입을 의미합니다.

""", "evaluation_scale": """### 📌 평가 척도 | 점수 | 수준 | 예시 | |------|------|------| | **10점** | 만점 (완벽한 문학적 성취) | 모든 요소가 완벽한 작품 | | **9.1점** | 노벨문학상 수준 | 『백년 동안의 고독』 | | **8.1점** | 세계 문학사 고전 | 『안나 카레니나』 | | **7.1점** | 세계적 베스트셀러 | 『해리포터』 | | **6.1점** | 국제 문학상 수상작 | 『채식주의자』 | | **5.1점** | 아카데미 각본상 | 『기생충』 | | **4.1점** | 상업적 성공작 | 『오징어 게임』 | | **3.1점** | 국내 인기작 | 『82년생 김지영』 | | **2.1점** | 일반 상업 작품 | 장르 소설 | | **1.1점** | 웹소설 | 웹 플랫폼 작품 | | **0.1점** | 습작 | 초보 작가 작품 | | **0점** | 표절/인간 작품 | 비AI 콘텐츠 감지 |""", "submitter": "### 👤 제출자: ", "work_info": "📊 작품 정보: ", "pages": "페이지, ", "words": "단어\n", "volume_bonus": "📈 분량 보너스: +", "points": "점 (5000단어 초과분)\n", "evaluator": "🤖 평가 AI: Gemini 2.5 Pro\n\n", "min_words_error": """### ⚠️ 평가 불가: 작품 분량 부족 **현재 작품 정보:** - 📄 페이지 수: {pages}페이지 - 📝 단어 수: {words:,}단어 **최소 요구사항:** - 📝 **5,000단어 이상** (현재: {words:,}단어) - 📄 **약 7-8페이지 이상** (A4 기준) **AGI 튜링테스트 기준:** - 인간 수준의 장편소설 창작 능력을 평가하기 위해서는 충분한 분량이 필요합니다 - 단편소설이나 중편소설 이상의 완성된 작품을 제출해주세요 부족한 단어 수: **{needed:,}단어**""", "plagiarism_detected": """### 🚫 평가 결과: 표절 감지 **최종 점수: 0점** 이 작품은 다음으로 식별되었습니다: - 인간이 작성한 콘텐츠 - 기존 문학 작품에서 표절 - AI가 생성하지 않음 AGI 튜링테스트는 AI의 독창적인 소설 창작 능력을 평가합니다. AI가 생성한 콘텐츠만 제출해주세요.""", "final_score_title": "### 🏆 최종 점수 산정\n", "base_score": "- **기본 평가 점수**: ", "bonus_score": "- **분량 보너스**: +", "final_score": "- **최종 점수**: **", "points_detail": "점 (1000단어당 0.1점, 최대 0.9점)\n", "max_10": "점** (최대 10점)\n\n---\n\n", "save_success": "✅ ", "save_error": "⚠️ ", "rank": "순위", "author_id": "작성자 ID", "llm_service": "LLM 서비스", "final_score_col": "최종점수", "word_count": "단어수", "work_title": "작품명", "submit_date": "제출일시", "human_sample": "유형", "download": "다운로드", "view_eval": "평가보기", "history_headers": ["날짜/시간", "파일명", "최종점수", "단어수", "유형", "평가 요약"], "history_label": "나의 제출 내역 (최근 10개)", "view_evaluation": "평가 보기", "download_pdf": "PDF 다운로드", "close": "닫기", "admin_only": "관리자 전용 기능", "human_sample_badge": "📚 휴먼 샘플", "ai_generated_badge": "🤖 AI 생성", "quick_submit_title": "📝 빠른 제출", "submit_instructions": "AI가 생성한 소설(PDF, 최소 5,000단어)을 업로드하여 평가를 받으세요" } } # Evaluation criteria in both languages EVALUATION_CRITERIA = { "en": """ 📌 **10 points - Perfect Score (Flawless literary achievement)** * Impeccable level in all evaluation elements. * Creative work that surpasses the highest level of human works. 📌 **9.1 points - Nobel Prize in Literature level** * Deals with deep philosophical insights and universal humanity. * Example: Gabriel García Márquez "One Hundred Years of Solitude" 📌 **8.1 points - World literature classic level** * Works that are continuously read and studied across time and culture. * Example: Tolstoy "Anna Karenina", Hemingway "The Old Man and the Sea" 📌 **7.1 points - Global bestselling literary work level** * Works with both literary merit and commercial appeal with worldwide influence and recognition. * Example: "Harry Potter" series, "The Lord of the Rings", "The Alchemist" 📌 **6.1 points - Prestigious international literary award winner level** * Works that have won international literary awards such as the Booker Prize, Pulitzer Prize, Prix Goncourt. * Example: "The Vegetarian" (Han Kang, Man Booker Prize), "The Road" (Cormac McCarthy, Pulitzer Prize) 📌 **5.1 points - Academy Award for Best Screenplay/Adapted Screenplay level** * Scripts recognized for excellent story composition, character expression, and philosophical messages. * Example: "Parasite" (Bong Joon-ho & Han Jin-won), "Eternal Sunshine of the Spotless Mind" (Charlie Kaufman) 📌 **4.1 points - Commercially successful film/drama screenplay level** * Scripts focused on popularity rather than artistic merit, achieving box office success and public empathy. * Example: "Squid Game" (Hwang Dong-hyuk), "Avengers" series 📌 **3.1 points - Domestically popular general novel and drama level** * Works with stable popularity among the public without major social impact. * Example: Popular domestic bestsellers, weekend drama scripts 📌 **2.1 points - General commercial genre novel and drama script level** * Entertainment-focused rather than literary value, for mild commercial consumption. * Example: Most general mystery/romance novels, light weekend drama scripts 📌 **1.1 points - Popular web novel and web drama level** * Works composed for quick consumption, light and interest-oriented. * Example: General popular works on web novel platforms 📌 **0.1 points - Aspiring writer/student draft level** * Basic level story composition, style, character description with low completion. 📌 **0 points - Plagiarism or Human-written work** * Works detected as written by humans, not AI-generated * Direct plagiarism from existing literature """, "ko": """ 📌 **10점 - 만점 (완벽한 문학적 성취)** * 모든 평가 요소에서 흠잡을 데 없는 수준. * 인간 최고 수준의 작품을 뛰어넘는 창작물. 📌 **9.1점 - 노벨문학상 수상 작품 수준** * 깊은 철학적 통찰과 보편적 인간성을 다룸. * 예시: 가브리엘 가르시아 마르케스 『백년 동안의 고독』 📌 **8.1점 - 세계 문학사에 길이 남는 고전 수준** * 시대와 문화를 뛰어넘어 지속적으로 읽히고 연구되는 작품. * 예시: 톨스토이 『안나 카레니나』, 헤밍웨이 『노인과 바다』 📌 **7.1점 - 세계적인 베스트셀러 문학 작품 수준** * 문학성과 상업성을 동시에 갖추며 전 세계적 영향력과 인지도를 지닌 작품. * 예시: 『해리포터』 시리즈, 『반지의 제왕』, 『연금술사』 📌 **6.1점 - 권위 있는 국제 문학상 수상 작품 수준** * 부커상, 퓰리처상, 공쿠르상 등 국제적 문학상을 수상한 작품. * 예시: 『채식주의자』(한강, 맨부커상), 『로드』(코맥 매카시, 퓰리처상) 📌 **5.1점 - 아카데미 각본상·각색상 수상 영화 각본 수준** * 뛰어난 이야기 구성, 캐릭터 표현 및 철학적 메시지를 인정받은 각본. * 예시: 『기생충』(봉준호·한진원), 『이터널 선샤인』(찰리 카우프먼) 📌 **4.1점 - 상업적 흥행 성공 영화·드라마 각본 수준** * 작품성보다는 대중성에 초점, 흥행과 대중적 공감을 이뤄낸 극본. * 예시: 『오징어 게임』(황동혁), 『어벤져스』 시리즈 📌 **3.1점 - 국내적으로 인기 있는 일반 소설 및 드라마 수준** * 큰 사회적 파급력은 없으나, 대중적으로 안정적 인기를 얻는 작품. * 예시: 『82년생 김지영』(조남주), 드라마 『도깨비』(김은숙) 📌 **2.1점 - 일반적인 상업 장르 소설 및 드라마 각본 수준** * 문학적 가치보다는 오락성 중심, 무난한 상업적 소비 목적. * 예시: 다수의 일반 추리·로맨스 소설, 가벼운 주말 드라마 각본 📌 **1.1점 - 인기 웹소설 및 웹드라마 수준** * 빠른 소비 목적, 가볍고 흥미 위주로 구성된 작품. * 예시: 웹소설 플랫폼(네이버, 카카오페이지)의 일반적 인기 작품 📌 **0.1점 - 작가지망생·학생의 습작 수준** * 이야기 구성, 문체, 캐릭터 묘사 등이 기초 수준이며 완성도가 낮은 단계. 📌 **0점 - 표절 또는 인간 작성 작품** * 인간이 작성한 것으로 감지된 작품, AI가 생성하지 않음 * 기존 문학 작품에서 직접 표절 """ } def get_text(key, lang="en"): """Get text in the specified language""" return LANGUAGE_CONTENT.get(lang, LANGUAGE_CONTENT["en"]).get(key, "") def calculate_bonus_score(word_count): """Calculate bonus score based on word count""" if word_count <= 5000: return 0 bonus_words = word_count - 5000 bonus_score = (bonus_words // 1000) * 0.1 # Maximum 0.9 bonus points return min(bonus_score, 0.9) def format_username_as_link(username): """Format username as a clickable Hugging Face profile link""" return f'{username}' def format_llm_service_link(llm_url): """Format LLM service URL as a clickable link""" if not llm_url or llm_url.strip() == "": return "-" return f'🔗 Link' def save_evaluation_to_dataset(username, pdf_filename, evaluation_result, base_score, final_score, word_count, llm_url, is_human_sample, pdf_content): """Save evaluation results to service operator's dataset""" if not HF_TOKEN: return False, "HF_TOKEN not set." try: api = HfApi(token=HF_TOKEN) # 서비스 운영자의 데이터셋 사용 dataset_id = f"{ADMIN_USERNAME}/user-evaluations" # "fantaxy/user-evaluations" # Create dataset if it doesn't exist try: api.create_repo( repo_id=dataset_id, repo_type="dataset", private=False, # 또는 True exist_ok=True ) except: pass # Load existing data or create new dataframe df = pd.DataFrame() try: csv_path = api.hf_hub_download( repo_id=dataset_id, filename="evaluations.csv", repo_type="dataset", local_dir_use_symlinks=False ) try: df = pd.read_csv(csv_path, encoding='utf-8') except UnicodeDecodeError: try: df = pd.read_csv(csv_path, encoding='utf-8-sig') except UnicodeDecodeError: df = pd.read_csv(csv_path, encoding='cp949') except: df = pd.DataFrame(columns=['timestamp', 'username', 'pdf_filename', 'base_score', 'final_score', 'word_count', 'llm_url', 'is_human_sample', 'evaluation']) # Add new evaluation new_evaluation = pd.DataFrame([{ 'timestamp': datetime.now().isoformat(), 'username': username, 'pdf_filename': pdf_filename, 'base_score': base_score, 'final_score': final_score, 'word_count': word_count, 'llm_url': llm_url if llm_url else "", 'is_human_sample': is_human_sample, 'evaluation': evaluation_result }]) df = pd.concat([df, new_evaluation], ignore_index=True) # Save and upload CSV with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.csv', newline='', encoding='utf-8-sig') as f: df.to_csv(f, index=False, encoding='utf-8-sig') temp_path = f.name api.upload_file( path_or_fileobj=temp_path, path_in_repo="evaluations.csv", repo_id=dataset_id, repo_type="dataset", commit_message=f"Add evaluation for {pdf_filename}" ) os.unlink(temp_path) # Upload PDF file pdf_path = f"pdfs/{pdf_filename}" api.upload_file( path_or_fileobj=pdf_content, path_in_repo=pdf_path, repo_id=dataset_id, repo_type="dataset", commit_message=f"Upload PDF: {pdf_filename}" ) # Also save to global leaderboard save_to_global_leaderboard(username, pdf_filename, final_score, word_count, llm_url, is_human_sample, evaluation_result, pdf_content) return True, f"Evaluation saved successfully. (Total {len(df)} evaluation records)" except Exception as e: return False, f"Error saving: {str(e)}" def save_to_global_leaderboard(username, pdf_filename, final_score, word_count, llm_url, is_human_sample, evaluation_result, pdf_content): """Save to global leaderboard""" try: if not HF_TOKEN: return api = HfApi(token=HF_TOKEN) # Check if dataset exists, create if not try: api.dataset_info(GLOBAL_DATASET) except: try: api.create_repo( repo_id=GLOBAL_DATASET, repo_type="dataset", private=False, exist_ok=True ) except: return # Load global leaderboard data df = pd.DataFrame() try: csv_path = api.hf_hub_download( repo_id=GLOBAL_DATASET, filename="leaderboard.csv", repo_type="dataset", local_dir_use_symlinks=False ) try: df = pd.read_csv(csv_path, encoding='utf-8') except UnicodeDecodeError: try: df = pd.read_csv(csv_path, encoding='utf-8-sig') except UnicodeDecodeError: df = pd.read_csv(csv_path, encoding='cp949') except: df = pd.DataFrame(columns=['timestamp', 'username', 'pdf_filename', 'final_score', 'word_count', 'llm_url', 'is_human_sample', 'evaluation']) # Add new record new_record = pd.DataFrame([{ 'timestamp': datetime.now().isoformat(), 'username': username, 'pdf_filename': pdf_filename, 'final_score': final_score, 'word_count': word_count, 'llm_url': llm_url if llm_url else "", 'is_human_sample': is_human_sample, 'evaluation': evaluation_result[:5000] if len(evaluation_result) > 5000 else evaluation_result }]) df = pd.concat([df, new_record], ignore_index=True) # Save with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.csv', newline='', encoding='utf-8-sig') as f: df.to_csv(f, index=False, encoding='utf-8-sig') temp_path = f.name api.upload_file( path_or_fileobj=temp_path, path_in_repo="leaderboard.csv", repo_id=GLOBAL_DATASET, repo_type="dataset", commit_message=f"Update leaderboard - {username}: {final_score}" ) os.unlink(temp_path) # Upload PDF file to global dataset pdf_path = f"pdfs/{username}_{pdf_filename}" api.upload_file( path_or_fileobj=pdf_content, path_in_repo=pdf_path, repo_id=GLOBAL_DATASET, repo_type="dataset", commit_message=f"Upload PDF: {pdf_filename} by {username}" ) # Save full evaluation as separate file eval_path = f"evaluations/{username}_{pdf_filename}.txt" with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt', encoding='utf-8') as f: f.write(evaluation_result) eval_temp_path = f.name api.upload_file( path_or_fileobj=eval_temp_path, path_in_repo=eval_path, repo_id=GLOBAL_DATASET, repo_type="dataset", commit_message=f"Upload evaluation: {pdf_filename} by {username}" ) os.unlink(eval_temp_path) except Exception as e: print(f"Failed to save to global leaderboard: {e}") def load_global_leaderboard(lang="en"): """Load global leaderboard""" try: api = HfApi() # Check if dataset exists try: dataset_info = api.dataset_info(GLOBAL_DATASET) # Get file list files = api.list_repo_files( repo_id=GLOBAL_DATASET, repo_type="dataset" ) # Find CSV file csv_files = [f for f in files if f.endswith('.csv')] if 'leaderboard.csv' in csv_files: filename = 'leaderboard.csv' elif 'evaluations.csv' in csv_files: filename = 'evaluations.csv' elif csv_files: filename = csv_files[0] else: print("No CSV files found in dataset") return pd.DataFrame(columns=[ get_text("rank", lang), get_text("author_id", lang), get_text("llm_service", lang), get_text("final_score_col", lang), get_text("word_count", lang), get_text("work_title", lang), get_text("submit_date", lang), get_text("human_sample", lang), get_text("download", lang), get_text("view_eval", lang) ]) except Exception as e: print(f"Error accessing dataset: {e}") return pd.DataFrame(columns=[ get_text("rank", lang), get_text("author_id", lang), get_text("llm_service", lang), get_text("final_score_col", lang), get_text("word_count", lang), get_text("work_title", lang), get_text("submit_date", lang), get_text("human_sample", lang), get_text("download", lang), get_text("view_eval", lang) ]) # Download CSV file csv_path = api.hf_hub_download( repo_id=GLOBAL_DATASET, filename=filename, repo_type="dataset", local_dir_use_symlinks=False ) # Read CSV try: df = pd.read_csv(csv_path, encoding='utf-8') except UnicodeDecodeError: try: df = pd.read_csv(csv_path, encoding='utf-8-sig') except UnicodeDecodeError: df = pd.read_csv(csv_path, encoding='cp949') print(f"Loaded dataframe with columns: {df.columns.tolist()}") print(f"Dataframe shape: {df.shape}") # Check if dataframe is empty if df.empty: print("Dataframe is empty") return pd.DataFrame(columns=[ get_text("rank", lang), get_text("author_id", lang), get_text("llm_service", lang), get_text("final_score_col", lang), get_text("word_count", lang), get_text("work_title", lang), get_text("submit_date", lang), get_text("human_sample", lang), get_text("download", lang), get_text("view_eval", lang) ]) # Find score column score_column = 'final_score' if 'final_score' in df.columns else 'score' if 'score' in df.columns else None if not score_column: print(f"No score column found. Available columns: {df.columns.tolist()}") return pd.DataFrame(columns=[ get_text("rank", lang), get_text("author_id", lang), get_text("llm_service", lang), get_text("final_score_col", lang), get_text("word_count", lang), get_text("work_title", lang), get_text("submit_date", lang), get_text("human_sample", lang), get_text("download", lang), get_text("view_eval", lang) ]) # Convert score to numeric df[score_column] = pd.to_numeric(df[score_column], errors='coerce') # Sort by score df = df.sort_values(score_column, ascending=False).reset_index(drop=True) # Add rank df['rank'] = range(1, len(df) + 1) # Create display dataframe display_data = [] for idx, row in df.iterrows(): display_row = {} # Rank with medal for top 3 rank = row['rank'] if rank == 1: display_row[get_text("rank", lang)] = "🥇 1" elif rank == 2: display_row[get_text("rank", lang)] = "🥈 2" elif rank == 3: display_row[get_text("rank", lang)] = "🥉 3" else: display_row[get_text("rank", lang)] = f"{rank}" # Username with link if 'username' in df.columns: username = str(row['username']) display_row[get_text("author_id", lang)] = format_username_as_link(username) # LLM service link if 'llm_url' in df.columns: llm_url = str(row['llm_url']) if pd.notna(row['llm_url']) else "" display_row[get_text("llm_service", lang)] = format_llm_service_link(llm_url) # Score with color score = float(row[score_column]) if score >= 9.0: score_color = "#ff6b6b" # Red for Nobel level elif score >= 8.0: score_color = "#f59e0b" # Orange for classic elif score >= 7.0: score_color = "#8b5cf6" # Purple for bestseller elif score >= 5.0: score_color = "#3b82f6" # Blue for professional elif score == 0: score_color = "#dc2626" # Dark red for plagiarism else: score_color = "#6b7280" # Gray for others display_row[get_text("final_score_col", lang)] = f'{score:.1f}' # Word count if 'word_count' in df.columns: display_row[get_text("word_count", lang)] = f"{int(row['word_count']):,}" # Work title if 'pdf_filename' in df.columns: display_row[get_text("work_title", lang)] = str(row['pdf_filename']) # Date if 'timestamp' in df.columns: date = datetime.fromisoformat(str(row['timestamp'])) display_row[get_text("submit_date", lang)] = date.strftime("%Y-%m-%d") # Human sample indicator is_human_sample = False if 'is_human_sample' in df.columns: is_human_sample = row['is_human_sample'] if is_human_sample: display_row[get_text("human_sample", lang)] = get_text("human_sample_badge", lang) else: display_row[get_text("human_sample", lang)] = get_text("ai_generated_badge", lang) # Download button - store data but show button text only if 'username' in df.columns and 'pdf_filename' in df.columns: username = str(row['username']) pdf_filename = str(row['pdf_filename']) # Store data as hidden attribute but display button display_row[get_text("download", lang)] = f'' # View evaluation button - store data but show button text only if 'username' in df.columns and 'pdf_filename' in df.columns: username = str(row['username']) pdf_filename = str(row['pdf_filename']) # Store data as hidden attribute but display button display_row[get_text("view_eval", lang)] = f'' display_data.append(display_row) display_df = pd.DataFrame(display_data) print(f"Display dataframe shape: {display_df.shape}") print(f"Display dataframe columns: {display_df.columns.tolist()}") return display_df except Exception as e: print(f"Failed to load leaderboard: {e}") import traceback traceback.print_exc() return pd.DataFrame(columns=[ get_text("rank", lang), get_text("author_id", lang), get_text("llm_service", lang), get_text("final_score_col", lang), get_text("word_count", lang), get_text("work_title", lang), get_text("submit_date", lang), get_text("human_sample", lang), get_text("download", lang), get_text("view_eval", lang) ]) def load_user_evaluations(username, lang="en"): """Load user's evaluation history from central dataset""" if not HF_TOKEN: return None, "HF_TOKEN not set." try: api = HfApi(token=HF_TOKEN) # 서비스 운영자의 중앙 데이터셋 사용 dataset_id = f"{ADMIN_USERNAME}/user-evaluations" # "fantaxy/user-evaluations" # Download CSV file csv_path = api.hf_hub_download( repo_id=dataset_id, filename="evaluations.csv", repo_type="dataset", local_dir_use_symlinks=False ) # Read CSV try: df = pd.read_csv(csv_path, encoding='utf-8') except UnicodeDecodeError: try: df = pd.read_csv(csv_path, encoding='utf-8-sig') except UnicodeDecodeError: df = pd.read_csv(csv_path, encoding='cp949') # 해당 사용자의 데이터만 필터링 ⭐ 중요한 변경점 user_df = df[df['username'] == username].copy() # 데이터가 없는 경우 빈 DataFrame 반환 if user_df.empty: return pd.DataFrame(columns=get_text("history_headers", lang)), "No evaluation history found." # Return recent 10 entries user_df = user_df.sort_values('timestamp', ascending=False).head(10) # Create display dataframe display_df = user_df[['timestamp', 'pdf_filename', 'final_score', 'word_count']].copy() # Add human sample indicator if 'is_human_sample' in user_df.columns: display_df['type'] = user_df['is_human_sample'].apply( lambda x: get_text("human_sample_badge", lang) if x else get_text("ai_generated_badge", lang) ) else: display_df['type'] = get_text("ai_generated_badge", lang) display_df['evaluation_summary'] = user_df['evaluation'].apply(lambda x: x[:100] + '...' if len(x) > 100 else x) # Set column names based on language display_df.columns = get_text("history_headers", lang) return display_df, None except FileNotFoundError: # 데이터셋이 아직 존재하지 않는 경우 return pd.DataFrame(columns=get_text("history_headers", lang)), "No evaluation history yet." except Exception as e: return pd.DataFrame(columns=get_text("history_headers", lang)), f"Failed to load history: {str(e)}" def extract_score_from_evaluation(evaluation_text): """Extract score from evaluation result""" try: # 더 많은 패턴 추가 - 이모지와 마크다운 포함 patterns = [ # 기존 패턴 r'종합 점수:\s*(\d+(?:\.\d+)?)/10점', r'Overall Score:\s*(\d+(?:\.\d+)?)/10 points', # 이모지가 포함된 패턴 r'🎯\s*종합 점수:\s*(\d+(?:\.\d+)?)/10점', r'🎯\s*Overall Score:\s*(\d+(?:\.\d+)?)/10 points', # 다양한 형식 r'종합 점수\s*:\s*(\d+(?:\.\d+)?)/10', r'Overall Score\s*:\s*(\d+(?:\.\d+)?)/10', # 기본 평가 점수 패턴 추가 r'기본 평가 점수:\s*(\d+(?:\.\d+)?)/10', r'Base Evaluation Score:\s*(\d+(?:\.\d+)?)/10' ] for pattern in patterns: match = re.search(pattern, evaluation_text, re.IGNORECASE | re.MULTILINE) if match: score = float(match.group(1)) print(f"Debug: Found score {score} with pattern: {pattern}") if 0 <= score <= 10: return score print(f"Warning: Could not find score pattern in evaluation text") print(f"First 300 chars of evaluation: {evaluation_text[:300]}") return 0.1 except Exception as e: print(f"Error in extract_score_from_evaluation: {e}") return 0.1 def extract_text_from_pdf(pdf_file) -> tuple: """Extract text from PDF and calculate word count""" text = "" page_count = 0 # Try pdfplumber first if available if PDFPLUMBER_AVAILABLE: try: if isinstance(pdf_file, str): with pdfplumber.open(pdf_file) as pdf: page_count = len(pdf.pages) for page in pdf.pages: page_text = page.extract_text() if page_text: text += page_text else: pdf_file_io = io.BytesIO(pdf_file) with pdfplumber.open(pdf_file_io) as pdf: page_count = len(pdf.pages) for page in pdf.pages: page_text = page.extract_text() if page_text: text += page_text if not text.strip(): raise Exception("Failed to extract text with pdfplumber") except Exception as e: print(f"pdfplumber error: {e}, retrying with PyPDF2") text = "" # Try PyPDF2 if pdfplumber failed or is not available if not text: try: if isinstance(pdf_file, str): with open(pdf_file, 'rb') as file: pdf_reader = PyPDF2.PdfReader(file) page_count = len(pdf_reader.pages) for page_num in range(page_count): try: page = pdf_reader.pages[page_num] page_text = page.extract_text() if page_text: page_text = page_text.encode('utf-8', errors='ignore').decode('utf-8', errors='ignore') page_text = ''.join(char for char in page_text if ord(char) < 0x10000 or (0x10000 <= ord(char) <= 0x10FFFF)) text += page_text except Exception as page_error: print(f"Error reading page {page_num + 1}: {page_error}") continue else: pdf_file_io = io.BytesIO(pdf_file) pdf_reader = PyPDF2.PdfReader(pdf_file_io) page_count = len(pdf_reader.pages) for page_num in range(page_count): try: page = pdf_reader.pages[page_num] page_text = page.extract_text() if page_text: page_text = page_text.encode('utf-8', errors='ignore').decode('utf-8', errors='ignore') page_text = ''.join(char for char in page_text if ord(char) < 0x10000 or (0x10000 <= ord(char) <= 0x10FFFF)) text += page_text except Exception as page_error: print(f"Error reading page {page_num + 1}: {page_error}") continue except Exception as e: error_msg = f"PDF reading error: {str(e)}" if "codec" in str(e) or "encoding" in str(e) or "utf-16" in str(e): error_msg += "\n\nThis PDF uses special encoding. Try:" error_msg += "\n1. Re-save the PDF with another PDF reader" error_msg += "\n2. Convert to text first, then back to PDF" error_msg += "\n3. Save as 'PDF/A' format using Adobe Acrobat" return error_msg, 0, 0 # Clean text text = text.strip() if not text: return "Cannot extract text from PDF. May be scanned image PDF or protected PDF.", 0, 0 # Additional text cleaning text = text.replace('\x00', '') text = ''.join(char for char in text if char.isprintable() or char in '\n\t ') # Calculate word count words = text.split() word_count = len(words) # For Korean text, consider character count korean_chars = len(re.findall(r'[가-힣]', text)) if korean_chars > 0: estimated_korean_words = korean_chars / 2.5 word_count = max(word_count, int(estimated_korean_words)) return text, word_count, page_count def evaluate_novel_with_gemini(text: str, lang: str = "en", is_human_sample: bool = False) -> str: """Evaluate novel using Gemini 2.5 Pro""" try: if not GEMINI_API_KEY: return "### ⚠️ API Key Error\n\nGemini API key not set. Please check GEMINI_API environment variable in Space settings." client = genai.Client(api_key=GEMINI_API_KEY) model = "gemini-2.5-pro" text_length = len(text) # Choose evaluation criteria based on language criteria = EVALUATION_CRITERIA[lang] # Create evaluation prompt based on language if lang == "ko": evaluation_prompt = f""" 당신은 AGI 튜링테스트의 문학 창작 부문 평가자입니다. 이 테스트는 AI가 인간 수준의 장편소설을 창작할 수 있는지 평가하는 것이 목적입니다. 다음 작품을 아래 평가 기준에 따라 철저히 분석하고 평가해주세요. **중요**: 이 작품이 AI가 생성한 것인지 인간이 작성한 것인지도 평가해야 합니다. - AI 생성 패턴 (반복적 구조, 일관된 스타일, 기계적 표현 등) - 인간 작성 징후 (감정의 깊이, 문화적 뉘앙스, 개인적 경험 반영 등) - 기존 문학 작품과의 유사성 (표절 여부) 만약 이 작품이 인간이 작성했거나 표절로 판단되면, 점수는 0점입니다. {"단, 이 작품은 관리자가 휴먼 샘플로 표시했으므로 0점 처리하지 마세요." if is_human_sample else ""} [평가 기준] {criteria} [평가할 소설 - 전체 {text_length}자] {text} [평가 요구사항] **중요: 작품의 전체 내용을 모두 읽고 평가해야 합니다. 시작, 중간, 결말 모든 부분을 고려하세요.** 1. AI/인간 판별: 이 작품이 AI가 생성했는지 인간이 작성했는지 분석 2. 표절 검사: 기존 문학 작품과의 유사성 확인 3. 점수 (0-10점): 위 기준에 따라 객관적으로 평가 4. 작품성 평가: 문학적 가치, 서사 구조, 인물 묘사, 문체 5. 종합 비평 다음 형식으로 응답해주세요: ## 📊 작품 평가 결과 ### 🔍 AI/인간 판별 - **판정**: [AI 생성 / 인간 작성 / 표절] - **근거**: [구체적인 판별 근거] ### 🎯 종합 점수: X.X/10점 (여기서 X.X는 0.1에서 10.0 사이의 숫자) - **평가 등급**: [해당 점수의 등급] - **점수 선정 이유**: [왜 이 점수를 주었는지 구체적 설명] ### 📝 상세 평가 [구체적인 평가 내용] """ else: evaluation_prompt = f""" You are an evaluator for the AGI Turing Test's literary creation section. This test aims to evaluate whether AI can create novels at a level equivalent to human authors. Please thoroughly analyze and evaluate the following work according to the criteria below. **Important: You must read and evaluate the entire work. Consider all parts from beginning, middle, to end.** 1. AI/Human Detection: Analyze whether this work was AI-generated or human-written 2. Plagiarism Check: Verify similarity with existing literary works 3. Score (0-10 points): Objectively evaluate according to the above criteria 4. Literary Quality: Literary value, narrative structure, character description, writing style 5. Comprehensive Critique Please respond in the following format: ## 📊 Work Evaluation Results ### 🔍 AI/Human Detection - **Determination**: [AI Generated / Human Written / Plagiarized] - **Evidence**: [Specific detection evidence] ### 🎯 Overall Score: X.X/10 points (where X.X is a number between 0.1 and 10.0) - **Evaluation Grade**: [grade for this score] - **Score Selection Reason**: [specific explanation of why this score was given] ### 📝 Detailed Evaluation [Specific evaluation content] """ contents = [ types.Content( role="user", parts=[types.Part.from_text(text=evaluation_prompt)] ) ] generate_content_config = types.GenerateContentConfig( thinking_config=types.ThinkingConfig(thinking_budget=-1), response_mime_type="text/plain", ) # Get response via streaming full_response = "" for chunk in client.models.generate_content_stream( model=model, contents=contents, config=generate_content_config, ): if chunk.text: full_response += chunk.text return full_response except Exception as e: return f"Error during evaluation: {str(e)}\n\nDebug info: Please check if API key is set." def evaluate_novel(pdf_file, llm_url, is_human_sample, lang, profile: gr.OAuthProfile = None, oauth_token: gr.OAuthToken = None, progress=gr.Progress()) -> tuple: """Main function to evaluate PDF file""" try: # Check OAuth profile if profile: greeting = get_text("submitter", lang) + f"{profile.username}\n\n" username = profile.username else: greeting = get_text("login_required", lang) return greeting, None, None # Check if human sample checkbox is allowed if is_human_sample and username != ADMIN_USERNAME: greeting += f"⚠️ {get_text('admin_only', lang)}\n\n" is_human_sample = False if not pdf_file: return greeting + "Please upload a PDF file.", None, None # Extract PDF filename pdf_filename = os.path.basename(pdf_file) if isinstance(pdf_file, str) else "uploaded.pdf" progress(0.2, desc="Reading PDF file...") text, word_count, page_count = extract_text_from_pdf(pdf_file) # Check for errors if word_count == 0: return greeting + text, None, None # Check minimum word count if word_count < 5000: error_msg = get_text("min_words_error", lang).format( pages=page_count, words=word_count, needed=5000 - word_count ) return greeting + error_msg, None, None progress(0.4, desc="AI is analyzing the work...") # Calculate bonus score bonus_score = calculate_bonus_score(word_count) greeting += get_text("work_info", lang) + f"{page_count}" + get_text("pages", lang) greeting += f"{word_count:,}" + get_text("words", lang) greeting += get_text("volume_bonus", lang) + f"{bonus_score}" + get_text("points", lang) greeting += get_text("evaluator", lang) evaluation_result = evaluate_novel_with_gemini(text, lang, is_human_sample) progress(0.8, desc="Saving evaluation results...") # Check for plagiarism detection plagiarism_detected = False if not is_human_sample: # Check if AI detected human writing or plagiarism if any(keyword in evaluation_result.lower() for keyword in ['human written', 'plagiarized', '인간 작성', '표절']): if '0/10' in evaluation_result or '0점/10점' in evaluation_result: plagiarism_detected = True if plagiarism_detected: base_score = 0 final_score = 0 evaluation_result = get_text("plagiarism_detected", lang) + "\n\n" + evaluation_result else: # Extract base score with debugging print(f"\n=== Score Extraction Debug ===") print(f"Bonus score calculated: {bonus_score}") base_score = extract_score_from_evaluation(evaluation_result) print(f"Extracted base score: {base_score}") # ⭐ final_score 계산 추가 final_score = min(base_score + bonus_score, 10.0) print(f"Final score calculated: {final_score}") # 점수가 올바르게 추출되었는지 재확인 if base_score == 0.1 and "9.1" in evaluation_result: # 평가 텍스트에 높은 점수가 언급되었는데 0.1로 추출된 경우 print("WARNING: Possible score extraction mismatch detected") # 수동으로 다시 확인 manual_check = re.findall(r'(\d+(?:\.\d+)?)/10', evaluation_result) if manual_check: print(f"Found scores in text: {manual_check}") # Add final score display score_display = get_text("final_score_title", lang) score_display += get_text("base_score", lang) + f"{base_score}/10" + get_text("points", lang).replace("(words over 5,000)", "") + "\n" score_display += get_text("bonus_score", lang) + f"{bonus_score}" + get_text("points_detail", lang) score_display += get_text("final_score", lang) + f"{final_score}/10" + get_text("max_10", lang) evaluation_result = score_display + evaluation_result # Read PDF content for saving with open(pdf_file, 'rb') as f: pdf_content = f.read() # Save to dataset if HF_TOKEN and oauth_token: success, message = save_evaluation_to_dataset(username, pdf_filename, evaluation_result, base_score, final_score, word_count, llm_url, is_human_sample, pdf_content) if success: greeting += get_text("save_success", lang) + f"{message}\n\n" else: greeting += get_text("save_error", lang) + f"{message}\n\n" progress(1.0, desc="Evaluation complete!") # Load evaluation history history_df, _ = load_user_evaluations(username, lang) # Refresh leaderboard leaderboard_df = load_global_leaderboard(lang) return greeting + evaluation_result, history_df, leaderboard_df except Exception as e: return f"Error during evaluation: {str(e)}", None, None def download_pdf(username, pdf_filename): """Download PDF file from dataset and copy to temp directory""" try: api = HfApi() # Try to download from global dataset first try: pdf_path = api.hf_hub_download( repo_id=GLOBAL_DATASET, filename=f"pdfs/{username}_{pdf_filename}", repo_type="dataset", local_dir_use_symlinks=False ) except: # Try user's personal dataset try: pdf_path = api.hf_hub_download( repo_id=f"{username}/{DATASET_NAME}", filename=f"pdfs/{pdf_filename}", repo_type="dataset", local_dir_use_symlinks=False ) except: return None # Copy to temp directory temp_dir = tempfile.gettempdir() temp_path = os.path.join(temp_dir, f"{username}_{pdf_filename}") shutil.copy2(pdf_path, temp_path) return temp_path except Exception as e: print(f"Error downloading PDF: {e}") return None def view_evaluation(username, pdf_filename, lang="en"): """View evaluation from dataset""" try: api = HfApi() # Try to download evaluation from global dataset try: eval_path = api.hf_hub_download( repo_id=GLOBAL_DATASET, filename=f"evaluations/{username}_{pdf_filename}.txt", repo_type="dataset", local_dir_use_symlinks=False ) with open(eval_path, 'r', encoding='utf-8') as f: evaluation = f.read() return evaluation except: # Try to get from CSV if txt file not found try: csv_path = api.hf_hub_download( repo_id=GLOBAL_DATASET, filename="leaderboard.csv", repo_type="dataset", local_dir_use_symlinks=False ) df = pd.read_csv(csv_path, encoding='utf-8') row = df[(df['username'] == username) & (df['pdf_filename'] == pdf_filename)] if not row.empty and 'evaluation' in df.columns: return row.iloc[0]['evaluation'] except: pass return "Evaluation not found." except Exception as e: return f"Error loading evaluation: {str(e)}" # Custom CSS - Modern and bright design with simplified main page css = """ /* Main container */ .container { max-width: 1600px; margin: auto; padding: 20px; } /* Simple header for main page */ .simple-header { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 12px; text-align: center; margin-bottom: 20px; box-shadow: 0 4px 15px rgba(0, 0, 0, 0.1); } .simple-header h3 { font-size: 1.8em; margin: 0; } /* Header gradient */ .leaderboard-header { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 30px; border-radius: 15px; text-align: center; margin-bottom: 30px; box-shadow: 0 10px 30px rgba(0, 0, 0, 0.1); } .leaderboard-header h2 { font-size: 2.5em; margin-bottom: 10px; text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.2); } /* Quick submit box */ .quick-submit-box { background: linear-gradient(135deg, #f3f4f6 0%, #e5e7eb 100%); border-radius: 12px; padding: 25px; margin-bottom: 20px; box-shadow: 0 4px 15px rgba(0, 0, 0, 0.05); } .quick-submit-box h3 { color: #1f2937; margin-top: 0; margin-bottom: 15px; } /* Tabs styling */ .tabs { border-radius: 12px; overflow: hidden; box-shadow: 0 4px 20px rgba(0, 0, 0, 0.08); } button.tab-button { font-size: 1.1em; padding: 15px 30px; background: white; border: none; transition: all 0.3s ease; } button.tab-button:hover { background: #f3f4f6; transform: translateY(-2px); } button.tab-button.selected { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; font-weight: bold; } /* Cards and boxes */ .gr-box { border-radius: 12px; border: 1px solid #e5e7eb; padding: 20px; background: white; box-shadow: 0 4px 15px rgba(0, 0, 0, 0.05); transition: all 0.3s ease; } .gr-box:hover { box-shadow: 0 8px 25px rgba(0, 0, 0, 0.1); transform: translateY(-2px); } /* Buttons */ .gr-button { border-radius: 8px; font-weight: 600; transition: all 0.3s ease; } .gr-button-primary { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border: none; } .gr-button-primary:hover { transform: translateY(-2px); box-shadow: 0 8px 20px rgba(102, 126, 234, 0.4); } .gr-button-secondary { background: #f3f4f6; color: #4b5563; border: 1px solid #e5e7eb; } .gr-button-secondary:hover { background: #e5e7eb; transform: translateY(-1px); } /* Download and View buttons */ .download-btn, .view-btn { border: none; padding: 6px 12px; cursor: pointer; border-radius: 6px; font-size: 14px; transition: all 0.3s ease; } .download-btn { background-color: #10b981; color: white; } .download-btn:hover { background-color: #059669; transform: translateY(-1px); } .view-btn { background-color: #6366f1; color: white; } .view-btn:hover { background-color: #4f46e5; transform: translateY(-1px); } /* Warning box */ .warning-box { background: linear-gradient(135deg, #fee2e2 0%, #fecaca 100%); border: 2px solid #ef4444; border-radius: 12px; padding: 20px; margin: 20px 0; box-shadow: 0 4px 15px rgba(239, 68, 68, 0.1); } .warning-box strong { color: #dc2626; font-size: 1.1em; } /* Success/Info boxes */ .success-box { background: linear-gradient(135deg, #d1fae5 0%, #a7f3d0 100%); border: 2px solid #10b981; border-radius: 12px; padding: 20px; margin: 20px 0; box-shadow: 0 4px 15px rgba(16, 185, 129, 0.1); } .info-box { background: linear-gradient(135deg, #dbeafe 0%, #bfdbfe 100%); border: 2px solid #3b82f6; border-radius: 12px; padding: 20px; margin: 20px 0; box-shadow: 0 4px 15px rgba(59, 130, 246, 0.1); } /* Table styling */ .gr-dataframe { border-radius: 12px; overflow: hidden; box-shadow: 0 4px 20px rgba(0, 0, 0, 0.08); } .gr-dataframe thead { background: linear-gradient(135deg, #f3f4f6 0%, #e5e7eb 100%); } .gr-dataframe th { padding: 15px; font-weight: 700; color: #374151; text-transform: uppercase; font-size: 0.85em; letter-spacing: 0.05em; } .gr-dataframe td { padding: 12px 15px; border-bottom: 1px solid #f3f4f6; } .gr-dataframe tr:hover { background: #f9fafb; } /* Score colors in table */ .score-nobel { color: #ef4444; font-weight: bold; } .score-classic { color: #f59e0b; font-weight: bold; } .score-bestseller { color: #8b5cf6; font-weight: bold; } .score-professional { color: #3b82f6; font-weight: bold; } .score-amateur { color: #6b7280; font-weight: bold; } /* Modal styling */ .modal-overlay { position: fixed; top: 0; left: 0; width: 100%; height: 100%; background: rgba(0, 0, 0, 0.5); display: none; justify-content: center; align-items: center; z-index: 1000; } .modal-content { background: white; border-radius: 15px; padding: 30px; max-width: 800px; max-height: 80vh; overflow-y: auto; box-shadow: 0 20px 50px rgba(0, 0, 0, 0.3); } /* File upload area */ .gr-file { border: 2px dashed #9ca3af; border-radius: 12px; background: #f9fafb; transition: all 0.3s ease; } .gr-file:hover { border-color: #667eea; background: #ede9fe; } /* Language selector */ .language-selector { position: absolute; top: 20px; right: 20px; background: white; border-radius: 8px; padding: 8px; box-shadow: 0 4px 15px rgba(0, 0, 0, 0.1); } /* Guide content styling */ .guide-content { max-width: 1200px; margin: 0 auto; padding: 20px; } .guide-section { background: white; border-radius: 12px; padding: 30px; margin-bottom: 20px; box-shadow: 0 4px 15px rgba(0, 0, 0, 0.05); } .guide-section h3 { color: #1f2937; margin-top: 0; margin-bottom: 20px; font-size: 1.5em; } .guide-section ul { list-style: none; padding-left: 0; } .guide-section ul li { position: relative; padding-left: 24px; margin-bottom: 12px; line-height: 1.6; } .guide-section ul li:before { content: "▸"; position: absolute; left: 0; color: #667eea; font-weight: bold; } /* Markdown content */ .markdown-content h3 { color: #1f2937; margin-top: 24px; margin-bottom: 12px; } .markdown-content ul { list-style: none; padding-left: 0; } .markdown-content ul li { position: relative; padding-left: 24px; margin-bottom: 8px; } .markdown-content ul li:before { content: "▸"; position: absolute; left: 0; color: #667eea; font-weight: bold; } /* Animations */ @keyframes fadeIn { from { opacity: 0; transform: translateY(10px); } to { opacity: 1; transform: translateY(0); } } .gr-box, .gr-button, .gr-dataframe { animation: fadeIn 0.5s ease-out; } /* Responsive design */ @media (max-width: 768px) { .container { padding: 10px; } .leaderboard-header h2 { font-size: 1.8em; } .simple-header h3 { font-size: 1.5em; } .gr-dataframe { font-size: 0.9em; } } """ # JavaScript code - simplified js_code = """ """ # Create Gradio interface with gr.Blocks(title="AGI Novel Evaluation Leaderboard", theme=gr.themes.Soft(), css=css) as demo: # Add JavaScript gr.HTML(js_code) # State for language and current selection current_lang = gr.State(value="en") selected_user = gr.State(value="") selected_file = gr.State(value="") # Language selector with gr.Row(): with gr.Column(scale=10): title_md = gr.Markdown(get_text("title", "en")) with gr.Column(scale=1): lang_selector = gr.Radio( choices=[("English", "en"), ("한국어", "ko")], value="en", label="Language", interactive=True ) # OAuth login button gr.LoginButton() with gr.Tabs() as tabs: # Leaderboard tab - simplified main page # Leaderboard tab - simplified main page with gr.TabItem(get_text("leaderboard_tab", "en"), id="leaderboard_tab") as leaderboard_tab: leaderboard_header = gr.HTML(get_text("simple_leaderboard_header", "en")) # Remove quick submit section, expand leaderboard to full width leaderboard_display = gr.Dataframe( headers=[ get_text("rank", "en"), get_text("author_id", "en"), get_text("llm_service", "en"), get_text("final_score_col", "en"), get_text("word_count", "en"), get_text("work_title", "en"), get_text("submit_date", "en"), get_text("human_sample", "en"), get_text("download", "en"), get_text("view_eval", "en") ], label="", interactive=False, wrap=True, datatype=["html", "html", "html", "html", "str", "str", "str", "str", "html", "html"] ) # Actions section below leaderboard gr.Markdown("### 🔧 Actions") with gr.Row(): action_user = gr.Textbox(label="Username", placeholder="Enter username") action_file = gr.Textbox(label="Filename", placeholder="Enter filename") with gr.Row(): manual_download_btn = gr.Button("📥 Download PDF", size="sm") manual_view_btn = gr.Button("👁️ View Evaluation", size="sm") download_result = gr.File(label="Downloaded PDF", visible=False) # Evaluation display eval_display = gr.Markdown("", visible=False) refresh_btn = gr.Button(get_text("refresh_btn", "en"), variant="secondary") # Submit tab - detailed submission with gr.TabItem(get_text("submit_tab", "en"), id="submit_tab") as submit_tab: with gr.Row(): with gr.Column(): pdf_input = gr.File( label=get_text("upload_label", "en"), file_types=[".pdf"], type="filepath" ) llm_url_input = gr.Textbox( label=get_text("llm_url_label", "en"), placeholder=get_text("llm_url_placeholder", "en"), lines=1, max_lines=1 ) is_human_sample_input = gr.Checkbox( label=get_text("is_human_sample_label", "en"), value=False, interactive=True ) evaluate_btn = gr.Button( get_text("evaluate_btn", "en"), variant="primary", size="lg" ) with gr.Column(): output = gr.Markdown( label="Evaluation Results", value=get_text("result_label", "en") ) # History tab with gr.TabItem(get_text("history_tab", "en"), id="history_tab") as history_tab: history_btn = gr.Button(get_text("history_btn", "en"), variant="secondary") history_display = gr.Dataframe( headers=get_text("history_headers", "en"), label=get_text("history_label", "en"), interactive=False ) # Guide tab - all detailed information with gr.TabItem(get_text("guide_tab", "en"), id="guide_tab") as guide_tab: with gr.Column(elem_classes="guide-content"): # Purpose section with gr.Group(elem_classes="guide-section"): purpose_title_md = gr.Markdown(get_text("purpose_title", "en")) purpose_desc_md = gr.Markdown(get_text("purpose_desc", "en")) # Why Novel Creation section with gr.Group(elem_classes="guide-section"): why_title_md = gr.Markdown(get_text("why_title", "en")) why_desc_md = gr.Markdown(get_text("why_desc", "en")) # Evaluation Criteria section with gr.Group(elem_classes="guide-section"): criteria_title_md = gr.Markdown(get_text("criteria_title", "en")) criteria_desc_md = gr.Markdown(get_text("criteria_desc", "en")) # Requirements and Scoring with gr.Row(): with gr.Column(): with gr.Group(elem_classes="guide-section"): requirements_md = gr.Markdown(get_text("requirements", "en")) bonus_md = gr.Markdown(get_text("bonus_system", "en")) with gr.Column(): with gr.Group(elem_classes="guide-section"): score_system_md = gr.Markdown(get_text("score_system", "en")) grade_criteria_md = gr.Markdown(get_text("grade_criteria", "en")) # Evaluation Scale with gr.Group(elem_classes="guide-section"): eval_scale_md = gr.Markdown(get_text("evaluation_scale", "en")) # Warning warning_html = gr.HTML(get_text("warning", "en")) # Quick submit result display (hidden by default) quick_submit_output = gr.Markdown(visible=False) # Language change handler def update_language(lang): return ( lang, # Update state get_text("title", lang), gr.TabItem(label=get_text("leaderboard_tab", lang)), gr.TabItem(label=get_text("submit_tab", lang)), gr.TabItem(label=get_text("history_tab", lang)), gr.TabItem(label=get_text("guide_tab", lang)), get_text("simple_leaderboard_header", lang), gr.Button(value=get_text("refresh_btn", lang), variant="secondary"), gr.File(label=get_text("upload_label", lang)), gr.Textbox(label=get_text("llm_url_label", lang), placeholder=get_text("llm_url_placeholder", lang)), gr.Checkbox(label=get_text("is_human_sample_label", lang)), gr.Button(value=get_text("evaluate_btn", lang), variant="primary", size="lg"), gr.Markdown(value=get_text("result_label", lang)), gr.Button(value=get_text("history_btn", lang), variant="secondary"), load_global_leaderboard(lang), gr.Button(value=f"📥 {get_text('download_pdf', lang)}", size="sm"), gr.Button(value=f"👁️ {get_text('view_evaluation', lang)}", size="sm"), # Guide tab updates get_text("purpose_title", lang), get_text("purpose_desc", lang), get_text("why_title", lang), get_text("why_desc", lang), get_text("criteria_title", lang), get_text("criteria_desc", lang), get_text("requirements", lang), get_text("bonus_system", lang), get_text("score_system", lang), get_text("grade_criteria", lang), get_text("evaluation_scale", lang), get_text("warning", lang) ) lang_selector.change( fn=update_language, inputs=[lang_selector], outputs=[ current_lang, title_md, leaderboard_tab, submit_tab, history_tab, guide_tab, leaderboard_header, refresh_btn, pdf_input, llm_url_input, is_human_sample_input, evaluate_btn, output, history_btn, leaderboard_display, manual_download_btn, manual_view_btn, # Guide tab elements purpose_title_md, purpose_desc_md, why_title_md, why_desc_md, criteria_title_md, criteria_desc_md, requirements_md, bonus_md, score_system_md, grade_criteria_md, eval_scale_md, warning_html ] ) # Event handlers evaluate_btn.click( fn=evaluate_novel, inputs=[pdf_input, llm_url_input, is_human_sample_input, current_lang], outputs=[output, history_display, leaderboard_display], show_progress=True ) def refresh_history(profile: gr.OAuthProfile = None): if not profile: return None lang = current_lang.value if hasattr(current_lang, 'value') else "en" df, _ = load_user_evaluations(profile.username, lang) return df history_btn.click( fn=refresh_history, inputs=[], outputs=[history_display] ) refresh_btn.click( fn=lambda lang: load_global_leaderboard(lang), inputs=[current_lang], outputs=[leaderboard_display] ) # Click handler for dataframe rows def on_dataframe_select(evt: gr.SelectData, dataframe): if evt.index and len(evt.index) >= 2: row_idx = evt.index[0] col_idx = evt.index[1] # Get column name if dataframe is not None and not dataframe.empty: cols = dataframe.columns.tolist() if col_idx < len(cols): col_name = cols[col_idx] # Check if it's download or view column if col_name in ["Download", "다운로드", "View", "평가보기"]: # Get the HTML content cell_value = dataframe.iloc[row_idx, col_idx] # Extract username and filename from data attributes import re user_match = re.search(r'data-user="([^"]+)"', str(cell_value)) file_match = re.search(r'data-file="([^"]+)"', str(cell_value)) if user_match and file_match: return user_match.group(1), file_match.group(1) return "", "" leaderboard_display.select( fn=on_dataframe_select, inputs=[leaderboard_display], outputs=[action_user, action_file] ) # Manual download button def manual_download(user, file): if user and file: pdf_path = download_pdf(user, file) if pdf_path: return gr.File(value=pdf_path, visible=True) return gr.File(visible=False) manual_download_btn.click( fn=manual_download, inputs=[action_user, action_file], outputs=[download_result] ) # Manual view button def manual_view(user, file, lang): if user and file: evaluation = view_evaluation(user, file, lang) title = f"## 📋 Evaluation for {file}\n### Author: {user}\n\n" return gr.Markdown(value=title + evaluation, visible=True) return gr.Markdown(visible=False) manual_view_btn.click( fn=manual_view, inputs=[action_user, action_file, current_lang], outputs=[eval_display] ) # Auto-load leaderboard on page load demo.load( fn=lambda: load_global_leaderboard("en"), inputs=[], outputs=[leaderboard_display] ) if __name__ == "__main__": demo.launch()