| """ |
| ЕГЭ Эксперт - API для проверки сочинений и парсинга заданий |
| Объединяет ruBERT scraper и ФИПИ парсер |
| """ |
|
|
| from fastapi import FastAPI, HTTPException |
| from fastapi.middleware.cors import CORSMiddleware |
| from pydantic import BaseModel |
| from typing import Optional, List, Dict |
| import torch |
| from transformers import AutoTokenizer, AutoModel |
| import re |
| import json |
| import os |
| from dotenv import load_dotenv |
|
|
| |
| load_dotenv() |
|
|
| app = FastAPI( |
| title="ЕГЭ Эксперт API", |
| description="Проверка сочинений ЕГЭ + парсинг заданий ФИПИ", |
| version="2.0.0" |
| ) |
|
|
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| |
| |
| |
|
|
| MODEL_NAME = "DeepPavlov/rubert-base-cased-sentence" |
| tokenizer = None |
| model = None |
|
|
| def load_model(): |
| global tokenizer, model |
| print("Loading ruBERT model...") |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
| model = AutoModel.from_pretrained(MODEL_NAME) |
| model.eval() |
| print("ruBERT loaded!") |
|
|
| @app.on_event("startup") |
| async def startup(): |
| load_model() |
|
|
| |
| |
| |
|
|
| class EssayRequest(BaseModel): |
| essay: str |
| source: Optional[str] = "" |
|
|
| class TaskRequest(BaseModel): |
| url: Optional[str] = "" |
| max_pages: int = 3 |
|
|
| class SupabaseConfig(BaseModel): |
| supabase_url: str |
| supabase_key: str |
|
|
| |
| |
| |
|
|
| def normalize(text: str) -> str: |
| return text.lower().replace("ё", "е").strip() |
|
|
| def count_words(text: str) -> int: |
| return len([w for w in text.strip().split() if w]) |
|
|
| def get_paragraphs(text: str) -> list: |
| return [p.strip() for p in re.split(r'\n+', text) if p.strip()] |
|
|
| def get_sentences(text: str) -> list: |
| return [s.strip() for s in re.split(r'[.!?]+', text) if s.strip()] |
|
|
| def get_embedding(text: str) -> torch.Tensor: |
| inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True) |
| with torch.no_grad(): |
| outputs = model(**inputs) |
| token_embeddings = outputs.last_hidden_state |
| attention_mask = inputs["attention_mask"] |
| mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() |
| embedding = torch.sum(token_embeddings * mask_expanded, 1) / torch.clamp(mask_expanded.sum(1), min=1e-9) |
| return embedding[0] |
|
|
| def cosine_similarity(a: torch.Tensor, b: torch.Tensor) -> float: |
| return torch.nn.functional.cosine_similarity(a.unsqueeze(0), b.unsqueeze(0)).item() |
|
|
| |
| |
| |
|
|
| K1_PHRASES = ["проблем", "автор поднимает", "автор рассматривает", "текст посвящен"] |
| K2_EXAMPLE_PHRASES = ["например", "автор пишет", "автор описывает", "в тексте"] |
| K2_LINK_PHRASES = ["таким образом", "следовательно", "оба примера", "кроме того"] |
| K3_OPINION_PHRASES = ["я считаю", "я думаю", "по моему мнению", "я согласен"] |
| K3_ARG_PHRASES = ["потому что", "так как", "литература", "в романе", "в повести"] |
|
|
| def check_k1(essay: str, has_source: bool, relevance: float = 0.5) -> dict: |
| n = normalize(essay) |
| found = [p for p in K1_PHRASES if p in n] |
| |
| if has_source: |
| if len(found) >= 1 or relevance > 0.4: |
| return {"score": 1, "comment": "Позиция автора сформулирована."} |
| return {"score": 0, "comment": "Позиция автора не сформулирована."} |
| else: |
| if len(found) >= 1: |
| return {"score": 1, "comment": "Проблема сформулирована."} |
| return {"score": 0, "comment": "Проблема не сформулирована."} |
|
|
| def check_k2(essay: str, has_source: bool) -> dict: |
| n = normalize(essay) |
| sentences = get_sentences(essay) |
| |
| example_sentences = [s for s in sentences if any(p in normalize(s) for p in K2_EXAMPLE_PHRASES)] |
| has_link = any(p in n for p in K2_LINK_PHRASES) |
| |
| if len(example_sentences) >= 2 and has_link: |
| return {"score": 3, "comment": "Два примера с пояснением и связью."} |
| elif len(example_sentences) >= 2: |
| return {"score": 2, "comment": "Два примера без связи."} |
| elif len(example_sentences) >= 1: |
| return {"score": 1, "comment": "Один пример."} |
| return {"score": 0, "comment": "Нет примеров."} |
|
|
| def check_k3(essay: str) -> dict: |
| n = normalize(essay) |
| |
| has_opinion = any(p in n for p in K3_OPINION_PHRASES) |
| has_arg = any(p in n for p in K3_ARG_PHRASES) |
| |
| if has_opinion and has_arg: |
| return {"score": 2, "comment": "Позиция выражена и обоснована."} |
| elif has_opinion: |
| return {"score": 1, "comment": "Позиция выражена."} |
| return {"score": 0, "comment": "Позиция не выражена."} |
|
|
| def check_k4(essay: str) -> dict: |
| if count_words(essay) < 50: |
| return {"score": 0, "comment": "Текст слишком короткий."} |
| return {"score": 1, "comment": "Ошибок нет."} |
|
|
| def check_k5(essay: str) -> dict: |
| paragraphs = get_paragraphs(essay) |
| |
| if len(paragraphs) >= 5: |
| return {"score": 2, "comment": "Структура соблюдена."} |
| elif len(paragraphs) >= 3: |
| return {"score": 1, "comment": "Структура частична."} |
| return {"score": 0, "comment": "Нет абзацев."} |
|
|
| |
| |
| |
|
|
| @app.get("/") |
| async def root(): |
| return { |
| "message": "ЕГЭ Эксперт API", |
| "version": "2.0.0", |
| "endpoints": [ |
| "POST /grade - Проверка сочинения", |
| "GET /tasks - Получить задания из БД", |
| "POST /parse - Запустить парсер" |
| ] |
| } |
|
|
| @app.post("/grade") |
| async def grade_essay(request: EssayRequest): |
| """Проверка сочинения ЕГЭ""" |
| |
| essay = request.essay |
| source = request.source or "" |
| has_source = len(source) > 10 |
| |
| |
| relevance = 0.5 |
| if has_source: |
| try: |
| emb_essay = get_embedding(essay[:512]) |
| emb_source = get_embedding(source[:512]) |
| relevance = cosine_similarity(emb_essay, emb_source) |
| except: |
| pass |
| |
| |
| k1 = check_k1(essay, has_source, relevance) |
| k2 = check_k2(essay, has_source) |
| k3 = check_k3(essay) |
| k4 = check_k4(essay) |
| k5 = check_k5(essay) |
| |
| total = k1["score"] + k2["score"] + k3["score"] + k4["score"] + k5["score"] |
| max_score = 9 |
| |
| return { |
| "total_score": total, |
| "max_score": max_score, |
| "percentage": round(total / max_score * 100), |
| "criteria": { |
| "k1": k1, |
| "k2": k2, |
| "k3": k3, |
| "k4": k4, |
| "k5": k5 |
| }, |
| "stats": { |
| "words": count_words(essay), |
| "paragraphs": len(get_paragraphs(essay)), |
| "sentences": len(get_sentences(essay)) |
| } |
| } |
|
|
| @app.get("/tasks") |
| async def get_tasks(): |
| """Получить задания из Supabase""" |
| |
| supabase_url = os.getenv("SUPABASE_URL") |
| supabase_key = os.getenv("SUPABASE_KEY") |
| |
| if not supabase_url or not supabase_key: |
| return {"error": "Supabase не настроен", "tasks": []} |
| |
| try: |
| import requests |
| response = requests.get( |
| f"{supabase_url}/rest/v1/tasks?limit=100", |
| headers={ |
| "apikey": supabase_key, |
| "Authorization": f"Bearer {supabase_key}" |
| }, |
| timeout=10 |
| ) |
| |
| if response.status_code == 200: |
| tasks = response.json() |
| return {"count": len(tasks), "tasks": tasks} |
| else: |
| return {"error": f"Ошибка {response.status_code}", "tasks": []} |
| except Exception as e: |
| return {"error": str(e), "tasks": []} |
|
|
| @app.post("/parse") |
| async def parse_tasks(request: TaskRequest): |
| """Запустить парсер заданий""" |
| |
| supabase_url = os.getenv("SUPABASE_URL") |
| supabase_key = os.getenv("SUPABASE_KEY") |
| |
| if not supabase_url or not supabase_key: |
| return {"error": "Supabase не настроен"} |
| |
| |
| try: |
| from fipi_ai_scraper import parse_all_sources |
| tasks = parse_all_sources(max_pages=request.max_pages) |
| |
| |
| if tasks: |
| import requests |
| saved = 0 |
| for task in tasks: |
| resp = requests.post( |
| f"{supabase_url}/rest/v1/tasks", |
| headers={ |
| "apikey": supabase_key, |
| "Authorization": f"Bearer {supabase_key}", |
| "Content-Type": "application/json" |
| }, |
| json=task, |
| timeout=10 |
| ) |
| if resp.status_code in [200, 201]: |
| saved += 1 |
| |
| return {"message": f"Сохранено {saved} заданий", "count": saved} |
| return {"message": "Задания не найдены"} |
| except Exception as e: |
| return {"error": str(e)} |
|
|
| |
| |
| |
|
|
| if __name__ == "__main__": |
| import uvicorn |
| uvicorn.run(app, host="0.0.0.0", port=7860) |
|
|