Upload 37 files
Browse files- backend/__pycache__/db.cpython-39.pyc +0 -0
- backend/__pycache__/main.cpython-313.pyc +0 -0
- backend/__pycache__/main.cpython-39.pyc +0 -0
- backend/__pycache__/models.cpython-39.pyc +0 -0
- backend/__pycache__/post_router.cpython-39.pyc +0 -0
- backend/__pycache__/recommendWord.cpython-313.pyc +0 -0
- backend/__pycache__/recommendWord.cpython-39.pyc +0 -0
- backend/__pycache__/ref.cpython-39.pyc +0 -0
- backend/__pycache__/spellchecker.cpython-39.pyc +0 -0
- backend/__pycache__/test.cpython-313.pyc +0 -0
- backend/auth/__pycache__/routes.cpython-39.pyc +0 -0
- backend/auth/routes.py +175 -0
- backend/db.py +26 -0
- backend/hansepll.py +36 -0
- backend/hanspell/__init__.py +1 -0
- backend/hanspell/__pycache__/__init__.cpython-312.pyc +0 -0
- backend/hanspell/__pycache__/__init__.cpython-313.pyc +0 -0
- backend/hanspell/__pycache__/__init__.cpython-39.pyc +0 -0
- backend/hanspell/__pycache__/constants.cpython-313.pyc +0 -0
- backend/hanspell/__pycache__/constants.cpython-39.pyc +0 -0
- backend/hanspell/__pycache__/response.cpython-313.pyc +0 -0
- backend/hanspell/__pycache__/response.cpython-39.pyc +0 -0
- backend/hanspell/__pycache__/spell_checker.cpython-312.pyc +0 -0
- backend/hanspell/__pycache__/spell_checker.cpython-313.pyc +0 -0
- backend/hanspell/__pycache__/spell_checker.cpython-39.pyc +0 -0
- backend/hanspell/constants.py +10 -0
- backend/hanspell/response.py +26 -0
- backend/hanspell/spell_checker.py +153 -0
- backend/main.py +131 -0
- backend/models.py +32 -0
- backend/post_router.py +119 -0
- backend/recommendWord.py +86 -0
- backend/ref.py +75 -0
- backend/requirements.txt +20 -0
- backend/spellchecker.py +191 -0
- backend/test.py +45 -0
- dockerfile +13 -0
backend/__pycache__/db.cpython-39.pyc
ADDED
|
Binary file (819 Bytes). View file
|
|
|
backend/__pycache__/main.cpython-313.pyc
ADDED
|
Binary file (2.45 kB). View file
|
|
|
backend/__pycache__/main.cpython-39.pyc
ADDED
|
Binary file (4.04 kB). View file
|
|
|
backend/__pycache__/models.cpython-39.pyc
ADDED
|
Binary file (1.35 kB). View file
|
|
|
backend/__pycache__/post_router.cpython-39.pyc
ADDED
|
Binary file (3.72 kB). View file
|
|
|
backend/__pycache__/recommendWord.cpython-313.pyc
ADDED
|
Binary file (2.06 kB). View file
|
|
|
backend/__pycache__/recommendWord.cpython-39.pyc
ADDED
|
Binary file (2.61 kB). View file
|
|
|
backend/__pycache__/ref.cpython-39.pyc
ADDED
|
Binary file (2.58 kB). View file
|
|
|
backend/__pycache__/spellchecker.cpython-39.pyc
ADDED
|
Binary file (4.17 kB). View file
|
|
|
backend/__pycache__/test.cpython-313.pyc
ADDED
|
Binary file (2.02 kB). View file
|
|
|
backend/auth/__pycache__/routes.cpython-39.pyc
ADDED
|
Binary file (4.82 kB). View file
|
|
|
backend/auth/routes.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException, Depends, Request, Response
|
| 2 |
+
from fastapi.responses import JSONResponse
|
| 3 |
+
from fastapi.security import OAuth2PasswordRequestForm
|
| 4 |
+
from pydantic import BaseModel, EmailStr
|
| 5 |
+
from datetime import timedelta, timezone, datetime
|
| 6 |
+
import jwt
|
| 7 |
+
from passlib.context import CryptContext
|
| 8 |
+
from db import get_db
|
| 9 |
+
from models import User
|
| 10 |
+
from sqlalchemy.orm import Session
|
| 11 |
+
|
| 12 |
+
router = APIRouter()
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class UserSignup(BaseModel):
|
| 16 |
+
user_name: str
|
| 17 |
+
user_email: EmailStr
|
| 18 |
+
password: str
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
users_db = {}
|
| 22 |
+
refresh_tokens = {}
|
| 23 |
+
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
SECRET_KEY = "52a6206f34a1c479da043cdeee17fd859a35e54978a6733a6a7ebadcbd11f0ca"
|
| 27 |
+
ALGORITHM = "HS256"
|
| 28 |
+
ACCESS_TOKEN_EXPIRE_MINUTES = 15
|
| 29 |
+
REFRESH_TOKEN_EXPIRE_DAYS = 7
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def create_token(data: dict, expires_delta: timedelta = timedelta(minutes=15)):
|
| 33 |
+
to_encode = data.copy()
|
| 34 |
+
expire = datetime.now(timezone.utc) + expires_delta # κΆμ₯ λ°©μ
|
| 35 |
+
to_encode.update({"exp": expire})
|
| 36 |
+
token = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
|
| 37 |
+
return token
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def verify_token(token: str):
|
| 41 |
+
try:
|
| 42 |
+
payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
|
| 43 |
+
email = payload.get("sub")
|
| 44 |
+
if email is None:
|
| 45 |
+
raise HTTPException(status_code=401, detail="ν ν°μ μ΄λ©μΌ μμ")
|
| 46 |
+
# μ ν¨ν ν ν°μ΄κ³ , μ¬μ©μ μ΄λ©μΌλ μμΌλ©΄ ν ν° μ ν¨νλ€κ³ νλ¨
|
| 47 |
+
return email
|
| 48 |
+
except jwt.ExpiredSignatureError:
|
| 49 |
+
raise HTTPException(status_code=401, detail="ν ν° λ§λ£")
|
| 50 |
+
except jwt.PyJWTError:
|
| 51 |
+
raise HTTPException(status_code=401, detail="ν ν° μ€λ₯")
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def get_current_user(request: Request):
|
| 55 |
+
auth_header = request.headers.get("Authorization")
|
| 56 |
+
if not auth_header or not auth_header.startswith("Bearer "):
|
| 57 |
+
raise HTTPException(status_code=401, detail="μΈμ¦ ν€λ μμ")
|
| 58 |
+
|
| 59 |
+
token = auth_header.split(" ")[1]
|
| 60 |
+
print(token)
|
| 61 |
+
user = verify_token(token)
|
| 62 |
+
if not user:
|
| 63 |
+
raise HTTPException(status_code=401, detail="μ¬μ©μ μμ")
|
| 64 |
+
return user
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
@router.post("/refresh")
|
| 68 |
+
def refresh_token(request: Request, db: Session = Depends(get_db)):
|
| 69 |
+
client_refresh_token = request.cookies.get("refresh_token")
|
| 70 |
+
if not client_refresh_token:
|
| 71 |
+
raise HTTPException(status_code=401, detail="Refresh token missing")
|
| 72 |
+
|
| 73 |
+
try:
|
| 74 |
+
payload = jwt.decode(client_refresh_token, SECRET_KEY, algorithms=[ALGORITHM])
|
| 75 |
+
user_email = payload.get("sub") # or email λ±
|
| 76 |
+
if not user_email:
|
| 77 |
+
raise HTTPException(status_code=401, detail="Invalid payload")
|
| 78 |
+
|
| 79 |
+
# μλ²μ μ μ₯λ ν ν°κ³Ό λΉκ΅
|
| 80 |
+
user = db.query(User).filter(User.user_email == user_email).first()
|
| 81 |
+
if not user or user.refresh_token != client_refresh_token:
|
| 82 |
+
raise HTTPException(
|
| 83 |
+
status_code=401, detail="Token mismatch or reused token"
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
except jwt.ExpiredSignatureError:
|
| 87 |
+
raise HTTPException(status_code=401, detail="Refresh token expired")
|
| 88 |
+
except jwt.InvalidTokenError:
|
| 89 |
+
raise HTTPException(status_code=401, detail="Invalid refresh token")
|
| 90 |
+
|
| 91 |
+
# μ access token λ°κΈ
|
| 92 |
+
new_access_token = create_token(
|
| 93 |
+
data={"sub": user_email},
|
| 94 |
+
expires_delta=timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES),
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
return JSONResponse(content={"access_token": new_access_token})
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
@router.post("/logout")
|
| 101 |
+
def logout(request: Request):
|
| 102 |
+
# 1. μΏ ν€μμ refresh_token κ°μ Έμ€κΈ°
|
| 103 |
+
refresh_token = request.cookies.get("refresh_token")
|
| 104 |
+
if not refresh_token:
|
| 105 |
+
raise HTTPException(status_code=400, detail="Refresh token missing")
|
| 106 |
+
|
| 107 |
+
# 2. μλ² μΈ‘ μ μ₯μ(DB λλ in-memory)μμ ν΄λΉ ν ν° μμ
|
| 108 |
+
# μ: refresh_token_dbλ ν ν°μ μ μ₯ν dict
|
| 109 |
+
try:
|
| 110 |
+
payload = jwt.decode(refresh_token, SECRET_KEY, algorithms=[ALGORITHM])
|
| 111 |
+
user_email = payload.get("sub")
|
| 112 |
+
if refresh_tokens.get(user_email) == refresh_token:
|
| 113 |
+
del refresh_tokens[user_email]
|
| 114 |
+
except jwt.PyJWTError:
|
| 115 |
+
pass
|
| 116 |
+
res = JSONResponse(content={"message": "Logged out successfully."})
|
| 117 |
+
# 3. ν΄λΌμ΄μΈνΈ μΏ ν€μμ μ κ±°
|
| 118 |
+
res.delete_cookie("refresh_token")
|
| 119 |
+
|
| 120 |
+
return res
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
@router.post("/login")
|
| 124 |
+
def login(
|
| 125 |
+
form_data: OAuth2PasswordRequestForm = Depends(), db: Session = Depends(get_db)
|
| 126 |
+
):
|
| 127 |
+
db_user = db.query(User).filter(User.user_email == form_data.username).first()
|
| 128 |
+
if not db_user:
|
| 129 |
+
raise HTTPException(status_code=400, detail="Invalid email or password")
|
| 130 |
+
|
| 131 |
+
if not pwd_context.verify(form_data.password, db_user.password):
|
| 132 |
+
raise HTTPException(status_code=400, detail="Invalid email or password")
|
| 133 |
+
access_token = create_token(
|
| 134 |
+
data={"sub": db_user.user_email},
|
| 135 |
+
expires_delta=timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES),
|
| 136 |
+
)
|
| 137 |
+
refresh_token = create_token(
|
| 138 |
+
data={"sub": db_user.user_email},
|
| 139 |
+
expires_delta=timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS),
|
| 140 |
+
)
|
| 141 |
+
db_user.refresh_token = refresh_token
|
| 142 |
+
db.add(db_user)
|
| 143 |
+
db.commit()
|
| 144 |
+
res = JSONResponse(
|
| 145 |
+
content={
|
| 146 |
+
"access_token": access_token,
|
| 147 |
+
"token_type": "bearer",
|
| 148 |
+
}
|
| 149 |
+
)
|
| 150 |
+
res.set_cookie(
|
| 151 |
+
key="refresh_token",
|
| 152 |
+
value=refresh_token,
|
| 153 |
+
httponly=True,
|
| 154 |
+
secure=False, # κ°λ° νκ²½μμλ False μΆμ²
|
| 155 |
+
samesite="Lax",
|
| 156 |
+
max_age=60 * 60 * 24,
|
| 157 |
+
)
|
| 158 |
+
return res
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
@router.post("/signup")
|
| 162 |
+
def signup(user: UserSignup, db: Session = Depends(get_db)):
|
| 163 |
+
existing = db.query(User).filter(User.user_email == user.user_email).first()
|
| 164 |
+
|
| 165 |
+
if existing:
|
| 166 |
+
raise HTTPException(status_code=400, detail="Email already registered")
|
| 167 |
+
hashed_pw = pwd_context.hash(user.password)
|
| 168 |
+
new_user = User(
|
| 169 |
+
user_email=user.user_email,
|
| 170 |
+
user_name=user.user_name,
|
| 171 |
+
password=hashed_pw, # μ€μ λ‘λ ν΄μ ν μ μ₯
|
| 172 |
+
)
|
| 173 |
+
db.add(new_user)
|
| 174 |
+
db.commit()
|
| 175 |
+
return {"message": "User created successfully"}
|
backend/db.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# db.py
|
| 2 |
+
|
| 3 |
+
from sqlalchemy import create_engine
|
| 4 |
+
from sqlalchemy.orm import sessionmaker, declarative_base
|
| 5 |
+
import urllib.parse
|
| 6 |
+
|
| 7 |
+
DB_USER = "root"
|
| 8 |
+
DB_PASSWORD = urllib.parse.quote_plus("wjdals2413@")
|
| 9 |
+
DB_HOST = "localhost"
|
| 10 |
+
DB_PORT = "3306"
|
| 11 |
+
DB_NAME = "gradproject"
|
| 12 |
+
|
| 13 |
+
DATABASE_URL = f"mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}?charset=utf8mb4"
|
| 14 |
+
# DATABASE_URL = "mysql://root:oIvlzNQSswVbBwNiTwFsLKdmAbFlLlDx@turntable.proxy.rlwy.net:44568/railway"
|
| 15 |
+
engine = create_engine(DATABASE_URL, echo=True)
|
| 16 |
+
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
| 17 |
+
|
| 18 |
+
Base = declarative_base()
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def get_db():
|
| 22 |
+
db = SessionLocal()
|
| 23 |
+
try:
|
| 24 |
+
yield db
|
| 25 |
+
finally:
|
| 26 |
+
db.close()
|
backend/hansepll.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import json
|
| 3 |
+
import re
|
| 4 |
+
|
| 5 |
+
class SpellChecker:
|
| 6 |
+
def __init__(self):
|
| 7 |
+
self.passport_key = None
|
| 8 |
+
self.base_url = None
|
| 9 |
+
|
| 10 |
+
def fetch_passport_key(self):
|
| 11 |
+
headers = {
|
| 12 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
|
| 13 |
+
'Referer': 'https://search.naver.com/',
|
| 14 |
+
}
|
| 15 |
+
response = requests.get("https://search.naver.com/search.naver?query=%EB%A7%9E%EC%B6%A9%EB%B2%95%20%EA%B2%80%EC%82%AC%EA%B8%B0", headers=headers)
|
| 16 |
+
passport_key_match = re.search(r'(?<={new SpellingCheck\({API:{checker:").*?(?="},selector)', response.text)
|
| 17 |
+
if not passport_key_match:
|
| 18 |
+
return "Error: Unable to retrieve passport key"
|
| 19 |
+
self.base_url, self.passport_key = passport_key_match.group(0).split("?passportKey=")
|
| 20 |
+
|
| 21 |
+
def spell_check(self, text):
|
| 22 |
+
if self.passport_key is None or self.base_url is None:
|
| 23 |
+
self.fetch_passport_key()
|
| 24 |
+
payload = {
|
| 25 |
+
'passportKey': self.passport_key,
|
| 26 |
+
'where': 'nexearch',
|
| 27 |
+
'color_blindness': 0,
|
| 28 |
+
'q': text
|
| 29 |
+
}
|
| 30 |
+
headers = {
|
| 31 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, Gecko) Chrome/129.0.0.0 Safari/537.36',
|
| 32 |
+
'Referer': 'https://search.naver.com/',
|
| 33 |
+
}
|
| 34 |
+
result_response = requests.get(self.base_url, headers=headers, params=payload)
|
| 35 |
+
return json.loads(result_response.text)['message']['result']['notag_html']
|
| 36 |
+
|
backend/hanspell/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
__version__ = '1.1'
|
backend/hanspell/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (174 Bytes). View file
|
|
|
backend/hanspell/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (177 Bytes). View file
|
|
|
backend/hanspell/__pycache__/__init__.cpython-39.pyc
ADDED
|
Binary file (163 Bytes). View file
|
|
|
backend/hanspell/__pycache__/constants.cpython-313.pyc
ADDED
|
Binary file (606 Bytes). View file
|
|
|
backend/hanspell/__pycache__/constants.cpython-39.pyc
ADDED
|
Binary file (506 Bytes). View file
|
|
|
backend/hanspell/__pycache__/response.cpython-313.pyc
ADDED
|
Binary file (1.35 kB). View file
|
|
|
backend/hanspell/__pycache__/response.cpython-39.pyc
ADDED
|
Binary file (974 Bytes). View file
|
|
|
backend/hanspell/__pycache__/spell_checker.cpython-312.pyc
ADDED
|
Binary file (6 kB). View file
|
|
|
backend/hanspell/__pycache__/spell_checker.cpython-313.pyc
ADDED
|
Binary file (6.03 kB). View file
|
|
|
backend/hanspell/__pycache__/spell_checker.cpython-39.pyc
ADDED
|
Binary file (3.85 kB). View file
|
|
|
backend/hanspell/constants.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
base_url = 'https://m.search.naver.com/p/csearch/ocontent/util/SpellerProxy'
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class CheckResult:
|
| 5 |
+
PASSED = 0
|
| 6 |
+
WRONG_SPELLING = 1
|
| 7 |
+
WRONG_SPACING = 2
|
| 8 |
+
AMBIGUOUS = 3
|
| 9 |
+
STATISTICAL_CORRECTION = 4
|
| 10 |
+
|
backend/hanspell/response.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
from collections import namedtuple
|
| 3 |
+
|
| 4 |
+
# μ‘°μ¬μ μ΄λ―Έλ λ¨μ΄λ‘ μ²λ¦¬ν¨. λ§λ
ν μλ¨μ΄κ° μκ°μ΄ μ λμ..
|
| 5 |
+
_checked = namedtuple('Checked',
|
| 6 |
+
['result', 'original', 'checked', 'errors', 'words', 'time'])
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class Checked(_checked):
|
| 10 |
+
def __new__(cls, result=False, original='', checked='', errors=0, words=[], time=0.0):
|
| 11 |
+
return super(Checked, cls).__new__(
|
| 12 |
+
cls, result, original, checked, errors, words, time)
|
| 13 |
+
|
| 14 |
+
def as_dict(self):
|
| 15 |
+
d = {
|
| 16 |
+
'result': self.result,
|
| 17 |
+
'original': self.original,
|
| 18 |
+
'checked': self.checked,
|
| 19 |
+
'errors': self.errors,
|
| 20 |
+
'words': self.words,
|
| 21 |
+
'time': self.time,
|
| 22 |
+
}
|
| 23 |
+
return d
|
| 24 |
+
|
| 25 |
+
def only_checked(self):
|
| 26 |
+
return self.checked
|
backend/hanspell/spell_checker.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
Pythonμ© νκΈ λ§μΆ€λ² κ²μ¬ λͺ¨λ
|
| 4 |
+
"""
|
| 5 |
+
import re
|
| 6 |
+
import requests
|
| 7 |
+
import json
|
| 8 |
+
import time
|
| 9 |
+
import sys
|
| 10 |
+
from collections import OrderedDict
|
| 11 |
+
import xml.etree.ElementTree as ET
|
| 12 |
+
|
| 13 |
+
from . import __version__
|
| 14 |
+
from .response import Checked
|
| 15 |
+
from .constants import base_url
|
| 16 |
+
from .constants import CheckResult
|
| 17 |
+
|
| 18 |
+
_agent = requests.Session()
|
| 19 |
+
PY3 = sys.version_info[0] == 3
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def get_passport_key():
|
| 23 |
+
"""λ€μ΄λ²μμ 'λ€μ΄λ² λ§μΆ€λ² κ²μ¬κΈ°' νμ΄μ§μμ passportKeyλ₯Ό νλ
|
| 24 |
+
|
| 25 |
+
- λ€μ΄λ²μμ 'λ€μ΄λ² λ§μΆ€λ² κ²μ¬κΈ°'λ₯Ό λμ΄ ν
|
| 26 |
+
htmlμμ passportKeyλ₯Ό κ²μνλ©΄ κ°μ μ°Ύμ μ μλ€.
|
| 27 |
+
|
| 28 |
+
- μ°Ύμ κ°μ spell_checker.py 48 lineμ μ μ©νλ€.
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
url = "https://search.naver.com/search.naver?where=nexearch&sm=top_hty&fbm=0&ie=utf8&query=λ€μ΄λ²+λ§μΆ€λ²+κ²μ¬κΈ°"
|
| 32 |
+
res = requests.get(url)
|
| 33 |
+
|
| 34 |
+
html_text = res.text
|
| 35 |
+
|
| 36 |
+
match = re.search(r'passportKey=([^&"}]+)', html_text)
|
| 37 |
+
if match:
|
| 38 |
+
passport_key = match.group(1)
|
| 39 |
+
return passport_key
|
| 40 |
+
else:
|
| 41 |
+
return False
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def fix_spell_checker_py_code(file_path, passportKey):
|
| 45 |
+
"""νλν passportkeyλ₯Ό spell_checker.pyνμΌμ μ μ©
|
| 46 |
+
"""
|
| 47 |
+
|
| 48 |
+
pattern = r"'passportKey': '.*'"
|
| 49 |
+
|
| 50 |
+
with open(file_path, 'r', encoding='utf-8') as input_file:
|
| 51 |
+
content = input_file.read()
|
| 52 |
+
modified_content = re.sub(pattern, f"'passportKey': '{passportKey}'", content)
|
| 53 |
+
|
| 54 |
+
with open(file_path, 'w', encoding='utf-8') as output_file:
|
| 55 |
+
output_file.write(modified_content)
|
| 56 |
+
|
| 57 |
+
return
|
| 58 |
+
passport_key = get_passport_key()
|
| 59 |
+
|
| 60 |
+
def _remove_tags(text):
|
| 61 |
+
text = u'<content>{}</content>'.format(text).replace('<br>','')
|
| 62 |
+
if not PY3:
|
| 63 |
+
text = text.encode('utf-8')
|
| 64 |
+
|
| 65 |
+
result = ''.join(ET.fromstring(text).itertext())
|
| 66 |
+
|
| 67 |
+
return result
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def check(text):
|
| 71 |
+
"""
|
| 72 |
+
λ§€κ°λ³μλ‘ μ
λ ₯λ°μ νκΈ λ¬Έμ₯μ λ§μΆ€λ²μ 체ν¬ν©λλ€.
|
| 73 |
+
"""
|
| 74 |
+
if isinstance(text, list):
|
| 75 |
+
result = []
|
| 76 |
+
for item in text:
|
| 77 |
+
checked = check(item)
|
| 78 |
+
result.append(checked)
|
| 79 |
+
return result
|
| 80 |
+
|
| 81 |
+
# μ΅λ 500μκΉμ§ κ°λ₯.
|
| 82 |
+
if len(text) > 500:
|
| 83 |
+
return Checked(result=False)
|
| 84 |
+
|
| 85 |
+
payload = {
|
| 86 |
+
"passportKey": passport_key,
|
| 87 |
+
'color_blindness': '0',
|
| 88 |
+
'q': text
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
headers = {
|
| 92 |
+
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
|
| 93 |
+
'referer': 'https://search.naver.com/',
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
start_time = time.time()
|
| 97 |
+
r = _agent.get(base_url, params=payload, headers=headers)
|
| 98 |
+
passed_time = time.time() - start_time
|
| 99 |
+
|
| 100 |
+
data = json.loads(r.text)
|
| 101 |
+
html = data['message']['result']['html']
|
| 102 |
+
result = {
|
| 103 |
+
'result': True,
|
| 104 |
+
'original': text,
|
| 105 |
+
'checked': _remove_tags(html),
|
| 106 |
+
'errors': data['message']['result']['errata_count'],
|
| 107 |
+
'time': passed_time,
|
| 108 |
+
'words': OrderedDict(),
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
# λμ΄μ°κΈ°λ‘ ꡬλΆνκΈ° μν΄ νκ·Έλ μΌλ¨ 보기 μ½κ² λ°κΏλ .
|
| 112 |
+
# ElementTreeμ iter()λ₯Ό μ¨μ λ μ’κ² ν μ μλ λ°©λ²μ΄ μμ§λ§
|
| 113 |
+
# μ΄ μ§§μ μ½λμ κ΅³μ΄ κ·Έλ κ² ν νμμ±μ΄ μμΌλ―λ‘ μΌλ¨ λ¬Έμμ΄μ μΉννλ λ°©λ²μΌλ‘ μμ±.
|
| 114 |
+
html = html.replace('<em class=\'green_text\'>', '<green>') \
|
| 115 |
+
.replace('<em class=\'red_text\'>', '<red>') \
|
| 116 |
+
.replace('<em class=\'violet_text\'>', '<violet>') \
|
| 117 |
+
.replace('<em class=\'blue_text\'>', '<blue>') \
|
| 118 |
+
.replace('</em>', '<end>')
|
| 119 |
+
items = html.split(' ')
|
| 120 |
+
words = []
|
| 121 |
+
tmp = ''
|
| 122 |
+
for word in items:
|
| 123 |
+
if tmp == '' and word[:1] == '<':
|
| 124 |
+
pos = word.find('>') + 1
|
| 125 |
+
tmp = word[:pos]
|
| 126 |
+
elif tmp != '':
|
| 127 |
+
word = u'{}{}'.format(tmp, word)
|
| 128 |
+
|
| 129 |
+
if word[-5:] == '<end>':
|
| 130 |
+
word = word.replace('<end>', '')
|
| 131 |
+
tmp = ''
|
| 132 |
+
|
| 133 |
+
words.append(word)
|
| 134 |
+
|
| 135 |
+
for word in words:
|
| 136 |
+
check_result = CheckResult.PASSED
|
| 137 |
+
if word[:5] == '<red>':
|
| 138 |
+
check_result = CheckResult.WRONG_SPELLING
|
| 139 |
+
word = word.replace('<red>', '')
|
| 140 |
+
elif word[:7] == '<green>':
|
| 141 |
+
check_result = CheckResult.WRONG_SPACING
|
| 142 |
+
word = word.replace('<green>', '')
|
| 143 |
+
elif word[:8] == '<violet>':
|
| 144 |
+
check_result = CheckResult.AMBIGUOUS
|
| 145 |
+
word = word.replace('<violet>', '')
|
| 146 |
+
elif word[:6] == '<blue>':
|
| 147 |
+
check_result = CheckResult.STATISTICAL_CORRECTION
|
| 148 |
+
word = word.replace('<blue>', '')
|
| 149 |
+
result['words'][word] = check_result
|
| 150 |
+
|
| 151 |
+
result = Checked(**result)
|
| 152 |
+
|
| 153 |
+
return result
|
backend/main.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, Depends
|
| 2 |
+
from fastapi.responses import JSONResponse
|
| 3 |
+
from pydantic import BaseModel
|
| 4 |
+
from typing import List, Optional
|
| 5 |
+
from recommendWord import recommendWord
|
| 6 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 7 |
+
from sentence_transformers import SentenceTransformer
|
| 8 |
+
from keybert import KeyBERT
|
| 9 |
+
from kiwipiepy import Kiwi
|
| 10 |
+
import pandas as pd
|
| 11 |
+
import faiss
|
| 12 |
+
from transformers import AutoTokenizer, AutoModelForMaskedLM
|
| 13 |
+
from ref import refRecommend
|
| 14 |
+
from spellchecker import check
|
| 15 |
+
from auth.routes import router as auth_router
|
| 16 |
+
from auth.routes import get_current_user
|
| 17 |
+
from post_router import router as post_router
|
| 18 |
+
import os
|
| 19 |
+
import requests
|
| 20 |
+
|
| 21 |
+
FAISS_URL = (
|
| 22 |
+
"https://huggingface.co/datasets/uuuy5615/my_index/resolve/main/faiss_index.idx"
|
| 23 |
+
)
|
| 24 |
+
CSV_URL = "https://huggingface.co/datasets/uuuy5615/my_index/resolve/main/kci.csv"
|
| 25 |
+
|
| 26 |
+
FAISS_PATH = "faiss_index.idx"
|
| 27 |
+
CSV_PATH = "kci.csv"
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def mask_by_position(sentence: str, start: int, end: int) -> str:
|
| 31 |
+
return sentence[:start] + "[MASK]" + sentence[end:]
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
if not os.path.exists(FAISS_PATH):
|
| 35 |
+
print("FAISS νμΌ λ€μ΄λ‘λ μ€...")
|
| 36 |
+
r = requests.get(FAISS_URL)
|
| 37 |
+
r.raise_for_status() # μ€ν¨ μ μλ¬ λ°μ
|
| 38 |
+
with open(FAISS_PATH, "wb") as f:
|
| 39 |
+
f.write(r.content)
|
| 40 |
+
print("FAISS λ€μ΄λ‘λ μλ£!")
|
| 41 |
+
|
| 42 |
+
# CSV νμΌ λ€μ΄λ‘λ
|
| 43 |
+
if not os.path.exists(CSV_PATH):
|
| 44 |
+
print("CSV νμΌ λ€μ΄λ‘λ μ€...")
|
| 45 |
+
r = requests.get(CSV_URL)
|
| 46 |
+
r.raise_for_status()
|
| 47 |
+
with open(CSV_PATH, "wb") as f:
|
| 48 |
+
f.write(r.content)
|
| 49 |
+
print("CSV λ€μ΄λ‘λ μλ£!")
|
| 50 |
+
|
| 51 |
+
# refrec
|
| 52 |
+
refModel = SentenceTransformer("jhgan/ko-sbert-nli")
|
| 53 |
+
kw_model = KeyBERT(refModel)
|
| 54 |
+
kiwi = Kiwi()
|
| 55 |
+
df = pd.read_csv("kci.csv", low_memory=False)
|
| 56 |
+
index = faiss.read_index("faiss_index.idx")
|
| 57 |
+
|
| 58 |
+
# wordrec
|
| 59 |
+
tokenizer = AutoTokenizer.from_pretrained("klue/roberta-large")
|
| 60 |
+
wordModel = AutoModelForMaskedLM.from_pretrained("klue/roberta-large")
|
| 61 |
+
|
| 62 |
+
app = FastAPI()
|
| 63 |
+
|
| 64 |
+
app.include_router(auth_router, prefix="/auth", tags=["auth"])
|
| 65 |
+
app.include_router(post_router, prefix="/post", tags=["post"])
|
| 66 |
+
|
| 67 |
+
app.add_middleware(
|
| 68 |
+
CORSMiddleware,
|
| 69 |
+
allow_origins=[
|
| 70 |
+
"http://127.0.0.1:5173",
|
| 71 |
+
"https://geulditbul.vercel.app",
|
| 72 |
+
], # React μ± μ£Όμ
|
| 73 |
+
allow_credentials=True,
|
| 74 |
+
allow_methods=["*"],
|
| 75 |
+
allow_headers=["*"],
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
class SpellCheckRequest(BaseModel):
|
| 80 |
+
text: str
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
class Correction(BaseModel):
|
| 84 |
+
error: str
|
| 85 |
+
checked: str
|
| 86 |
+
position: Optional[int]
|
| 87 |
+
length: int
|
| 88 |
+
errortype: int
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
class SpellCheckResponse(BaseModel):
|
| 92 |
+
flag: int
|
| 93 |
+
original_text: str
|
| 94 |
+
checked_text: str
|
| 95 |
+
corrections: List[Correction]
|
| 96 |
+
time: float
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
@app.post("/api/spellcheck", response_model=SpellCheckResponse)
|
| 100 |
+
def api_spellcheck(req: SpellCheckRequest, _: dict = Depends(get_current_user)):
|
| 101 |
+
# checkλ dictλ₯Ό λ°ν
|
| 102 |
+
return check(req.text)
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
@app.get("/model/WordRec")
|
| 106 |
+
async def runWordRec(
|
| 107 |
+
user_sentence: str,
|
| 108 |
+
MaskWord: str,
|
| 109 |
+
start: int,
|
| 110 |
+
end: int,
|
| 111 |
+
_: dict = Depends(get_current_user),
|
| 112 |
+
):
|
| 113 |
+
|
| 114 |
+
sentence = mask_by_position(user_sentence, start - 1, end - 1)
|
| 115 |
+
rec_words = recommendWord(sentence, MaskWord, tokenizer, wordModel)
|
| 116 |
+
|
| 117 |
+
# result = {"model_name":model_name, "masked_word": MaskWord, "rec_word" : rec_word}
|
| 118 |
+
|
| 119 |
+
# result1, result2, result3 = [rec_word[i:i+3] for i in range(0, len(rec_word), 3)]
|
| 120 |
+
|
| 121 |
+
data = {"Model": "WordRec", "masked_word": MaskWord, "rec_result": rec_words}
|
| 122 |
+
|
| 123 |
+
return JSONResponse(content=data)
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
@app.get("/model/RefRec")
|
| 127 |
+
async def runRefRec(text: str, _: dict = Depends(get_current_user)):
|
| 128 |
+
name, link = refRecommend(refModel, kw_model, kiwi, text, df, index)
|
| 129 |
+
data = {"Model": "RefRec", "name_result": name, "link_result": link}
|
| 130 |
+
|
| 131 |
+
return JSONResponse(content=data)
|
backend/models.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# models.py
|
| 2 |
+
|
| 3 |
+
from sqlalchemy import Column, Integer, String, Text, DateTime, ForeignKey
|
| 4 |
+
from sqlalchemy.sql import func
|
| 5 |
+
from sqlalchemy.ext.declarative import declarative_base
|
| 6 |
+
|
| 7 |
+
Base = declarative_base()
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class Post(Base):
|
| 11 |
+
__tablename__ = "post"
|
| 12 |
+
|
| 13 |
+
post_id = Column(Integer, primary_key=True, index=True)
|
| 14 |
+
title = Column(String(100), nullable=False)
|
| 15 |
+
content = Column(Text, nullable=True)
|
| 16 |
+
created_date = Column(DateTime(timezone=True), server_default=func.now())
|
| 17 |
+
updated_date = Column(
|
| 18 |
+
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
|
| 19 |
+
)
|
| 20 |
+
user_id = Column(Integer, ForeignKey("user.user_id"), nullable=False)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class User(Base):
|
| 24 |
+
__tablename__ = "user"
|
| 25 |
+
|
| 26 |
+
user_id = Column(Integer, primary_key=True, index=True)
|
| 27 |
+
user_email = Column(String(255), nullable=False, unique=True, index=True)
|
| 28 |
+
user_name = Column(String(100))
|
| 29 |
+
password = Column(String(255), nullable=False)
|
| 30 |
+
join_date = Column(DateTime, default=func.now())
|
| 31 |
+
last_login_date = Column(DateTime, nullable=True)
|
| 32 |
+
refresh_token = Column(String(512), nullable=True)
|
backend/post_router.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, Depends, HTTPException
|
| 2 |
+
from sqlalchemy.orm import Session
|
| 3 |
+
from models import Post, User
|
| 4 |
+
from db import get_db
|
| 5 |
+
from pydantic import BaseModel
|
| 6 |
+
from typing import Optional
|
| 7 |
+
from fastapi.security import OAuth2PasswordBearer
|
| 8 |
+
import jwt
|
| 9 |
+
from auth.routes import get_current_user
|
| 10 |
+
|
| 11 |
+
class PostCreate(BaseModel):
|
| 12 |
+
title: str
|
| 13 |
+
content: Optional[str] = None
|
| 14 |
+
|
| 15 |
+
class PostUpdate(BaseModel):
|
| 16 |
+
post_id: int
|
| 17 |
+
title: str
|
| 18 |
+
content: Optional[str] = None
|
| 19 |
+
|
| 20 |
+
SECRET_KEY = "52a6206f34a1c479da043cdeee17fd859a35e54978a6733a6a7ebadcbd11f0ca"
|
| 21 |
+
ALGORITHM = "HS256"
|
| 22 |
+
|
| 23 |
+
router = APIRouter()
|
| 24 |
+
|
| 25 |
+
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/auth/login") # λ‘κ·ΈμΈ κ²½λ‘ μ€μ
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def get_auth_user(
|
| 29 |
+
token: str = Depends(oauth2_scheme), db: Session = Depends(get_db)
|
| 30 |
+
) -> User:
|
| 31 |
+
try:
|
| 32 |
+
payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
|
| 33 |
+
email: str = payload.get("sub")
|
| 34 |
+
if email is None:
|
| 35 |
+
raise HTTPException(status_code=401, detail="Token payload invalid")
|
| 36 |
+
except jwt.PyJWTError:
|
| 37 |
+
raise HTTPException(status_code=401, detail="Invalid token")
|
| 38 |
+
|
| 39 |
+
user = db.query(User).filter(User.user_email == email).first()
|
| 40 |
+
if user is None:
|
| 41 |
+
raise HTTPException(status_code=401, detail="User not found")
|
| 42 |
+
|
| 43 |
+
return user
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
@router.post("/posts")
|
| 47 |
+
def create_post(
|
| 48 |
+
post: PostCreate,
|
| 49 |
+
db: Session = Depends(get_db),
|
| 50 |
+
current_user: User = Depends(get_auth_user),
|
| 51 |
+
):
|
| 52 |
+
new_post = Post(
|
| 53 |
+
title=post.title, content=post.content, user_id=current_user.user_id
|
| 54 |
+
)
|
| 55 |
+
db.add(new_post)
|
| 56 |
+
db.commit()
|
| 57 |
+
db.refresh(new_post)
|
| 58 |
+
return {"message": "Post created", "post_id": new_post.post_id}
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
@router.get("/list")
|
| 62 |
+
def get_my_posts(
|
| 63 |
+
current_user: User = Depends(get_auth_user), db: Session = Depends(get_db)
|
| 64 |
+
):
|
| 65 |
+
posts = db.query(Post).filter(Post.user_id == current_user.user_id).all()
|
| 66 |
+
return [{"post_id": post.post_id, "title": post.title} for post in posts]
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
@router.get("/{post_id}")
|
| 70 |
+
def read_user_post(
|
| 71 |
+
post_id: int,
|
| 72 |
+
current_user: User = Depends(get_auth_user),
|
| 73 |
+
db: Session = Depends(get_db),
|
| 74 |
+
):
|
| 75 |
+
post = (
|
| 76 |
+
db.query(Post)
|
| 77 |
+
.filter(Post.post_id == post_id, Post.user_id == current_user.user_id)
|
| 78 |
+
.first()
|
| 79 |
+
)
|
| 80 |
+
if post is None:
|
| 81 |
+
raise HTTPException(status_code=404, detail="ν¬μ€νΈκ° μκ±°λ κΆνμ΄ μμ΅λλ€.")
|
| 82 |
+
return {"post_id": post.post_id, "title": post.title, "content": post.content}
|
| 83 |
+
|
| 84 |
+
@router.put("/save")
|
| 85 |
+
def update_post(
|
| 86 |
+
post: PostUpdate,
|
| 87 |
+
db: Session = Depends(get_db),
|
| 88 |
+
current_user: User = Depends(get_auth_user),
|
| 89 |
+
):
|
| 90 |
+
db_post = (
|
| 91 |
+
db.query(Post)
|
| 92 |
+
.filter(Post.post_id == post.post_id, Post.user_id == current_user.user_id)
|
| 93 |
+
.first()
|
| 94 |
+
)
|
| 95 |
+
if not db_post:
|
| 96 |
+
raise HTTPException(status_code=404, detail="Post not found or access denied")
|
| 97 |
+
|
| 98 |
+
db_post.title = post.title
|
| 99 |
+
db_post.content = post.content
|
| 100 |
+
db.commit()
|
| 101 |
+
db.refresh(db_post)
|
| 102 |
+
|
| 103 |
+
return {"message": "Post updated successfully"}
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
@router.delete("/{post_id}")
|
| 107 |
+
def delete_post(
|
| 108 |
+
post_id: int, db: Session = Depends(get_db), user=Depends(get_auth_user)
|
| 109 |
+
):
|
| 110 |
+
post = (
|
| 111 |
+
db.query(Post)
|
| 112 |
+
.filter(Post.post_id == post_id, Post.user_id == user.user_id)
|
| 113 |
+
.first()
|
| 114 |
+
)
|
| 115 |
+
if not post:
|
| 116 |
+
raise HTTPException(status_code=404, detail="Post not found")
|
| 117 |
+
db.delete(post)
|
| 118 |
+
db.commit()
|
| 119 |
+
return {"message": "Post deleted"}
|
backend/recommendWord.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import requests
|
| 3 |
+
from bs4 import BeautifulSoup
|
| 4 |
+
from urllib.parse import quote
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def get_synonyms_from_wordsisters(word: str) -> list[str]:
|
| 8 |
+
encoded_word = quote(word)
|
| 9 |
+
url = f"https://wordsisters.com/api/ai/{word}"
|
| 10 |
+
headers = {
|
| 11 |
+
"User-Agent": "Mozilla/5.0",
|
| 12 |
+
"Referer": f"https://wordsisters.com/search/{encoded_word}",
|
| 13 |
+
}
|
| 14 |
+
try:
|
| 15 |
+
response = requests.get(url, headers=headers)
|
| 16 |
+
response.raise_for_status()
|
| 17 |
+
|
| 18 |
+
data = response.json()
|
| 19 |
+
synonyms = data.get("result", {}).get("synonyms", [])
|
| 20 |
+
return synonyms
|
| 21 |
+
except Exception as e:
|
| 22 |
+
print(f"Error fetching synonyms: {e}")
|
| 23 |
+
return []
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def extract_synonyms_from_html(html: str) -> list[str]:
|
| 27 |
+
try:
|
| 28 |
+
soup = BeautifulSoup(html, "html.parser")
|
| 29 |
+
synonyms = []
|
| 30 |
+
|
| 31 |
+
for tag in soup.select(".link_relate"):
|
| 32 |
+
text = tag.get_text(strip=True)
|
| 33 |
+
if text and text not in synonyms:
|
| 34 |
+
synonyms.append(text)
|
| 35 |
+
|
| 36 |
+
print(f"Extracted synonyms: {synonyms}")
|
| 37 |
+
return synonyms
|
| 38 |
+
except Exception as e:
|
| 39 |
+
print(f"Error parsing HTML: {e}")
|
| 40 |
+
return []
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def get_synonyms_from_daum(word: str) -> list[str]:
|
| 44 |
+
try:
|
| 45 |
+
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
|
| 46 |
+
params = {"q": word}
|
| 47 |
+
|
| 48 |
+
response = requests.get(
|
| 49 |
+
"https://dic.daum.net/search.do", params=params, headers=headers
|
| 50 |
+
)
|
| 51 |
+
response.raise_for_status()
|
| 52 |
+
|
| 53 |
+
return extract_synonyms_from_html(response.text)
|
| 54 |
+
except Exception as e:
|
| 55 |
+
print(f"Error fetching from Daum: {e}")
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def max_logit(tensor, symDict, tokenizer):
|
| 59 |
+
found = []
|
| 60 |
+
counter = 0
|
| 61 |
+
size = len(symDict)
|
| 62 |
+
stop = False
|
| 63 |
+
for i in range(0, 32000):
|
| 64 |
+
for j in range(0, size):
|
| 65 |
+
if str(tokenizer.decode(tensor[1][0][i])) == symDict[j]:
|
| 66 |
+
found.append(symDict[j])
|
| 67 |
+
counter += 1
|
| 68 |
+
break
|
| 69 |
+
if counter >= 3:
|
| 70 |
+
break
|
| 71 |
+
return found
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def recommendWord(user_sentence, MaskWord, tokenizer, model):
|
| 75 |
+
inputs = tokenizer(user_sentence, return_tensors="pt")
|
| 76 |
+
with torch.no_grad():
|
| 77 |
+
logits = model(**inputs).logits
|
| 78 |
+
mask_token_index = (inputs.input_ids == tokenizer.mask_token_id)[0].nonzero(
|
| 79 |
+
as_tuple=True
|
| 80 |
+
)[0]
|
| 81 |
+
symDict = get_synonyms_from_wordsisters(MaskWord)
|
| 82 |
+
ts = torch.sort(logits[0, mask_token_index], dim=-1, descending=True)
|
| 83 |
+
found = max_logit(ts, symDict, tokenizer)
|
| 84 |
+
return found
|
| 85 |
+
|
| 86 |
+
|
backend/ref.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import faiss
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from sentence_transformers import SentenceTransformer
|
| 4 |
+
import math
|
| 5 |
+
from keybert import KeyBERT
|
| 6 |
+
from kiwipiepy import Kiwi
|
| 7 |
+
import urllib.parse
|
| 8 |
+
import json
|
| 9 |
+
|
| 10 |
+
def safe_int(val):
|
| 11 |
+
"""valμ΄ None λλ NaNμ΄λ©΄ '' λ°ν, μλλ©΄ intλ‘ λ³ν"""
|
| 12 |
+
if val is None:
|
| 13 |
+
return ''
|
| 14 |
+
if isinstance(val, float) and math.isnan(val):
|
| 15 |
+
return ''
|
| 16 |
+
return int(val)
|
| 17 |
+
|
| 18 |
+
def generate_dbpia_link(title):
|
| 19 |
+
"""λ
Όλ¬Έ μ λͺ©μ μ΄μ©ν΄ DBpia κ²μ λ§ν¬ μμ±"""
|
| 20 |
+
base_url = "https://www.dbpia.co.kr/search/topSearch?searchOption=all&query="
|
| 21 |
+
encoded_title = urllib.parse.quote(title)
|
| 22 |
+
return base_url + encoded_title
|
| 23 |
+
|
| 24 |
+
def generate_reference(row):
|
| 25 |
+
"""
|
| 26 |
+
row: λμ
λ리 ννμ λ
Όλ¬Έ μ 보 (μ: DataFrameμ ν ν)
|
| 27 |
+
λ°νκ°: μ°Έκ³ λ¬Έν λ¬Έμμ΄
|
| 28 |
+
"""
|
| 29 |
+
vol = safe_int(row.get('κΆ'))
|
| 30 |
+
issue = safe_int(row.get('νΈ'))
|
| 31 |
+
start_page = safe_int(row.get('μμνμ΄μ§'))
|
| 32 |
+
end_page = safe_int(row.get('λνμ΄μ§'))
|
| 33 |
+
|
| 34 |
+
pages = f"{start_page}-{end_page}" if start_page != '' and end_page != '' else ''
|
| 35 |
+
|
| 36 |
+
ref = f"{row.get('μ μ', '')}. ({safe_int(row.get('λ°νλ
'))}). {row.get('λ
Όλ¬Έλͺ
(κ΅λ¬Έ)', '')}. {row.get('νμ μ§λͺ
(κ΅λ¬Έ)', '')}"
|
| 37 |
+
|
| 38 |
+
if vol != '' or issue != '':
|
| 39 |
+
issue_str = f"({issue})" if issue != '' else ''
|
| 40 |
+
ref += f", {vol}{issue_str}"
|
| 41 |
+
|
| 42 |
+
if pages:
|
| 43 |
+
ref += f", {pages}."
|
| 44 |
+
else:
|
| 45 |
+
ref += "."
|
| 46 |
+
|
| 47 |
+
link = generate_dbpia_link(row.get('λ
Όλ¬Έλͺ
(κ΅λ¬Έ)', ''))
|
| 48 |
+
return (ref,link)
|
| 49 |
+
|
| 50 |
+
def refRecommend(model,kw_model,kiwi,text,df,index):
|
| 51 |
+
nouns_list = []
|
| 52 |
+
for sentence in kiwi.analyze(text):
|
| 53 |
+
nouns = [token.form for token in sentence[0] if token.tag.startswith('NN')]
|
| 54 |
+
if nouns:
|
| 55 |
+
nouns_list.extend(nouns)
|
| 56 |
+
result_text = ' '.join(nouns_list)
|
| 57 |
+
|
| 58 |
+
keywords = kw_model.extract_keywords(result_text, keyphrase_ngram_range=(1, 1), stop_words=None, top_n=5)
|
| 59 |
+
|
| 60 |
+
query_vector = model.encode([keywords[0][0]+" "+keywords[1][0]])
|
| 61 |
+
D, I = index.search(query_vector, k=3) # kλ μ°Ύκ³ μΆμ κ°μ (μ: top-5)
|
| 62 |
+
|
| 63 |
+
results = df.iloc[I[0]] # I[0]μ top-k κ²°κ³Όμ μΈλ±μ€ 리μ€νΈ
|
| 64 |
+
|
| 65 |
+
name = []
|
| 66 |
+
link = []
|
| 67 |
+
# 6. μμ μΆλ ₯
|
| 68 |
+
for i, row in results.iterrows():
|
| 69 |
+
name_result,link_result = generate_reference(row)
|
| 70 |
+
print(name_result)
|
| 71 |
+
name.append(name_result)
|
| 72 |
+
link.append(link_result)
|
| 73 |
+
# print(f"{i+1}. μ λͺ©: {row['λ
Όλ¬Έλͺ
(κ΅λ¬Έ)']} / ν€μλ: {row['ν€μλ(κ΅λ¬Έ)']}")
|
| 74 |
+
# print(f"{row['μ μ']}. ({row['λ°νλ
']}). {row['λ
Όλ¬Έλͺ
(κ΅λ¬Έ)']}. {row['νμ μ§λͺ
(κ΅λ¬Έ)']}, {int(row['κΆ'])}({int(row['νΈ'])}), {int(row['μμνμ΄μ§'])}-{int(row['λνμ΄μ§'])}")
|
| 75 |
+
return name,link
|
backend/requirements.txt
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
beautifulsoup4==4.13.4
|
| 2 |
+
faiss_cpu==1.11.0
|
| 3 |
+
fastapi==0.115.12
|
| 4 |
+
keybert==0.9.0
|
| 5 |
+
kiwipiepy==0.21.0
|
| 6 |
+
pandas==2.2.3
|
| 7 |
+
passlib==1.7.4
|
| 8 |
+
pydantic==2.11.5
|
| 9 |
+
PyJWT==2.10.1
|
| 10 |
+
Requests==2.32.3
|
| 11 |
+
sentence_transformers==4.1.0
|
| 12 |
+
SQLAlchemy==2.0.41
|
| 13 |
+
torch==2.7.0
|
| 14 |
+
transformers==4.51.3
|
| 15 |
+
uvicorn==0.34.3
|
| 16 |
+
mysqlclient
|
| 17 |
+
pydantic[email]
|
| 18 |
+
python-multipart
|
| 19 |
+
bcrypt
|
| 20 |
+
pymysql
|
backend/spellchecker.py
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import difflib
|
| 3 |
+
from hanspell import spell_checker
|
| 4 |
+
from hanspell.constants import CheckResult
|
| 5 |
+
from kiwipiepy import Kiwi
|
| 6 |
+
|
| 7 |
+
ERROR_TYPE_MAPPING = {
|
| 8 |
+
CheckResult.PASSED: 0, # λ¬Έμ κ° μλ λ¨μ΄ λλ ꡬμ
|
| 9 |
+
CheckResult.WRONG_SPELLING: 1, # λ§μΆ€λ²μ λ¬Έμ κ° μλ λ¨μ΄ λλ ꡬμ
|
| 10 |
+
CheckResult.WRONG_SPACING: 2, # λμ΄μ°κΈ°μ λ¬Έμ κ° μλ λ¨μ΄ λλ ꡬμ
|
| 11 |
+
CheckResult.AMBIGUOUS: 3, # νμ€μ΄κ° μμ¬λλ λ¨μ΄ λλ ꡬμ
|
| 12 |
+
CheckResult.STATISTICAL_CORRECTION: 4, # ν΅κ³μ κ΅μ μ λ°λ₯Έ λ¨μ΄ λλ ꡬμ
|
| 13 |
+
}
|
| 14 |
+
import difflib
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def update_corrections_by_error_text(original_text, checked_text, corrections):
|
| 18 |
+
updated = []
|
| 19 |
+
|
| 20 |
+
for corr in corrections:
|
| 21 |
+
error = corr["error"]
|
| 22 |
+
start_pos = original_text.find(error)
|
| 23 |
+
|
| 24 |
+
if start_pos == -1:
|
| 25 |
+
# error λ¬Έμ₯μ λͺ» μ°Ύμ κ²½μ° position κΈ°λ°μΌλ‘ fallback
|
| 26 |
+
start_pos = corr["position"]
|
| 27 |
+
|
| 28 |
+
length = len(error)
|
| 29 |
+
# checked_textμμ λμΌ μμΉ μΆμ
|
| 30 |
+
corrected_text = checked_text[start_pos : start_pos + length]
|
| 31 |
+
|
| 32 |
+
new_corr = corr.copy()
|
| 33 |
+
new_corr["checked"] = corrected_text
|
| 34 |
+
new_corr["position"] = start_pos # μμΉ λ³΄μ
|
| 35 |
+
new_corr["length"] = length
|
| 36 |
+
updated.append(new_corr)
|
| 37 |
+
|
| 38 |
+
return updated
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def extract_phrase(text: str, position: int) -> str:
|
| 42 |
+
if position < 0 or position >= len(text):
|
| 43 |
+
return ""
|
| 44 |
+
|
| 45 |
+
# μΌμͺ½ νμ: position - 1 λΆν° κ³΅λ°±μ΄ λμ¬ λκΉμ§
|
| 46 |
+
left = position - 1
|
| 47 |
+
while left >= 0 and text[left] != " ":
|
| 48 |
+
left -= 1
|
| 49 |
+
|
| 50 |
+
# μ€λ₯Έμͺ½ νμ: position + 1 λΆν° κ³΅λ°±μ΄ λμ¬ λκΉμ§
|
| 51 |
+
right = position + 1
|
| 52 |
+
while right < len(text) and text[right] != " ":
|
| 53 |
+
right += 1
|
| 54 |
+
|
| 55 |
+
return text[left + 1 : right]
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def get_space_diffs(original: str, corrected: str):
|
| 59 |
+
diffs = []
|
| 60 |
+
orig_len = len(original)
|
| 61 |
+
corr_len = len(corrected)
|
| 62 |
+
o_idx = c_idx = 0
|
| 63 |
+
|
| 64 |
+
while o_idx < orig_len and c_idx < corr_len:
|
| 65 |
+
o_char = original[o_idx]
|
| 66 |
+
c_char = corrected[c_idx]
|
| 67 |
+
|
| 68 |
+
# λμΌ λ¬Έμλ©΄ ν΅κ³Ό
|
| 69 |
+
if o_char == c_char:
|
| 70 |
+
o_idx += 1
|
| 71 |
+
c_idx += 1
|
| 72 |
+
continue
|
| 73 |
+
# μλ¬Έμ κ³΅λ°±μ΄ μκ³ κ΅μ λ¬Έμ μμΌλ©΄ β delete_space
|
| 74 |
+
if o_char == " " and c_char != " ":
|
| 75 |
+
error = extract_phrase(original, o_idx)
|
| 76 |
+
check = spell_checker.check(error).as_dict()["checked"]
|
| 77 |
+
diffs.append(
|
| 78 |
+
{
|
| 79 |
+
"error": error,
|
| 80 |
+
"checked": check,
|
| 81 |
+
"position": o_idx,
|
| 82 |
+
"length": -1,
|
| 83 |
+
"errortype": ERROR_TYPE_MAPPING[2],
|
| 84 |
+
}
|
| 85 |
+
)
|
| 86 |
+
o_idx += 1 # 곡백μ λκΉ
|
| 87 |
+
|
| 88 |
+
# κ΅μ λ¬Έμ κ³΅λ°±μ΄ μκ³ μλ¬Έμ μμΌλ©΄ β insert_space
|
| 89 |
+
elif c_char == " " and o_char != " ":
|
| 90 |
+
# 곡백μ κ·Έ "μ λ¬Έμ" λ€μ μ½μ
νλ€κ³ κ°μ
|
| 91 |
+
error = extract_phrase(original, o_idx)
|
| 92 |
+
check = spell_checker.check(error).as_dict()["checked"]
|
| 93 |
+
diffs.append(
|
| 94 |
+
{
|
| 95 |
+
"error": error,
|
| 96 |
+
"checked": check,
|
| 97 |
+
"position": o_idx, # μλ¬Έ κΈ°μ€ μ½μ
μμΉ
|
| 98 |
+
"length": 1,
|
| 99 |
+
"errortype": ERROR_TYPE_MAPPING[2],
|
| 100 |
+
}
|
| 101 |
+
)
|
| 102 |
+
c_idx += 1 # 곡백μ λκΉ
|
| 103 |
+
|
| 104 |
+
# λ λ€ λ€λ₯΄μ§λ§ 곡백λ μλ λ (λ¬Έλ² κ΅μ λ±): κ·Έλ₯ λκΉ
|
| 105 |
+
else:
|
| 106 |
+
o_idx += 1
|
| 107 |
+
c_idx += 1
|
| 108 |
+
|
| 109 |
+
return diffs
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def check(text: str):
|
| 113 |
+
ch_text = spell_checker.check(text)
|
| 114 |
+
info = ch_text.as_dict()
|
| 115 |
+
orig_text = info["original"]
|
| 116 |
+
corr_text = info["checked"]
|
| 117 |
+
time = info["time"]
|
| 118 |
+
if orig_text == corr_text:
|
| 119 |
+
flag = 0
|
| 120 |
+
else:
|
| 121 |
+
flag = 1
|
| 122 |
+
print(info["words"])
|
| 123 |
+
space = get_space_diffs(orig_text, corr_text)
|
| 124 |
+
# 1) originalβcorrected κ° λ¬Έμ λ¨μ λ§€ν μμ±
|
| 125 |
+
sm = difflib.SequenceMatcher(None, orig_text, corr_text)
|
| 126 |
+
mapping = {}
|
| 127 |
+
for tag, i1, i2, j1, j2 in sm.get_opcodes():
|
| 128 |
+
if tag == "equal":
|
| 129 |
+
# μΌμΉ λΈλ‘: 1:1 λ§€ν
|
| 130 |
+
for offset in range(i2 - i1):
|
| 131 |
+
mapping[j1 + offset] = i1 + offset
|
| 132 |
+
elif tag in ("replace", "insert"):
|
| 133 |
+
# κ΅μ²΄λΈλ‘Β·μ½μ
λΈλ‘: κ΅μ λ¬Έμ λͺ¨λ μλ³Έ λΈλ‘ μμ μμΉλ‘ λ§€ν
|
| 134 |
+
for offset in range(j2 - j1):
|
| 135 |
+
mapping[j1 + offset] = i1
|
| 136 |
+
# 2) ν ν°λ³λ‘ μμΉ λ° μλ νλ¦° λ¨μ΄ μΆμΆ
|
| 137 |
+
corrections = []
|
| 138 |
+
for token, status in info["words"].items():
|
| 139 |
+
if status == CheckResult.PASSED or status == CheckResult.WRONG_SPACING:
|
| 140 |
+
continue
|
| 141 |
+
|
| 142 |
+
corr_pos = corr_text.find(token)
|
| 143 |
+
|
| 144 |
+
if corr_pos != -1 and corr_pos in mapping:
|
| 145 |
+
orig_pos = mapping[corr_pos]
|
| 146 |
+
# μλ³Έ ν
μ€νΈμμ token κΈΈμ΄λ§νΌ μλΌλΈλ€λ€.
|
| 147 |
+
error_word = orig_text[orig_pos : orig_pos + len(token)]
|
| 148 |
+
else:
|
| 149 |
+
orig_pos = None
|
| 150 |
+
error_word = token
|
| 151 |
+
length = len(error_word)
|
| 152 |
+
corrections.append(
|
| 153 |
+
{
|
| 154 |
+
"error": error_word,
|
| 155 |
+
"checked": token,
|
| 156 |
+
"position": orig_pos,
|
| 157 |
+
"length": length,
|
| 158 |
+
"errortype": ERROR_TYPE_MAPPING[status],
|
| 159 |
+
}
|
| 160 |
+
)
|
| 161 |
+
combined = corrections + space
|
| 162 |
+
|
| 163 |
+
sorted_combined = sorted(combined, key=lambda x: x["position"])
|
| 164 |
+
result = {
|
| 165 |
+
"flag": flag,
|
| 166 |
+
"original_text": info["original"],
|
| 167 |
+
"checked_text": info["checked"],
|
| 168 |
+
"corrections": sorted_combined,
|
| 169 |
+
"time": time,
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
return result
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
if __name__ == "__main__":
|
| 176 |
+
sample = "λλ μ€λ μμΉ¨λ°₯μ λ¨Ήκ³ νκ΅ λ₯Ό κ°λ€.νκ΅ λ₯Ό μλ μΉκ΅¬λ€μ΄ λ§μΉλ§, μ€λμ λ³λ£¨ 보μ΄μ§ μμλ€. νκ΅μ 문ꡬμ μμ λ³Όνμ μλλ°, κ·Έ λ³Όνμ μν¬κ° μμ£Ό λ§λΌμ μμ£Ό λ°κΏμΌνλ€. νκ΅μμ νκ΅ νμ¬μ λν μκΈ°λ₯Ό λ€μλλ°, λ³λ£¨ κΈ°λλ μλλ€."
|
| 177 |
+
sample2 = "νλ κ΅μ‘μ λ¨μν μ§μμ μ λ¬νλ κ²μ λμ΄μ, νμμ μ μΈμ μ±μμ λͺ©νλ‘ νλ€. μ΄μ λ°λΌ μ μμ μ§μ§μ μ¬νμ± κ΅μ‘λ μ μ μ€μν΄μ§κ³ μμλ€. κ·Έλ¬λ μμ§λ λ§μ νκ΅μμλ μ£Όμ
μ κ΅μ‘μ΄ μ€μ¬μ΄ λμ΄, νμλ€μ΄ μ£Όλμ μΌλ‘ νμ΅ν κΈ°νκ° μ λ€. λν, κ΅μ¬λ€μ κ³Όλν νμ μ
λ¬΄λ‘ μΈν΄ μμ
μ€λΉμ μΆ©λΆν μκ°μ κ°μ§μ μκ³ , μ΄λ κ΅μ‘μ μ§ μ νλ‘ μ΄μ΄μ§ μ μλ°. μ§μμ μΈ κ΅μ¬ μ°μμ κ΅μ‘νκ²½ κ°μ μ΄ λ·λ°μΉ¨λμ΄μΌλ§ λ―Έλν κ΅μ‘μ΄ μ€νλ μ μμ¬ κ²μ΄λ€."
|
| 178 |
+
output = check(sample2)
|
| 179 |
+
print(json.dumps(output, ensure_ascii=False, indent=2))
|
| 180 |
+
print(sample2[79])
|
| 181 |
+
# "flag": λ¬Έμ₯μ λ§μΆ€λ² μ€λ₯κ° μλμ§μ μ¬λΆ(0: μμ/1: μμ)
|
| 182 |
+
# "original_text": μλ³Έ λ¬Έμ₯
|
| 183 |
+
# "checked_text": λ§μΆ€λ²μ΄ μμ λ λ¬Έμ₯
|
| 184 |
+
# "corrections"[
|
| 185 |
+
# {
|
| 186 |
+
# "error": λ§μΆ€λ²μ΄ νλ¦° λ¨μ΄
|
| 187 |
+
# "position": νλ¦° λ¨μ΄μ λ¬Έμ₯ λ΄ μμΉ(μμμ )
|
| 188 |
+
# "errortype": μ€λ₯ μ ν(1~4)
|
| 189 |
+
# },
|
| 190 |
+
# ]
|
| 191 |
+
# "time": μμ μκ°
|
backend/test.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
from bs4 import BeautifulSoup
|
| 3 |
+
|
| 4 |
+
def get_synonyms_from_daum(word: str) -> list[str]:
|
| 5 |
+
try:
|
| 6 |
+
headers = {
|
| 7 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
|
| 8 |
+
}
|
| 9 |
+
params = {
|
| 10 |
+
'q': word
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
response = requests.get("https://dic.daum.net/search.do", params=params, headers=headers)
|
| 14 |
+
response.raise_for_status()
|
| 15 |
+
|
| 16 |
+
return extract_synonyms_from_html(response.text)
|
| 17 |
+
except Exception as e:
|
| 18 |
+
print(f"Error fetching from Daum: {e}")
|
| 19 |
+
|
| 20 |
+
def extract_synonyms_from_html(html: str) -> list[str]:
|
| 21 |
+
try:
|
| 22 |
+
soup = BeautifulSoup(html, 'html.parser')
|
| 23 |
+
synonyms = []
|
| 24 |
+
|
| 25 |
+
for tag in soup.select('.link_relate'):
|
| 26 |
+
text = tag.get_text(strip=True)
|
| 27 |
+
if text and text not in synonyms:
|
| 28 |
+
synonyms.append(text)
|
| 29 |
+
|
| 30 |
+
print(f"Extracted synonyms: {synonyms}")
|
| 31 |
+
return synonyms
|
| 32 |
+
except Exception as e:
|
| 33 |
+
print(f"Error parsing HTML: {e}")
|
| 34 |
+
return []
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
# μμ μ¬μ©
|
| 38 |
+
|
| 39 |
+
# word = "λ₯λ ₯"
|
| 40 |
+
# synonyms = get_synonyms_from_daum(word)
|
| 41 |
+
def mask_by_position(sentence: str, start: int, end: int) -> str:
|
| 42 |
+
return sentence[:start-1] + "[MASK]" + sentence[end-1:]
|
| 43 |
+
|
| 44 |
+
# a = mask_by_position("κ°μκ° μ
무λ₯Ό νλ©΄μ κ°μΈμ λ₯λ ₯μΌλ‘ λͺ¨λ μΌμ ν΄κ²°ν μ μλ€λ©΄ μ λ§ κΈ°μ κ²μ
λλ€",17,19)
|
| 45 |
+
# print(a)
|
dockerfile
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.9.13
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# backend ν΄λ μ 체 볡μ¬
|
| 6 |
+
COPY backend/ ./backend
|
| 7 |
+
|
| 8 |
+
# backend μμ requirements.txt μ€μΉ
|
| 9 |
+
RUN pip install --no-cache-dir -r backend/requirements.txt
|
| 10 |
+
|
| 11 |
+
# (νμνλ©΄ startup.py 볡μ¬)
|
| 12 |
+
|
| 13 |
+
CMD ["bash", "-c", "uvicorn backend.main:app --host 0.0.0.0 --port 7860"]
|