uuuy5615 commited on
Commit
5ea2b9d
Β·
verified Β·
1 Parent(s): b01fc98

Upload 37 files

Browse files
Files changed (37) hide show
  1. backend/__pycache__/db.cpython-39.pyc +0 -0
  2. backend/__pycache__/main.cpython-313.pyc +0 -0
  3. backend/__pycache__/main.cpython-39.pyc +0 -0
  4. backend/__pycache__/models.cpython-39.pyc +0 -0
  5. backend/__pycache__/post_router.cpython-39.pyc +0 -0
  6. backend/__pycache__/recommendWord.cpython-313.pyc +0 -0
  7. backend/__pycache__/recommendWord.cpython-39.pyc +0 -0
  8. backend/__pycache__/ref.cpython-39.pyc +0 -0
  9. backend/__pycache__/spellchecker.cpython-39.pyc +0 -0
  10. backend/__pycache__/test.cpython-313.pyc +0 -0
  11. backend/auth/__pycache__/routes.cpython-39.pyc +0 -0
  12. backend/auth/routes.py +175 -0
  13. backend/db.py +26 -0
  14. backend/hansepll.py +36 -0
  15. backend/hanspell/__init__.py +1 -0
  16. backend/hanspell/__pycache__/__init__.cpython-312.pyc +0 -0
  17. backend/hanspell/__pycache__/__init__.cpython-313.pyc +0 -0
  18. backend/hanspell/__pycache__/__init__.cpython-39.pyc +0 -0
  19. backend/hanspell/__pycache__/constants.cpython-313.pyc +0 -0
  20. backend/hanspell/__pycache__/constants.cpython-39.pyc +0 -0
  21. backend/hanspell/__pycache__/response.cpython-313.pyc +0 -0
  22. backend/hanspell/__pycache__/response.cpython-39.pyc +0 -0
  23. backend/hanspell/__pycache__/spell_checker.cpython-312.pyc +0 -0
  24. backend/hanspell/__pycache__/spell_checker.cpython-313.pyc +0 -0
  25. backend/hanspell/__pycache__/spell_checker.cpython-39.pyc +0 -0
  26. backend/hanspell/constants.py +10 -0
  27. backend/hanspell/response.py +26 -0
  28. backend/hanspell/spell_checker.py +153 -0
  29. backend/main.py +131 -0
  30. backend/models.py +32 -0
  31. backend/post_router.py +119 -0
  32. backend/recommendWord.py +86 -0
  33. backend/ref.py +75 -0
  34. backend/requirements.txt +20 -0
  35. backend/spellchecker.py +191 -0
  36. backend/test.py +45 -0
  37. dockerfile +13 -0
backend/__pycache__/db.cpython-39.pyc ADDED
Binary file (819 Bytes). View file
 
backend/__pycache__/main.cpython-313.pyc ADDED
Binary file (2.45 kB). View file
 
backend/__pycache__/main.cpython-39.pyc ADDED
Binary file (4.04 kB). View file
 
backend/__pycache__/models.cpython-39.pyc ADDED
Binary file (1.35 kB). View file
 
backend/__pycache__/post_router.cpython-39.pyc ADDED
Binary file (3.72 kB). View file
 
backend/__pycache__/recommendWord.cpython-313.pyc ADDED
Binary file (2.06 kB). View file
 
backend/__pycache__/recommendWord.cpython-39.pyc ADDED
Binary file (2.61 kB). View file
 
backend/__pycache__/ref.cpython-39.pyc ADDED
Binary file (2.58 kB). View file
 
backend/__pycache__/spellchecker.cpython-39.pyc ADDED
Binary file (4.17 kB). View file
 
backend/__pycache__/test.cpython-313.pyc ADDED
Binary file (2.02 kB). View file
 
backend/auth/__pycache__/routes.cpython-39.pyc ADDED
Binary file (4.82 kB). View file
 
backend/auth/routes.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Depends, Request, Response
2
+ from fastapi.responses import JSONResponse
3
+ from fastapi.security import OAuth2PasswordRequestForm
4
+ from pydantic import BaseModel, EmailStr
5
+ from datetime import timedelta, timezone, datetime
6
+ import jwt
7
+ from passlib.context import CryptContext
8
+ from db import get_db
9
+ from models import User
10
+ from sqlalchemy.orm import Session
11
+
12
+ router = APIRouter()
13
+
14
+
15
+ class UserSignup(BaseModel):
16
+ user_name: str
17
+ user_email: EmailStr
18
+ password: str
19
+
20
+
21
+ users_db = {}
22
+ refresh_tokens = {}
23
+ pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
24
+
25
+
26
+ SECRET_KEY = "52a6206f34a1c479da043cdeee17fd859a35e54978a6733a6a7ebadcbd11f0ca"
27
+ ALGORITHM = "HS256"
28
+ ACCESS_TOKEN_EXPIRE_MINUTES = 15
29
+ REFRESH_TOKEN_EXPIRE_DAYS = 7
30
+
31
+
32
+ def create_token(data: dict, expires_delta: timedelta = timedelta(minutes=15)):
33
+ to_encode = data.copy()
34
+ expire = datetime.now(timezone.utc) + expires_delta # ꢌμž₯ 방식
35
+ to_encode.update({"exp": expire})
36
+ token = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
37
+ return token
38
+
39
+
40
+ def verify_token(token: str):
41
+ try:
42
+ payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
43
+ email = payload.get("sub")
44
+ if email is None:
45
+ raise HTTPException(status_code=401, detail="토큰에 이메일 μ—†μŒ")
46
+ # μœ νš¨ν•œ 토큰이고, μ‚¬μš©μž 이메일도 있으면 토큰 μœ νš¨ν•˜λ‹€κ³  νŒλ‹¨
47
+ return email
48
+ except jwt.ExpiredSignatureError:
49
+ raise HTTPException(status_code=401, detail="토큰 만료")
50
+ except jwt.PyJWTError:
51
+ raise HTTPException(status_code=401, detail="토큰 였λ₯˜")
52
+
53
+
54
+ def get_current_user(request: Request):
55
+ auth_header = request.headers.get("Authorization")
56
+ if not auth_header or not auth_header.startswith("Bearer "):
57
+ raise HTTPException(status_code=401, detail="인증 헀더 μ—†μŒ")
58
+
59
+ token = auth_header.split(" ")[1]
60
+ print(token)
61
+ user = verify_token(token)
62
+ if not user:
63
+ raise HTTPException(status_code=401, detail="μ‚¬μš©μž μ—†μŒ")
64
+ return user
65
+
66
+
67
+ @router.post("/refresh")
68
+ def refresh_token(request: Request, db: Session = Depends(get_db)):
69
+ client_refresh_token = request.cookies.get("refresh_token")
70
+ if not client_refresh_token:
71
+ raise HTTPException(status_code=401, detail="Refresh token missing")
72
+
73
+ try:
74
+ payload = jwt.decode(client_refresh_token, SECRET_KEY, algorithms=[ALGORITHM])
75
+ user_email = payload.get("sub") # or email λ“±
76
+ if not user_email:
77
+ raise HTTPException(status_code=401, detail="Invalid payload")
78
+
79
+ # μ„œλ²„μ— μ €μž₯된 토큰과 비ꡐ
80
+ user = db.query(User).filter(User.user_email == user_email).first()
81
+ if not user or user.refresh_token != client_refresh_token:
82
+ raise HTTPException(
83
+ status_code=401, detail="Token mismatch or reused token"
84
+ )
85
+
86
+ except jwt.ExpiredSignatureError:
87
+ raise HTTPException(status_code=401, detail="Refresh token expired")
88
+ except jwt.InvalidTokenError:
89
+ raise HTTPException(status_code=401, detail="Invalid refresh token")
90
+
91
+ # μƒˆ access token λ°œκΈ‰
92
+ new_access_token = create_token(
93
+ data={"sub": user_email},
94
+ expires_delta=timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES),
95
+ )
96
+
97
+ return JSONResponse(content={"access_token": new_access_token})
98
+
99
+
100
+ @router.post("/logout")
101
+ def logout(request: Request):
102
+ # 1. μΏ ν‚€μ—μ„œ refresh_token κ°€μ Έμ˜€κΈ°
103
+ refresh_token = request.cookies.get("refresh_token")
104
+ if not refresh_token:
105
+ raise HTTPException(status_code=400, detail="Refresh token missing")
106
+
107
+ # 2. μ„œλ²„ μΈ‘ μ €μž₯μ†Œ(DB λ˜λŠ” in-memory)μ—μ„œ ν•΄λ‹Ή 토큰 μ‚­μ œ
108
+ # 예: refresh_token_dbλŠ” 토큰을 μ €μž₯ν•œ dict
109
+ try:
110
+ payload = jwt.decode(refresh_token, SECRET_KEY, algorithms=[ALGORITHM])
111
+ user_email = payload.get("sub")
112
+ if refresh_tokens.get(user_email) == refresh_token:
113
+ del refresh_tokens[user_email]
114
+ except jwt.PyJWTError:
115
+ pass
116
+ res = JSONResponse(content={"message": "Logged out successfully."})
117
+ # 3. ν΄λΌμ΄μ–ΈνŠΈ μΏ ν‚€μ—μ„œ 제거
118
+ res.delete_cookie("refresh_token")
119
+
120
+ return res
121
+
122
+
123
+ @router.post("/login")
124
+ def login(
125
+ form_data: OAuth2PasswordRequestForm = Depends(), db: Session = Depends(get_db)
126
+ ):
127
+ db_user = db.query(User).filter(User.user_email == form_data.username).first()
128
+ if not db_user:
129
+ raise HTTPException(status_code=400, detail="Invalid email or password")
130
+
131
+ if not pwd_context.verify(form_data.password, db_user.password):
132
+ raise HTTPException(status_code=400, detail="Invalid email or password")
133
+ access_token = create_token(
134
+ data={"sub": db_user.user_email},
135
+ expires_delta=timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES),
136
+ )
137
+ refresh_token = create_token(
138
+ data={"sub": db_user.user_email},
139
+ expires_delta=timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS),
140
+ )
141
+ db_user.refresh_token = refresh_token
142
+ db.add(db_user)
143
+ db.commit()
144
+ res = JSONResponse(
145
+ content={
146
+ "access_token": access_token,
147
+ "token_type": "bearer",
148
+ }
149
+ )
150
+ res.set_cookie(
151
+ key="refresh_token",
152
+ value=refresh_token,
153
+ httponly=True,
154
+ secure=False, # 개발 ν™˜κ²½μ—μ„œλŠ” False μΆ”μ²œ
155
+ samesite="Lax",
156
+ max_age=60 * 60 * 24,
157
+ )
158
+ return res
159
+
160
+
161
+ @router.post("/signup")
162
+ def signup(user: UserSignup, db: Session = Depends(get_db)):
163
+ existing = db.query(User).filter(User.user_email == user.user_email).first()
164
+
165
+ if existing:
166
+ raise HTTPException(status_code=400, detail="Email already registered")
167
+ hashed_pw = pwd_context.hash(user.password)
168
+ new_user = User(
169
+ user_email=user.user_email,
170
+ user_name=user.user_name,
171
+ password=hashed_pw, # μ‹€μ œλ‘œλŠ” ν•΄μ‹œ ν›„ μ €μž₯
172
+ )
173
+ db.add(new_user)
174
+ db.commit()
175
+ return {"message": "User created successfully"}
backend/db.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # db.py
2
+
3
+ from sqlalchemy import create_engine
4
+ from sqlalchemy.orm import sessionmaker, declarative_base
5
+ import urllib.parse
6
+
7
+ DB_USER = "root"
8
+ DB_PASSWORD = urllib.parse.quote_plus("wjdals2413@")
9
+ DB_HOST = "localhost"
10
+ DB_PORT = "3306"
11
+ DB_NAME = "gradproject"
12
+
13
+ DATABASE_URL = f"mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}?charset=utf8mb4"
14
+ # DATABASE_URL = "mysql://root:oIvlzNQSswVbBwNiTwFsLKdmAbFlLlDx@turntable.proxy.rlwy.net:44568/railway"
15
+ engine = create_engine(DATABASE_URL, echo=True)
16
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
17
+
18
+ Base = declarative_base()
19
+
20
+
21
+ def get_db():
22
+ db = SessionLocal()
23
+ try:
24
+ yield db
25
+ finally:
26
+ db.close()
backend/hansepll.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ import re
4
+
5
+ class SpellChecker:
6
+ def __init__(self):
7
+ self.passport_key = None
8
+ self.base_url = None
9
+
10
+ def fetch_passport_key(self):
11
+ headers = {
12
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
13
+ 'Referer': 'https://search.naver.com/',
14
+ }
15
+ response = requests.get("https://search.naver.com/search.naver?query=%EB%A7%9E%EC%B6%A9%EB%B2%95%20%EA%B2%80%EC%82%AC%EA%B8%B0", headers=headers)
16
+ passport_key_match = re.search(r'(?<={new SpellingCheck\({API:{checker:").*?(?="},selector)', response.text)
17
+ if not passport_key_match:
18
+ return "Error: Unable to retrieve passport key"
19
+ self.base_url, self.passport_key = passport_key_match.group(0).split("?passportKey=")
20
+
21
+ def spell_check(self, text):
22
+ if self.passport_key is None or self.base_url is None:
23
+ self.fetch_passport_key()
24
+ payload = {
25
+ 'passportKey': self.passport_key,
26
+ 'where': 'nexearch',
27
+ 'color_blindness': 0,
28
+ 'q': text
29
+ }
30
+ headers = {
31
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, Gecko) Chrome/129.0.0.0 Safari/537.36',
32
+ 'Referer': 'https://search.naver.com/',
33
+ }
34
+ result_response = requests.get(self.base_url, headers=headers, params=payload)
35
+ return json.loads(result_response.text)['message']['result']['notag_html']
36
+
backend/hanspell/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ __version__ = '1.1'
backend/hanspell/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (174 Bytes). View file
 
backend/hanspell/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (177 Bytes). View file
 
backend/hanspell/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (163 Bytes). View file
 
backend/hanspell/__pycache__/constants.cpython-313.pyc ADDED
Binary file (606 Bytes). View file
 
backend/hanspell/__pycache__/constants.cpython-39.pyc ADDED
Binary file (506 Bytes). View file
 
backend/hanspell/__pycache__/response.cpython-313.pyc ADDED
Binary file (1.35 kB). View file
 
backend/hanspell/__pycache__/response.cpython-39.pyc ADDED
Binary file (974 Bytes). View file
 
backend/hanspell/__pycache__/spell_checker.cpython-312.pyc ADDED
Binary file (6 kB). View file
 
backend/hanspell/__pycache__/spell_checker.cpython-313.pyc ADDED
Binary file (6.03 kB). View file
 
backend/hanspell/__pycache__/spell_checker.cpython-39.pyc ADDED
Binary file (3.85 kB). View file
 
backend/hanspell/constants.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ base_url = 'https://m.search.naver.com/p/csearch/ocontent/util/SpellerProxy'
2
+
3
+
4
+ class CheckResult:
5
+ PASSED = 0
6
+ WRONG_SPELLING = 1
7
+ WRONG_SPACING = 2
8
+ AMBIGUOUS = 3
9
+ STATISTICAL_CORRECTION = 4
10
+
backend/hanspell/response.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ from collections import namedtuple
3
+
4
+ # 쑰사와 어미도 λ‹¨μ–΄λ‘œ μ²˜λ¦¬ν•¨. λ§ˆλ•…ν•œ μ˜λ‹¨μ–΄κ°€ 생각이 μ•ˆ λ‚˜μ„œ..
5
+ _checked = namedtuple('Checked',
6
+ ['result', 'original', 'checked', 'errors', 'words', 'time'])
7
+
8
+
9
+ class Checked(_checked):
10
+ def __new__(cls, result=False, original='', checked='', errors=0, words=[], time=0.0):
11
+ return super(Checked, cls).__new__(
12
+ cls, result, original, checked, errors, words, time)
13
+
14
+ def as_dict(self):
15
+ d = {
16
+ 'result': self.result,
17
+ 'original': self.original,
18
+ 'checked': self.checked,
19
+ 'errors': self.errors,
20
+ 'words': self.words,
21
+ 'time': self.time,
22
+ }
23
+ return d
24
+
25
+ def only_checked(self):
26
+ return self.checked
backend/hanspell/spell_checker.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Python용 ν•œκΈ€ λ§žμΆ€λ²• 검사 λͺ¨λ“ˆ
4
+ """
5
+ import re
6
+ import requests
7
+ import json
8
+ import time
9
+ import sys
10
+ from collections import OrderedDict
11
+ import xml.etree.ElementTree as ET
12
+
13
+ from . import __version__
14
+ from .response import Checked
15
+ from .constants import base_url
16
+ from .constants import CheckResult
17
+
18
+ _agent = requests.Session()
19
+ PY3 = sys.version_info[0] == 3
20
+
21
+
22
+ def get_passport_key():
23
+ """λ„€μ΄λ²„μ—μ„œ '넀이버 λ§žμΆ€λ²• 검사기' νŽ˜μ΄μ§€μ—μ„œ passportKeyλ₯Ό νšλ“
24
+
25
+ - λ„€μ΄λ²„μ—μ„œ '넀이버 λ§žμΆ€λ²• 검사기'λ₯Ό λ„μš΄ ν›„
26
+ htmlμ—μ„œ passportKeyλ₯Ό κ²€μƒ‰ν•˜λ©΄ 값을 찾을 수 μžˆλ‹€.
27
+
28
+ - 찾은 값을 spell_checker.py 48 line에 μ μš©ν•œλ‹€.
29
+ """
30
+
31
+ url = "https://search.naver.com/search.naver?where=nexearch&sm=top_hty&fbm=0&ie=utf8&query=넀이버+λ§žμΆ€λ²•+검사기"
32
+ res = requests.get(url)
33
+
34
+ html_text = res.text
35
+
36
+ match = re.search(r'passportKey=([^&"}]+)', html_text)
37
+ if match:
38
+ passport_key = match.group(1)
39
+ return passport_key
40
+ else:
41
+ return False
42
+
43
+
44
+ def fix_spell_checker_py_code(file_path, passportKey):
45
+ """νšλ“ν•œ passportkeyλ₯Ό spell_checker.pyνŒŒμΌμ— 적용
46
+ """
47
+
48
+ pattern = r"'passportKey': '.*'"
49
+
50
+ with open(file_path, 'r', encoding='utf-8') as input_file:
51
+ content = input_file.read()
52
+ modified_content = re.sub(pattern, f"'passportKey': '{passportKey}'", content)
53
+
54
+ with open(file_path, 'w', encoding='utf-8') as output_file:
55
+ output_file.write(modified_content)
56
+
57
+ return
58
+ passport_key = get_passport_key()
59
+
60
+ def _remove_tags(text):
61
+ text = u'<content>{}</content>'.format(text).replace('<br>','')
62
+ if not PY3:
63
+ text = text.encode('utf-8')
64
+
65
+ result = ''.join(ET.fromstring(text).itertext())
66
+
67
+ return result
68
+
69
+
70
+ def check(text):
71
+ """
72
+ λ§€κ°œλ³€μˆ˜λ‘œ μž…λ ₯받은 ν•œκΈ€ λ¬Έμž₯의 λ§žμΆ€λ²•μ„ μ²΄ν¬ν•©λ‹ˆλ‹€.
73
+ """
74
+ if isinstance(text, list):
75
+ result = []
76
+ for item in text:
77
+ checked = check(item)
78
+ result.append(checked)
79
+ return result
80
+
81
+ # μ΅œλŒ€ 500μžκΉŒμ§€ κ°€λŠ₯.
82
+ if len(text) > 500:
83
+ return Checked(result=False)
84
+
85
+ payload = {
86
+ "passportKey": passport_key,
87
+ 'color_blindness': '0',
88
+ 'q': text
89
+ }
90
+
91
+ headers = {
92
+ 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
93
+ 'referer': 'https://search.naver.com/',
94
+ }
95
+
96
+ start_time = time.time()
97
+ r = _agent.get(base_url, params=payload, headers=headers)
98
+ passed_time = time.time() - start_time
99
+
100
+ data = json.loads(r.text)
101
+ html = data['message']['result']['html']
102
+ result = {
103
+ 'result': True,
104
+ 'original': text,
105
+ 'checked': _remove_tags(html),
106
+ 'errors': data['message']['result']['errata_count'],
107
+ 'time': passed_time,
108
+ 'words': OrderedDict(),
109
+ }
110
+
111
+ # λ„μ–΄μ“°κΈ°λ‘œ κ΅¬λΆ„ν•˜κΈ° μœ„ν•΄ νƒœκ·ΈλŠ” 일단 보기 μ‰½κ²Œ λ°”κΏ”λ‘ .
112
+ # ElementTree의 iter()λ₯Ό μ¨μ„œ 더 μ’‹κ²Œ ν•  수 μžˆλŠ” 방법이 μžˆμ§€λ§Œ
113
+ # 이 짧은 μ½”λ“œμ— ꡳ이 κ·Έλ ‡κ²Œ ν•  ν•„μš”μ„±μ΄ μ—†μœΌλ―€λ‘œ 일단 λ¬Έμžμ—΄μ„ μΉ˜ν™˜ν•˜λŠ” λ°©λ²•μœΌλ‘œ μž‘μ„±.
114
+ html = html.replace('<em class=\'green_text\'>', '<green>') \
115
+ .replace('<em class=\'red_text\'>', '<red>') \
116
+ .replace('<em class=\'violet_text\'>', '<violet>') \
117
+ .replace('<em class=\'blue_text\'>', '<blue>') \
118
+ .replace('</em>', '<end>')
119
+ items = html.split(' ')
120
+ words = []
121
+ tmp = ''
122
+ for word in items:
123
+ if tmp == '' and word[:1] == '<':
124
+ pos = word.find('>') + 1
125
+ tmp = word[:pos]
126
+ elif tmp != '':
127
+ word = u'{}{}'.format(tmp, word)
128
+
129
+ if word[-5:] == '<end>':
130
+ word = word.replace('<end>', '')
131
+ tmp = ''
132
+
133
+ words.append(word)
134
+
135
+ for word in words:
136
+ check_result = CheckResult.PASSED
137
+ if word[:5] == '<red>':
138
+ check_result = CheckResult.WRONG_SPELLING
139
+ word = word.replace('<red>', '')
140
+ elif word[:7] == '<green>':
141
+ check_result = CheckResult.WRONG_SPACING
142
+ word = word.replace('<green>', '')
143
+ elif word[:8] == '<violet>':
144
+ check_result = CheckResult.AMBIGUOUS
145
+ word = word.replace('<violet>', '')
146
+ elif word[:6] == '<blue>':
147
+ check_result = CheckResult.STATISTICAL_CORRECTION
148
+ word = word.replace('<blue>', '')
149
+ result['words'][word] = check_result
150
+
151
+ result = Checked(**result)
152
+
153
+ return result
backend/main.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Depends
2
+ from fastapi.responses import JSONResponse
3
+ from pydantic import BaseModel
4
+ from typing import List, Optional
5
+ from recommendWord import recommendWord
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+ from sentence_transformers import SentenceTransformer
8
+ from keybert import KeyBERT
9
+ from kiwipiepy import Kiwi
10
+ import pandas as pd
11
+ import faiss
12
+ from transformers import AutoTokenizer, AutoModelForMaskedLM
13
+ from ref import refRecommend
14
+ from spellchecker import check
15
+ from auth.routes import router as auth_router
16
+ from auth.routes import get_current_user
17
+ from post_router import router as post_router
18
+ import os
19
+ import requests
20
+
21
+ FAISS_URL = (
22
+ "https://huggingface.co/datasets/uuuy5615/my_index/resolve/main/faiss_index.idx"
23
+ )
24
+ CSV_URL = "https://huggingface.co/datasets/uuuy5615/my_index/resolve/main/kci.csv"
25
+
26
+ FAISS_PATH = "faiss_index.idx"
27
+ CSV_PATH = "kci.csv"
28
+
29
+
30
+ def mask_by_position(sentence: str, start: int, end: int) -> str:
31
+ return sentence[:start] + "[MASK]" + sentence[end:]
32
+
33
+
34
+ if not os.path.exists(FAISS_PATH):
35
+ print("FAISS 파일 λ‹€μš΄λ‘œλ“œ 쀑...")
36
+ r = requests.get(FAISS_URL)
37
+ r.raise_for_status() # μ‹€νŒ¨ μ‹œ μ—λŸ¬ λ°œμƒ
38
+ with open(FAISS_PATH, "wb") as f:
39
+ f.write(r.content)
40
+ print("FAISS λ‹€μš΄λ‘œλ“œ μ™„λ£Œ!")
41
+
42
+ # CSV 파일 λ‹€μš΄λ‘œλ“œ
43
+ if not os.path.exists(CSV_PATH):
44
+ print("CSV 파일 λ‹€μš΄λ‘œλ“œ 쀑...")
45
+ r = requests.get(CSV_URL)
46
+ r.raise_for_status()
47
+ with open(CSV_PATH, "wb") as f:
48
+ f.write(r.content)
49
+ print("CSV λ‹€μš΄λ‘œλ“œ μ™„λ£Œ!")
50
+
51
+ # refrec
52
+ refModel = SentenceTransformer("jhgan/ko-sbert-nli")
53
+ kw_model = KeyBERT(refModel)
54
+ kiwi = Kiwi()
55
+ df = pd.read_csv("kci.csv", low_memory=False)
56
+ index = faiss.read_index("faiss_index.idx")
57
+
58
+ # wordrec
59
+ tokenizer = AutoTokenizer.from_pretrained("klue/roberta-large")
60
+ wordModel = AutoModelForMaskedLM.from_pretrained("klue/roberta-large")
61
+
62
+ app = FastAPI()
63
+
64
+ app.include_router(auth_router, prefix="/auth", tags=["auth"])
65
+ app.include_router(post_router, prefix="/post", tags=["post"])
66
+
67
+ app.add_middleware(
68
+ CORSMiddleware,
69
+ allow_origins=[
70
+ "http://127.0.0.1:5173",
71
+ "https://geulditbul.vercel.app",
72
+ ], # React μ•± μ£Όμ†Œ
73
+ allow_credentials=True,
74
+ allow_methods=["*"],
75
+ allow_headers=["*"],
76
+ )
77
+
78
+
79
+ class SpellCheckRequest(BaseModel):
80
+ text: str
81
+
82
+
83
+ class Correction(BaseModel):
84
+ error: str
85
+ checked: str
86
+ position: Optional[int]
87
+ length: int
88
+ errortype: int
89
+
90
+
91
+ class SpellCheckResponse(BaseModel):
92
+ flag: int
93
+ original_text: str
94
+ checked_text: str
95
+ corrections: List[Correction]
96
+ time: float
97
+
98
+
99
+ @app.post("/api/spellcheck", response_model=SpellCheckResponse)
100
+ def api_spellcheck(req: SpellCheckRequest, _: dict = Depends(get_current_user)):
101
+ # checkλŠ” dictλ₯Ό λ°˜ν™˜
102
+ return check(req.text)
103
+
104
+
105
+ @app.get("/model/WordRec")
106
+ async def runWordRec(
107
+ user_sentence: str,
108
+ MaskWord: str,
109
+ start: int,
110
+ end: int,
111
+ _: dict = Depends(get_current_user),
112
+ ):
113
+
114
+ sentence = mask_by_position(user_sentence, start - 1, end - 1)
115
+ rec_words = recommendWord(sentence, MaskWord, tokenizer, wordModel)
116
+
117
+ # result = {"model_name":model_name, "masked_word": MaskWord, "rec_word" : rec_word}
118
+
119
+ # result1, result2, result3 = [rec_word[i:i+3] for i in range(0, len(rec_word), 3)]
120
+
121
+ data = {"Model": "WordRec", "masked_word": MaskWord, "rec_result": rec_words}
122
+
123
+ return JSONResponse(content=data)
124
+
125
+
126
+ @app.get("/model/RefRec")
127
+ async def runRefRec(text: str, _: dict = Depends(get_current_user)):
128
+ name, link = refRecommend(refModel, kw_model, kiwi, text, df, index)
129
+ data = {"Model": "RefRec", "name_result": name, "link_result": link}
130
+
131
+ return JSONResponse(content=data)
backend/models.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # models.py
2
+
3
+ from sqlalchemy import Column, Integer, String, Text, DateTime, ForeignKey
4
+ from sqlalchemy.sql import func
5
+ from sqlalchemy.ext.declarative import declarative_base
6
+
7
+ Base = declarative_base()
8
+
9
+
10
+ class Post(Base):
11
+ __tablename__ = "post"
12
+
13
+ post_id = Column(Integer, primary_key=True, index=True)
14
+ title = Column(String(100), nullable=False)
15
+ content = Column(Text, nullable=True)
16
+ created_date = Column(DateTime(timezone=True), server_default=func.now())
17
+ updated_date = Column(
18
+ DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
19
+ )
20
+ user_id = Column(Integer, ForeignKey("user.user_id"), nullable=False)
21
+
22
+
23
+ class User(Base):
24
+ __tablename__ = "user"
25
+
26
+ user_id = Column(Integer, primary_key=True, index=True)
27
+ user_email = Column(String(255), nullable=False, unique=True, index=True)
28
+ user_name = Column(String(100))
29
+ password = Column(String(255), nullable=False)
30
+ join_date = Column(DateTime, default=func.now())
31
+ last_login_date = Column(DateTime, nullable=True)
32
+ refresh_token = Column(String(512), nullable=True)
backend/post_router.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Depends, HTTPException
2
+ from sqlalchemy.orm import Session
3
+ from models import Post, User
4
+ from db import get_db
5
+ from pydantic import BaseModel
6
+ from typing import Optional
7
+ from fastapi.security import OAuth2PasswordBearer
8
+ import jwt
9
+ from auth.routes import get_current_user
10
+
11
+ class PostCreate(BaseModel):
12
+ title: str
13
+ content: Optional[str] = None
14
+
15
+ class PostUpdate(BaseModel):
16
+ post_id: int
17
+ title: str
18
+ content: Optional[str] = None
19
+
20
+ SECRET_KEY = "52a6206f34a1c479da043cdeee17fd859a35e54978a6733a6a7ebadcbd11f0ca"
21
+ ALGORITHM = "HS256"
22
+
23
+ router = APIRouter()
24
+
25
+ oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/auth/login") # 둜그인 경둜 μ„€μ •
26
+
27
+
28
+ def get_auth_user(
29
+ token: str = Depends(oauth2_scheme), db: Session = Depends(get_db)
30
+ ) -> User:
31
+ try:
32
+ payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
33
+ email: str = payload.get("sub")
34
+ if email is None:
35
+ raise HTTPException(status_code=401, detail="Token payload invalid")
36
+ except jwt.PyJWTError:
37
+ raise HTTPException(status_code=401, detail="Invalid token")
38
+
39
+ user = db.query(User).filter(User.user_email == email).first()
40
+ if user is None:
41
+ raise HTTPException(status_code=401, detail="User not found")
42
+
43
+ return user
44
+
45
+
46
+ @router.post("/posts")
47
+ def create_post(
48
+ post: PostCreate,
49
+ db: Session = Depends(get_db),
50
+ current_user: User = Depends(get_auth_user),
51
+ ):
52
+ new_post = Post(
53
+ title=post.title, content=post.content, user_id=current_user.user_id
54
+ )
55
+ db.add(new_post)
56
+ db.commit()
57
+ db.refresh(new_post)
58
+ return {"message": "Post created", "post_id": new_post.post_id}
59
+
60
+
61
+ @router.get("/list")
62
+ def get_my_posts(
63
+ current_user: User = Depends(get_auth_user), db: Session = Depends(get_db)
64
+ ):
65
+ posts = db.query(Post).filter(Post.user_id == current_user.user_id).all()
66
+ return [{"post_id": post.post_id, "title": post.title} for post in posts]
67
+
68
+
69
+ @router.get("/{post_id}")
70
+ def read_user_post(
71
+ post_id: int,
72
+ current_user: User = Depends(get_auth_user),
73
+ db: Session = Depends(get_db),
74
+ ):
75
+ post = (
76
+ db.query(Post)
77
+ .filter(Post.post_id == post_id, Post.user_id == current_user.user_id)
78
+ .first()
79
+ )
80
+ if post is None:
81
+ raise HTTPException(status_code=404, detail="ν¬μŠ€νŠΈκ°€ μ—†κ±°λ‚˜ κΆŒν•œμ΄ μ—†μŠ΅λ‹ˆλ‹€.")
82
+ return {"post_id": post.post_id, "title": post.title, "content": post.content}
83
+
84
+ @router.put("/save")
85
+ def update_post(
86
+ post: PostUpdate,
87
+ db: Session = Depends(get_db),
88
+ current_user: User = Depends(get_auth_user),
89
+ ):
90
+ db_post = (
91
+ db.query(Post)
92
+ .filter(Post.post_id == post.post_id, Post.user_id == current_user.user_id)
93
+ .first()
94
+ )
95
+ if not db_post:
96
+ raise HTTPException(status_code=404, detail="Post not found or access denied")
97
+
98
+ db_post.title = post.title
99
+ db_post.content = post.content
100
+ db.commit()
101
+ db.refresh(db_post)
102
+
103
+ return {"message": "Post updated successfully"}
104
+
105
+
106
+ @router.delete("/{post_id}")
107
+ def delete_post(
108
+ post_id: int, db: Session = Depends(get_db), user=Depends(get_auth_user)
109
+ ):
110
+ post = (
111
+ db.query(Post)
112
+ .filter(Post.post_id == post_id, Post.user_id == user.user_id)
113
+ .first()
114
+ )
115
+ if not post:
116
+ raise HTTPException(status_code=404, detail="Post not found")
117
+ db.delete(post)
118
+ db.commit()
119
+ return {"message": "Post deleted"}
backend/recommendWord.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ from urllib.parse import quote
5
+
6
+
7
+ def get_synonyms_from_wordsisters(word: str) -> list[str]:
8
+ encoded_word = quote(word)
9
+ url = f"https://wordsisters.com/api/ai/{word}"
10
+ headers = {
11
+ "User-Agent": "Mozilla/5.0",
12
+ "Referer": f"https://wordsisters.com/search/{encoded_word}",
13
+ }
14
+ try:
15
+ response = requests.get(url, headers=headers)
16
+ response.raise_for_status()
17
+
18
+ data = response.json()
19
+ synonyms = data.get("result", {}).get("synonyms", [])
20
+ return synonyms
21
+ except Exception as e:
22
+ print(f"Error fetching synonyms: {e}")
23
+ return []
24
+
25
+
26
+ def extract_synonyms_from_html(html: str) -> list[str]:
27
+ try:
28
+ soup = BeautifulSoup(html, "html.parser")
29
+ synonyms = []
30
+
31
+ for tag in soup.select(".link_relate"):
32
+ text = tag.get_text(strip=True)
33
+ if text and text not in synonyms:
34
+ synonyms.append(text)
35
+
36
+ print(f"Extracted synonyms: {synonyms}")
37
+ return synonyms
38
+ except Exception as e:
39
+ print(f"Error parsing HTML: {e}")
40
+ return []
41
+
42
+
43
+ def get_synonyms_from_daum(word: str) -> list[str]:
44
+ try:
45
+ headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
46
+ params = {"q": word}
47
+
48
+ response = requests.get(
49
+ "https://dic.daum.net/search.do", params=params, headers=headers
50
+ )
51
+ response.raise_for_status()
52
+
53
+ return extract_synonyms_from_html(response.text)
54
+ except Exception as e:
55
+ print(f"Error fetching from Daum: {e}")
56
+
57
+
58
+ def max_logit(tensor, symDict, tokenizer):
59
+ found = []
60
+ counter = 0
61
+ size = len(symDict)
62
+ stop = False
63
+ for i in range(0, 32000):
64
+ for j in range(0, size):
65
+ if str(tokenizer.decode(tensor[1][0][i])) == symDict[j]:
66
+ found.append(symDict[j])
67
+ counter += 1
68
+ break
69
+ if counter >= 3:
70
+ break
71
+ return found
72
+
73
+
74
+ def recommendWord(user_sentence, MaskWord, tokenizer, model):
75
+ inputs = tokenizer(user_sentence, return_tensors="pt")
76
+ with torch.no_grad():
77
+ logits = model(**inputs).logits
78
+ mask_token_index = (inputs.input_ids == tokenizer.mask_token_id)[0].nonzero(
79
+ as_tuple=True
80
+ )[0]
81
+ symDict = get_synonyms_from_wordsisters(MaskWord)
82
+ ts = torch.sort(logits[0, mask_token_index], dim=-1, descending=True)
83
+ found = max_logit(ts, symDict, tokenizer)
84
+ return found
85
+
86
+
backend/ref.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import faiss
2
+ import pandas as pd
3
+ from sentence_transformers import SentenceTransformer
4
+ import math
5
+ from keybert import KeyBERT
6
+ from kiwipiepy import Kiwi
7
+ import urllib.parse
8
+ import json
9
+
10
+ def safe_int(val):
11
+ """val이 None λ˜λŠ” NaN이면 '' λ°˜ν™˜, μ•„λ‹ˆλ©΄ int둜 λ³€ν™˜"""
12
+ if val is None:
13
+ return ''
14
+ if isinstance(val, float) and math.isnan(val):
15
+ return ''
16
+ return int(val)
17
+
18
+ def generate_dbpia_link(title):
19
+ """λ…Όλ¬Έ 제λͺ©μ„ μ΄μš©ν•΄ DBpia 검색 링크 생성"""
20
+ base_url = "https://www.dbpia.co.kr/search/topSearch?searchOption=all&query="
21
+ encoded_title = urllib.parse.quote(title)
22
+ return base_url + encoded_title
23
+
24
+ def generate_reference(row):
25
+ """
26
+ row: λ”•μ…”λ„ˆλ¦¬ ν˜•νƒœμ˜ λ…Όλ¬Έ 정보 (예: DataFrame의 ν•œ ν–‰)
27
+ λ°˜ν™˜κ°’: μ°Έκ³ λ¬Έν—Œ λ¬Έμžμ—΄
28
+ """
29
+ vol = safe_int(row.get('ꢌ'))
30
+ issue = safe_int(row.get('호'))
31
+ start_page = safe_int(row.get('μ‹œμž‘νŽ˜μ΄μ§€'))
32
+ end_page = safe_int(row.get('λνŽ˜μ΄μ§€'))
33
+
34
+ pages = f"{start_page}-{end_page}" if start_page != '' and end_page != '' else ''
35
+
36
+ ref = f"{row.get('μ €μž', '')}. ({safe_int(row.get('λ°œν–‰λ…„'))}). {row.get('λ…Όλ¬Έλͺ…(κ΅­λ¬Έ)', '')}. {row.get('ν•™μˆ μ§€λͺ…(κ΅­λ¬Έ)', '')}"
37
+
38
+ if vol != '' or issue != '':
39
+ issue_str = f"({issue})" if issue != '' else ''
40
+ ref += f", {vol}{issue_str}"
41
+
42
+ if pages:
43
+ ref += f", {pages}."
44
+ else:
45
+ ref += "."
46
+
47
+ link = generate_dbpia_link(row.get('λ…Όλ¬Έλͺ…(κ΅­λ¬Έ)', ''))
48
+ return (ref,link)
49
+
50
+ def refRecommend(model,kw_model,kiwi,text,df,index):
51
+ nouns_list = []
52
+ for sentence in kiwi.analyze(text):
53
+ nouns = [token.form for token in sentence[0] if token.tag.startswith('NN')]
54
+ if nouns:
55
+ nouns_list.extend(nouns)
56
+ result_text = ' '.join(nouns_list)
57
+
58
+ keywords = kw_model.extract_keywords(result_text, keyphrase_ngram_range=(1, 1), stop_words=None, top_n=5)
59
+
60
+ query_vector = model.encode([keywords[0][0]+" "+keywords[1][0]])
61
+ D, I = index.search(query_vector, k=3) # kλŠ” μ°Ύκ³  싢은 개수 (예: top-5)
62
+
63
+ results = df.iloc[I[0]] # I[0]은 top-k 결과의 인덱슀 리슀트
64
+
65
+ name = []
66
+ link = []
67
+ # 6. μ˜ˆμ‹œ 좜λ ₯
68
+ for i, row in results.iterrows():
69
+ name_result,link_result = generate_reference(row)
70
+ print(name_result)
71
+ name.append(name_result)
72
+ link.append(link_result)
73
+ # print(f"{i+1}. 제λͺ©: {row['λ…Όλ¬Έλͺ…(κ΅­λ¬Έ)']} / ν‚€μ›Œλ“œ: {row['ν‚€μ›Œλ“œ(κ΅­λ¬Έ)']}")
74
+ # print(f"{row['μ €μž']}. ({row['λ°œν–‰λ…„']}). {row['λ…Όλ¬Έλͺ…(κ΅­λ¬Έ)']}. {row['ν•™μˆ μ§€λͺ…(κ΅­λ¬Έ)']}, {int(row['ꢌ'])}({int(row['호'])}), {int(row['μ‹œμž‘νŽ˜μ΄μ§€'])}-{int(row['λνŽ˜μ΄μ§€'])}")
75
+ return name,link
backend/requirements.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ beautifulsoup4==4.13.4
2
+ faiss_cpu==1.11.0
3
+ fastapi==0.115.12
4
+ keybert==0.9.0
5
+ kiwipiepy==0.21.0
6
+ pandas==2.2.3
7
+ passlib==1.7.4
8
+ pydantic==2.11.5
9
+ PyJWT==2.10.1
10
+ Requests==2.32.3
11
+ sentence_transformers==4.1.0
12
+ SQLAlchemy==2.0.41
13
+ torch==2.7.0
14
+ transformers==4.51.3
15
+ uvicorn==0.34.3
16
+ mysqlclient
17
+ pydantic[email]
18
+ python-multipart
19
+ bcrypt
20
+ pymysql
backend/spellchecker.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import difflib
3
+ from hanspell import spell_checker
4
+ from hanspell.constants import CheckResult
5
+ from kiwipiepy import Kiwi
6
+
7
+ ERROR_TYPE_MAPPING = {
8
+ CheckResult.PASSED: 0, # λ¬Έμ œκ°€ μ—†λŠ” 단어 λ˜λŠ” ꡬ절
9
+ CheckResult.WRONG_SPELLING: 1, # λ§žμΆ€λ²•μ— λ¬Έμ œκ°€ μžˆλŠ” 단어 λ˜λŠ” ꡬ절
10
+ CheckResult.WRONG_SPACING: 2, # 띄어쓰기에 λ¬Έμ œκ°€ μžˆλŠ” 단어 λ˜λŠ” ꡬ절
11
+ CheckResult.AMBIGUOUS: 3, # ν‘œμ€€μ–΄κ°€ μ˜μ‹¬λ˜λŠ” 단어 λ˜λŠ” ꡬ절
12
+ CheckResult.STATISTICAL_CORRECTION: 4, # 톡계적 ꡐ정에 λ”°λ₯Έ 단어 λ˜λŠ” ꡬ절
13
+ }
14
+ import difflib
15
+
16
+
17
+ def update_corrections_by_error_text(original_text, checked_text, corrections):
18
+ updated = []
19
+
20
+ for corr in corrections:
21
+ error = corr["error"]
22
+ start_pos = original_text.find(error)
23
+
24
+ if start_pos == -1:
25
+ # error λ¬Έμž₯을 λͺ» 찾은 경우 position 기반으둜 fallback
26
+ start_pos = corr["position"]
27
+
28
+ length = len(error)
29
+ # checked_textμ—μ„œ 동일 μœ„μΉ˜ μΆ”μ •
30
+ corrected_text = checked_text[start_pos : start_pos + length]
31
+
32
+ new_corr = corr.copy()
33
+ new_corr["checked"] = corrected_text
34
+ new_corr["position"] = start_pos # μœ„μΉ˜ 보정
35
+ new_corr["length"] = length
36
+ updated.append(new_corr)
37
+
38
+ return updated
39
+
40
+
41
+ def extract_phrase(text: str, position: int) -> str:
42
+ if position < 0 or position >= len(text):
43
+ return ""
44
+
45
+ # μ™Όμͺ½ 탐색: position - 1 λΆ€ν„° 곡백이 λ‚˜μ˜¬ λ•ŒκΉŒμ§€
46
+ left = position - 1
47
+ while left >= 0 and text[left] != " ":
48
+ left -= 1
49
+
50
+ # 였λ₯Έμͺ½ 탐색: position + 1 λΆ€ν„° 곡백이 λ‚˜μ˜¬ λ•ŒκΉŒμ§€
51
+ right = position + 1
52
+ while right < len(text) and text[right] != " ":
53
+ right += 1
54
+
55
+ return text[left + 1 : right]
56
+
57
+
58
+ def get_space_diffs(original: str, corrected: str):
59
+ diffs = []
60
+ orig_len = len(original)
61
+ corr_len = len(corrected)
62
+ o_idx = c_idx = 0
63
+
64
+ while o_idx < orig_len and c_idx < corr_len:
65
+ o_char = original[o_idx]
66
+ c_char = corrected[c_idx]
67
+
68
+ # 동일 문자면 톡과
69
+ if o_char == c_char:
70
+ o_idx += 1
71
+ c_idx += 1
72
+ continue
73
+ # 원문에 곡백이 있고 ꡐ정문에 μ—†μœΌλ©΄ β†’ delete_space
74
+ if o_char == " " and c_char != " ":
75
+ error = extract_phrase(original, o_idx)
76
+ check = spell_checker.check(error).as_dict()["checked"]
77
+ diffs.append(
78
+ {
79
+ "error": error,
80
+ "checked": check,
81
+ "position": o_idx,
82
+ "length": -1,
83
+ "errortype": ERROR_TYPE_MAPPING[2],
84
+ }
85
+ )
86
+ o_idx += 1 # 곡백을 λ„˜κΉ€
87
+
88
+ # ꡐ정문에 곡백이 있고 원문에 μ—†μœΌλ©΄ β†’ insert_space
89
+ elif c_char == " " and o_char != " ":
90
+ # 곡백을 κ·Έ "μ•ž 문자" 뒀에 μ‚½μž…ν•œλ‹€κ³  κ°€μ •
91
+ error = extract_phrase(original, o_idx)
92
+ check = spell_checker.check(error).as_dict()["checked"]
93
+ diffs.append(
94
+ {
95
+ "error": error,
96
+ "checked": check,
97
+ "position": o_idx, # 원문 κΈ°μ€€ μ‚½μž… μœ„μΉ˜
98
+ "length": 1,
99
+ "errortype": ERROR_TYPE_MAPPING[2],
100
+ }
101
+ )
102
+ c_idx += 1 # 곡백을 λ„˜κΉ€
103
+
104
+ # λ‘˜ λ‹€ λ‹€λ₯΄μ§€λ§Œ 곡백도 아닐 λ•Œ (문법 ꡐ정 λ“±): κ·Έλƒ₯ λ„˜κΉ€
105
+ else:
106
+ o_idx += 1
107
+ c_idx += 1
108
+
109
+ return diffs
110
+
111
+
112
+ def check(text: str):
113
+ ch_text = spell_checker.check(text)
114
+ info = ch_text.as_dict()
115
+ orig_text = info["original"]
116
+ corr_text = info["checked"]
117
+ time = info["time"]
118
+ if orig_text == corr_text:
119
+ flag = 0
120
+ else:
121
+ flag = 1
122
+ print(info["words"])
123
+ space = get_space_diffs(orig_text, corr_text)
124
+ # 1) original↔corrected κ°„ 문자 λ‹¨μœ„ λ§€ν•‘ 생성
125
+ sm = difflib.SequenceMatcher(None, orig_text, corr_text)
126
+ mapping = {}
127
+ for tag, i1, i2, j1, j2 in sm.get_opcodes():
128
+ if tag == "equal":
129
+ # 일치 블둝: 1:1 λ§€ν•‘
130
+ for offset in range(i2 - i1):
131
+ mapping[j1 + offset] = i1 + offset
132
+ elif tag in ("replace", "insert"):
133
+ # κ΅μ²΄λΈ”λ‘Β·μ‚½μž…λΈ”λ‘: κ΅μ •λ¬Έμž λͺ¨λ‘ 원본 블둝 μ‹œμž‘ μœ„μΉ˜λ‘œ λ§€ν•‘
134
+ for offset in range(j2 - j1):
135
+ mapping[j1 + offset] = i1
136
+ # 2) ν† ν°λ³„λ‘œ μœ„μΉ˜ 및 μ›λž˜ ν‹€λ¦° 단어 μΆ”μΆœ
137
+ corrections = []
138
+ for token, status in info["words"].items():
139
+ if status == CheckResult.PASSED or status == CheckResult.WRONG_SPACING:
140
+ continue
141
+
142
+ corr_pos = corr_text.find(token)
143
+
144
+ if corr_pos != -1 and corr_pos in mapping:
145
+ orig_pos = mapping[corr_pos]
146
+ # 원본 ν…μŠ€νŠΈμ—μ„œ token 길이만큼 μž˜λΌλ‚Έλ‹€λ‹€.
147
+ error_word = orig_text[orig_pos : orig_pos + len(token)]
148
+ else:
149
+ orig_pos = None
150
+ error_word = token
151
+ length = len(error_word)
152
+ corrections.append(
153
+ {
154
+ "error": error_word,
155
+ "checked": token,
156
+ "position": orig_pos,
157
+ "length": length,
158
+ "errortype": ERROR_TYPE_MAPPING[status],
159
+ }
160
+ )
161
+ combined = corrections + space
162
+
163
+ sorted_combined = sorted(combined, key=lambda x: x["position"])
164
+ result = {
165
+ "flag": flag,
166
+ "original_text": info["original"],
167
+ "checked_text": info["checked"],
168
+ "corrections": sorted_combined,
169
+ "time": time,
170
+ }
171
+
172
+ return result
173
+
174
+
175
+ if __name__ == "__main__":
176
+ sample = "λ‚˜λŠ” 였늘 μ•„μΉ¨λ°₯을 λ¨Ήκ³  학ꡐ λ₯Ό κ°”λ‹€.학ꡐ λ₯Ό μ•„λŠ” μΉœκ΅¬λ“€μ΄ 많치만, μ˜€λŠ˜μ€ 별루 보이지 μ•Šμ•˜λ‹€. ν•™κ΅μ•ž λ¬Έκ΅¬μ μ—μ„œ λ³ΌνŽœμ„ μƒ€λŠ”λ°, κ·Έ λ³ΌνŽœμ€ μž‰ν¬κ°€ 자주 λ§λΌμ„œ 자주 λ°”κΏ”μ•Όν•œλ‹€. ν•™κ΅μ—μ„œ 학ꡐ 행사에 λŒ€ν•œ μ–˜κΈ°λ₯Ό λ“€μ—ˆλŠ”λ°, 별루 κΈ°λŒ€λŠ” μ•ˆλœλ‹€."
177
+ sample2 = "ν˜„λŒ€ κ΅μœ‘μ€ λ‹¨μˆœνžˆ 지식을 μ „λ‹¬ν•˜λŠ” 것을 λ„˜μ–΄μ„œ, ν•™μƒμ˜ 전인적 μ„±μž˜μ„ λͺ©ν‘œλ‘œ ν•œλ‹€. 이에 따라 μ •μ„œμ  지지와 μ‚¬νšŒμ„± κ΅μœ‘λ„ 점점 μ€‘μš”ν•΄μ§€κ³  μžˆμžˆλ‹€. κ·ΈλŸ¬λ‚˜ 아직도 λ§Žμ€ ν•™κ΅μ—μ„œλŠ” μ£Όμž…μ‹ ꡐ윑이 쀑심이 λ˜μ–΄, 학생듀이 μ£Όλ„μ μœΌλ‘œ ν•™μŠ΅ν•  κΈ°νšŒκ°€ 적닀. λ˜ν•œ, κ΅μ‚¬λ“€μ˜ κ³Όλ„ν•œ ν–‰μ •μ—…λ¬΄λ‘œ 인해 μˆ˜μ—… 쀀비에 μΆ©λΆ„ν•œ μ‹œκ°„μ„ κ°€μ§ˆμˆ˜ μ—†κ³ , μ΄λŠ” ꡐ윑의 질 μ €ν•˜λ‘œ μ΄μ–΄μ§ˆ 수 μžˆλ”°. 지속적인 ꡐ사 μ—°μˆ˜μ™€ κ΅μœ‘ν™˜κ²½ κ°œμ„ μ΄ λ’·λ°›μΉ¨λ˜μ–΄μ•Όλ§Œ λ―Έλž˜ν˜• ꡐ윑이 μ‹€ν˜„λ  수 있슬 것이닀."
178
+ output = check(sample2)
179
+ print(json.dumps(output, ensure_ascii=False, indent=2))
180
+ print(sample2[79])
181
+ # "flag": λ¬Έμž₯에 λ§žμΆ€λ²• 였λ₯˜κ°€ μžˆλŠ”μ§€μ˜ μ—¬λΆ€(0: μ—†μŒ/1: 있음)
182
+ # "original_text": 원본 λ¬Έμž₯
183
+ # "checked_text": λ§žμΆ€λ²•μ΄ μˆ˜μ •λœ λ¬Έμž₯
184
+ # "corrections"[
185
+ # {
186
+ # "error": λ§žμΆ€λ²•μ΄ ν‹€λ¦° 단어
187
+ # "position": ν‹€λ¦° λ‹¨μ–΄μ˜ λ¬Έμž₯ λ‚΄ μœ„μΉ˜(μ‹œμž‘μ )
188
+ # "errortype": 였λ₯˜ μœ ν˜•(1~4)
189
+ # },
190
+ # ]
191
+ # "time": μ†Œμš” μ‹œκ°„
backend/test.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+
4
+ def get_synonyms_from_daum(word: str) -> list[str]:
5
+ try:
6
+ headers = {
7
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
8
+ }
9
+ params = {
10
+ 'q': word
11
+ }
12
+
13
+ response = requests.get("https://dic.daum.net/search.do", params=params, headers=headers)
14
+ response.raise_for_status()
15
+
16
+ return extract_synonyms_from_html(response.text)
17
+ except Exception as e:
18
+ print(f"Error fetching from Daum: {e}")
19
+
20
+ def extract_synonyms_from_html(html: str) -> list[str]:
21
+ try:
22
+ soup = BeautifulSoup(html, 'html.parser')
23
+ synonyms = []
24
+
25
+ for tag in soup.select('.link_relate'):
26
+ text = tag.get_text(strip=True)
27
+ if text and text not in synonyms:
28
+ synonyms.append(text)
29
+
30
+ print(f"Extracted synonyms: {synonyms}")
31
+ return synonyms
32
+ except Exception as e:
33
+ print(f"Error parsing HTML: {e}")
34
+ return []
35
+
36
+
37
+ # μ˜ˆμ‹œ μ‚¬μš©
38
+
39
+ # word = "λŠ₯λ ₯"
40
+ # synonyms = get_synonyms_from_daum(word)
41
+ def mask_by_position(sentence: str, start: int, end: int) -> str:
42
+ return sentence[:start-1] + "[MASK]" + sentence[end-1:]
43
+
44
+ # a = mask_by_position("κ°μžκ°€ 업무λ₯Ό ν•˜λ©΄μ„œ 개인의 λŠ₯λ ₯으둜 λͺ¨λ“  일을 ν•΄κ²°ν•  수 μžˆλ‹€λ©΄ 정말 기쁠 κ²ƒμž…λ‹ˆλ‹€",17,19)
45
+ # print(a)
dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9.13
2
+
3
+ WORKDIR /app
4
+
5
+ # backend 폴더 전체 볡사
6
+ COPY backend/ ./backend
7
+
8
+ # backend μ•ˆμ˜ requirements.txt μ„€μΉ˜
9
+ RUN pip install --no-cache-dir -r backend/requirements.txt
10
+
11
+ # (ν•„μš”ν•˜λ©΄ startup.py 볡사)
12
+
13
+ CMD ["bash", "-c", "uvicorn backend.main:app --host 0.0.0.0 --port 7860"]