diff --git a/.env b/.env new file mode 100644 index 0000000000000000000000000000000000000000..5375d038a6cd805eb3dbc5922c86f6040454c902 --- /dev/null +++ b/.env @@ -0,0 +1,17 @@ +# app +PORT=8001 + +# database +DB_HOST=127.0.0.1 +DB_PORT=3306 +DB_USER=root +DB_PASSWORD=password +DB_DATABASE=mini-project +POOL_SIZE=8 +MAX_OVERFLOW=32 +POOL_RECYCLE=64 + +# jwt +JWT_EXPIRATION_DELTA=24 +JWT_ALGORITHM=HS256 +JWT_SECRET=key123456 \ No newline at end of file diff --git a/__pycache__/env.cpython-313.pyc b/__pycache__/env.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9b46d51ceca01c3b84b9c1050f86e9e60874a5ff Binary files /dev/null and b/__pycache__/env.cpython-313.pyc differ diff --git a/__pycache__/main.cpython-313.pyc b/__pycache__/main.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c5e62836fb8409c02795d4796b56d1def4c34281 Binary files /dev/null and b/__pycache__/main.cpython-313.pyc differ diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 0000000000000000000000000000000000000000..ceead3016def49bcbedb2cfeed48599b01e2fd98 --- /dev/null +++ b/alembic.ini @@ -0,0 +1,147 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts. +# this is typically a path given in POSIX (e.g. forward slashes) +# format, relative to the token %(here)s which refers to the location of this +# ini file +script_location = %(here)s/alembic + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file +# for all available tokens +# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. for multiple paths, the path separator +# is defined by "path_separator" below. +prepend_sys_path = . + + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the tzdata library which can be installed by adding +# `alembic[tz]` to the pip requirements. +# string value is passed to ZoneInfo() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to /versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "path_separator" +# below. +# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions + +# path_separator; This indicates what character is used to split lists of file +# paths, including version_locations and prepend_sys_path within configparser +# files such as alembic.ini. +# The default rendered in new alembic.ini files is "os", which uses os.pathsep +# to provide os-dependent path splitting. +# +# Note that in order to support legacy alembic.ini files, this default does NOT +# take place if path_separator is not present in alembic.ini. If this +# option is omitted entirely, fallback logic is as follows: +# +# 1. Parsing of the version_locations option falls back to using the legacy +# "version_path_separator" key, which if absent then falls back to the legacy +# behavior of splitting on spaces and/or commas. +# 2. Parsing of the prepend_sys_path option falls back to the legacy +# behavior of splitting on spaces, commas, or colons. +# +# Valid values for path_separator are: +# +# path_separator = : +# path_separator = ; +# path_separator = space +# path_separator = newline +# +# Use os.pathsep. Default configuration used for new projects. +path_separator = os + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +# database URL. This is consumed by the user-maintained env.py script only. +# other means of configuring database URLs may be customized within the env.py +# file. +sqlalchemy.url = mysql+pymysql://root:password@localhost:3306/mini-project + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module +# hooks = ruff +# ruff.type = module +# ruff.module = ruff +# ruff.options = check --fix REVISION_SCRIPT_FILENAME + +# Alternatively, use the exec runner to execute a binary found on your PATH +# hooks = ruff +# ruff.type = exec +# ruff.executable = ruff +# ruff.options = check --fix REVISION_SCRIPT_FILENAME + +# Logging configuration. This is also consumed by the user-maintained +# env.py script only. +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARNING +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARNING +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/alembic/README b/alembic/README new file mode 100644 index 0000000000000000000000000000000000000000..98e4f9c44effe479ed38c66ba922e7bcc672916f --- /dev/null +++ b/alembic/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/alembic/__pycache__/env.cpython-313.pyc b/alembic/__pycache__/env.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7ff38637f2111ae08575b5bac105da63d3f58f2a Binary files /dev/null and b/alembic/__pycache__/env.cpython-313.pyc differ diff --git a/alembic/env.py b/alembic/env.py new file mode 100644 index 0000000000000000000000000000000000000000..5992154a25d1ca87b8ebb6f91eadaf5a9b64bb3d --- /dev/null +++ b/alembic/env.py @@ -0,0 +1,87 @@ +from logging.config import fileConfig + +from sqlalchemy import engine_from_config +from sqlalchemy import pool + +from alembic import context + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata + +from src.models.base import BaseModel +from src.models.user import User +from src.models.question import Question +from src.models.comment import Comment +from src.models.rating import Rating +from src.models.choice import Choice + +target_metadata = BaseModel.metadata + + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + connectable = engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure( + connection=connection, target_metadata=target_metadata + ) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/alembic/script.py.mako b/alembic/script.py.mako new file mode 100644 index 0000000000000000000000000000000000000000..11016301e749297acb67822efc7974ee53c905c6 --- /dev/null +++ b/alembic/script.py.mako @@ -0,0 +1,28 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + """Upgrade schema.""" + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + """Downgrade schema.""" + ${downgrades if downgrades else "pass"} diff --git a/alembic/versions/7531e2b3a772_init_database.py b/alembic/versions/7531e2b3a772_init_database.py new file mode 100644 index 0000000000000000000000000000000000000000..1b0f09fad16ddf6117cea0205111c8c8f6588773 --- /dev/null +++ b/alembic/versions/7531e2b3a772_init_database.py @@ -0,0 +1,100 @@ +"""init database + +Revision ID: 7531e2b3a772 +Revises: +Create Date: 2025-10-14 00:42:13.032368 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '7531e2b3a772' +down_revision: Union[str, Sequence[str], None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('users', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('username', sa.String(length=50), nullable=False), + sa.Column('email', sa.String(length=100), nullable=False), + sa.Column('password', sa.String(length=100), nullable=False), + sa.Column('avatar_url', sa.Text(), nullable=True), + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_users_email'), 'users', ['email'], unique=True) + op.create_index(op.f('ix_users_id'), 'users', ['id'], unique=False) + op.create_index(op.f('ix_users_username'), 'users', ['username'], unique=True) + op.create_table('questions', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('user_id', sa.Integer(), nullable=False), + sa.Column('topic', sa.String(length=100), nullable=False), + sa.Column('context', sa.Text(), nullable=False), + sa.Column('question_text', sa.Text(), nullable=False), + sa.Column('correct_choice', sa.String(length=255), nullable=False), + sa.Column('tags', sa.Text(), nullable=True), + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.ForeignKeyConstraint(['user_id'], ['users.id'], ondelete='CASCADE'), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_questions_id'), 'questions', ['id'], unique=False) + op.create_table('choices', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('question_id', sa.Integer(), nullable=False), + sa.Column('choice_text', sa.String(length=255), nullable=False), + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.ForeignKeyConstraint(['question_id'], ['questions.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_choices_id'), 'choices', ['id'], unique=False) + op.create_table('comments', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('question_id', sa.Integer(), nullable=False), + sa.Column('user_id', sa.Integer(), nullable=False), + sa.Column('comment_text', sa.Text(), nullable=False), + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.ForeignKeyConstraint(['question_id'], ['questions.id'], ), + sa.ForeignKeyConstraint(['user_id'], ['users.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_comments_id'), 'comments', ['id'], unique=False) + op.create_table('ratings', + sa.Column('user_id', sa.Integer(), nullable=False), + sa.Column('question_id', sa.Integer(), nullable=False), + sa.Column('rating_value', sa.Integer(), nullable=False), + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.ForeignKeyConstraint(['question_id'], ['questions.id'], ondelete='CASCADE'), + sa.ForeignKeyConstraint(['user_id'], ['users.id'], ondelete='CASCADE'), + sa.PrimaryKeyConstraint('user_id', 'question_id') + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('ratings') + op.drop_index(op.f('ix_comments_id'), table_name='comments') + op.drop_table('comments') + op.drop_index(op.f('ix_choices_id'), table_name='choices') + op.drop_table('choices') + op.drop_index(op.f('ix_questions_id'), table_name='questions') + op.drop_table('questions') + op.drop_index(op.f('ix_users_username'), table_name='users') + op.drop_index(op.f('ix_users_id'), table_name='users') + op.drop_index(op.f('ix_users_email'), table_name='users') + op.drop_table('users') + # ### end Alembic commands ### diff --git a/alembic/versions/__pycache__/7531e2b3a772_init_database.cpython-313.pyc b/alembic/versions/__pycache__/7531e2b3a772_init_database.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4ebb33724339217702643559bf0b19d4401d6c5d Binary files /dev/null and b/alembic/versions/__pycache__/7531e2b3a772_init_database.cpython-313.pyc differ diff --git a/env.py b/env.py new file mode 100644 index 0000000000000000000000000000000000000000..d76e15a7de98ac0558adfae2e090e91736aec079 --- /dev/null +++ b/env.py @@ -0,0 +1,25 @@ +import os +from dotenv import load_dotenv + +load_dotenv() + +config = { + "app": { + 'port': os.getenv("PORT"), + }, + "db": { + "host": os.getenv("DB_HOST"), + "port": os.getenv("DB_PORT"), + "user": os.getenv("DB_USER"), + "password": os.getenv("DB_PASSWORD"), + "database": os.getenv("DB_DATABASE"), + "pool_size": int(os.getenv("POOL_SIZE")) | 8, + "max_overflow": int(os.getenv("MAX_OVERFLOW")) | 16, + "pool_recycle": int(os.getenv("POOL_RECYCLE")), + }, + "jwt": { + "expired_in": int(os.getenv("JWT_EXPIRATION_DELTA")) | 24, # hour + "algorithm": os.getenv("JWT_ALGORITHM"), + "secret_key": os.getenv("JWT_SECRET"), + } +} \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000000000000000000000000000000000000..3af69551b750346bd90a1fc4a9551f281b722741 --- /dev/null +++ b/main.py @@ -0,0 +1,23 @@ +from fastapi import FastAPI, Request, HTTPException + +from src.routers.public.public import router +from src.utils.response import handler_error + +app = FastAPI() +@app.exception_handler(Exception) +async def exception_handler(request: Request, exc: Exception): + return handler_error(exc) + +@app.exception_handler(HTTPException) +async def http_exception_handler(request: Request, exc: HTTPException): + return handler_error(exc) + +@app.get('/check-health') +def check_health(): + return {"status": "ok"} + +app.include_router(router) + +if __name__ == "__main__": + import uvicorn + uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..fe6dda6c83b8383bf05828cbbdc8ad6ab9879b24 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,135 @@ +aiomysql==0.2.0 +annotated-types==0.7.0 +anyio==4.11.0 +bcrypt==5.0.0 +beautifulsoup4==4.14.2 +blis==1.3.0 +cachetools==6.2.0 +catalogue==2.0.10 +certifi==2025.10.5 +cffi==2.0.0 +charset-normalizer==3.4.3 +click==8.3.0 +cloudpathlib==0.22.0 +colorama==0.4.6 +coloredlogs==15.0.1 +confection==0.1.5 +cryptography==46.0.2 +curated-tokenizers==0.0.9 +curated-transformers==0.1.1 +cymem==2.0.11 +deep-translator==1.11.4 +dnspython==2.8.0 +email-validator==2.3.0 +exceptiongroup==1.3.0 +fastapi==0.118.0 +fastapi-cli==0.0.13 +fastapi-cloud-cli==0.3.0 +fastt5==0.0.5 +filelock==3.19.1 +flatbuffers==25.9.23 +fsspec==2025.9.0 +gdown==5.2.0 +google-api-core==2.25.2 +google-auth==2.41.1 +google-cloud-core==2.4.3 +google-cloud-storage==3.4.0 +google-crc32c==1.7.1 +google-resumable-media==2.7.2 +googleapis-common-protos==1.70.0 +greenlet==3.2.4 +h11==0.16.0 +httpcore==1.0.9 +httptools==0.6.4 +httpx==0.28.1 +huggingface-hub==0.35.3 +humanfriendly==10.0 +idna==3.10 +iniconfig==2.1.0 +Jinja2==3.1.6 +joblib==1.5.2 +jwt==1.4.0 +keybert==0.9.0 +keyphrase-vectorizers==0.0.13 +langcodes==3.5.0 +language_data==1.3.0 +marisa-trie==1.3.1 +markdown-it-py==4.0.0 +MarkupSafe==3.0.3 +mdurl==0.1.2 +ml_dtypes==0.5.3 +mpmath==1.3.0 +murmurhash==1.0.13 +networkx==3.4.2 +nltk==3.9.2 +numpy==2.2.6 +onnx==1.19.0 +onnxruntime==1.23.0 +packaging==25.0 +pillow==11.3.0 +pluggy==1.6.0 +preshed==3.0.10 +progress==1.6.1 +proto-plus==1.26.1 +protobuf==6.32.1 +psutil==7.1.0 +pyasn1==0.6.1 +pyasn1_modules==0.4.2 +pycparser==2.23 +pydantic==2.11.10 +pydantic_core==2.33.2 +Pygments==2.19.2 +PyMySQL==1.1.2 +PyPDF2==3.0.1 +pyreadline3==3.5.4 +PySocks==1.7.1 +pytesseract==0.3.13 +pytest==8.4.2 +python-dotenv==1.1.1 +python-multipart==0.0.20 +PyYAML==6.0.3 +regex==2025.9.18 +requests==2.32.5 +rich==14.1.0 +rich-toolkit==0.15.1 +rignore==0.7.0 +rsa==4.9.1 +safetensors==0.6.2 +scikit-learn==1.7.2 +scipy==1.15.3 +sense2vec==2.0.2 +sentence-transformers==5.1.1 +sentencepiece==0.2.1 +sentry-sdk==2.39.0 +shellingham==1.5.4 +sklearn==0.0 +smart_open==7.3.1 +sniffio==1.3.1 +soupsieve==2.8 +spacy==3.8.7 +spacy-alignments==0.9.2 +spacy-curated-transformers==0.3.1 +spacy-legacy==3.0.12 +spacy-loggers==1.0.5 +spacy-transformers==1.3.9 +SQLAlchemy==2.0.43 +srsly==2.5.1 +starlette==0.48.0 +sympy==1.14.0 +thinc==8.3.6 +threadpoolctl==3.6.0 +tokenizers==0.21.4 +tomli==2.2.1 +torch==2.8.0 +tqdm==4.67.1 +transformers==4.49.0 +typer==0.19.2 +typing-inspection==0.4.2 +typing_extensions==4.15.0 +urllib3==2.5.0 +uvicorn==0.37.0 +wasabi==1.1.3 +watchfiles==1.1.0 +weasel==0.4.1 +websockets==15.0.1 +wrapt==1.17.3 diff --git a/src/dtos/__pycache__/user.cpython-313.pyc b/src/dtos/__pycache__/user.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3d76b894d397adf795b9cda57de60aae41e9ee72 Binary files /dev/null and b/src/dtos/__pycache__/user.cpython-313.pyc differ diff --git a/src/dtos/user.py b/src/dtos/user.py new file mode 100644 index 0000000000000000000000000000000000000000..08754d72dcb791f47897525773e6029bc099898c --- /dev/null +++ b/src/dtos/user.py @@ -0,0 +1,11 @@ +from pydantic import BaseModel + + +class UserDto(BaseModel): + id: int + username: str + email: str + + model_config = { + "from_attributes": True, + } \ No newline at end of file diff --git a/src/interfaces/__pycache__/auth.cpython-313.pyc b/src/interfaces/__pycache__/auth.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e2a8fb38fb2e4f6e22c4a62e5e31141b795b05ea Binary files /dev/null and b/src/interfaces/__pycache__/auth.cpython-313.pyc differ diff --git a/src/interfaces/__pycache__/question.cpython-313.pyc b/src/interfaces/__pycache__/question.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3f776c100bf6b56e34a6b6df95fcf85c95441360 Binary files /dev/null and b/src/interfaces/__pycache__/question.cpython-313.pyc differ diff --git a/src/interfaces/__pycache__/user.cpython-313.pyc b/src/interfaces/__pycache__/user.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a078dd27225a8017620b7057304ca6a853c21a5c Binary files /dev/null and b/src/interfaces/__pycache__/user.cpython-313.pyc differ diff --git a/src/interfaces/auth.py b/src/interfaces/auth.py new file mode 100644 index 0000000000000000000000000000000000000000..0d5146b6854d613759045a124b8f186c3705e086 --- /dev/null +++ b/src/interfaces/auth.py @@ -0,0 +1,15 @@ +from pydantic import BaseModel, Field + +class ILogin(BaseModel): + username: str + password: str + +class IPagination(BaseModel): + paging: int = Field(1, ge=1) + limit: int = Field(10, ge=1) + sort_by: str = "id", + sort_order: str = 'asc', + + @property + def offset(self): + return (self.page - 1) * self.limit diff --git a/src/interfaces/question.py b/src/interfaces/question.py new file mode 100644 index 0000000000000000000000000000000000000000..25419df4ea7af61be68fe649705e414d34c882e9 --- /dev/null +++ b/src/interfaces/question.py @@ -0,0 +1,12 @@ +from pydantic import BaseModel +from typing import Optional + +class ModelInput(BaseModel): + """General request model structure for flutter incoming req.""" + user_id: Optional[str] = None + context: str + name: str + +class ICreateQuestion(BaseModel): + context: str + name: str \ No newline at end of file diff --git a/src/interfaces/user.py b/src/interfaces/user.py new file mode 100644 index 0000000000000000000000000000000000000000..70ff634e0c5884fa7ef8a91aff0727d9d1dca22b --- /dev/null +++ b/src/interfaces/user.py @@ -0,0 +1,19 @@ +from datetime import datetime + +from pydantic import BaseModel, EmailStr +from typing import Optional + +class ICreateUser(BaseModel): + username: str + email: EmailStr + password: str + +class IUpdateUser(BaseModel): + username: str + email: EmailStr + password: Optional[str] + +class IFilterUser(BaseModel): + username: Optional[str] = None + email: Optional[EmailStr] = None + username_or_email: Optional[str] = None diff --git a/src/loaders/__pycache__/app.cpython-313.pyc b/src/loaders/__pycache__/app.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..97f9636c1f6bf2c6f0ad53cc3e5d8e88d38ed211 Binary files /dev/null and b/src/loaders/__pycache__/app.cpython-313.pyc differ diff --git a/src/loaders/__pycache__/database.cpython-313.pyc b/src/loaders/__pycache__/database.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a1b21f6a9a58333124c214b7ce107aa157d7a26d Binary files /dev/null and b/src/loaders/__pycache__/database.cpython-313.pyc differ diff --git a/src/loaders/database.py b/src/loaders/database.py new file mode 100644 index 0000000000000000000000000000000000000000..ca6c88e92b24e794d71d68a19d0b9636673607b5 --- /dev/null +++ b/src/loaders/database.py @@ -0,0 +1,25 @@ +from env import config + +from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker + +database_url = f"mysql+aiomysql://{config['db']['user']}:{config['db']['password']}@{config['db']['host']}:{config['db']['port']}/{config['db']['database']}" +engine = create_async_engine( + database_url, + pool_size=config['db']['pool_size'], + max_overflow=0, + pool_recycle=3600, + echo=False, + future=True, +) +SessionLocal = async_sessionmaker( + engine, + expire_on_commit=False, + class_=AsyncSession +) + +def get_db(): + db = SessionLocal() + try: + yield db + finally: + db.close() \ No newline at end of file diff --git a/src/middlewares/__pycache__/base.cpython-313.pyc b/src/middlewares/__pycache__/base.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6c8c84dc3c0b8a851ab2d1f880ca364ca9241c03 Binary files /dev/null and b/src/middlewares/__pycache__/base.cpython-313.pyc differ diff --git a/src/middlewares/__pycache__/logging.cpython-313.pyc b/src/middlewares/__pycache__/logging.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0d8104770b2f2c5b00eeac025dd3b909407b1443 Binary files /dev/null and b/src/middlewares/__pycache__/logging.cpython-313.pyc differ diff --git a/src/middlewares/authenticate.py b/src/middlewares/authenticate.py new file mode 100644 index 0000000000000000000000000000000000000000..edb60cf8701967f1a2ca3a37a4b593bd0b7ce849 --- /dev/null +++ b/src/middlewares/authenticate.py @@ -0,0 +1,25 @@ +from fastapi import Request, HTTPException, Depends +from sqlalchemy.ext.asyncio import AsyncSession + +import jwt + +from env import config +from src.loaders.database import get_db +from src.services.user import get_user_service + +async def authenticate(request: Request, db: AsyncSession = Depends(get_db), user_service = Depends(get_user_service)): + auth_header = request.headers.get("Authorization") + if not auth_header or not auth_header.startswith("Bearer"): + raise HTTPException(status_code=401, detail="token_invalid") + + token = auth_header.split("Bearer")[1].strip() + payload = jwt.decode(token, config["jwt"]["secret_key"], algorithm=config["jwt"]["algorithm"]) + user_id = payload.get("id") + if user_id is None: + raise HTTPException(status_code=401, detail="token_invalid") + + user = await user_service.find_by_pk(db, user_id) + if user is None: + raise HTTPException(status_code=401, detail="token_invalid") + request.state.user = user + diff --git a/src/models/__init__.py b/src/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0664f2daf981b109d20683f4162918c1784b937b --- /dev/null +++ b/src/models/__init__.py @@ -0,0 +1,5 @@ +from .choice import Choice +from .comment import Comment +from .question import Question +from .rating import Rating +from .user import User diff --git a/src/models/__pycache__/__init__.cpython-313.pyc b/src/models/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..92d8f5d76b8f3391dd1149bd66d8b2302cc28e19 Binary files /dev/null and b/src/models/__pycache__/__init__.cpython-313.pyc differ diff --git a/src/models/__pycache__/base.cpython-313.pyc b/src/models/__pycache__/base.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6327d6508203ff1d5b6abbae0e9fe8e440777c00 Binary files /dev/null and b/src/models/__pycache__/base.cpython-313.pyc differ diff --git a/src/models/__pycache__/choice.cpython-313.pyc b/src/models/__pycache__/choice.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5c332fd89d67669ccedfce2e92e06840c4aaeb30 Binary files /dev/null and b/src/models/__pycache__/choice.cpython-313.pyc differ diff --git a/src/models/__pycache__/comment.cpython-313.pyc b/src/models/__pycache__/comment.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..146ccafc725b0807e4d7d91062f40a506d1fccaf Binary files /dev/null and b/src/models/__pycache__/comment.cpython-313.pyc differ diff --git a/src/models/__pycache__/question.cpython-313.pyc b/src/models/__pycache__/question.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a35913b5da17bea44977c56d98a9b1a32b0ef5da Binary files /dev/null and b/src/models/__pycache__/question.cpython-313.pyc differ diff --git a/src/models/__pycache__/rating.cpython-313.pyc b/src/models/__pycache__/rating.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..de83c257157279405f9f457c8bb39dee4e75b152 Binary files /dev/null and b/src/models/__pycache__/rating.cpython-313.pyc differ diff --git a/src/models/__pycache__/user.cpython-313.pyc b/src/models/__pycache__/user.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1cb529a0f561f4d59c0fefcb39ceea2fd5e0f158 Binary files /dev/null and b/src/models/__pycache__/user.cpython-313.pyc differ diff --git a/src/models/base.py b/src/models/base.py new file mode 100644 index 0000000000000000000000000000000000000000..f3eb68ee12bd1ba1a7b7740345c827e1acdc8b05 --- /dev/null +++ b/src/models/base.py @@ -0,0 +1,3 @@ +from sqlalchemy.ext.declarative import declarative_base + +BaseModel = declarative_base() diff --git a/src/models/choice.py b/src/models/choice.py new file mode 100644 index 0000000000000000000000000000000000000000..7a1656816caee06c7ae74ca513451a1fc705d250 --- /dev/null +++ b/src/models/choice.py @@ -0,0 +1,15 @@ +# models.py +from sqlalchemy import Column, Integer, String, ForeignKey, DateTime, func +from sqlalchemy.orm import relationship +from src.models.base import BaseModel + +class Choice(BaseModel): + __tablename__ = "choices" + + id = Column(Integer, primary_key=True, index=True) + question_id = Column(Integer, ForeignKey("questions.id"), nullable=False) + choice_text = Column(String(255), nullable=False) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column(DateTime(timezone=True), onupdate=func.now(), server_default=func.now()) + + question = relationship("Question", back_populates="choices") diff --git a/src/models/comment.py b/src/models/comment.py new file mode 100644 index 0000000000000000000000000000000000000000..f72b56c0706ba8438416564152822c2030dbb29f --- /dev/null +++ b/src/models/comment.py @@ -0,0 +1,16 @@ +# models.py +from sqlalchemy import Column, Integer, ForeignKey, Text, DateTime, func +from sqlalchemy.orm import relationship +from src.models.base import BaseModel + +class Comment(BaseModel): + __tablename__ = "comments" + + id = Column(Integer, primary_key=True, index=True) + question_id = Column(Integer, ForeignKey("questions.id"), nullable=False) + user_id = Column(Integer, ForeignKey("users.id"), nullable=False) + comment_text = Column(Text, nullable=False) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column(DateTime(timezone=True), onupdate=func.now(), server_default=func.now()) + + question = relationship("Question", back_populates="comments") diff --git a/src/models/paragraph.py b/src/models/paragraph.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/models/question.py b/src/models/question.py new file mode 100644 index 0000000000000000000000000000000000000000..dc9c9acfa8eb474efc1ca9f19ba033d3015b2974 --- /dev/null +++ b/src/models/question.py @@ -0,0 +1,24 @@ +# models.py +from sqlalchemy import Column, Integer, String, Boolean, ForeignKey, Text, Date, DateTime, func +from sqlalchemy.orm import relationship +from src.models.base import BaseModel +from datetime import datetime + +class Question(BaseModel): + __tablename__ = "questions" + + id = Column(Integer, primary_key=True, index=True) + user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False) + topic = Column(String(100), nullable=False) + context = Column(Text, nullable=False) + question_text = Column(Text, nullable=False) + correct_choice = Column(String(255), nullable=False) + tags = Column(Text, nullable=True) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column(DateTime(timezone=True), onupdate=func.now(), server_default=func.now()) + + + user = relationship("User", back_populates="questions") + choices = relationship("Choice", back_populates="question", cascade="all, delete", passive_deletes=True) + comments = relationship("Comment", back_populates="question", cascade="all, delete", passive_deletes=True) + ratings = relationship("Rating", back_populates="question", cascade="all, delete", passive_deletes=True) diff --git a/src/models/rating.py b/src/models/rating.py new file mode 100644 index 0000000000000000000000000000000000000000..dde202cdc49d06f2fabd08d84186fa1a6a9db101 --- /dev/null +++ b/src/models/rating.py @@ -0,0 +1,15 @@ +# models.py +from sqlalchemy import Column, Integer, ForeignKey, DateTime, func +from sqlalchemy.orm import relationship +from src.models.base import BaseModel + +class Rating(BaseModel): + __tablename__ = "ratings" + + user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), primary_key=True) + question_id = Column(Integer, ForeignKey("questions.id", ondelete="CASCADE"), primary_key=True) + rating_value = Column(Integer, nullable=False) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column(DateTime(timezone=True), onupdate=func.now(), server_default=func.now()) + + question = relationship("Question", back_populates="ratings") \ No newline at end of file diff --git a/src/models/user.py b/src/models/user.py new file mode 100644 index 0000000000000000000000000000000000000000..e9414f2deb240daa7e101c3a34e71ac1092ac5a5 --- /dev/null +++ b/src/models/user.py @@ -0,0 +1,22 @@ +# models.py +from sqlalchemy import Column, Integer, String, Text, DateTime, func +from sqlalchemy.orm import relationship +from src.models.base import BaseModel + +class User(BaseModel): + __tablename__ = "users" + + id = Column(Integer, primary_key=True, index=True) + username = Column(String(50), unique=True, nullable=False, index=True) + email = Column(String(100), unique=True, nullable=False, index=True) + password = Column(String(100), nullable=False) + avatar_url = Column(Text, nullable=True) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column(DateTime(timezone=True), onupdate=func.now(), server_default=func.now()) + + questions = relationship( + "Question", + back_populates="user", + cascade="all, delete", + passive_deletes=True + ) \ No newline at end of file diff --git a/src/repositories/__pycache__/auth.cpython-313.pyc b/src/repositories/__pycache__/auth.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..08b5a6d3f8e1b4a24cb9aa5cd8a3d71af2dfcce2 Binary files /dev/null and b/src/repositories/__pycache__/auth.cpython-313.pyc differ diff --git a/src/repositories/__pycache__/base.cpython-313.pyc b/src/repositories/__pycache__/base.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f5106d41bfb819338849ca907344461db0efd201 Binary files /dev/null and b/src/repositories/__pycache__/base.cpython-313.pyc differ diff --git a/src/repositories/__pycache__/user.cpython-313.pyc b/src/repositories/__pycache__/user.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8e86875baa0503f75a47ea73509b3053fb6a2b51 Binary files /dev/null and b/src/repositories/__pycache__/user.cpython-313.pyc differ diff --git a/src/repositories/auth.py b/src/repositories/auth.py new file mode 100644 index 0000000000000000000000000000000000000000..a35c2aa08e5124168cb68dd1d78f92494d295eda --- /dev/null +++ b/src/repositories/auth.py @@ -0,0 +1,12 @@ +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + + +from src.interfaces.user import * +from src.models.user import User + +class AuthRepository(): + async def find_by_username(self, db: AsyncSession, username: str) -> Optional[User]: + query = select(User).where(User.username == username) + result = await db.execute(query) + return await result.scalar_one_or_none() \ No newline at end of file diff --git a/src/repositories/base.py b/src/repositories/base.py new file mode 100644 index 0000000000000000000000000000000000000000..44bb45c3b0472e9a25d42dbfb60224545a659aaa --- /dev/null +++ b/src/repositories/base.py @@ -0,0 +1,32 @@ +from abc import ABC, abstractmethod + + +class ICrudRepository(ABC): + @abstractmethod + def store(self, data): + pass + + @abstractmethod + def update(self, entity, data): + pass + + @abstractmethod + def delete(self, entity): + pass + + @abstractmethod + def find_by_pk(self, entity_id): + pass + + @abstractmethod + def get_one(self, filter_data): + pass + + @abstractmethod + def get_many(self, paging, filter_data): + pass + + @abstractmethod + def build_query(filter_data): + pass + diff --git a/src/repositories/user.py b/src/repositories/user.py new file mode 100644 index 0000000000000000000000000000000000000000..17b61ebcdd9ab943fffaed1818c494fb7b33cddb --- /dev/null +++ b/src/repositories/user.py @@ -0,0 +1,73 @@ +from src.interfaces.auth import IPagination +from src.models.user import User +from src.repositories.base import ICrudRepository +from src.interfaces.user import ICreateUser, IFilterUser, IUpdateUser + +from sqlalchemy import select, or_ +from sqlalchemy.sql import Select +from sqlalchemy.ext.asyncio import AsyncSession +from typing import Optional + + + +class UserRepository(ICrudRepository): + async def store(self, db: AsyncSession, data: ICreateUser) -> User: + new_user = User(**data.model_dump()) + db.add(new_user) + await db.commit() + await db.refresh(new_user) + + return new_user + + async def update(self, db: AsyncSession, user: User, data: IUpdateUser) -> User : + update_data = data.model_dump(exclude_unset=True) + for key, value in update_data.items(): + if hasattr(user, key): + setattr(user, key, value) + + await db.commit() + await db.refresh(user) + return user + + async def delete(self, db: AsyncSession, user: User) : + await db.delete(user) + await db.commit() + return User(**user.model_dump()) + + async def find_by_pk(self, db: AsyncSession, user_id: int) -> Optional[User]: + return await db.get(User, user_id) + + async def get_one(self, db: AsyncSession, filter_data: IFilterUser): + query = self.build_query(filter_data) + result = await db.execute(query) + return result.scalar_one_or_none() + + async def get_many(self, db: AsyncSession, paging: IPagination, filter_data: IFilterUser): + query = self.build_query(filter_data) + query = query.limit(paging.limit).offset(paging.offset) + sort_by = getattr(User, paging.sort_by, None) + if sort_by is not None: + if paging.sort_order and paging.sort_order.lower() == 'desc': + query = query.order_by(sort_by.desc()) + else: + query = query.order_by(sort_by.asc()) + result = await db.execute(query) + return result.scalar().all() + + @staticmethod + def build_query(filters: IFilterUser) -> Select: + query = select(User) + conditions = [] + if filters.username_or_email: + conditions.append( + or_( + filters.username_or_email == User.email, + filters.username_or_email == User.username) + ) + else: + if filters.username: + conditions.append(filters.username == User.username) + if filters.email: + conditions.append(filters.email == User.email) + + return query.where(*conditions) \ No newline at end of file diff --git a/src/routers/public/__pycache__/auth.cpython-313.pyc b/src/routers/public/__pycache__/auth.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..de36c65e768dad112bdb9390adae3884ab14518a Binary files /dev/null and b/src/routers/public/__pycache__/auth.cpython-313.pyc differ diff --git a/src/routers/public/__pycache__/public.cpython-313.pyc b/src/routers/public/__pycache__/public.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3592da91487d0cef6aac8da5602bf94863bb5f49 Binary files /dev/null and b/src/routers/public/__pycache__/public.cpython-313.pyc differ diff --git a/src/routers/public/__pycache__/quesion.cpython-313.pyc b/src/routers/public/__pycache__/quesion.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..17af82586703e8ce8458c267291d0daa7aec6d63 Binary files /dev/null and b/src/routers/public/__pycache__/quesion.cpython-313.pyc differ diff --git a/src/routers/public/auth.py b/src/routers/public/auth.py new file mode 100644 index 0000000000000000000000000000000000000000..9eaa09e0a5c8373e811766d0a3d77d23d4199ae5 --- /dev/null +++ b/src/routers/public/auth.py @@ -0,0 +1,33 @@ +from fastapi import APIRouter, Depends +from fastapi.responses import JSONResponse +from sqlalchemy.ext.asyncio import AsyncSession + +from src.interfaces.auth import ILogin +from src.interfaces.user import ICreateUser +from src.services.auth import AuthService, get_auth_service +from src.utils.response import res_ok +from src.services.user import UserService, get_user_service +from src.loaders.database import get_db + +router = APIRouter(prefix="/auth", tags=["auth"]) + + +@router.post("/login") +async def login(user: ILogin, db: AsyncSession = Depends(get_db), auth_service: AuthService = Depends(get_auth_service)): + token = await auth_service.authenticate(db, user) + + return JSONResponse( + status_code=200, + content=res_ok({ + "access_token": token, + }), + ) + +@router.post("/register") +async def register(user: ICreateUser, db: AsyncSession = Depends(get_db), user_service: UserService = Depends(get_user_service)): + new_user = await user_service.store(db, user) + return JSONResponse( + status_code=200, + content=res_ok(new_user.model_dump()) + ) + diff --git a/src/routers/public/public.py b/src/routers/public/public.py new file mode 100644 index 0000000000000000000000000000000000000000..524e6c896ccd0f644cf499c95b574f0b727ecff7 --- /dev/null +++ b/src/routers/public/public.py @@ -0,0 +1,10 @@ +from fastapi import APIRouter + +from src.routers.public.quesion import route as question_route +from src.routers.public.auth import router as auth_route + +router = APIRouter(prefix="/public", tags=["public"]) + +print("Including public routes...") +router.include_router(question_route) +router.include_router(auth_route) \ No newline at end of file diff --git a/src/routers/public/quesion.py b/src/routers/public/quesion.py new file mode 100644 index 0000000000000000000000000000000000000000..fe9d648440b5269bc491a23d9a4b2decda352a41 --- /dev/null +++ b/src/routers/public/quesion.py @@ -0,0 +1,103 @@ +from fastapi import APIRouter, Request +from fastapi.responses import JSONResponse + +from src.utils.response import res_ok +from src.utils.text_process import vietnamese_to_english, english_to_vietnamese, get_all_summary, get_all_questions +from src.interfaces.question import ModelInput, ICreateQuestion +from src.services.AI.abstractive_summarizer import AbstractiveSummarizer +from src.services.AI.question_generator import QuestionGenerator +from src.services.AI.false_ans_generator import FalseAnswerGenerator +from src.services.AI.keyword_extractor import KeywordExtractor + +route = APIRouter(prefix="/question", tags=["Question"]) +print("Including question routes...") +@route.post('/sentence') +async def generate_questions_from_sentence(body: ICreateQuestion, request: Request): + """Process user request + + Args: + request (ModelInput): request model + bg_task (BackgroundTasks): run process_request() on other thread + and respond to request + + Returns: + dict(str: int): response + """ + # bg_task.add_task(process_request, request) + + + # # Tạo một dictionary để lưu trữ kết quả + # results = [] + + # def background_task(): + # nonlocal results + # results = process_request(request) + + # # Thêm tác vụ nền để xử lý yêu cầu + # bg_task.add_task(background_task) + + + # Thực hiện xử lý yêu cầu và lưu kết quả vào Firestore + # Không dùng background vì để nó chạy trong cùng 1 thread để chờ xử lí xong mới có results + new_questions = [] + error_sentences = [] + model_input = ModelInput(**body.model_dump(), user_id=None) + try: + new_questions = generate_and_store_questions(model_input) + except Exception as e: + # Không để là model_input.context mà là request.context vì model_input.context là tiếng Anh + print(f"Lỗi khi xử lí câu: {body.context}. Lỗi: {e}") + error_sentences.append({'sentence': body.context, 'error': str(e)}) + + result = { + "success": new_questions, + "fail": error_sentences + } + return JSONResponse(status_code=200, content=res_ok(result)) + +async def generate_and_store_questions(self, request): + """Generate questions from user request and store results in Firestore. + + Args: + request (ModelInput): request from flutter. + + Returns: + dict: results saved to Firestore + """ + request.context = vietnamese_to_english(request.context) + request.name = vietnamese_to_english(request.name) + + await self.user_repo.update_generator_working_status(request, True) + questions, crct_ans, all_ans = await self.generate_questions_and_answers(request.context) + await self.user_repo.update_generator_working_status(request, False) + + results = self.send_results_to_db(request, questions, crct_ans, all_ans, request.context) + return results + +def generate_questions_and_answers(context: str): + """Generate questions and answers from given context. + + Args: + context (str): input corpus used to generate question. + + Returns: + tuple[list[str], list[str], list[list[str]]]: + questions, correct answers, and all answer choices. + """ + summarizer = AbstractiveSummarizer() + question_gen = QuestionGenerator() + false_ans_gen = FalseAnswerGenerator() + keyword_extractor = KeywordExtractor() + summary, splitted_text = get_all_summary( + model=summarizer, context=context + ) + filtered_kws = keyword_extractor.get_keywords( + original_list=splitted_text, summarized_list=summary + ) + + crct_ans, all_answers = false_ans_gen.get_output(filtered_kws=filtered_kws) + questions = get_all_questions( + model=question_gen, context=summary, answer=crct_ans + ) + + return questions, crct_ans, all_answers \ No newline at end of file diff --git a/src/routers/user/user.py b/src/routers/user/user.py new file mode 100644 index 0000000000000000000000000000000000000000..859c95a4e4fcce639bfde6ec52cc4be2fb50f5ce --- /dev/null +++ b/src/routers/user/user.py @@ -0,0 +1,62 @@ +from fastapi import APIRouter, Depends +from fastapi.responses import JSONResponse +from typing import Optional + +from interfaces.user import IUpdateUser, IFilterUser +from interfaces.auth import IPagination +from middlewares.authenticate import authenticate +from services.user import UserService, get_user_service +from utils.response import res_ok + +router = APIRouter( + prefix="/users", + tags=["users"], + dependencies=[Depends(authenticate)] +) + +@router.get("/") +async def index( + paging: IPagination = Depends(IPagination), + keyword: Optional[str] = None, + user_service: UserService = Depends(get_user_service), +): + await user_service.get_many( + paging, + IFilterUser(username_or_email=keyword) + ) + + return JSONResponse( + status_code=200, + content=res_ok( + # result["rows"], + 10, + message="success", + page=paging.page, + limit=paging.limit, + total_item=100 + ) + ) + +@router.get("/{user_id}") +async def detail(user_id: int, user_service: UserService = Depends(get_user_service),): + user = await user_service.find_or_fail(user_id) + return JSONResponse( + status_code=200, + content=res_ok(user.dict()) + ) + +@router.put("/{user_id}") +async def update(user_id: int, user: IUpdateUser, user_service: UserService = Depends(get_user_service),): + updated_user = await user_service.update(user_id, user) + return JSONResponse( + status_code=200, + content=res_ok(updated_user.model_dump()) + ) + +@router.delete("/{user_id}") +async def delete(user_id: int, user_service: UserService = Depends(get_user_service),): + await user_service.delete(user_id) + return JSONResponse( + status_code=200, + content=res_ok({"id": user_id}) + ) diff --git a/src/services/AI/__pycache__/abstractive_summarizer.cpython-313.pyc b/src/services/AI/__pycache__/abstractive_summarizer.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..24ede270c72c64e568fe936de60fe72ef7fbf6c4 Binary files /dev/null and b/src/services/AI/__pycache__/abstractive_summarizer.cpython-313.pyc differ diff --git a/src/services/AI/__pycache__/base.cpython-313.pyc b/src/services/AI/__pycache__/base.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bdfd3cdc89d47bdae0f2ff248a144c781f933898 Binary files /dev/null and b/src/services/AI/__pycache__/base.cpython-313.pyc differ diff --git a/src/services/AI/__pycache__/false_ans_generator.cpython-313.pyc b/src/services/AI/__pycache__/false_ans_generator.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6276ac102a23ad3de19f3a87efa90feeb7684922 Binary files /dev/null and b/src/services/AI/__pycache__/false_ans_generator.cpython-313.pyc differ diff --git a/src/services/AI/__pycache__/keyword_extractor.cpython-313.pyc b/src/services/AI/__pycache__/keyword_extractor.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d96a0c6aad8fa5ba5a6ba6086b0513b8784c8323 Binary files /dev/null and b/src/services/AI/__pycache__/keyword_extractor.cpython-313.pyc differ diff --git a/src/services/AI/__pycache__/question_generator.cpython-313.pyc b/src/services/AI/__pycache__/question_generator.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6081faf0e7c8bd033b6997bceceeca1ba2a70d6e Binary files /dev/null and b/src/services/AI/__pycache__/question_generator.cpython-313.pyc differ diff --git a/src/services/AI/abstractive_summarizer.py b/src/services/AI/abstractive_summarizer.py new file mode 100644 index 0000000000000000000000000000000000000000..ce53a8ce228e66c604fb2964cc42aed3ffbfda8a --- /dev/null +++ b/src/services/AI/abstractive_summarizer.py @@ -0,0 +1,53 @@ +"""This module contains all tasks specific for summarizer + +@Author: Karthick T. Sharma +""" + +from .base import Model +from src.utils.text_process import postprocess_summary, split_text + + +class AbstractiveSummarizer(Model): + """Summarize input context.""" + _instance = None + + def __new__(cls): + if cls._instance is None: + cls._instance = super(AbstractiveSummarizer, cls).__new__(cls) + cls._instance._init_model() + return cls._instance + + def _init_model(self): + """Initialize corpus summarizer only once.""" + super().__init__(model_name='google-t5/t5-base') + + # def __init__(self): + # """Initialize corpus summarizer.""" + # # NOTE: Default + # super().__init__(model_name='google-t5/t5-base') + # super().__init__(model_name='t5-base', path_id='1-50SZ_WIHX4A6mkpsz-t0EAF_VhtHb-9') + # super().__init__(model_name='t5-small', path_id='1ODslrpbSXB0HWAGymYmyJn5nFO8GELpd') + + def preprocess_input(self, model_input): + """Process model input. + + Args: + model_input (str): bulk text that needs to be processed. + + Returns: + list(str): processed text chunks. + """ + return split_text(model_input) + + def summarize(self, context): + """Generate abstrative summary of given context. + + Args: + context (str): input corpus. + + Returns: + str: summarized text. + """ + return postprocess_summary(super().inference( + num_beams=3, no_repeat_ngram_size=2, model_max_length=512, + num_return_sequences=1, summarize=context)) diff --git a/src/services/AI/base.py b/src/services/AI/base.py new file mode 100644 index 0000000000000000000000000000000000000000..9bdcaf4e8dfe0d67aa582ce1a3245ed41a419969 --- /dev/null +++ b/src/services/AI/base.py @@ -0,0 +1,77 @@ +""" +This module contains all tasks related to transformer model +Refactored to use Hugging Face Flan-T5 (no fastT5 dependency) + +@Author: Karthick T. Sharma +@Modified: LinhGPT +""" + +import os +from transformers import AutoTokenizer, AutoModelForSeq2SeqLM + + +class Model: + """Generalized T5/Flan-T5 model for text generation.""" + + def __init__(self, model_name: str = "google/flan-t5-base"): + """ + Load model and tokenizer into memory. + + Args: + model_name (str): Name or path of the Hugging Face model. + """ + os.environ["TOKENIZERS_PARALLELISM"] = "false" + + print(f"🔹 Loading model: {model_name} ...") + self.__tokenizer = AutoTokenizer.from_pretrained(model_name) + self.__model = AutoModelForSeq2SeqLM.from_pretrained(model_name) + print("✅ Model and tokenizer loaded successfully.\n") + + def tokenize_corpus(self, text: str, max_length: int): + """Tokenize model input text.""" + encode = self.__tokenizer.encode_plus( + text, + return_tensors="pt", + max_length=max_length, + truncation=True, + padding="max_length", + ) + return encode["input_ids"], encode["attention_mask"] + + def __extract_dict(self, input_dict): + """Extract key-value pairs into a string format.""" + return " ".join(f"{k}: {v}" for k, v in input_dict.items()) + + def inference( + self, + num_beams: int = 4, + no_repeat_ngram_size: int = 2, + model_max_length: int = 128, + num_return_sequences: int = 1, + token_max_length: int = 256, + **kwargs, + ): + """ + Generate model output text. + """ + text = self.__extract_dict(kwargs) + input_ids, attention_mask = self.tokenize_corpus(text, token_max_length) + + outputs = self.__model.generate( + input_ids=input_ids, + attention_mask=attention_mask, + num_beams=num_beams, + num_return_sequences=num_return_sequences, + no_repeat_ngram_size=no_repeat_ngram_size, + max_length=model_max_length, + early_stopping=True, + ) + + decoded = [ + self.__tokenizer.decode( + output, skip_special_tokens=True, clean_up_tokenization_spaces=True + ) + for output in outputs + ] + + return decoded[0] if num_return_sequences == 1 else decoded diff --git a/src/services/AI/false_ans_generator.py b/src/services/AI/false_ans_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..7a3bbc7fe8a83c2c1c7a46704ee2cc136097ed09 --- /dev/null +++ b/src/services/AI/false_ans_generator.py @@ -0,0 +1,200 @@ +"""This module generates false answers within same context. + +@Author: Karthick T. Sharma +""" + +import os +import random +import urllib.request +import tarfile + +import numpy as np + +from sklearn.metrics.pairwise import cosine_similarity +from sentence_transformers import SentenceTransformer +from sense2vec import Sense2Vec + +from src.utils.text_process import change_format +import tempfile + + +class FalseAnswerGenerator: + """Generate false answers within same context.""" + + # def __init__(self): + # """Initialize false answer generation models.""" + # self.__init_sentence_transformer() + # self.__init_sense2vec() + + def __new__(cls): + if cls._instance is None: + cls._instance = super(FalseAnswerGenerator, cls).__new__(cls) + cls._instance._init_models() + return cls._instance + + def _init_model(self): + self.__init_sentence_transformer() + self.__init_sense2vec() + + def __init_sentence_transformer(self): + """Initialize sentence embedding. + + https://www.sbert.net/ + """ + self._sentence_model = SentenceTransformer('all-MiniLM-L12-v2') + + def __init_sense2vec(self): + """Initialize word vectors to get similar words. + + https://github.com/explosion/sense2vec + """ + if not os.path.isdir(os.getcwd() + '/s2v_old'): + s2v_url = "https://github.com/explosion/sense2vec/releases/download/" + s2v_ver_url = s2v_url + "v1.0.0/s2v_reddit_2015_md.tar.gz" + + with urllib.request.urlopen(s2v_ver_url) as req: + # save downloaded to a temp file first + with tempfile.NamedTemporaryFile(delete=False) as temp_file: + temp_file.write(req.read()) + temp_file_path = temp_file.name + + with tarfile.open(temp_file_path, mode='r:gz') as file: + def is_within_directory(directory, target): + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + prefix = os.path.commonprefix([abs_directory, abs_target]) + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + tar.extractall(path, members, numeric_owner=numeric_owner) + + safe_extract(file) + + self._s2v = Sense2Vec().from_disk("s2v_old") + + def __get_embedding(self, answer, distractors): + """Returns sentence model embedding of answer and distractors. + + Args: + answer (str): correct answer. + distractors (list[str]): false answers. + + Returns: + tuple[list[str], list[str]]: sentence model embedding of answer and distractors. + """ + return self._sentence_model.encode([answer]), self._sentence_model.encode(distractors) + + def filter_output(self, orig, dummies): + """Filter out final answers. + + Args: + orig (str): correct answer. + dummies (list[str]): false answers list generated from correct answer. + + Returns: + list[str]: list of final answer which has low similarity. + """ + ans_embedded, dis_embedded = self.__get_embedding(orig, dummies) + # filter using MMMR + dist = self.__mmr(ans_embedded, dis_embedded, dummies) + + filtered_dist = [] + for dis in dist: + # 0 -> word, 1 -> confidence / probability + filtered_dist.append(dis[0].capitalize()) + + return filtered_dist + + def __mmr(self, doc_embedding, word_embedding, words, diversity=0.9): + """Word diversity using MMR - Maximal Marginal Relevance. + + Args: + doc_embedding (list[str]): sentence embedding of correct answer. + word_embedding (list[str]): sentence embedding of false answer. + words (list[str]): false answers. + diversity (float, optional): diversity coefficient. Defaults to 0.9. + + Returns: + list[str]: list of final answers. + """ + # extract similarity between words and docs + word_doc_similarity = cosine_similarity(word_embedding, doc_embedding) + word_similarity = cosine_similarity(word_embedding) + + kw_idx = [np.argmax(word_doc_similarity)] # NumPy 2.0.2 vẫn hỗ trợ np.argmax() + dist_idx = [i for i in range(len(words)) if i != kw_idx[0]] + + for _ in range(3): + dist_similarities = word_doc_similarity[dist_idx, :] + target_similarities = np.max( + word_similarity[dist_idx][:, kw_idx], axis=1 + ) + + # calculate MMR + mmr = (1 - diversity) * dist_similarities - \ + diversity * target_similarities.reshape(-1, 1) + mmr_idx = dist_idx[np.argmax(mmr)] # NumPy vẫn hỗ trợ np.argmax() + + # update kw + kw_idx.append(mmr_idx) + dist_idx.remove(mmr_idx) + + return [(words[idx], round(float(word_doc_similarity.reshape(1, -1)[0][idx]), 4)) + for idx in kw_idx] + + def __generate_answer(self, query): + """Generate false answers from correct answer. + + Args: + query (str): correct answer. + + Returns: + list(str): list of final answers if input is valid, else None. + """ + # get the best sense for given word (like NOUN, PRONOUN, VERB...) + query_al = self._s2v.get_best_sense(query.lower().replace(' ', '_')) + + if query_al is None: + return None + + try: + assert query_al in self._s2v + # get most similar 20 words (if any) + temp = self._s2v.most_similar(query_al, n=20) + formatted_string = change_format(temp) + formatted_string.insert(0, query) + # if answers are numbers then we don't need to filter + if query_al.split('|')[1] == 'CARDINAL': + return formatted_string[:4] + # else filter because sometimes similar words will be US, U.S, USA, AMERICA... + return self.filter_output(query, formatted_string) + except AssertionError: + return None + + def get_output(self, filtered_kws): + """Generate false answers for whole context. + + Filter out keywords that don't generate 3 false answers. + + Args: + filtered_kws (list(str)): list of keywords + + Returns: + tuple(list(str), list(list(str))): tuple of correct answers and list of all answers. + """ + crct_ans = [] + all_answers = [] + + for kws in filtered_kws: + for kwx in kws: + results = self.__generate_answer(kwx) + if results is not None: + crct_ans.append(kwx.capitalize()) + random.shuffle(results) + all_answers.append(results) + + return crct_ans, sum(all_answers, []) diff --git a/src/services/AI/keyword_extractor.py b/src/services/AI/keyword_extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..c7ac5723bcc6f28bc5ec7ee0c84748acf46397bd --- /dev/null +++ b/src/services/AI/keyword_extractor.py @@ -0,0 +1,83 @@ +"""This module contains keyword word extraction using Key-Bert model + +https://github.com/MaartenGr/KeyBERT +https://github.com/TimSchopf/KeyphraseVectorizers + +@Author: Karthick T. Sharma +""" + +from keyphrase_vectorizers import KeyphraseCountVectorizer +from keybert import KeyBERT + + +class KeywordExtractor: + """Extract keyword from context.""" + _instance = None + + def __new__(cls): + if cls._instance is None: + cls._instance = super(KeywordExtractor, cls).__new__(cls) + cls._instance._init_model() + return cls._instance + + def _init_model(self): + """Initialize keyword extraction model only once.""" + self.__kw_model = KeyBERT() + self.__vectorizer = KeyphraseCountVectorizer() + + # def __init__(self): + # """Initialize keyword extration model (KeyBERT) and keypharse vectorizer + # for meaningful keywords. + # """ + # self.__kw_model = KeyBERT() + # self.__vectorizer = KeyphraseCountVectorizer() + + def __extract_keywords(self, text): + """Extract keywords from corpus using KeyBERT. + + Args: + text (str): corpus used to extract keywords. + + Returns: + list[str]: list of keywords extracted from input corpus. + """ + kwx = self.__kw_model.extract_keywords( + text, vectorizer=self.__vectorizer) + + kw_ls = [] + for i in kwx: + # 0 -> keyword, 1-> confidence / probability + kw_ls.append(i[0]) + return kw_ls + + def filter_keywords(self, original, summarized): + """Extract keywords from both summary and original text and only return keywords + which are common. + + Args: + original (str): original corpus. + summarized (str): summarized corpus. + + Returns: + list(str): list of keywords common for both corpus. + """ + orig_ls = set(self.__extract_keywords(original)) + sum_ls = self.__extract_keywords(summarized) + return list(orig_ls.intersection(sum_ls)) + + def get_keywords(self, original_list, summarized_list): + """Return keywords from input corpus + + Args: + original_list (str): list of original corpus. + summarized_list (str): list of summarized corpus. + + Returns: + list(list(str)): list of keywords common for both corpus. + """ + kw_list = [] + + for orig, sum_ in zip(original_list, summarized_list): + kw_list.append(self.filter_keywords(orig, sum_)) + + return kw_list diff --git a/src/services/AI/question_generator.py b/src/services/AI/question_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..09bdc20691bb3c0e2c8d392d441ef5b8721b0ec0 --- /dev/null +++ b/src/services/AI/question_generator.py @@ -0,0 +1,42 @@ +"""This module contains all tasks specific to question generation model + +@Author: Karthick T. Sharma +""" + +from .base import Model +from src.utils.text_process import postprocess_question + + +class QuestionGenerator(Model): + """Generate question from context and answer.""" + _instance = None + + def __new__(cls): + if cls._instance is None: + cls._instance = super(QuestionGenerator, cls).__new__(cls) + cls._instance._init_model() + return cls._instance + + def _init_model(self): + """Initialize question generator once.""" + super().__init__(model_name='iarfmoose/t5-base-question-generator') + + # def __init__(self): + # """Initialize question generator.""" + # super().__init__(model_name='iarfmoose/t5-base-question-generator') + # super().__init__(model_name='t5-question', + # path_id='1_0dPLdv8WNtSYQdKEWxFc03IR-szs0kB') + + def generate(self, context, answer): + """Generate abstrative summary of given context. + + Args: + context (str): input corpus. + ans (str): ans for question that needs to be generated. + + Returns: + str: generated question. + """ + return postprocess_question(super().inference( + num_beams=5, no_repeat_ngram_size=2, model_max_length=72, + token_max_length=382, context=context, answer=answer)) diff --git a/src/services/__pycache__/auth.cpython-313.pyc b/src/services/__pycache__/auth.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5e48eeec8687a45d658d4b491b0fa7fef0b6def4 Binary files /dev/null and b/src/services/__pycache__/auth.cpython-313.pyc differ diff --git a/src/services/__pycache__/user.cpython-313.pyc b/src/services/__pycache__/user.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f01d5f1d73e415a5211b57cd74bdb396fcac0876 Binary files /dev/null and b/src/services/__pycache__/user.cpython-313.pyc differ diff --git a/src/services/auth.py b/src/services/auth.py new file mode 100644 index 0000000000000000000000000000000000000000..1b117eeb0915f22c5051712da3eefcb96de2073b --- /dev/null +++ b/src/services/auth.py @@ -0,0 +1,45 @@ + +from typing import Dict +import jwt +from datetime import datetime, timezone, timedelta +from sqlalchemy.ext.asyncio import AsyncSession + + +from src.utils.exceptions import BadRequestException +from src.utils.password import PasswordUtils +from src.interfaces.auth import ILogin +from src.repositories.auth import AuthRepository +from src.dtos.user import UserDto +from env import config + + +class AuthService: + def __init__(self, repo: AuthRepository): + self.repo = repo + + async def authenticate(self, db: AsyncSession, data: ILogin) -> UserDto: + user = await self.repo.find_by_username(db, data.username) + + if not user: + raise BadRequestException('username_not_match') + if not PasswordUtils.check_password(data.password, user.password): + raise BadRequestException('password_not_match') + return self.create_access_token({ + "id": "1", + "username": user.username, + "email": user.email, + }) + + @staticmethod + def create_access_token(data: Dict) -> UserDto: + data_encoded = data.copy() + data_encoded.update({"exp": datetime.now(timezone.utc) + timedelta(hours=config["jwt"]["expired_in"])}) + token = jwt.encode(data_encoded, config["jwt"]["secret_key"], algorithm=config["jwt"]["algorithm"]) + return token + + +def get_auth_service() -> AuthService: + auth_repo = AuthRepository() + return AuthService(auth_repo) + + diff --git a/src/services/user.py b/src/services/user.py new file mode 100644 index 0000000000000000000000000000000000000000..f172683001f347d6ed78b3ccb0edb8c2e3f37f69 --- /dev/null +++ b/src/services/user.py @@ -0,0 +1,68 @@ +from src.interfaces.auth import IPagination +from src.interfaces.user import ICreateUser, IFilterUser, IUpdateUser +from src.repositories.user import UserRepository +from src.dtos.user import UserDto +from src.models.user import User +from src.utils.password import PasswordUtils +from src.utils.exceptions import BadRequestException + +from typing import List +from sqlalchemy.ext.asyncio import AsyncSession + +class UserService: + def __init__(self, repo: UserRepository): + self.repo = repo + + async def store(self, db: AsyncSession, data: ICreateUser) -> UserDto: + await self.validate_unique_username(db, data.username) + + hashed_password = PasswordUtils.hash_password(data.password) + user_data = data.model_copy(update={"password": hashed_password}) + + new_user = await self.repo.store(db, data=user_data) + + return UserDto.model_validate(new_user) + + async def update(self, db: AsyncSession, user_id: int, data: IUpdateUser) -> UserDto: + user = await self.find_or_fail(db, user_id) + if data.password: + hashed_password = PasswordUtils.hash_password(data.password) + data.update(password= hashed_password) + + updated_user = await self.repo.update(db, user, data) + return updated_user + + async def delete(self, db: AsyncSession, user_id: int): + user = await self.find_or_fail(db, user_id) + await self.repo.delete(db, user) + + async def get_one(self, db: AsyncSession, filter_data: IFilterUser) -> UserDto | None: + user = await self.repo.get_one(db, filter_data) + if user is None: + return None + else: + return user + + async def get_many(self, db: AsyncSession, paging: IPagination, filter_data: IFilterUser) -> List[UserDto]: + list_user = await self.repo.get_many(db, paging, filter_data.user_id) + return list(map(UserDto.model_validate, list_user)) + + async def validate_unique_username(self, db: AsyncSession, username: str): + old_user = await self.repo.get_one(db, IFilterUser(username=username)) + if old_user: + raise BadRequestException('username_already_exists') + + async def find_by_pk(self, db: AsyncSession, user_id: int): + return await self.repo.find_by_pk(db, user_id) + + async def find_or_fail(self, db: AsyncSession, user_id: int) -> User: + user = await self.find_by_pk(db, user_id) + if not user: + raise BadRequestException('user_not_found') + + return user + +def get_user_service() -> UserService: + user_repo = UserRepository() + return UserService(user_repo) + diff --git a/src/utils/__pycache__/exceptions.cpython-313.pyc b/src/utils/__pycache__/exceptions.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bce3e387a6aa59b63be1fb03331bd9beae858945 Binary files /dev/null and b/src/utils/__pycache__/exceptions.cpython-313.pyc differ diff --git a/src/utils/__pycache__/password.cpython-313.pyc b/src/utils/__pycache__/password.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..534b8ace5adcf959ce1e3f5c183af5076624b31e Binary files /dev/null and b/src/utils/__pycache__/password.cpython-313.pyc differ diff --git a/src/utils/__pycache__/response.cpython-313.pyc b/src/utils/__pycache__/response.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5d83acdca78b69965bd3c8d276c55c12a2fab42e Binary files /dev/null and b/src/utils/__pycache__/response.cpython-313.pyc differ diff --git a/src/utils/__pycache__/text_process.cpython-313.pyc b/src/utils/__pycache__/text_process.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6106237fedb6aea09dc6971348d80adbf48fb3e3 Binary files /dev/null and b/src/utils/__pycache__/text_process.cpython-313.pyc differ diff --git a/src/utils/exceptions.py b/src/utils/exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..c6276f5372a928df0bf9ee7f0f5e51c000fbacc7 --- /dev/null +++ b/src/utils/exceptions.py @@ -0,0 +1,21 @@ +class AppException(Exception): + def __init__(self, status_code: int, message: str): + super().__init__(message) + self.detail = message + self.status_code = status_code + +class BadRequestException(AppException): + def __init__(self, message = 'invalid_param') -> None: + super().__init__(400, message) + +class UnauthorizedException(AppException): + def __init__(self, message='unauthorized'): + super().__init__(401, message) + +class ForbiddenException(AppException): + def __init__(self, message='forbidden'): + super().__init__(403, message) + +class NotFoundException(AppException): + def __init__(self, message = 'not_found') -> None: + super().__init__(404, message) \ No newline at end of file diff --git a/src/utils/password.py b/src/utils/password.py new file mode 100644 index 0000000000000000000000000000000000000000..ad63d3f68b1c890ac5b03ef5ec1889a3f975c2a9 --- /dev/null +++ b/src/utils/password.py @@ -0,0 +1,11 @@ +import bcrypt + +class PasswordUtils: + @staticmethod + def hash_password(password: str) -> str: + hashed = bcrypt.hashpw(password.encode('utf-8'), bcrypt.gensalt()) + return hashed.decode('utf-8') + + @staticmethod + def check_password(password: str, hashed_password: str) -> bool: + return bcrypt.checkpw(password.encode('utf-8'), hashed_password.encode('utf-8')) diff --git a/src/utils/response.py b/src/utils/response.py new file mode 100644 index 0000000000000000000000000000000000000000..91e2432852b3c7e65a13b7f10bad5eb7d503fbca --- /dev/null +++ b/src/utils/response.py @@ -0,0 +1,39 @@ +from typing import Any, Optional, Dict +from fastapi.responses import JSONResponse + +import math +import logging + +logger = logging.getLogger(__name__) + +def res_ok(data: dict = None, code: str = "SUCCESS", + page: int = None, limit: int = None, total_items: int = None): + + response = { + "code": code, + "message": code, + "data": data or {} + } + + if page is not None and limit is not None and total_items is not None: + total_pages = int(total_items/limit) + response["meta"] = { + "total_pages": total_pages, + "total_items": total_items, + "limit": limit, + "page": page + } + + return response + + +def handler_error(error: Exception) -> JSONResponse: + status_code = getattr(error, 'status_code', 500) + detail = getattr(error, 'detail', str(error)) + + logger.error(f"Exception occurred: {detail}", exc_info=True) + + return JSONResponse( + status_code=status_code, + content={"message": detail,} + ) diff --git a/src/utils/text_process.py b/src/utils/text_process.py new file mode 100644 index 0000000000000000000000000000000000000000..933eda6de2200d991ee2bd7cb1db6e6e0838bf73 --- /dev/null +++ b/src/utils/text_process.py @@ -0,0 +1,156 @@ +"""This module handles all textual preprocessing tasks, all textual postprocessing tasks. + +@Author: Karthick T. Sharma +""" + +import re +from deep_translator import GoogleTranslator +import nltk +from nltk.tokenize import sent_tokenize +nltk.download('punkt') + + + +def filter_text(context): + """Remove all signs other than -,-,a-z,A-Z,0-9, and some symbols..... + and remove all extra blank spaces. + + Args: + text (str): input string for processing. + + Returns: + str: processed string. + """ + text = context.strip() + text = re.sub('[\u2010-\u2013]', '-', text) + text = re.sub(r'[^a-zA-Z0-9\.,-?%&*()]', ' ', text) + text = re.sub(' {2,}', ' ', text) + return text + + +def split_text(context, char_range=300): + """Split the bulk input text into small chunks. + + Args: + text (str): processed string to be splitted. + + Returns: + list[str]: list of splitted corpus. + """ + bulk_text = filter_text(context=context) + + if len(bulk_text) <= char_range: + return [bulk_text] + + splitted_texts = [] + # split whole input into $(char_range) block of meaningful text. + # (only split after an full stop has encountered) + while len(bulk_text) > char_range: + i = char_range + while((i < len(bulk_text)) and (bulk_text[i] != '.')): + i += 1 + splitted_texts.append(bulk_text[:(i+1)]) + bulk_text = bulk_text.replace(bulk_text[:(i+1)], "") + return splitted_texts + + +def change_format(false_ans): + """Change s2v format to fair readable form. Remove '|,_' and toggle case. + + Args: + false_ans (list[tuple(str,int)]): list of most similar words and their + similiarity. + + Returns: + list[str]: false_ans in fair-readable format. + """ + output = [] + for result in false_ans: + res = result[0].split('|') + res = res[0].replace('_', ' ') + res = res[0].upper() + res[1:] + output.append(res) + return output + +def postprocess_summary(text): + """Postprocess the output of summarizer model for fair readable output. + + Capitalize firt word of sentence. Put spaces in required place. + + Args: + text (str): summarized text to processed. + + Returns: + str: clean-human readable text. + """ + output = "" + + for token in sent_tokenize(text): + token = token.capitalize() + output += " " + token + return output + + +def postprocess_question(text): + """Postprocess the output of question generation model for fair readable. + + Args: + text (text): generated question to be processed. + + Returns: + str: clean readable text. + """ + output = text.replace("question: ", "") + output = output.strip() + return output + +# Dịch vietnamese -> english +def vietnamese_to_english(text): + translator = GoogleTranslator(source='vi', target='en') + translated_text = translator.translate(text) + return translated_text + +def english_to_vietnamese(text): + translator = GoogleTranslator(source='en', target='vi') + translated_text = translator.translate(text) + return translated_text + + +def get_all_summary(model, context): + """Generate summary of input corpus. + + Args: + model (OnnxT5): T5 transformer for summarization. + context (str): Bunch of unprocessed text. + + Returns: + tuple(list(str), list(str)): tuple of, list of summarized text chunks and list of + original text chuncks. + """ + summary = [] + splitted_text = model.preprocess_input(context) + + for txt in splitted_text: + summary.append(model.summarize(txt)) + + return summary, splitted_text + + +def get_all_questions(model, context, answer): + """Return list of generated questions. + + Args: + model (OnnxT5): T5 transformer for question generation. + context (list(str)): list of context for generating questions. + answer (list(str)): list of answers for question which will be generated. + + Returns: + list(str): list of questions within given context + """ + questions = [] + + for cont, ans in zip(context, answer): + questions.append(model.generate(cont, ans)) + + # squeezing the 2d list to 1d + return questions