diff --git a/apis/V1/configs/__pycache__/database_config.cpython-310.pyc b/apis/V1/configs/__pycache__/database_config.cpython-310.pyc deleted file mode 100644 index 5038e32083164a0400e0912da772a3052c5f6549..0000000000000000000000000000000000000000 Binary files a/apis/V1/configs/__pycache__/database_config.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/configs/__pycache__/firebase_config.cpython-310.pyc b/apis/V1/configs/__pycache__/firebase_config.cpython-310.pyc deleted file mode 100644 index 517e9608d449939fcccffa6ecd788003404b5c96..0000000000000000000000000000000000000000 Binary files a/apis/V1/configs/__pycache__/firebase_config.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/configs/__pycache__/llm_config.cpython-310.pyc b/apis/V1/configs/__pycache__/llm_config.cpython-310.pyc deleted file mode 100644 index 1bbb0764ba4e359448c4d202f2743fa26b9b068f..0000000000000000000000000000000000000000 Binary files a/apis/V1/configs/__pycache__/llm_config.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/configs/database_config.py b/apis/V1/configs/database_config.py deleted file mode 100644 index 0078ed56f40fcb1f984a44f4ee16a01c75d6483d..0000000000000000000000000000000000000000 --- a/apis/V1/configs/database_config.py +++ /dev/null @@ -1,16 +0,0 @@ -import os -from dotenv import load_dotenv - -load_dotenv() -from pymongo.mongo_client import MongoClient -from pymongo.server_api import ServerApi - -# Create a new client and connect to the server -client = MongoClient(os.getenv("MONGODB_URL"), server_api=ServerApi("1")) -db = client.bandict_db -collection_name = db["user"] -try: - client.admin.command("ping") - print("Pinged your deployment. You successfully connected to MongoDB!") -except Exception as e: - print(e) diff --git a/apis/V1/configs/drant_config.py b/apis/V1/configs/drant_config.py deleted file mode 100644 index e7aea0879d7d8b11b3057fc00637a1b3023bfb5d..0000000000000000000000000000000000000000 --- a/apis/V1/configs/drant_config.py +++ /dev/null @@ -1,34 +0,0 @@ -from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings -from langchain_community.document_loaders import TextLoader -from langchain_qdrant import Qdrant -from langchain_text_splitters import CharacterTextSplitter -import qdrant_client -from .vector_embedding_config import embeddings -import os - -url = os.getenv("QDRANT_URL") -qdrant_api_key = os.getenv("QDRANT_API_KEY") -client = qdrant_client.QdrantClient(url, api_key=qdrant_api_key) - -# collection_config = qdrant_client.http.models.VectorParams( -# size=768, # 768 for instructor-xl, 1536 for OpenAI -# distance=qdrant_client.http.models.Distance.COSINE, -# ) -# client.recreate_collection( -# collection_name="BANDict", -# vectors_config=collection_config, -# ) -# vectorstore = Qdrant( -# client=client, -# collection_name="BANDict", -# embeddings=embeddings, -# ) - -# loader = TextLoader("./note.txt") -# documents = loader.load() -# text_splitter = CharacterTextSplitter( -# chunk_size=100, -# chunk_overlap=20, -# ) -# docs = text_splitter.split_documents(documents) -# vectorstore.add_documents(docs) diff --git a/apis/V1/configs/firebase_config.py b/apis/V1/configs/firebase_config.py deleted file mode 100644 index c8b5009162103c214932440f6bcf67813b6a1126..0000000000000000000000000000000000000000 --- a/apis/V1/configs/firebase_config.py +++ /dev/null @@ -1,37 +0,0 @@ -import firebase_admin -from firebase_admin import credentials -from firebase_admin import storage - -import os -from dotenv import load_dotenv - -# Load environment variables -load_dotenv() -firebase_url_storageBucket = os.getenv("FIREBASE_URL_STORAGEBUCKET") - -# Get credentials from environment variables -credential_firebase = { - "type": os.getenv("FIREBASE_TYPE"), - "project_id": os.getenv("FIREBASE_PROJECT_ID"), - "private_key_id": os.getenv("FIREBASE_PRIVATE_KEY_ID"), - "private_key": os.getenv("FIREBASE_PRIVATE_KEY").replace('\\n', '\n'), - "client_email": os.getenv("FIREBASE_CLIENT_EMAIL"), - "client_id": os.getenv("FIREBASE_CLIENT_ID"), - "auth_uri": os.getenv("FIREBASE_AUTH_URI"), - "token_uri": os.getenv("FIREBASE_TOKEN_URI"), - "auth_provider_x509_cert_url": os.getenv("FIREBASE_AUTH_PROVIDER_X509_CERT_URL"), - "client_x509_cert_url": os.getenv("FIREBASE_CLIENT_X509_CERT_URL"), - "universe_domain": os.getenv("FIREBASE_UNIVERSE_DOMAIN") -} - -# Check if the app is not initialized yet -if not firebase_admin._apps: - # Initialize the app with the credentials - cred = credentials.Certificate(credential_firebase) - firebase_admin.initialize_app(cred, { - 'storageBucket': firebase_url_storageBucket - }) - -# Initialize Firestore -firebase_bucket = storage.bucket(app=firebase_admin.get_app()) -print("Storage connected") diff --git a/apis/V1/configs/llm_config.py b/apis/V1/configs/llm_config.py deleted file mode 100644 index e7a69d6c98abee491ef56fc14d03418fb850bf4d..0000000000000000000000000000000000000000 --- a/apis/V1/configs/llm_config.py +++ /dev/null @@ -1,19 +0,0 @@ -from langchain_google_genai import GoogleGenerativeAI -import os -from langchain.globals import set_llm_cache -from langchain.cache import InMemoryCache - -GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") -# llm = GoogleGenerativeAI( -# model="models/gemini-pro", -# temperature=0, -# verbose=True, -# google_api_key=GOOGLE_API_KEY, -# ) -llm = GoogleGenerativeAI( - model="gemini-1.5-flash", - temperature=0, - verbose=True, - google_api_key=GOOGLE_API_KEY, -) -set_llm_cache(InMemoryCache()) diff --git a/apis/V1/configs/vector_embedding_config.py b/apis/V1/configs/vector_embedding_config.py deleted file mode 100644 index 77e7689b8ef08a878a4d937a4f7c82a95311c3bb..0000000000000000000000000000000000000000 --- a/apis/V1/configs/vector_embedding_config.py +++ /dev/null @@ -1,8 +0,0 @@ -from langchain_google_genai import GoogleGenerativeAIEmbeddings -import os - -GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") - -embeddings = GoogleGenerativeAIEmbeddings( - model="models/embedding-001", google_api_key=GOOGLE_API_KEY -) diff --git a/apis/V1/controllers/__pycache__/auth_controller.cpython-310.pyc b/apis/V1/controllers/__pycache__/auth_controller.cpython-310.pyc deleted file mode 100644 index 0867f88a525fc4e921e86d966a78c949b9658e7a..0000000000000000000000000000000000000000 Binary files a/apis/V1/controllers/__pycache__/auth_controller.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/controllers/__pycache__/word_controller.cpython-310.pyc b/apis/V1/controllers/__pycache__/word_controller.cpython-310.pyc deleted file mode 100644 index 3e41d2c33d73242a16327a1dca795980a4a5f8dd..0000000000000000000000000000000000000000 Binary files a/apis/V1/controllers/__pycache__/word_controller.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/controllers/auth_controller.py b/apis/V1/controllers/auth_controller.py deleted file mode 100644 index 39f00c6c11669e872312b82a4a4254c69f7589e4..0000000000000000000000000000000000000000 --- a/apis/V1/controllers/auth_controller.py +++ /dev/null @@ -1,35 +0,0 @@ -from fastapi import HTTPException, status -from ..models.users import User, UserSchema -from ..configs.database_config import db -from ..interfaces.auth_interface import Credential -from ..providers import jwt as jwt_provider -from ..middlewares.auth_middleware import get_current_user -import jwt - - -collection_name = db["user"] - - -def login_control(token): - if not token: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail="Authorization Token is required", - ) - decoded_token = jwt.decode(token, options={"verify_signature": False}) - user = { - "name": decoded_token["name"], - "email": decoded_token["email"], - "picture": decoded_token["picture"], - } - user = User(**user) - - existing_user = UserSchema.find_by_email(user.email) - if not existing_user: - user_id = UserSchema(**user.dict()).create() - else: - user_id = existing_user["_id"] - - token = jwt_provider.encrypt({"id": str(user_id)}) - - return token diff --git a/apis/V1/controllers/word_controller.py b/apis/V1/controllers/word_controller.py deleted file mode 100644 index 116ca57bfd48486cbd1bc5b6a89469bf2b4e9b12..0000000000000000000000000000000000000000 --- a/apis/V1/controllers/word_controller.py +++ /dev/null @@ -1,67 +0,0 @@ -from fastapi import APIRouter, Depends, HTTPException -from typing import Annotated -from ..middlewares.auth_middleware import get_current_user -from ..models.users import User -from ..models.words import WordSchema -from ..configs.database_config import db -from ..utils.response_fmt import jsonResponseFmt -from bson import ObjectId -import random - -router = APIRouter(prefix="/word", tags=["Words"]) -collection = db["word"] - -user_dependency = Annotated[User, Depends(get_current_user)] - - -def list_word_controlller(user): - user_id = user.get("id") - try: - print("user_id", user_id) - words = WordSchema.read_all_words_by_user_id(user_id) - return words - - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - - -def add_word_controller(user, word): - user_id = user.get("id") - try: - existing_word = WordSchema.check_existing_word(word.word, user_id) - if existing_word: - return jsonResponseFmt(None, msg="Existed", code=400) - random.shuffle(word.options) - WordSchema(**word.dict()).create(user_id) - return jsonResponseFmt(None, code=201) - except Exception as e: - return jsonResponseFmt(None, msg=str(e), code=500) - - -def update_word_controller(user, word_id, word_data): - user_id = user.get("id") - try: - print("user", user) - print("word_id", word_id) - print("word_data", word_data) - - word_data.user_id = user_id - print("word_data", word_data.dict()) - WordSchema(**word_data.dict()).update(str(word_id)) - return jsonResponseFmt(None, code=200) - except Exception as e: - return jsonResponseFmt(None, msg=str(e), code=500) - - -def delete_word_controller(user, word_id): - user_id = user.get("id") - try: - existing_word = collection.find_one( - {"_id": ObjectId(word_id), "user_id": user_id} - ) - if not existing_word: - return jsonResponseFmt(None, msg="Word not found", code=404) - collection.delete_one({"_id": ObjectId(word_id)}) - return jsonResponseFmt(None, code=200) - except Exception as e: - return jsonResponseFmt(None, msg=str(e), code=500) diff --git a/apis/V1/interfaces/__pycache__/auth_interface.cpython-310.pyc b/apis/V1/interfaces/__pycache__/auth_interface.cpython-310.pyc deleted file mode 100644 index 8107bba24fc12245c0a90d41cefe66ac98710f90..0000000000000000000000000000000000000000 Binary files a/apis/V1/interfaces/__pycache__/auth_interface.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/interfaces/__pycache__/llm_interface.cpython-310.pyc b/apis/V1/interfaces/__pycache__/llm_interface.cpython-310.pyc deleted file mode 100644 index 1ad86e3976dd71b5e4ef07f19ee6eb0e1a684083..0000000000000000000000000000000000000000 Binary files a/apis/V1/interfaces/__pycache__/llm_interface.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/interfaces/auth_interface.py b/apis/V1/interfaces/auth_interface.py deleted file mode 100644 index 5c556248c28c253562fcdf72e7537cc6b4d48f92..0000000000000000000000000000000000000000 --- a/apis/V1/interfaces/auth_interface.py +++ /dev/null @@ -1,18 +0,0 @@ -from pydantic import BaseModel, Field - - -class Credential(BaseModel): - credential: str = Field(..., example="F9P/3?@q2!vq") - - -class _LoginResponseInterface(BaseModel): - token: str = Field(..., title="JWT Token") - - -class LoginResponseInterface(BaseModel): - msg: str = Field(..., title="Message") - data: _LoginResponseInterface = Field(..., title="User Data") - - -class AuthInterface(BaseModel): - gtoken: str = Field(..., title="Google Access-Token") diff --git a/apis/V1/interfaces/llm_interface.py b/apis/V1/interfaces/llm_interface.py deleted file mode 100644 index 88d244267acab2085954bf4deccb13d3ede6d742..0000000000000000000000000000000000000000 --- a/apis/V1/interfaces/llm_interface.py +++ /dev/null @@ -1,21 +0,0 @@ -from pydantic import BaseModel, Field - - -class TranslateOutput(BaseModel): - answer: str = Field(description="translated word") - word: str = Field(description="word to be translated") - - -class GrammarlyOutput(BaseModel): - corrected_sentence: str = Field(description="corrected sentence") - incorrect: list = Field(description="list of incorrect words or phrases") - correct: list = Field(description="list of correct words or phrases") - - -class ClassifyDocumentOutput(BaseModel): - type: str = Field(description="document type RnD or Business") - - -class ClassifyAndSummarizeOutput(BaseModel): - summary: str = Field(description="summary of the document") - type: str = Field(description="document type RnD or Business") diff --git a/apis/V1/middlewares/__pycache__/auth_middleware.cpython-310.pyc b/apis/V1/middlewares/__pycache__/auth_middleware.cpython-310.pyc deleted file mode 100644 index 9d405698e58f2920678746456d52fbe997ff1327..0000000000000000000000000000000000000000 Binary files a/apis/V1/middlewares/__pycache__/auth_middleware.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/middlewares/auth_middleware.py b/apis/V1/middlewares/auth_middleware.py deleted file mode 100644 index 8b768418aab87d146a4b03fd72ee30a15fea229f..0000000000000000000000000000000000000000 --- a/apis/V1/middlewares/auth_middleware.py +++ /dev/null @@ -1,41 +0,0 @@ -from typing import Annotated -from fastapi import Depends -from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer -from ..schemas.user_schema import getUser -from ..providers import jwt -from ..configs.database_config import db -from bson import ObjectId -from jose import JWTError -from ..utils.response_fmt import jsonResponseFmt - -security = HTTPBearer() - -collection = db["user"] - - -# Get the auth token from the request header, -# parse token to get user data, and return the user data. -def get_current_user( - credentials: Annotated[HTTPAuthorizationCredentials, Depends(security)] -): - try: - # Get token - token = credentials.credentials - # If Authorization is not provided, return Un-authorized. - if not token: - return jsonResponseFmt(None, msg="Authentication failed", code=401) - - # Decrypted token to get user data. - payload = jwt.decrypt(token) - user_id: str = payload["id"] - if not user_id: - return jsonResponseFmt(None, msg="Authentication failed", code=401) - user = collection.find_one({"_id": ObjectId(user_id)}) - print("request of", user["email"]) - # If user is not found, return Un-authorized. - if not user: - return jsonResponseFmt(None, msg="Authentication failed", code=401) - - return getUser(user) - except JWTError: - return jsonResponseFmt(None, msg="Authentication failed", code=401) diff --git a/apis/V1/models/__pycache__/projects.cpython-310.pyc b/apis/V1/models/__pycache__/projects.cpython-310.pyc deleted file mode 100644 index df5336c9bd027fdce8af026c49901d1c55149af8..0000000000000000000000000000000000000000 Binary files a/apis/V1/models/__pycache__/projects.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/models/__pycache__/users.cpython-310.pyc b/apis/V1/models/__pycache__/users.cpython-310.pyc deleted file mode 100644 index 175f9090b23e20b0adf36f156a037ace0056b433..0000000000000000000000000000000000000000 Binary files a/apis/V1/models/__pycache__/users.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/models/__pycache__/words.cpython-310.pyc b/apis/V1/models/__pycache__/words.cpython-310.pyc deleted file mode 100644 index 0d08b1fe6020dd5bfbddb73aec15dec859af5f4e..0000000000000000000000000000000000000000 Binary files a/apis/V1/models/__pycache__/words.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/models/projects.py b/apis/V1/models/projects.py deleted file mode 100644 index 518d9c2c5b9ba707e2438768e06c212744d77ce0..0000000000000000000000000000000000000000 --- a/apis/V1/models/projects.py +++ /dev/null @@ -1,92 +0,0 @@ -from pydantic import BaseModel, Field -from typing import Optional, AnyStr, List, Dict -from ..utils.utils import get_current_time -from ..configs.database_config import db -from bson import ObjectId -from fastapi import UploadFile, File - -collection = db["project"] - - -class Project(BaseModel): - title: str = Field(..., title="title") - # abstract: str = Field(..., title="Abstract") - file: UploadFile = File(..., title="File") - - class Config: - schema_extra = { - "example": { - "id": "666460100c23ec4225cb2bc3", - "title": "Transformer", - # "abstract": "Kiến trúc transformer làm cho kỷ nguyên AI bùng nổ", - "file": 'bert.pdf', - "user_id": "6661455703d07f73ba", - } - } - - -class ProjectSchema: - def __init__( - self, - id: AnyStr = None, - title: AnyStr = "", - # abstract: AnyStr = "", - file: List = [AnyStr], - user_id: AnyStr = "", - created_at=get_current_time(), - ): - self.id = id - self.title = title - # self.abstract = abstract - self.file = file - self.user_id = user_id - self.created_at = created_at - - def to_dict(self): - data_dict = { - "title": self.title, - # "abstract": self.abstract, - "file": self.file, - "user_id": self.user_id, - "created_at": self.created_at, - } - if self.id is not None: - data_dict["_id"] = str(self.id) - return data_dict - - @staticmethod - def from_dict(data: Dict): - return ProjectSchema( - id=data.get("_id"), - title=data.get("title"), - file=data.get("file"), - user_id=data.get("user_id"), - created_at=data.get("created_at"), - ) - - def create(self, user_id: str): - project_dict = self.to_dict() - project_dict["user_id"] = user_id - print("datao") - collection.insert_one(project_dict) - - @staticmethod - def read_all_project_by_user_id(user_id: str): - data = collection.find({"user_id": user_id}) - return [ProjectSchema.from_dict(d).to_dict() for d in data] - - @staticmethod - def read_project_by_id(project_id: str, user_id: str): - data = collection.find_one( - {"_id": ObjectId(project_id), "user_id": user_id}) - return ProjectSchema.from_dict(data).to_dict() - - def update(self, project_id: str): - collection.update_one( - {"_id": ObjectId(project_id)}, - {"$set": self.to_dict()}, - ) - - @staticmethod - def delete(project_id: str): - collection.delete_one({"_id": ObjectId(project_id)}) diff --git a/apis/V1/models/users.py b/apis/V1/models/users.py deleted file mode 100644 index ea0c6cdaa246e9e4f0327fcff8642db502c96a00..0000000000000000000000000000000000000000 --- a/apis/V1/models/users.py +++ /dev/null @@ -1,84 +0,0 @@ -from pydantic import BaseModel, Field, EmailStr -from typing import Dict, AnyStr -from ..configs.database_config import db -from ..utils.utils import get_current_time -from bson import ObjectId - - -collection = db["user"] - - -class User(BaseModel): - name: str = Field("", title="User Name") - email: EmailStr = Field("", title="User Email") - picture: str = Field("", title="User Picture") - - class Config: - schema_extra = { - "example": { - "name": "John Doe", - "email": "johnUS192@gmail.com", - "picture": "https://example.com/picture.jpg", - } - } - - -class UserSchema: - def __init__( - self, - id: AnyStr = None, - name: AnyStr = "", - email: AnyStr = "", - picture: AnyStr = "", - created_at=get_current_time(), - ): - self.id = id - self.name = name - self.email = email - self.picture = picture - self.created_at = created_at - - def to_dict(self): - data_dict = { - "name": self.name, - "email": self.email, - "picture": self.picture, - "created_at": self.created_at, - } - if self.id is not None: - data_dict["_id"] = self.id - return data_dict - - @staticmethod - def from_dict(data: Dict): - return UserSchema( - id=data.get("_id"), - name=data.get("name"), - email=data.get("email"), - picture=data.get("picture"), - created_at=data.get("created_at"), - ) - - @staticmethod - def find_all(): - data = collection.find() - return [UserSchema.from_dict(user).to_dict for user in data] - - @staticmethod - def find_by_id(id): - data = collection.find_one({"_id": ObjectId(id)}) - if data is None: - return None - return UserSchema.from_dict(data).to_dict() - - @staticmethod - def find_by_email(email): - data = collection.find_one({"email": email}) - if data is None: - return None - return UserSchema.from_dict(data).to_dict() - - def create(self): - user = collection.insert_one(self.to_dict()) - user_id = str(user.inserted_id) - return user_id diff --git a/apis/V1/models/words.py b/apis/V1/models/words.py deleted file mode 100644 index 32f46ab0fbd84a8ce4fb228825297b5fe41711f8..0000000000000000000000000000000000000000 --- a/apis/V1/models/words.py +++ /dev/null @@ -1,96 +0,0 @@ -from pydantic import BaseModel, Field -from typing import Optional, AnyStr, List, Dict -from ..utils.utils import get_current_time -from ..configs.database_config import db -from bson import ObjectId - -collection = db["word"] - - -class Word(BaseModel): - id: Optional[str] = Field(title="ID") - word: str = Field(title="Word") - answer: str = Field(title="Meaning") - options: list = Field(title="Options") - user_id: Optional[str] = Field(title="User ID") - - class Config: - schema_extra = { - "example": { - "id": "666460100c23ec4225cb2bc3", - "word": "Apple", - "answer": "táo", - "options": ["dừa", "thanh long", "ổi", "táo"], - "user_id": "6661455703d07f73ba", - } - } - - -class WordSchema: - def __init__( - self, - id: AnyStr = None, - word: AnyStr = "", - answer: AnyStr = "", - options: List = [AnyStr], - user_id: AnyStr = "", - created_at=get_current_time(), - ): - self.id = id - self.word = word - self.answer = answer - self.options = options - self.user_id = user_id - self.created_at = created_at - - def to_dict(self): - data_dict = { - "word": self.word, - "answer": self.answer, - "options": self.options, - "user_id": self.user_id, - "created_at": self.created_at, - } - if self.id is not None: - data_dict["_id"] = str(self.id) - return data_dict - - @staticmethod - def from_dict(data: Dict): - return WordSchema( - id=data.get("_id"), - word=data.get("word"), - answer=data.get("answer"), - options=data.get("options"), - user_id=data.get("user_id"), - created_at=data.get("created_at"), - ) - - def create(self, user_id: str): - word_dict = self.to_dict() - word_dict["user_id"] = user_id - collection.insert_one(word_dict) - - @staticmethod - def read_all_words_by_user_id(user_id: str): - data = collection.find({"user_id": user_id}) - return [WordSchema.from_dict(d).to_dict() for d in data] - - @staticmethod - def read_word_by_id(word_id: str, user_id: str): - data = collection.find_one({"_id": ObjectId(word_id), "user_id": user_id}) - return WordSchema.from_dict(data).to_dict() - - @staticmethod - def check_existing_word(word: str, user_id: str): - return collection.find_one({"word": word, "user_id": user_id}) - - def update(self, word_id: str): - collection.update_one( - {"_id": ObjectId(word_id)}, - {"$set": self.to_dict()}, - ) - - @staticmethod - def delete(word_id: str): - collection.delete_one({"_id": ObjectId(word_id)}) diff --git a/apis/V1/prompts/__pycache__/document_type_clf.cpython-310.pyc b/apis/V1/prompts/__pycache__/document_type_clf.cpython-310.pyc deleted file mode 100644 index 0885b709d0881d55ef3a16450a5bbabfc622840a..0000000000000000000000000000000000000000 Binary files a/apis/V1/prompts/__pycache__/document_type_clf.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/prompts/__pycache__/grammarly.cpython-310.pyc b/apis/V1/prompts/__pycache__/grammarly.cpython-310.pyc deleted file mode 100644 index a25c1bc83b480b536d3c303bf9df736708186f28..0000000000000000000000000000000000000000 Binary files a/apis/V1/prompts/__pycache__/grammarly.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/prompts/__pycache__/summarize_doc.cpython-310.pyc b/apis/V1/prompts/__pycache__/summarize_doc.cpython-310.pyc deleted file mode 100644 index 9d56e86fc9dd645f118720ef0c6a043ea5baf292..0000000000000000000000000000000000000000 Binary files a/apis/V1/prompts/__pycache__/summarize_doc.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/prompts/__pycache__/translate_abstract_BIZ.cpython-310.pyc b/apis/V1/prompts/__pycache__/translate_abstract_BIZ.cpython-310.pyc deleted file mode 100644 index 54d3d2f96a001254721102da6b584d5baa9f5b84..0000000000000000000000000000000000000000 Binary files a/apis/V1/prompts/__pycache__/translate_abstract_BIZ.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/prompts/__pycache__/translate_abstract_RnD.cpython-310.pyc b/apis/V1/prompts/__pycache__/translate_abstract_RnD.cpython-310.pyc deleted file mode 100644 index 534fd1636ff8711f1a88c4b7cd8d0f61be22e9fd..0000000000000000000000000000000000000000 Binary files a/apis/V1/prompts/__pycache__/translate_abstract_RnD.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/prompts/__pycache__/translate_detail.cpython-310.pyc b/apis/V1/prompts/__pycache__/translate_detail.cpython-310.pyc deleted file mode 100644 index 673ee6fc24a04a8fb1022e161e65602d7e6aa93e..0000000000000000000000000000000000000000 Binary files a/apis/V1/prompts/__pycache__/translate_detail.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/prompts/__pycache__/translate_test.cpython-310.pyc b/apis/V1/prompts/__pycache__/translate_test.cpython-310.pyc deleted file mode 100644 index a58ea1cc8dd4091644bb2e95ce467d4be4db00cd..0000000000000000000000000000000000000000 Binary files a/apis/V1/prompts/__pycache__/translate_test.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/prompts/document_type_clf.py b/apis/V1/prompts/document_type_clf.py deleted file mode 100644 index ebc2736f632a519b65c3bad597e2a06954fa3ae0..0000000000000000000000000000000000000000 --- a/apis/V1/prompts/document_type_clf.py +++ /dev/null @@ -1,78 +0,0 @@ -from langchain.prompts import PromptTemplate, ChatPromptTemplate -from langchain_core.output_parsers import JsonOutputParser -from langchain_core.messages import HumanMessage, SystemMessage -from ..configs.llm_config import llm -from ..interfaces.llm_interface import ClassifyDocumentOutput - -# System message for initial context and instructions -system_instructions = SystemMessage( - content="""You are an expert in classifying document types. Identify the type of document based on short content as Business or RnD.""" -) - -# Example system message to clarify format with examples -system_examples = SystemMessage( - content=""" -Examples: - -1. -Document: -"A central goal of machine learning is the development of systems that can solve many problems in as many data domains as possible. Current architectures, however, cannot be applied beyond a small set of stereotyped settings, as they bake in domain & task assumptions or scale poorly to large inputs or outputs. In this work, we propose Perceiver IO, a general-purpose architecture that handles data from arbitrary settings while scaling linearly with the size of inputs and outputs." -Type: RnD - -2. -Document: -"The global economy is projected to grow at a slower pace this year, as rising inflation and supply chain disruptions continue to impact markets worldwide. Central banks are adjusting their monetary policies in response to these challenges." -Type: Business - -3. -Document: -"The company’s latest financial report shows a significant increase in net income, driven by cost reduction strategies and increased sales in emerging markets. Investors are optimistic about the future performance given these strong quarterly results." -Type: Business - -4. -Document: -"Our research introduces a novel approach to quantum computing, focusing on error correction and scalability. The proposed methods significantly improve the stability of qubits, which is crucial for the practical implementation of quantum algorithms." -Type: RnD - -5. -Document: -"An analysis of the stock market reveals that technology stocks have outperformed other sectors over the past quarter. This trend is attributed to the rapid digital transformation across industries and the increasing demand for tech solutions." -Type: Business - -6. -Document: -"The startup has launched an innovative mobile app designed to streamline project management for remote teams. The app includes features such as real-time collaboration, task tracking, and performance analytics, making it a comprehensive tool for businesses." -Type: Business -""" -) - -# Function to create the prompt template dynamically based on input -def create_prompt(document): - user_message = HumanMessage( - content=f""" -Document: {document} -Output format: -"type": type of document, - -If the document is Business, please type "Business" else type "RnD". -{{format_instructions}} -""" - ) - # Combine all messages into a conversation - messages = [system_instructions, system_examples, user_message] - chat_prompt = ChatPromptTemplate(messages=messages) - - return chat_prompt.format() - -# Use the JSON output parser -parser = JsonOutputParser(pydantic_object=ClassifyDocumentOutput) - -# Create the prompt template -prompt = PromptTemplate( - input_variables=["document"], - template=create_prompt("{document}"), - partial_variables={"format_instructions": parser.get_format_instructions()}, -) - -# Chain the prompt with the LLM and parser -chain_type_classify = prompt | llm | parser diff --git a/apis/V1/prompts/grammarly.py b/apis/V1/prompts/grammarly.py deleted file mode 100644 index e382e6a29e2c409ef58c549c02e570acae6537ca..0000000000000000000000000000000000000000 --- a/apis/V1/prompts/grammarly.py +++ /dev/null @@ -1,82 +0,0 @@ -from langchain.prompts import PromptTemplate, ChatPromptTemplate -from langchain_core.output_parsers import JsonOutputParser -from langchain_core.messages import AIMessage, HumanMessage, SystemMessage -from ..configs.llm_config import llm -from ..interfaces.llm_interface import GrammarlyOutput - -# System message for initial context and instructions -system_instructions = SystemMessage( - content="""You are an expert in checking grammar errors. Identify the errors and provide corrections.""" -) - -# Example system message to clarify format with examples -system_examples = SystemMessage( - content=""" -Examples: - -1. -Sentence: "She don't know nothing about the new project, and there is many details that needs to be explained." -Incorrect: [don't know nothing, is, needs] -Correct: [doesn't know anything, are, need] -Corrected sentence: "She doesn't know anything about the new project, and there are many details that need to be explained." - -2. -Sentence: "He go to the store every day, but he never buy nothing." -Incorrect: [go, buy nothing] -Correct: [goes, buys anything] -Corrected sentence: "He goes to the store every day, but he never buys anything." - -3. -Sentence: "She quickly ran to the store, bought some groceries, and returning home." -Incorrect: [returning] -Correct: [returned] -Corrected sentence: "She quickly ran to the store, bought some groceries, and returned home." - -4. -Sentence: "Each of the students have completed their assignments, but the teacher is not satisfied with their works." -Incorrect: [have, works] -Correct: [has, work] -Corrected sentence: "Each of the students has completed their assignments, but the teacher is not satisfied with their work." - -5. -Sentence: "If he was more careful, he will not make so many mistakes, which is causing problems for the team." -Incorrect: [was, will, is] -Correct: [were, would, are] -Corrected sentence: "If he were more careful, he would not make so many mistakes, which are causing problems for the team." -""" -) - - -# Function to create the prompt template dynamically based on input -def create_prompt(sentence): - user_message = HumanMessage( - content=f""" -My sentence: {sentence} -Output format: -"sentence": my sentence, -"incorrect": [list of incorrect words or phrases], -"correct": [list of correct words or phrases], -"corrected_sentence": corrected sentence - -If no corrections are needed, return the original sentence as the corrected sentence and empty lists for incorrect and correct. -{{format_instructions}} -""" - ) - # Combine all messages into a conversation - messages = [system_instructions, system_examples, user_message] - chat_prompt = ChatPromptTemplate(messages=messages) - return chat_prompt.format() - - -# Use the JSON output parser -parser = JsonOutputParser(pydantic_object=GrammarlyOutput) - -# Create the prompt template -prompt = PromptTemplate( - input_variables=["sentence"], - template=create_prompt("{sentence}"), - partial_variables={"format_instructions": parser.get_format_instructions()}, -) - -# Chain the prompt with the LLM and parser -chain_grammarly = prompt | llm | parser diff --git a/apis/V1/prompts/summarize_doc.py b/apis/V1/prompts/summarize_doc.py deleted file mode 100644 index c96531cfa2b785a00d7eebe45b9c804f86733562..0000000000000000000000000000000000000000 --- a/apis/V1/prompts/summarize_doc.py +++ /dev/null @@ -1,19 +0,0 @@ -from ..configs.llm_config import llm -from langchain_core.output_parsers import JsonOutputParser - -from langchain_core.prompts import PromptTemplate -from ..interfaces.llm_interface import ClassifyAndSummarizeOutput - -# Define prompt -prompt_template = """Write a concise summary of the following: -"{text}" and classify the document type as Business or RnD. -Return the answer as: \n{format_instructions}. -""" -parser = JsonOutputParser(pydantic_object=ClassifyAndSummarizeOutput) -prompt = PromptTemplate.from_template( - template=prompt_template, - partial_variables={"format_instructions": parser.get_format_instructions()}, -) - -# Define LLM chain -chain_summarize = prompt | llm | parser diff --git a/apis/V1/prompts/translate.py b/apis/V1/prompts/translate.py deleted file mode 100644 index 95a573a6773ecbdd759864fe075cfec283b35513..0000000000000000000000000000000000000000 --- a/apis/V1/prompts/translate.py +++ /dev/null @@ -1,60 +0,0 @@ -from langchain.prompts import PromptTemplate, ChatPromptTemplate -from langchain_core.output_parsers import JsonOutputParser -from langchain_core.messages import AIMessage, HumanMessage, SystemMessage -from ..configs.llm_config import llm -from ..interfaces.llm_interface import TranslateOutput - -# System message for initial context and instructions -system_instructions = SystemMessage( - content="""You are a master in translation. You are given a sentence and a word. - Translate the word to Vietnamese based on the context of the sentence. """ -) - -# Example system message to clarify format with examples -system_examples = SystemMessage( - content=""" -Example 1: - My sentence: "outperforms a Transformer-based BERT baseline on the GLUE language benchmark" - My word: "outperforms" - Answer: "vượt trội" -Example 2: - My sentence: "I love my dog so much, I was interested in him the first time I saw him" - My word: "so much" - Answer: "rất nhiều" -""" -) - - -# Function to create the prompt template dynamically based on input -def create_prompt(sentence, word): - user_message = HumanMessage( - content=f""" -My sentence: {sentence} -My word: {word} - -If this word is not in dictionary, return "Không tìm thấy từ này trong từ điển". -Else, Translate the word to Vietnamese based on the context of the sentence. Return the answer as the translated word. -I want output to have the format: "answer": translated word, "options": list of options. -Options is a list of words consisting of the answer and 3 random words which can be used to create flashcards for learning. -""" - ) - ai_response = AIMessage(content="Answer: [Your AI will provide the answer here]") - # Combine all messages into a conversation - messages = [system_instructions, system_examples, user_message, ai_response] - chat_prompt = ChatPromptTemplate(messages=messages) - - return chat_prompt.format() - - -# Use the JSON output parser -parser = JsonOutputParser(pydantic_object=TranslateOutput) - -# Create the prompt template -prompt = PromptTemplate( - input_variables=["sentence", "word"], - template=create_prompt("{sentence}", "{word}"), - partial_variables={"format_instructions": parser.get_format_instructions()}, -) - -# Chain the prompt with the LLM and parser -chain = prompt | llm | parser diff --git a/apis/V1/prompts/translate_abstract_BIZ.py b/apis/V1/prompts/translate_abstract_BIZ.py deleted file mode 100644 index 8d6cb88ce7d650a47430e73fd359100f23bc2484..0000000000000000000000000000000000000000 --- a/apis/V1/prompts/translate_abstract_BIZ.py +++ /dev/null @@ -1,74 +0,0 @@ -from langchain.prompts import PromptTemplate, ChatPromptTemplate -from langchain_core.output_parsers import JsonOutputParser -from langchain_core.messages import HumanMessage, SystemMessage -from ..configs.llm_config import llm -from ..interfaces.llm_interface import TranslateOutput - -# System message for initial context and instructions -system_instructions = SystemMessage( - content="""You are an expert in translating English into Vietnamese for Finance and Economics topics. Translate the given word or phrase based on the context of the abstract and sentence. -1. Read the abstract to understand the context. -2. Translate the given word or phrase into Vietnamese using the context. -3. If the word or phrase is not in the dictionary, return "Không tìm thấy từ này trong từ điển". -4. If the word or phrase is an acronym, expand it and provide the translation. -""" -) - -# Example system message to clarify format with examples -system_examples = SystemMessage( - content=""" -**Examples:** - -Abstract: "The efficient market hypothesis (EMH) suggests that financial markets are informationally efficient, meaning that asset prices fully reflect all available information." -Sentence: "The efficient market hypothesis (EMH) suggests that financial markets are informationally efficient." -Word: "efficient" -Answer: "hiệu quả" - -Abstract: "Quantitative easing (QE) is a monetary policy whereby a central bank purchases government securities or other securities from the market in order to increase the money supply and encourage lending and investment." -Sentence: "Quantitative easing (QE) is a monetary policy whereby a central bank purchases government securities." -Word: "Quantitative easing" -Answer: "Nới lỏng định lượng" - -Abstract: "Inflation is the rate at which the general level of prices for goods and services rises, eroding purchasing power." -Sentence: "Inflation is the rate at which the general level of prices for goods and services rises." -Word: "Inflation" -Answer: "lạm phát" - -Abstract: "GDP, or Gross Domestic Product, measures the total value of all goods and services produced within a country in a given period." -Sentence: "GDP, or Gross Domestic Product, measures the total value of all goods and services produced within a country." -Word: "GDP" -Answer: "Tổng sản phẩm quốc nội" -""" -) - -# Function to create the prompt template dynamically based on input -def create_prompt(abstract, sentence, word): - user_message = HumanMessage( - content=f""" -Abstract: {abstract} -Sentence: {sentence} -Word: {word} - -Translate the word to Vietnamese based on the context of the sentence and abstract. -If the word is not in the dictionary, return "Không tìm thấy từ này trong từ điển". -If it is an acronym, expand and translate it. -{{format_instructions}} -""" - ) - messages = [system_instructions, system_examples, user_message] - chat_prompt = ChatPromptTemplate(messages=messages) - - return chat_prompt.format() - -# Use the JSON output parser -parser = JsonOutputParser(pydantic_object=TranslateOutput) - -# Create the prompt template -prompt = PromptTemplate( - input_variables=["abstract", "sentence", "word"], - template=create_prompt("{abstract}", "{sentence}", "{word}"), - partial_variables={"format_instructions": parser.get_format_instructions()}, -) - -# Chain the prompt with the LLM and parser -chain_BIZ = prompt | llm | parser diff --git a/apis/V1/prompts/translate_abstract_RnD.py b/apis/V1/prompts/translate_abstract_RnD.py deleted file mode 100644 index 460a5eee4cf589d441175081f1522ada6079e270..0000000000000000000000000000000000000000 --- a/apis/V1/prompts/translate_abstract_RnD.py +++ /dev/null @@ -1,71 +0,0 @@ -from langchain.prompts import PromptTemplate, ChatPromptTemplate -from langchain_core.output_parsers import JsonOutputParser -from langchain_core.messages import HumanMessage, SystemMessage -from ..configs.llm_config import llm -from ..interfaces.llm_interface import TranslateOutput - -system_instructions = SystemMessage( - content="""You are an expert in translating English into Vietnamese for R&D topics. Translate the given word or phrase based on the context of the abstract and sentence. -1. Read the abstract to understand the context. -2. Translate the given word or phrase into Vietnamese using the context. -3. If the word or phrase is not in the dictionary, return "Không tìm thấy từ này trong từ điển". -4. If the word or phrase is an acronym, expand it and provide the translation. -""" -) - -system_examples = SystemMessage( - content=""" -**Examples:** - -Abstract: "In recent years, there has been significant progress in the development of Transformer-based models for natural language processing tasks. These models, such as BERT, have set new benchmarks in various language understanding tasks." -Sentence: "outperforms a Transformer-based BERT baseline on the GLUE language benchmark" -Word: "outperforms" -Answer: "vượt trội" - -Abstract: "This paper explores the use of reinforcement learning in optimizing neural network architectures. We demonstrate that our approach achieves state-of-the-art results on several benchmark datasets." -Sentence: "Our approach achieves state-of-the-art results on several benchmark datasets." -Word: "approach" -Answer: "phương pháp" - -Abstract: "The integration of machine learning techniques in medical diagnosis has shown promising results. Our study focuses on the application of deep learning to detect early signs of diseases." -Sentence: "The integration of machine learning techniques in medical diagnosis has shown promising results." -Word: "integration" -Answer: "tích hợp" - -Abstract: "Transformer-based models like BERT have transformed NLP tasks by achieving new state-of-the-art results." -Sentence: "Transformer-based models like BERT have transformed NLP tasks." -Word: "BERT" -Answer: "Bidirectional Encoder Representations from Transformers" -""" -) - -def create_prompt(abstract, sentence, word): - user_message = HumanMessage( - content=f""" -Abstract: {abstract} -Sentence: {sentence} -Word: {word} - -Translate the word to Vietnamese based on the context of the sentence and abstract. -If the word is not in the dictionary, return "Không tìm thấy từ này trong từ điển". -If it is an acronym, expand and translate it. -{{format_instructions}} -""" - ) - messages = [system_instructions, system_examples, user_message] - chat_prompt = ChatPromptTemplate(messages=messages) - - return chat_prompt.format() - -# Use the JSON output parser -parser = JsonOutputParser(pydantic_object=TranslateOutput) - -# Create the prompt template -prompt = PromptTemplate( - input_variables=["abstract", "sentence", "word"], - template=create_prompt("{abstract}", "{sentence}", "{word}"), - partial_variables={"format_instructions": parser.get_format_instructions()}, -) - -# Chain the prompt with the LLM and parser -chain_RnD = prompt | llm | parser diff --git a/apis/V1/prompts/translate_detail.py b/apis/V1/prompts/translate_detail.py deleted file mode 100644 index e300c6c0d9de360c8e70b8c66279634f9a5e873c..0000000000000000000000000000000000000000 --- a/apis/V1/prompts/translate_detail.py +++ /dev/null @@ -1,81 +0,0 @@ -import os -from langchain_core.prompts import PromptTemplate -import google.generativeai as genai -from pdf2image import convert_from_path - -t_short = """ -Requirement: -1. Identify the word: "{word}". -2. Understand the context, topic, or field of the document based on the provided information. -3. Provide an explanation of the word in Vietnamese based on the context, topic, or field of the document. -4. Output only the explanation content without any additional formatting. - -Examples: -For the word "loss function": -Explanation: "Hàm mất mát là một hàm số đo lường sự khác biệt giữa giá trị dự đoán và giá trị thực tế của một mô hình học máy. Hàm mất mát càng nhỏ thì mô hình càng tốt." - -For the word "neural network": -Explanation: "Mạng nơ-ron là một hệ thống các đơn vị tính toán kết nối với nhau được mô phỏng theo cách hoạt động của bộ não con người. Mạng nơ-ron được sử dụng trong học máy để phát hiện các mẫu và đưa ra dự đoán." - -For the word "gradient descent": -Explanation: "Thuật toán gradient descent là một phương pháp tối ưu hóa được sử dụng để tìm giá trị cực tiểu của hàm số. Nó thực hiện điều này bằng cách di chuyển từng bước nhỏ theo hướng ngược lại của gradient của hàm số." - -For the word "overfitting": -Explanation: "Overfitting là hiện tượng một mô hình học máy biểu hiện quá mức các dữ liệu huấn luyện, làm giảm khả năng dự đoán chính xác dữ liệu mới. Điều này thường xảy ra khi mô hình quá phức tạp so với dữ liệu." - -Now, follow the steps and provide the explanation for the word "{word}". -""" - - -def GetIndexContext(numPage: int, currentPage: int): - compulsoryContext = (0, 3) - threadContext = 3 - start = currentPage - threadContext - end = currentPage + threadContext - - start = 0 if start < compulsoryContext[1] else start - end = numPage - 1 if end > numPage - 1 else end - - if start == 0: - indexs = [i for i in range(0, end + 1)] - else: - indexs = [i for i in range(threadContext)] - indexs.extend([j for j in range(start, end + 1)]) - - return indexs - - -class AskImage: - def __init__(self) -> None: - genai.configure(api_key=os.environ["GOOGLE_API_KEY"]) - self.model = genai.GenerativeModel("gemini-1.5-flash") - self.prompt = PromptTemplate.from_template(t_short) - self.all_context = [] - self.context = [] - - def uploaded(self, path: str, user_name: str): - try: - self.all_context = convert_from_path( - pdf_path=path, first_page=0, last_page=200, size=(850, 1000), thread_count=100) - print("Converted", len(self.all_context), - "pages for", user_name, "successfully") - return True - except: - print("Error converting pages for", user_name) - return False - - def explain_word(self, word: str, current_page: int): - prompt = self.prompt.format(word=word) - - self.context = [ - self.all_context[idx] - for idx in GetIndexContext(len(self.all_context), current_page) - ] - print("Got context from page", GetIndexContext( - len(self.all_context), current_page)) - input_data = self.context + [prompt] - result = self.model.generate_content(input_data) - return result.text - - def ask(self, question: str): - return self.model.generate_content([question] + self.all_context) diff --git a/apis/V1/prompts/translate_test.py b/apis/V1/prompts/translate_test.py deleted file mode 100644 index f2a60cc3586d3a7de48115ec7bbcb3b3438d93b1..0000000000000000000000000000000000000000 --- a/apis/V1/prompts/translate_test.py +++ /dev/null @@ -1,72 +0,0 @@ -from langchain.prompts import PromptTemplate, ChatPromptTemplate -from langchain_core.output_parsers import JsonOutputParser -from langchain_core.messages import HumanMessage, SystemMessage -from ..configs.llm_config import llm -from pydantic import BaseModel, Field - -class TranslateOutput(BaseModel): - answer: str = Field(description="translated word") - word: str = Field(description="word to be translated") - -system_instructions = SystemMessage( - content="""You are a master at translating English into Vietnamese. You are provided with an abstract of a paper, a sentence containing the word you want to translate, and the word itself. Translate the word into Vietnamese based on the context of the document and the sentence. -1. Read the abstract to understand the context. -2. Use the context to translate the given word into Vietnamese. -3. If the word is not in the dictionary, return "Không tìm thấy từ này trong từ điển". -4. If the word is an acronym, expand it and provide the translation. -""" -) - -system_examples = SystemMessage( - content=""" -**Examples:** - -Abstract: "In recent years, there has been significant progress in the development of Transformer-based models for natural language processing tasks. These models, such as BERT, have set new benchmarks in various language understanding tasks." -Sentence: "outperforms a Transformer-based BERT baseline on the GLUE language benchmark" -Word: "outperforms" -Answer: "vượt trội" - -Abstract: "This paper explores the use of reinforcement learning in optimizing neural network architectures. We demonstrate that our approach achieves state-of-the-art results on several benchmark datasets." -Sentence: "Our approach achieves state-of-the-art results on several benchmark datasets." -Word: "approach" -Answer: "phương pháp" - -Abstract: "The integration of machine learning techniques in medical diagnosis has shown promising results. Our study focuses on the application of deep learning to detect early signs of diseases." -Sentence: "The integration of machine learning techniques in medical diagnosis has shown promising results." -Word: "integration" -Answer: "tích hợp" - -Abstract: "Transformer-based models like BERT have transformed NLP tasks by achieving new state-of-the-art results." -Sentence: "Transformer-based models like BERT have transformed NLP tasks." -Word: "BERT" -Answer: "Bidirectional Encoder Representations from Transformers" -""" -) - -def create_prompt(abstract, sentence, word): - user_message = HumanMessage( - content=f""" -Abstract: {abstract} -Sentence: {sentence} -Word: {word} - -Translate the word to Vietnamese based on the context of the sentence and abstract. -If the word is not in the dictionary, return "Không tìm thấy từ này trong từ điển". -If it is an acronym, expand and translate it. -Return the answer as: "answer": Vietnamese word, "word": input word (English). -""" - ) - messages = [system_instructions, system_examples, user_message] - chat_prompt = ChatPromptTemplate(messages=messages) - - return chat_prompt.format() - -parser = JsonOutputParser(pydantic_object=TranslateOutput) - -prompt = PromptTemplate( - input_variables=["abstract", "sentence", "word"], - template=create_prompt("{abstract}", "{sentence}", "{word}"), - partial_variables={"format_instructions": parser.get_format_instructions()}, -) - -chain_test_RnD = prompt | llm | parser diff --git a/apis/V1/providers/__init__.py b/apis/V1/providers/__init__.py deleted file mode 100644 index c943df1c710de8b9705009d6f9e42c783ec7acb9..0000000000000000000000000000000000000000 --- a/apis/V1/providers/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .jwt_provider import JWTProvider -from .llm_provider import chain - -jwt = JWTProvider() -llm = chain diff --git a/apis/V1/providers/__pycache__/__init__.cpython-310.pyc b/apis/V1/providers/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index 451d9ba6e2fe1dd995146461fb1f0cffe8eba265..0000000000000000000000000000000000000000 Binary files a/apis/V1/providers/__pycache__/__init__.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/providers/__pycache__/firebase_provider.cpython-310.pyc b/apis/V1/providers/__pycache__/firebase_provider.cpython-310.pyc deleted file mode 100644 index 1e8be38e3df3767ffff6b4ae0f7f3770de7f5ffa..0000000000000000000000000000000000000000 Binary files a/apis/V1/providers/__pycache__/firebase_provider.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/providers/__pycache__/jwt_provider.cpython-310.pyc b/apis/V1/providers/__pycache__/jwt_provider.cpython-310.pyc deleted file mode 100644 index e5d04203f182f0850ec4a6c883b009fdeaa2454a..0000000000000000000000000000000000000000 Binary files a/apis/V1/providers/__pycache__/jwt_provider.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/providers/__pycache__/llm_provider.cpython-310.pyc b/apis/V1/providers/__pycache__/llm_provider.cpython-310.pyc deleted file mode 100644 index a4780b0bfcef51812ac4133ac4f461913ca65650..0000000000000000000000000000000000000000 Binary files a/apis/V1/providers/__pycache__/llm_provider.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/providers/firebase_provider.py b/apis/V1/providers/firebase_provider.py deleted file mode 100644 index 41f3b390be9ab0fe88cfa7a2dc8fc8c91d7b0be0..0000000000000000000000000000000000000000 --- a/apis/V1/providers/firebase_provider.py +++ /dev/null @@ -1,64 +0,0 @@ -from ..configs.firebase_config import firebase_bucket - - -def upload_file_to_storage(file_path, file_name): - """ - Upload a file to Firebase Storage - param: - file_path: str - The path of the file on local machine to be uploaded - return: - str - The public URL of the uploaded file - """ - file_name = file_name - blob = firebase_bucket.blob(file_name) - blob.upload_from_filename(file_path) - blob.make_public() - - return blob.public_url - - -def delete_file_from_storage(file_name): - """ - Delete a file from Firebase Storage - param: - file_name: str - The name of the file to be deleted - return: - bool - True if the file is deleted successfully, False if the file is not found - """ - try: - blob = firebase_bucket.blob(file_name) - blob.delete() - return True - except Exception as e: - print("Error:", e) - return False - - -def list_all_files_in_storage(): - """ - View all files in Firebase Storage - return: - dict - Dictionary with keys are names and values are url of all files in Firebase Storage - """ - blobs = firebase_bucket.list_blobs() - blob_dict = {blob.name: blob.public_url for blob in blobs} - return blob_dict - - -def download_file_from_storage(file_name, destination_path): - """ - Download a file from Firebase Storage - param: - file_name: str - The name of the file to be downloaded - destination_path: str - The path to save the downloaded file - return: - bool - True if the file is downloaded successfully, False if the file is not found - """ - try: - blob = firebase_bucket.blob(file_name) - blob.download_to_filename(destination_path) - print("da tai xun thanh cong") - return True - except Exception as e: - print("Error:", e) - return False diff --git a/apis/V1/providers/jwt_provider.py b/apis/V1/providers/jwt_provider.py deleted file mode 100644 index 65c1ec37fb81fbdf21dc387183c47177f859d51e..0000000000000000000000000000000000000000 --- a/apis/V1/providers/jwt_provider.py +++ /dev/null @@ -1,34 +0,0 @@ -from typing import AnyStr, Dict -import os -from fastapi import HTTPException, status -from jose import jwt, JWTError - - -class JWTProvider: - """ - Perform JWT Encryption and Decryption - """ - - def __init__( - self, secret: AnyStr = os.environ.get("JWT_SECRET"), algorithm: AnyStr = "HS256" - ): - self.secret = secret - self.algorithm = algorithm - - def encrypt(self, data: Dict) -> AnyStr: - """ - Encrypt the data with JWT - """ - return jwt.encode(data, self.secret, algorithm=self.algorithm) - - def decrypt(self, token: AnyStr) -> Dict | None: - """ - Decrypt the token with JWT - """ - try: - return jwt.decode(token, self.secret, algorithms=[self.algorithm]) - except JWTError as e: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=f"Could not validate credentials. {str(e)}", - ) diff --git a/apis/V1/providers/llm_provider.py b/apis/V1/providers/llm_provider.py deleted file mode 100644 index a4ebba51d5c0a47e3c7b4a8557978d9848007a7c..0000000000000000000000000000000000000000 --- a/apis/V1/providers/llm_provider.py +++ /dev/null @@ -1,60 +0,0 @@ -from langchain.prompts import PromptTemplate, ChatPromptTemplate -from langchain_core.output_parsers import JsonOutputParser -from langchain_core.messages import AIMessage, HumanMessage, SystemMessage -from ..configs.llm_config import llm -from ..interfaces.llm_interface import TranslateOutput - -# System message for initial context and instructions -system_instructions = SystemMessage( - content="""You are a master in translation. You are given a sentence and a word. - Translate the word to Vietnamese based on the context of the sentence. """ -) - -# Example system message to clarify format with examples -system_examples = SystemMessage( - content=""" -Example 1: - My sentence: "outperforms a Transformer-based BERT baseline on the GLUE language benchmark" - My word: "outperforms" - Answer: "vượt trội" -Example 2: - My sentence: "I love my dog so much, I was interested in him the first time I saw him" - My word: "so much" - Answer: "rất nhiều" -""" -) - - -# Function to create the prompt template dynamically based on input -def create_prompt(sentence, word): - user_message = HumanMessage( - content=f""" -My sentence: {sentence} -My word: {word} - -If this word is not in dictionary, return "Không tìm thấy từ này trong từ điển". -Else, Translate the word to Vietnamese based on the context of the sentence. Return the answer as the translated word. -I want output to have the format: "word": input word ,"answer": translated word, "options": list of options. -Options is a list of words consisting of the answer and 3 random words which can be used to create flashcards for learning. -""" - ) - ai_response = AIMessage(content="Answer: [Your AI will provide the answer here]") - # Combine all messages into a conversation - messages = [system_instructions, system_examples, user_message, ai_response] - chat_prompt = ChatPromptTemplate(messages=messages) - - return chat_prompt.format() - - -# Use the JSON output parser -parser = JsonOutputParser(pydantic_object=TranslateOutput) - -# Create the prompt template -prompt = PromptTemplate( - input_variables=["sentence", "word"], - template=create_prompt("{sentence}", "{word}"), - partial_variables={"format_instructions": parser.get_format_instructions()}, -) - -# Chain the prompt with the LLM and parser -chain = prompt | llm | parser diff --git a/apis/V1/routes/__pycache__/auth.cpython-310.pyc b/apis/V1/routes/__pycache__/auth.cpython-310.pyc deleted file mode 100644 index a158cf8068139a7b0ac67da4c60f656193cf02c7..0000000000000000000000000000000000000000 Binary files a/apis/V1/routes/__pycache__/auth.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/routes/__pycache__/project.cpython-310.pyc b/apis/V1/routes/__pycache__/project.cpython-310.pyc deleted file mode 100644 index 4a393b6b761588868d316c335280a05181eb783a..0000000000000000000000000000000000000000 Binary files a/apis/V1/routes/__pycache__/project.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/routes/__pycache__/upload.cpython-310.pyc b/apis/V1/routes/__pycache__/upload.cpython-310.pyc deleted file mode 100644 index 8f473a75836e6c0cb24dfc09bef7ab2a5aa87c16..0000000000000000000000000000000000000000 Binary files a/apis/V1/routes/__pycache__/upload.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/routes/__pycache__/word.cpython-310.pyc b/apis/V1/routes/__pycache__/word.cpython-310.pyc deleted file mode 100644 index 741eb059438b655962de9fcd1d59afd10c5936e8..0000000000000000000000000000000000000000 Binary files a/apis/V1/routes/__pycache__/word.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/routes/auth.py b/apis/V1/routes/auth.py deleted file mode 100644 index b70b4c05d1a4e01c3378c45e2a9636c841b09072..0000000000000000000000000000000000000000 --- a/apis/V1/routes/auth.py +++ /dev/null @@ -1,50 +0,0 @@ -from fastapi import APIRouter, status, Response, Depends -from typing import Annotated -from ..models.users import User -from ..controllers.auth_controller import login_control -from ..configs.database_config import db -from ..interfaces.auth_interface import _LoginResponseInterface -from ..interfaces.auth_interface import Credential -from ..utils.response_fmt import jsonResponseFmt -from ..middlewares.auth_middleware import get_current_user -from ..utils.utils import get_current_time -from ..models.users import UserSchema - -print(get_current_time()) -router = APIRouter(prefix="/auth", tags=["Authentications"]) - -collection_name = db["user"] - -user_dependency = Annotated[User, Depends(get_current_user)] - - -@router.post( - "/login", status_code=status.HTTP_200_OK, response_model=_LoginResponseInterface -) -async def login(credential: Credential): - # print("credential", credential) - token = login_control(credential.credential) - print("token", token) - return jsonResponseFmt({"token": token}) - - -@router.get("") -async def get_me(user: user_dependency): - if user is None: - return jsonResponseFmt(None, msg="Authentication failed", code=401) - return jsonResponseFmt(user) - - -@router.get("/logout", status_code=status.HTTP_200_OK) -async def logout(user: user_dependency, response: Response): - if user is None: - return jsonResponseFmt(None, msg="Authentication failed", code=401) - response.delete_cookie("token") - return jsonResponseFmt(None) - - -@router.post("/findall") -async def find_all(user_data: User): - alo = UserSchema(**user_data.dict()).create() - print("alo", alo) - return jsonResponseFmt(alo) diff --git a/apis/V1/routes/project.py b/apis/V1/routes/project.py deleted file mode 100644 index 88cdeb2560d002faa3c92cea9af4cd86e73ea672..0000000000000000000000000000000000000000 --- a/apis/V1/routes/project.py +++ /dev/null @@ -1,75 +0,0 @@ -from fastapi import APIRouter, status, Path, Depends, Query, File, Form, UploadFile, HTTPException, BackgroundTasks -from fastapi.responses import FileResponse -import os -import re -from typing import Annotated -from ..middlewares.auth_middleware import get_current_user -from ..models.words import Word, WordSchema -from ..models.projects import ProjectSchema, Project -from ..models.users import User -from ..configs.database_config import db -from ..utils.response_fmt import jsonResponseFmt -import shutil -from ..providers.firebase_provider import ( - delete_file_from_storage, - download_file_from_storage, - list_all_files_in_storage, - upload_file_to_storage) -from ..utils.utils import get_current_time -router = APIRouter(prefix="/project", tags=["Projects"]) -collection = db["project"] - -user_dependency = Annotated[User, Depends(get_current_user)] - - -@router.get('/list', status_code=status.HTTP_200_OK) -async def list_projects(user: user_dependency): - if user is None: - return jsonResponseFmt(None, "Authentication failed", 401) - user_id = user.get("id") - projects = ProjectSchema.read_all_project_by_user_id(user_id) - return jsonResponseFmt(projects, "List of projects", 200) - - -@router.get('/{project_id}', status_code=status.HTTP_200_OK) -async def get_project(background_tasks: BackgroundTasks, user: user_dependency, project_id: str = Path(min_length=0)): - if user is None: - return jsonResponseFmt(None, "Authentication failed", 401) - user_id = user.get("id") - project_data = ProjectSchema.read_project_by_id(project_id, user_id) - if project_data is None: - jsonResponseFmt(None, "Project not found", 404) - file_name = project_data.get("file") - sanitized_file_name = re.sub(r'[<>:"/\\|?*]', '_', file_name) - destination = "storage/" + sanitized_file_name - download_file_from_storage( - file_name=file_name, destination_path=destination) - - def remove_file(path: str): - os.remove(path) - background_tasks.add_task(remove_file, destination) - return FileResponse(path=destination, filename="file.pdf", media_type='application/pdf') - - -@router.post('/create', status_code=status.HTTP_201_CREATED) -async def create_project( - user: user_dependency, - title: str = Form(min_length=1, max_length=100), - file: UploadFile = File(...) -): - if user is None: - return jsonResponseFmt(None, "Authentication failed", 401) - if file.content_type != "application/pdf": - return jsonResponseFmt(None, "Invalid file format. Only PDF files are allowed", 400) - try: - file_path = os.path.join("storage/", file.filename) - user_id = user.get("id") - user_email = user.get("email") - file_name = user_email + "/" + get_current_time() + "_" + file.filename - with open(file_path, "wb") as buffer: - shutil.copyfileobj(file.file, buffer) - upload_file_to_storage(file_path=file_path, file_name=file_name) - ProjectSchema(title=title, file=file_name).create(user_id) - return jsonResponseFmt(None, "Project created successfully", 201) - except Exception as e: - return jsonResponseFmt(None, str(e), 500) diff --git a/apis/V1/routes/upload.py b/apis/V1/routes/upload.py deleted file mode 100644 index 8b8030f0b80e583c1d14d1db4fab00f9b779f447..0000000000000000000000000000000000000000 --- a/apis/V1/routes/upload.py +++ /dev/null @@ -1,96 +0,0 @@ -from fastapi import Depends, UploadFile, status, Query -import requests -from fastapi import APIRouter -from ..prompts.translate_detail import AskImage -from typing import Annotated -from ..models.users import User -from ..middlewares.auth_middleware import get_current_user -from ..utils.response_fmt import jsonResponseFmt -import os -from typing import Dict - -router = APIRouter(prefix="/upload", tags=["Upload"]) - -user_dependency = Annotated[User, Depends(get_current_user)] - -UserService: Dict[str, AskImage] = {} - - -@router.get("/pdf_url", status_code=status.HTTP_200_OK) -async def upload_pdf_URL(user: user_dependency, pdf_url: str = Query(min_length=3)): - if user is None: - return jsonResponseFmt(None, msg="Authentication failed", code=401) - try: - user_id = user["id"] - user_name = user["name"] - response = requests.get(pdf_url) - if response.status_code == 200: - os.makedirs("storage", exist_ok=True) - with open(f"storage//{user_id}.pdf", "wb") as f: - f.write(response.content) - UserService[user_id] = AskImage() - convert_result = UserService[user_id].uploaded( - f"storage//{user_id}.pdf", user_name) - if convert_result == True: - return jsonResponseFmt(None, msg="Success", code=200) - else: - return jsonResponseFmt(None, msg="Uploaded but unable to extract content", code=409) - except: - return jsonResponseFmt(None, msg="Error", code=400) - - -@router.post("/pdf", status_code=status.HTTP_200_OK) -async def upload_pdf(pdf_file: UploadFile, user: user_dependency): - if user is None: - return jsonResponseFmt(None, msg="Authentication failed", code=401) - try: - user_id = user["id"] - user_name = user["name"] - contents = await pdf_file.read() - os.makedirs("storage", exist_ok=True) - with open(f"storage//{user_id}.pdf", "wb") as f: - f.write(contents) - UserService[user_id] = AskImage() - convert_result = UserService[user_id].uploaded( - f"storage//{user_id}.pdf", user_name) - if convert_result == True: - return jsonResponseFmt(None, msg="Success", code=200) - else: - return jsonResponseFmt(None, msg="Uploaded but unable to extract content", code=409) - except Exception as e: - print(e) - return jsonResponseFmt(None, msg="e", code=400) - - -@router.get("/explain_word", status_code=status.HTTP_200_OK) -async def explain_word(user: user_dependency, current_page: int = Query(ge=0), word: str = Query(min_length=2)): - if user is None: - return jsonResponseFmt(None, msg="Authentication failed", code=401) - user_id = user["id"] - try: - AI = UserService[user_id] - res = AI.explain_word(word=word, current_page=current_page) - return jsonResponseFmt(res, msg="Success", code=200) - - except Exception as e: - print(e) - return jsonResponseFmt(None, msg=e, code=400) - - -@router.post("/ask", status_code=status.HTTP_200_OK) -async def ask(question: str, user: user_dependency): - if user is None: - return jsonResponseFmt(None, msg="Authentication failed", code=401) - user_id = user["id"] - try: - AI = UserService[user_id] - res = AI.ask(question, f"storage//{user_id}.pdf") - return jsonResponseFmt(res, msg="Success", code=200) - except Exception as e: - print(e) - return jsonResponseFmt(None, msg=e, code=400) - - -@router.get("/check_users", status_code=status.HTTP_200_OK) -async def check_user(): - return jsonResponseFmt(UserService.keys(), msg="Success", code=200) diff --git a/apis/V1/routes/word.py b/apis/V1/routes/word.py deleted file mode 100644 index 65a25e9d509461bb86cdc18c92ab75be5c446a72..0000000000000000000000000000000000000000 --- a/apis/V1/routes/word.py +++ /dev/null @@ -1,56 +0,0 @@ -from fastapi import APIRouter, status, Path, Depends -from typing import Annotated -from ..middlewares.auth_middleware import get_current_user -from ..models.words import Word, WordSchema -from ..models.users import User -from ..configs.database_config import db -from ..utils.response_fmt import jsonResponseFmt -from ..controllers.word_controller import ( - list_word_controlller, - add_word_controller, - update_word_controller, - delete_word_controller, -) - -router = APIRouter(prefix="/word", tags=["Words"]) -collection = db["word"] - -user_dependency = Annotated[User, Depends(get_current_user)] - - -@router.get("/listWord", status_code=status.HTTP_200_OK) -async def list_words(user: user_dependency): - if user is None: - return jsonResponseFmt(None, msg="Authentication failed", code=401) - word = list_word_controlller(user) - return jsonResponseFmt(word) - - -@router.post("/addWord", status_code=status.HTTP_201_CREATED) -async def add_word(word: Word, user: user_dependency): - print("word", word) - if user is None: - return jsonResponseFmt(None, msg="Authentication failed", code=401) - return add_word_controller(user, word) - - -@router.put("/updateWord/{word_id}", status_code=status.HTTP_200_OK) -async def update_word( - user: user_dependency, word_data: Word, word_id: str = Path(min_length=1) -): - if user is None: - return jsonResponseFmt(None, msg="Authentication failed", code=401) - return update_word_controller(user, word_id, word_data) - - -@router.delete("/deleteWord/{word_id}", status_code=status.HTTP_200_OK) -async def delete_word(user: user_dependency, word_id: str = Path(min_length=1)): - if user is None: - return jsonResponseFmt(None, msg="Authentication failed", code=401) - return delete_word_controller(user, word_id) - - -@router.post("/test") -async def test(word: Word): - user_id = "6661455a0b293703d07f73ba" - print(WordSchema(**word.dict()).add_word(user_id)) diff --git a/apis/V1/schemas/__pycache__/user_schema.cpython-310.pyc b/apis/V1/schemas/__pycache__/user_schema.cpython-310.pyc deleted file mode 100644 index 0c682c37e803abf6b9fcd90e4dfa3c871dcf145b..0000000000000000000000000000000000000000 Binary files a/apis/V1/schemas/__pycache__/user_schema.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/schemas/user_schema.py b/apis/V1/schemas/user_schema.py deleted file mode 100644 index e370bede8f0ea0cf53f5dc8788f364e0aac7be23..0000000000000000000000000000000000000000 --- a/apis/V1/schemas/user_schema.py +++ /dev/null @@ -1,12 +0,0 @@ -def getUser(user) -> dict: - return { - "id": str(user["_id"]), - "name": user["name"], - "email": user["email"], - "picture": user["picture"], - # "role": user["role"], - } - - -def list_serial(users) -> list: - return [getUser(user) for user in users] diff --git a/apis/V1/schemas/word_schema.py b/apis/V1/schemas/word_schema.py deleted file mode 100644 index 1b800a774957402076e10c5c5b2500c6d6c2e674..0000000000000000000000000000000000000000 --- a/apis/V1/schemas/word_schema.py +++ /dev/null @@ -1,12 +0,0 @@ -def getword(word) -> dict: - return { - "id": str(word["_id"]), - "word": word["word"], - "answer": word["answer"], - "options": word["options"], - "user_id": word["user_id"], - } - - -def list_serial(words) -> list: - return [getword(word) for word in words] diff --git a/apis/V1/utils/__pycache__/response_fmt.cpython-310.pyc b/apis/V1/utils/__pycache__/response_fmt.cpython-310.pyc deleted file mode 100644 index 24a1415fa1bc25632a6f743c146c39eb6c9c75cb..0000000000000000000000000000000000000000 Binary files a/apis/V1/utils/__pycache__/response_fmt.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/utils/__pycache__/utils.cpython-310.pyc b/apis/V1/utils/__pycache__/utils.cpython-310.pyc deleted file mode 100644 index 4b329a19ddce14bbcc731d9338807fd12e8d0465..0000000000000000000000000000000000000000 Binary files a/apis/V1/utils/__pycache__/utils.cpython-310.pyc and /dev/null differ diff --git a/apis/V1/utils/response_fmt.py b/apis/V1/utils/response_fmt.py deleted file mode 100644 index 8b8c158aa0c7e05273894e2a2ac6736ade88a6a9..0000000000000000000000000000000000000000 --- a/apis/V1/utils/response_fmt.py +++ /dev/null @@ -1,6 +0,0 @@ -from typing import Any -from fastapi.responses import JSONResponse - - -def jsonResponseFmt(data: Any, msg: str = "Success", code: int = 200, **kwargs): - return JSONResponse({"msg": msg, "data": data}, code, **kwargs) diff --git a/apis/V1/utils/utils.py b/apis/V1/utils/utils.py deleted file mode 100644 index 74940922488e7170e3fa0349e765fef6c23d3026..0000000000000000000000000000000000000000 --- a/apis/V1/utils/utils.py +++ /dev/null @@ -1,8 +0,0 @@ -import datetime - - -def get_current_time() -> str: - """ - Get the current time in the string format. - """ - return datetime.datetime.now().isoformat()