ABAO77 commited on
Commit
408128e
·
verified ·
1 Parent(s): e4c1bf0

Upload 62 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. apis/V1/configs/__pycache__/database_config.cpython-310.pyc +0 -0
  2. apis/V1/configs/__pycache__/firebase_config.cpython-310.pyc +0 -0
  3. apis/V1/configs/__pycache__/llm_config.cpython-310.pyc +0 -0
  4. apis/V1/configs/database_config.py +16 -0
  5. apis/V1/configs/drant_config.py +34 -0
  6. apis/V1/configs/firebase_config.py +37 -0
  7. apis/V1/configs/llm_config.py +19 -0
  8. apis/V1/configs/vector_embedding_config.py +8 -0
  9. apis/V1/controllers/__pycache__/auth_controller.cpython-310.pyc +0 -0
  10. apis/V1/controllers/__pycache__/word_controller.cpython-310.pyc +0 -0
  11. apis/V1/controllers/auth_controller.py +35 -0
  12. apis/V1/controllers/word_controller.py +67 -0
  13. apis/V1/interfaces/__pycache__/auth_interface.cpython-310.pyc +0 -0
  14. apis/V1/interfaces/__pycache__/llm_interface.cpython-310.pyc +0 -0
  15. apis/V1/interfaces/auth_interface.py +18 -0
  16. apis/V1/interfaces/llm_interface.py +21 -0
  17. apis/V1/middlewares/__pycache__/auth_middleware.cpython-310.pyc +0 -0
  18. apis/V1/middlewares/auth_middleware.py +41 -0
  19. apis/V1/models/__pycache__/projects.cpython-310.pyc +0 -0
  20. apis/V1/models/__pycache__/users.cpython-310.pyc +0 -0
  21. apis/V1/models/__pycache__/words.cpython-310.pyc +0 -0
  22. apis/V1/models/projects.py +92 -0
  23. apis/V1/models/users.py +84 -0
  24. apis/V1/models/words.py +96 -0
  25. apis/V1/prompts/__pycache__/document_type_clf.cpython-310.pyc +0 -0
  26. apis/V1/prompts/__pycache__/grammarly.cpython-310.pyc +0 -0
  27. apis/V1/prompts/__pycache__/summarize_doc.cpython-310.pyc +0 -0
  28. apis/V1/prompts/__pycache__/translate_abstract_BIZ.cpython-310.pyc +0 -0
  29. apis/V1/prompts/__pycache__/translate_abstract_RnD.cpython-310.pyc +0 -0
  30. apis/V1/prompts/__pycache__/translate_detail.cpython-310.pyc +0 -0
  31. apis/V1/prompts/__pycache__/translate_test.cpython-310.pyc +0 -0
  32. apis/V1/prompts/document_type_clf.py +78 -0
  33. apis/V1/prompts/grammarly.py +82 -0
  34. apis/V1/prompts/summarize_doc.py +19 -0
  35. apis/V1/prompts/translate.py +60 -0
  36. apis/V1/prompts/translate_abstract_BIZ.py +74 -0
  37. apis/V1/prompts/translate_abstract_RnD.py +71 -0
  38. apis/V1/prompts/translate_detail.py +88 -0
  39. apis/V1/prompts/translate_test.py +72 -0
  40. apis/V1/providers/__init__.py +5 -0
  41. apis/V1/providers/__pycache__/__init__.cpython-310.pyc +0 -0
  42. apis/V1/providers/__pycache__/firebase_provider.cpython-310.pyc +0 -0
  43. apis/V1/providers/__pycache__/jwt_provider.cpython-310.pyc +0 -0
  44. apis/V1/providers/__pycache__/llm_provider.cpython-310.pyc +0 -0
  45. apis/V1/providers/firebase_provider.py +64 -0
  46. apis/V1/providers/jwt_provider.py +34 -0
  47. apis/V1/providers/llm_provider.py +60 -0
  48. apis/V1/routes/__pycache__/auth.cpython-310.pyc +0 -0
  49. apis/V1/routes/__pycache__/project.cpython-310.pyc +0 -0
  50. apis/V1/routes/__pycache__/upload.cpython-310.pyc +0 -0
apis/V1/configs/__pycache__/database_config.cpython-310.pyc ADDED
Binary file (643 Bytes). View file
 
apis/V1/configs/__pycache__/firebase_config.cpython-310.pyc ADDED
Binary file (1.16 kB). View file
 
apis/V1/configs/__pycache__/llm_config.cpython-310.pyc ADDED
Binary file (487 Bytes). View file
 
apis/V1/configs/database_config.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv()
5
+ from pymongo.mongo_client import MongoClient
6
+ from pymongo.server_api import ServerApi
7
+
8
+ # Create a new client and connect to the server
9
+ client = MongoClient(os.getenv("MONGODB_URL"), server_api=ServerApi("1"))
10
+ db = client.bandict_db
11
+ collection_name = db["user"]
12
+ try:
13
+ client.admin.command("ping")
14
+ print("Pinged your deployment. You successfully connected to MongoDB!")
15
+ except Exception as e:
16
+ print(e)
apis/V1/configs/drant_config.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
2
+ from langchain_community.document_loaders import TextLoader
3
+ from langchain_qdrant import Qdrant
4
+ from langchain_text_splitters import CharacterTextSplitter
5
+ import qdrant_client
6
+ from .vector_embedding_config import embeddings
7
+ import os
8
+
9
+ url = os.getenv("QDRANT_URL")
10
+ qdrant_api_key = os.getenv("QDRANT_API_KEY")
11
+ client = qdrant_client.QdrantClient(url, api_key=qdrant_api_key)
12
+
13
+ # collection_config = qdrant_client.http.models.VectorParams(
14
+ # size=768, # 768 for instructor-xl, 1536 for OpenAI
15
+ # distance=qdrant_client.http.models.Distance.COSINE,
16
+ # )
17
+ # client.recreate_collection(
18
+ # collection_name="BANDict",
19
+ # vectors_config=collection_config,
20
+ # )
21
+ # vectorstore = Qdrant(
22
+ # client=client,
23
+ # collection_name="BANDict",
24
+ # embeddings=embeddings,
25
+ # )
26
+
27
+ # loader = TextLoader("./note.txt")
28
+ # documents = loader.load()
29
+ # text_splitter = CharacterTextSplitter(
30
+ # chunk_size=100,
31
+ # chunk_overlap=20,
32
+ # )
33
+ # docs = text_splitter.split_documents(documents)
34
+ # vectorstore.add_documents(docs)
apis/V1/configs/firebase_config.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import firebase_admin
2
+ from firebase_admin import credentials
3
+ from firebase_admin import storage
4
+
5
+ import os
6
+ from dotenv import load_dotenv
7
+
8
+ # Load environment variables
9
+ load_dotenv()
10
+ firebase_url_storageBucket = os.getenv("FIREBASE_URL_STORAGEBUCKET")
11
+
12
+ # Get credentials from environment variables
13
+ credential_firebase = {
14
+ "type": os.getenv("FIREBASE_TYPE"),
15
+ "project_id": os.getenv("FIREBASE_PROJECT_ID"),
16
+ "private_key_id": os.getenv("FIREBASE_PRIVATE_KEY_ID"),
17
+ "private_key": os.getenv("FIREBASE_PRIVATE_KEY").replace('\\n', '\n'),
18
+ "client_email": os.getenv("FIREBASE_CLIENT_EMAIL"),
19
+ "client_id": os.getenv("FIREBASE_CLIENT_ID"),
20
+ "auth_uri": os.getenv("FIREBASE_AUTH_URI"),
21
+ "token_uri": os.getenv("FIREBASE_TOKEN_URI"),
22
+ "auth_provider_x509_cert_url": os.getenv("FIREBASE_AUTH_PROVIDER_X509_CERT_URL"),
23
+ "client_x509_cert_url": os.getenv("FIREBASE_CLIENT_X509_CERT_URL"),
24
+ "universe_domain": os.getenv("FIREBASE_UNIVERSE_DOMAIN")
25
+ }
26
+
27
+ # Check if the app is not initialized yet
28
+ if not firebase_admin._apps:
29
+ # Initialize the app with the credentials
30
+ cred = credentials.Certificate(credential_firebase)
31
+ firebase_admin.initialize_app(cred, {
32
+ 'storageBucket': firebase_url_storageBucket
33
+ })
34
+
35
+ # Initialize Firestore
36
+ firebase_bucket = storage.bucket(app=firebase_admin.get_app())
37
+ print("Storage connected")
apis/V1/configs/llm_config.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_google_genai import GoogleGenerativeAI
2
+ import os
3
+ from langchain.globals import set_llm_cache
4
+ from langchain.cache import InMemoryCache
5
+
6
+ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
7
+ # llm = GoogleGenerativeAI(
8
+ # model="models/gemini-pro",
9
+ # temperature=0,
10
+ # verbose=True,
11
+ # google_api_key=GOOGLE_API_KEY,
12
+ # )
13
+ llm = GoogleGenerativeAI(
14
+ model="gemini-1.5-flash",
15
+ temperature=0,
16
+ verbose=True,
17
+ google_api_key=GOOGLE_API_KEY,
18
+ )
19
+ set_llm_cache(InMemoryCache())
apis/V1/configs/vector_embedding_config.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
2
+ import os
3
+
4
+ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
5
+
6
+ embeddings = GoogleGenerativeAIEmbeddings(
7
+ model="models/embedding-001", google_api_key=GOOGLE_API_KEY
8
+ )
apis/V1/controllers/__pycache__/auth_controller.cpython-310.pyc ADDED
Binary file (1.13 kB). View file
 
apis/V1/controllers/__pycache__/word_controller.cpython-310.pyc ADDED
Binary file (2.33 kB). View file
 
apis/V1/controllers/auth_controller.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import HTTPException, status
2
+ from ..models.users import User, UserSchema
3
+ from ..configs.database_config import db
4
+ from ..interfaces.auth_interface import Credential
5
+ from ..providers import jwt as jwt_provider
6
+ from ..middlewares.auth_middleware import get_current_user
7
+ import jwt
8
+
9
+
10
+ collection_name = db["user"]
11
+
12
+
13
+ def login_control(token):
14
+ if not token:
15
+ raise HTTPException(
16
+ status_code=status.HTTP_401_UNAUTHORIZED,
17
+ detail="Authorization Token is required",
18
+ )
19
+ decoded_token = jwt.decode(token, options={"verify_signature": False})
20
+ user = {
21
+ "name": decoded_token["name"],
22
+ "email": decoded_token["email"],
23
+ "picture": decoded_token["picture"],
24
+ }
25
+ user = User(**user)
26
+
27
+ existing_user = UserSchema.find_by_email(user.email)
28
+ if not existing_user:
29
+ user_id = UserSchema(**user.dict()).create()
30
+ else:
31
+ user_id = existing_user["_id"]
32
+
33
+ token = jwt_provider.encrypt({"id": str(user_id)})
34
+
35
+ return token
apis/V1/controllers/word_controller.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Depends, HTTPException
2
+ from typing import Annotated
3
+ from ..middlewares.auth_middleware import get_current_user
4
+ from ..models.users import User
5
+ from ..models.words import WordSchema
6
+ from ..configs.database_config import db
7
+ from ..utils.response_fmt import jsonResponseFmt
8
+ from bson import ObjectId
9
+ import random
10
+
11
+ router = APIRouter(prefix="/word", tags=["Words"])
12
+ collection = db["word"]
13
+
14
+ user_dependency = Annotated[User, Depends(get_current_user)]
15
+
16
+
17
+ def list_word_controlller(user):
18
+ user_id = user.get("id")
19
+ try:
20
+ print("user_id", user_id)
21
+ words = WordSchema.read_all_words_by_user_id(user_id)
22
+ return words
23
+
24
+ except Exception as e:
25
+ raise HTTPException(status_code=500, detail=str(e))
26
+
27
+
28
+ def add_word_controller(user, word):
29
+ user_id = user.get("id")
30
+ try:
31
+ existing_word = WordSchema.check_existing_word(word.word, user_id)
32
+ if existing_word:
33
+ return jsonResponseFmt(None, msg="Existed", code=400)
34
+ random.shuffle(word.options)
35
+ WordSchema(**word.dict()).create(user_id)
36
+ return jsonResponseFmt(None, code=201)
37
+ except Exception as e:
38
+ return jsonResponseFmt(None, msg=str(e), code=500)
39
+
40
+
41
+ def update_word_controller(user, word_id, word_data):
42
+ user_id = user.get("id")
43
+ try:
44
+ print("user", user)
45
+ print("word_id", word_id)
46
+ print("word_data", word_data)
47
+
48
+ word_data.user_id = user_id
49
+ print("word_data", word_data.dict())
50
+ WordSchema(**word_data.dict()).update(str(word_id))
51
+ return jsonResponseFmt(None, code=200)
52
+ except Exception as e:
53
+ return jsonResponseFmt(None, msg=str(e), code=500)
54
+
55
+
56
+ def delete_word_controller(user, word_id):
57
+ user_id = user.get("id")
58
+ try:
59
+ existing_word = collection.find_one(
60
+ {"_id": ObjectId(word_id), "user_id": user_id}
61
+ )
62
+ if not existing_word:
63
+ return jsonResponseFmt(None, msg="Word not found", code=404)
64
+ collection.delete_one({"_id": ObjectId(word_id)})
65
+ return jsonResponseFmt(None, code=200)
66
+ except Exception as e:
67
+ return jsonResponseFmt(None, msg=str(e), code=500)
apis/V1/interfaces/__pycache__/auth_interface.cpython-310.pyc ADDED
Binary file (1.1 kB). View file
 
apis/V1/interfaces/__pycache__/llm_interface.cpython-310.pyc ADDED
Binary file (1.35 kB). View file
 
apis/V1/interfaces/auth_interface.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+
3
+
4
+ class Credential(BaseModel):
5
+ credential: str = Field(..., example="F9P/3?@q2!vq")
6
+
7
+
8
+ class _LoginResponseInterface(BaseModel):
9
+ token: str = Field(..., title="JWT Token")
10
+
11
+
12
+ class LoginResponseInterface(BaseModel):
13
+ msg: str = Field(..., title="Message")
14
+ data: _LoginResponseInterface = Field(..., title="User Data")
15
+
16
+
17
+ class AuthInterface(BaseModel):
18
+ gtoken: str = Field(..., title="Google Access-Token")
apis/V1/interfaces/llm_interface.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+
3
+
4
+ class TranslateOutput(BaseModel):
5
+ answer: str = Field(description="translated word")
6
+ word: str = Field(description="word to be translated")
7
+
8
+
9
+ class GrammarlyOutput(BaseModel):
10
+ corrected_sentence: str = Field(description="corrected sentence")
11
+ incorrect: list = Field(description="list of incorrect words or phrases")
12
+ correct: list = Field(description="list of correct words or phrases")
13
+
14
+
15
+ class ClassifyDocumentOutput(BaseModel):
16
+ type: str = Field(description="document type RnD or Business")
17
+
18
+
19
+ class ClassifyAndSummarizeOutput(BaseModel):
20
+ summary: str = Field(description="summary of the document")
21
+ type: str = Field(description="document type RnD or Business")
apis/V1/middlewares/__pycache__/auth_middleware.cpython-310.pyc ADDED
Binary file (1.14 kB). View file
 
apis/V1/middlewares/auth_middleware.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Annotated
2
+ from fastapi import Depends
3
+ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
4
+ from ..schemas.user_schema import getUser
5
+ from ..providers import jwt
6
+ from ..configs.database_config import db
7
+ from bson import ObjectId
8
+ from jose import JWTError
9
+ from ..utils.response_fmt import jsonResponseFmt
10
+
11
+ security = HTTPBearer()
12
+
13
+ collection = db["user"]
14
+
15
+
16
+ # Get the auth token from the request header,
17
+ # parse token to get user data, and return the user data.
18
+ def get_current_user(
19
+ credentials: Annotated[HTTPAuthorizationCredentials, Depends(security)]
20
+ ):
21
+ try:
22
+ # Get token
23
+ token = credentials.credentials
24
+ # If Authorization is not provided, return Un-authorized.
25
+ if not token:
26
+ return jsonResponseFmt(None, msg="Authentication failed", code=401)
27
+
28
+ # Decrypted token to get user data.
29
+ payload = jwt.decrypt(token)
30
+ user_id: str = payload["id"]
31
+ if not user_id:
32
+ return jsonResponseFmt(None, msg="Authentication failed", code=401)
33
+ user = collection.find_one({"_id": ObjectId(user_id)})
34
+ print("request of", user["email"])
35
+ # If user is not found, return Un-authorized.
36
+ if not user:
37
+ return jsonResponseFmt(None, msg="Authentication failed", code=401)
38
+
39
+ return getUser(user)
40
+ except JWTError:
41
+ return jsonResponseFmt(None, msg="Authentication failed", code=401)
apis/V1/models/__pycache__/projects.cpython-310.pyc ADDED
Binary file (3.31 kB). View file
 
apis/V1/models/__pycache__/users.cpython-310.pyc ADDED
Binary file (2.93 kB). View file
 
apis/V1/models/__pycache__/words.cpython-310.pyc ADDED
Binary file (3.59 kB). View file
 
apis/V1/models/projects.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import Optional, AnyStr, List, Dict
3
+ from ..utils.utils import get_current_time
4
+ from ..configs.database_config import db
5
+ from bson import ObjectId
6
+ from fastapi import UploadFile, File
7
+
8
+ collection = db["project"]
9
+
10
+
11
+ class Project(BaseModel):
12
+ title: str = Field(..., title="title")
13
+ # abstract: str = Field(..., title="Abstract")
14
+ file: UploadFile = File(..., title="File")
15
+
16
+ class Config:
17
+ schema_extra = {
18
+ "example": {
19
+ "id": "666460100c23ec4225cb2bc3",
20
+ "title": "Transformer",
21
+ # "abstract": "Kiến trúc transformer làm cho kỷ nguyên AI bùng nổ",
22
+ "file": 'bert.pdf',
23
+ "user_id": "6661455703d07f73ba",
24
+ }
25
+ }
26
+
27
+
28
+ class ProjectSchema:
29
+ def __init__(
30
+ self,
31
+ id: AnyStr = None,
32
+ title: AnyStr = "",
33
+ # abstract: AnyStr = "",
34
+ file: List = [AnyStr],
35
+ user_id: AnyStr = "",
36
+ created_at=get_current_time(),
37
+ ):
38
+ self.id = id
39
+ self.title = title
40
+ # self.abstract = abstract
41
+ self.file = file
42
+ self.user_id = user_id
43
+ self.created_at = created_at
44
+
45
+ def to_dict(self):
46
+ data_dict = {
47
+ "title": self.title,
48
+ # "abstract": self.abstract,
49
+ "file": self.file,
50
+ "user_id": self.user_id,
51
+ "created_at": self.created_at,
52
+ }
53
+ if self.id is not None:
54
+ data_dict["_id"] = str(self.id)
55
+ return data_dict
56
+
57
+ @staticmethod
58
+ def from_dict(data: Dict):
59
+ return ProjectSchema(
60
+ id=data.get("_id"),
61
+ title=data.get("title"),
62
+ file=data.get("file"),
63
+ user_id=data.get("user_id"),
64
+ created_at=data.get("created_at"),
65
+ )
66
+
67
+ def create(self, user_id: str):
68
+ project_dict = self.to_dict()
69
+ project_dict["user_id"] = user_id
70
+ print("datao")
71
+ collection.insert_one(project_dict)
72
+
73
+ @staticmethod
74
+ def read_all_project_by_user_id(user_id: str):
75
+ data = collection.find({"user_id": user_id})
76
+ return [ProjectSchema.from_dict(d).to_dict() for d in data]
77
+
78
+ @staticmethod
79
+ def read_project_by_id(project_id: str, user_id: str):
80
+ data = collection.find_one(
81
+ {"_id": ObjectId(project_id), "user_id": user_id})
82
+ return ProjectSchema.from_dict(data).to_dict()
83
+
84
+ def update(self, project_id: str):
85
+ collection.update_one(
86
+ {"_id": ObjectId(project_id)},
87
+ {"$set": self.to_dict()},
88
+ )
89
+
90
+ @staticmethod
91
+ def delete(project_id: str):
92
+ collection.delete_one({"_id": ObjectId(project_id)})
apis/V1/models/users.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field, EmailStr
2
+ from typing import Dict, AnyStr
3
+ from ..configs.database_config import db
4
+ from ..utils.utils import get_current_time
5
+ from bson import ObjectId
6
+
7
+
8
+ collection = db["user"]
9
+
10
+
11
+ class User(BaseModel):
12
+ name: str = Field("", title="User Name")
13
+ email: EmailStr = Field("", title="User Email")
14
+ picture: str = Field("", title="User Picture")
15
+
16
+ class Config:
17
+ schema_extra = {
18
+ "example": {
19
+ "name": "John Doe",
20
+ "email": "johnUS192@gmail.com",
21
+ "picture": "https://example.com/picture.jpg",
22
+ }
23
+ }
24
+
25
+
26
+ class UserSchema:
27
+ def __init__(
28
+ self,
29
+ id: AnyStr = None,
30
+ name: AnyStr = "",
31
+ email: AnyStr = "",
32
+ picture: AnyStr = "",
33
+ created_at=get_current_time(),
34
+ ):
35
+ self.id = id
36
+ self.name = name
37
+ self.email = email
38
+ self.picture = picture
39
+ self.created_at = created_at
40
+
41
+ def to_dict(self):
42
+ data_dict = {
43
+ "name": self.name,
44
+ "email": self.email,
45
+ "picture": self.picture,
46
+ "created_at": self.created_at,
47
+ }
48
+ if self.id is not None:
49
+ data_dict["_id"] = self.id
50
+ return data_dict
51
+
52
+ @staticmethod
53
+ def from_dict(data: Dict):
54
+ return UserSchema(
55
+ id=data.get("_id"),
56
+ name=data.get("name"),
57
+ email=data.get("email"),
58
+ picture=data.get("picture"),
59
+ created_at=data.get("created_at"),
60
+ )
61
+
62
+ @staticmethod
63
+ def find_all():
64
+ data = collection.find()
65
+ return [UserSchema.from_dict(user).to_dict for user in data]
66
+
67
+ @staticmethod
68
+ def find_by_id(id):
69
+ data = collection.find_one({"_id": ObjectId(id)})
70
+ if data is None:
71
+ return None
72
+ return UserSchema.from_dict(data).to_dict()
73
+
74
+ @staticmethod
75
+ def find_by_email(email):
76
+ data = collection.find_one({"email": email})
77
+ if data is None:
78
+ return None
79
+ return UserSchema.from_dict(data).to_dict()
80
+
81
+ def create(self):
82
+ user = collection.insert_one(self.to_dict())
83
+ user_id = str(user.inserted_id)
84
+ return user_id
apis/V1/models/words.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import Optional, AnyStr, List, Dict
3
+ from ..utils.utils import get_current_time
4
+ from ..configs.database_config import db
5
+ from bson import ObjectId
6
+
7
+ collection = db["word"]
8
+
9
+
10
+ class Word(BaseModel):
11
+ id: Optional[str] = Field(title="ID")
12
+ word: str = Field(title="Word")
13
+ answer: str = Field(title="Meaning")
14
+ options: list = Field(title="Options")
15
+ user_id: Optional[str] = Field(title="User ID")
16
+
17
+ class Config:
18
+ schema_extra = {
19
+ "example": {
20
+ "id": "666460100c23ec4225cb2bc3",
21
+ "word": "Apple",
22
+ "answer": "táo",
23
+ "options": ["dừa", "thanh long", "ổi", "táo"],
24
+ "user_id": "6661455703d07f73ba",
25
+ }
26
+ }
27
+
28
+
29
+ class WordSchema:
30
+ def __init__(
31
+ self,
32
+ id: AnyStr = None,
33
+ word: AnyStr = "",
34
+ answer: AnyStr = "",
35
+ options: List = [AnyStr],
36
+ user_id: AnyStr = "",
37
+ created_at=get_current_time(),
38
+ ):
39
+ self.id = id
40
+ self.word = word
41
+ self.answer = answer
42
+ self.options = options
43
+ self.user_id = user_id
44
+ self.created_at = created_at
45
+
46
+ def to_dict(self):
47
+ data_dict = {
48
+ "word": self.word,
49
+ "answer": self.answer,
50
+ "options": self.options,
51
+ "user_id": self.user_id,
52
+ "created_at": self.created_at,
53
+ }
54
+ if self.id is not None:
55
+ data_dict["_id"] = str(self.id)
56
+ return data_dict
57
+
58
+ @staticmethod
59
+ def from_dict(data: Dict):
60
+ return WordSchema(
61
+ id=data.get("_id"),
62
+ word=data.get("word"),
63
+ answer=data.get("answer"),
64
+ options=data.get("options"),
65
+ user_id=data.get("user_id"),
66
+ created_at=data.get("created_at"),
67
+ )
68
+
69
+ def create(self, user_id: str):
70
+ word_dict = self.to_dict()
71
+ word_dict["user_id"] = user_id
72
+ collection.insert_one(word_dict)
73
+
74
+ @staticmethod
75
+ def read_all_words_by_user_id(user_id: str):
76
+ data = collection.find({"user_id": user_id})
77
+ return [WordSchema.from_dict(d).to_dict() for d in data]
78
+
79
+ @staticmethod
80
+ def read_word_by_id(word_id: str, user_id: str):
81
+ data = collection.find_one({"_id": ObjectId(word_id), "user_id": user_id})
82
+ return WordSchema.from_dict(data).to_dict()
83
+
84
+ @staticmethod
85
+ def check_existing_word(word: str, user_id: str):
86
+ return collection.find_one({"word": word, "user_id": user_id})
87
+
88
+ def update(self, word_id: str):
89
+ collection.update_one(
90
+ {"_id": ObjectId(word_id)},
91
+ {"$set": self.to_dict()},
92
+ )
93
+
94
+ @staticmethod
95
+ def delete(word_id: str):
96
+ collection.delete_one({"_id": ObjectId(word_id)})
apis/V1/prompts/__pycache__/document_type_clf.cpython-310.pyc ADDED
Binary file (3.27 kB). View file
 
apis/V1/prompts/__pycache__/grammarly.cpython-310.pyc ADDED
Binary file (2.89 kB). View file
 
apis/V1/prompts/__pycache__/summarize_doc.cpython-310.pyc ADDED
Binary file (781 Bytes). View file
 
apis/V1/prompts/__pycache__/translate_abstract_BIZ.cpython-310.pyc ADDED
Binary file (3.27 kB). View file
 
apis/V1/prompts/__pycache__/translate_abstract_RnD.cpython-310.pyc ADDED
Binary file (3.26 kB). View file
 
apis/V1/prompts/__pycache__/translate_detail.cpython-310.pyc ADDED
Binary file (4.57 kB). View file
 
apis/V1/prompts/__pycache__/translate_test.cpython-310.pyc ADDED
Binary file (3.7 kB). View file
 
apis/V1/prompts/document_type_clf.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.prompts import PromptTemplate, ChatPromptTemplate
2
+ from langchain_core.output_parsers import JsonOutputParser
3
+ from langchain_core.messages import HumanMessage, SystemMessage
4
+ from ..configs.llm_config import llm
5
+ from ..interfaces.llm_interface import ClassifyDocumentOutput
6
+
7
+ # System message for initial context and instructions
8
+ system_instructions = SystemMessage(
9
+ content="""You are an expert in classifying document types. Identify the type of document based on short content as Business or RnD."""
10
+ )
11
+
12
+ # Example system message to clarify format with examples
13
+ system_examples = SystemMessage(
14
+ content="""
15
+ Examples:
16
+
17
+ 1.
18
+ Document:
19
+ "A central goal of machine learning is the development of systems that can solve many problems in as many data domains as possible. Current architectures, however, cannot be applied beyond a small set of stereotyped settings, as they bake in domain & task assumptions or scale poorly to large inputs or outputs. In this work, we propose Perceiver IO, a general-purpose architecture that handles data from arbitrary settings while scaling linearly with the size of inputs and outputs."
20
+ Type: RnD
21
+
22
+ 2.
23
+ Document:
24
+ "The global economy is projected to grow at a slower pace this year, as rising inflation and supply chain disruptions continue to impact markets worldwide. Central banks are adjusting their monetary policies in response to these challenges."
25
+ Type: Business
26
+
27
+ 3.
28
+ Document:
29
+ "The company’s latest financial report shows a significant increase in net income, driven by cost reduction strategies and increased sales in emerging markets. Investors are optimistic about the future performance given these strong quarterly results."
30
+ Type: Business
31
+
32
+ 4.
33
+ Document:
34
+ "Our research introduces a novel approach to quantum computing, focusing on error correction and scalability. The proposed methods significantly improve the stability of qubits, which is crucial for the practical implementation of quantum algorithms."
35
+ Type: RnD
36
+
37
+ 5.
38
+ Document:
39
+ "An analysis of the stock market reveals that technology stocks have outperformed other sectors over the past quarter. This trend is attributed to the rapid digital transformation across industries and the increasing demand for tech solutions."
40
+ Type: Business
41
+
42
+ 6.
43
+ Document:
44
+ "The startup has launched an innovative mobile app designed to streamline project management for remote teams. The app includes features such as real-time collaboration, task tracking, and performance analytics, making it a comprehensive tool for businesses."
45
+ Type: Business
46
+ """
47
+ )
48
+
49
+ # Function to create the prompt template dynamically based on input
50
+ def create_prompt(document):
51
+ user_message = HumanMessage(
52
+ content=f"""
53
+ Document: {document}
54
+ Output format:
55
+ "type": type of document,
56
+
57
+ If the document is Business, please type "Business" else type "RnD".
58
+ {{format_instructions}}
59
+ """
60
+ )
61
+ # Combine all messages into a conversation
62
+ messages = [system_instructions, system_examples, user_message]
63
+ chat_prompt = ChatPromptTemplate(messages=messages)
64
+
65
+ return chat_prompt.format()
66
+
67
+ # Use the JSON output parser
68
+ parser = JsonOutputParser(pydantic_object=ClassifyDocumentOutput)
69
+
70
+ # Create the prompt template
71
+ prompt = PromptTemplate(
72
+ input_variables=["document"],
73
+ template=create_prompt("{document}"),
74
+ partial_variables={"format_instructions": parser.get_format_instructions()},
75
+ )
76
+
77
+ # Chain the prompt with the LLM and parser
78
+ chain_type_classify = prompt | llm | parser
apis/V1/prompts/grammarly.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.prompts import PromptTemplate, ChatPromptTemplate
2
+ from langchain_core.output_parsers import JsonOutputParser
3
+ from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
4
+ from ..configs.llm_config import llm
5
+ from ..interfaces.llm_interface import GrammarlyOutput
6
+
7
+ # System message for initial context and instructions
8
+ system_instructions = SystemMessage(
9
+ content="""You are an expert in checking grammar errors. Identify the errors and provide corrections."""
10
+ )
11
+
12
+ # Example system message to clarify format with examples
13
+ system_examples = SystemMessage(
14
+ content="""
15
+ Examples:
16
+
17
+ 1.
18
+ Sentence: "She don't know nothing about the new project, and there is many details that needs to be explained."
19
+ Incorrect: [don't know nothing, is, needs]
20
+ Correct: [doesn't know anything, are, need]
21
+ Corrected sentence: "She doesn't know anything about the new project, and there are many details that need to be explained."
22
+
23
+ 2.
24
+ Sentence: "He go to the store every day, but he never buy nothing."
25
+ Incorrect: [go, buy nothing]
26
+ Correct: [goes, buys anything]
27
+ Corrected sentence: "He goes to the store every day, but he never buys anything."
28
+
29
+ 3.
30
+ Sentence: "She quickly ran to the store, bought some groceries, and returning home."
31
+ Incorrect: [returning]
32
+ Correct: [returned]
33
+ Corrected sentence: "She quickly ran to the store, bought some groceries, and returned home."
34
+
35
+ 4.
36
+ Sentence: "Each of the students have completed their assignments, but the teacher is not satisfied with their works."
37
+ Incorrect: [have, works]
38
+ Correct: [has, work]
39
+ Corrected sentence: "Each of the students has completed their assignments, but the teacher is not satisfied with their work."
40
+
41
+ 5.
42
+ Sentence: "If he was more careful, he will not make so many mistakes, which is causing problems for the team."
43
+ Incorrect: [was, will, is]
44
+ Correct: [were, would, are]
45
+ Corrected sentence: "If he were more careful, he would not make so many mistakes, which are causing problems for the team."
46
+ """
47
+ )
48
+
49
+
50
+ # Function to create the prompt template dynamically based on input
51
+ def create_prompt(sentence):
52
+ user_message = HumanMessage(
53
+ content=f"""
54
+ My sentence: {sentence}
55
+ Output format:
56
+ "sentence": my sentence,
57
+ "incorrect": [list of incorrect words or phrases],
58
+ "correct": [list of correct words or phrases],
59
+ "corrected_sentence": corrected sentence
60
+
61
+ If no corrections are needed, return the original sentence as the corrected sentence and empty lists for incorrect and correct.
62
+ {{format_instructions}}
63
+ """
64
+ )
65
+ # Combine all messages into a conversation
66
+ messages = [system_instructions, system_examples, user_message]
67
+ chat_prompt = ChatPromptTemplate(messages=messages)
68
+ return chat_prompt.format()
69
+
70
+
71
+ # Use the JSON output parser
72
+ parser = JsonOutputParser(pydantic_object=GrammarlyOutput)
73
+
74
+ # Create the prompt template
75
+ prompt = PromptTemplate(
76
+ input_variables=["sentence"],
77
+ template=create_prompt("{sentence}"),
78
+ partial_variables={"format_instructions": parser.get_format_instructions()},
79
+ )
80
+
81
+ # Chain the prompt with the LLM and parser
82
+ chain_grammarly = prompt | llm | parser
apis/V1/prompts/summarize_doc.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..configs.llm_config import llm
2
+ from langchain_core.output_parsers import JsonOutputParser
3
+
4
+ from langchain_core.prompts import PromptTemplate
5
+ from ..interfaces.llm_interface import ClassifyAndSummarizeOutput
6
+
7
+ # Define prompt
8
+ prompt_template = """Write a concise summary of the following:
9
+ "{text}" and classify the document type as Business or RnD.
10
+ Return the answer as: \n{format_instructions}.
11
+ """
12
+ parser = JsonOutputParser(pydantic_object=ClassifyAndSummarizeOutput)
13
+ prompt = PromptTemplate.from_template(
14
+ template=prompt_template,
15
+ partial_variables={"format_instructions": parser.get_format_instructions()},
16
+ )
17
+
18
+ # Define LLM chain
19
+ chain_summarize = prompt | llm | parser
apis/V1/prompts/translate.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.prompts import PromptTemplate, ChatPromptTemplate
2
+ from langchain_core.output_parsers import JsonOutputParser
3
+ from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
4
+ from ..configs.llm_config import llm
5
+ from ..interfaces.llm_interface import TranslateOutput
6
+
7
+ # System message for initial context and instructions
8
+ system_instructions = SystemMessage(
9
+ content="""You are a master in translation. You are given a sentence and a word.
10
+ Translate the word to Vietnamese based on the context of the sentence. """
11
+ )
12
+
13
+ # Example system message to clarify format with examples
14
+ system_examples = SystemMessage(
15
+ content="""
16
+ Example 1:
17
+ My sentence: "outperforms a Transformer-based BERT baseline on the GLUE language benchmark"
18
+ My word: "outperforms"
19
+ Answer: "vượt trội"
20
+ Example 2:
21
+ My sentence: "I love my dog so much, I was interested in him the first time I saw him"
22
+ My word: "so much"
23
+ Answer: "rất nhiều"
24
+ """
25
+ )
26
+
27
+
28
+ # Function to create the prompt template dynamically based on input
29
+ def create_prompt(sentence, word):
30
+ user_message = HumanMessage(
31
+ content=f"""
32
+ My sentence: {sentence}
33
+ My word: {word}
34
+
35
+ If this word is not in dictionary, return "Không tìm thấy từ này trong từ điển".
36
+ Else, Translate the word to Vietnamese based on the context of the sentence. Return the answer as the translated word.
37
+ I want output to have the format: "answer": translated word, "options": list of options.
38
+ Options is a list of words consisting of the answer and 3 random words which can be used to create flashcards for learning.
39
+ """
40
+ )
41
+ ai_response = AIMessage(content="Answer: [Your AI will provide the answer here]")
42
+ # Combine all messages into a conversation
43
+ messages = [system_instructions, system_examples, user_message, ai_response]
44
+ chat_prompt = ChatPromptTemplate(messages=messages)
45
+
46
+ return chat_prompt.format()
47
+
48
+
49
+ # Use the JSON output parser
50
+ parser = JsonOutputParser(pydantic_object=TranslateOutput)
51
+
52
+ # Create the prompt template
53
+ prompt = PromptTemplate(
54
+ input_variables=["sentence", "word"],
55
+ template=create_prompt("{sentence}", "{word}"),
56
+ partial_variables={"format_instructions": parser.get_format_instructions()},
57
+ )
58
+
59
+ # Chain the prompt with the LLM and parser
60
+ chain = prompt | llm | parser
apis/V1/prompts/translate_abstract_BIZ.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.prompts import PromptTemplate, ChatPromptTemplate
2
+ from langchain_core.output_parsers import JsonOutputParser
3
+ from langchain_core.messages import HumanMessage, SystemMessage
4
+ from ..configs.llm_config import llm
5
+ from ..interfaces.llm_interface import TranslateOutput
6
+
7
+ # System message for initial context and instructions
8
+ system_instructions = SystemMessage(
9
+ content="""You are an expert in translating English into Vietnamese for Finance and Economics topics. Translate the given word or phrase based on the context of the abstract and sentence.
10
+ 1. Read the abstract to understand the context.
11
+ 2. Translate the given word or phrase into Vietnamese using the context.
12
+ 3. If the word or phrase is not in the dictionary, return "Không tìm thấy từ này trong từ điển".
13
+ 4. If the word or phrase is an acronym, expand it and provide the translation.
14
+ """
15
+ )
16
+
17
+ # Example system message to clarify format with examples
18
+ system_examples = SystemMessage(
19
+ content="""
20
+ **Examples:**
21
+
22
+ Abstract: "The efficient market hypothesis (EMH) suggests that financial markets are informationally efficient, meaning that asset prices fully reflect all available information."
23
+ Sentence: "The efficient market hypothesis (EMH) suggests that financial markets are informationally efficient."
24
+ Word: "efficient"
25
+ Answer: "hiệu quả"
26
+
27
+ Abstract: "Quantitative easing (QE) is a monetary policy whereby a central bank purchases government securities or other securities from the market in order to increase the money supply and encourage lending and investment."
28
+ Sentence: "Quantitative easing (QE) is a monetary policy whereby a central bank purchases government securities."
29
+ Word: "Quantitative easing"
30
+ Answer: "Nới lỏng định lượng"
31
+
32
+ Abstract: "Inflation is the rate at which the general level of prices for goods and services rises, eroding purchasing power."
33
+ Sentence: "Inflation is the rate at which the general level of prices for goods and services rises."
34
+ Word: "Inflation"
35
+ Answer: "lạm phát"
36
+
37
+ Abstract: "GDP, or Gross Domestic Product, measures the total value of all goods and services produced within a country in a given period."
38
+ Sentence: "GDP, or Gross Domestic Product, measures the total value of all goods and services produced within a country."
39
+ Word: "GDP"
40
+ Answer: "Tổng sản phẩm quốc nội"
41
+ """
42
+ )
43
+
44
+ # Function to create the prompt template dynamically based on input
45
+ def create_prompt(abstract, sentence, word):
46
+ user_message = HumanMessage(
47
+ content=f"""
48
+ Abstract: {abstract}
49
+ Sentence: {sentence}
50
+ Word: {word}
51
+
52
+ Translate the word to Vietnamese based on the context of the sentence and abstract.
53
+ If the word is not in the dictionary, return "Không tìm thấy từ này trong từ điển".
54
+ If it is an acronym, expand and translate it.
55
+ {{format_instructions}}
56
+ """
57
+ )
58
+ messages = [system_instructions, system_examples, user_message]
59
+ chat_prompt = ChatPromptTemplate(messages=messages)
60
+
61
+ return chat_prompt.format()
62
+
63
+ # Use the JSON output parser
64
+ parser = JsonOutputParser(pydantic_object=TranslateOutput)
65
+
66
+ # Create the prompt template
67
+ prompt = PromptTemplate(
68
+ input_variables=["abstract", "sentence", "word"],
69
+ template=create_prompt("{abstract}", "{sentence}", "{word}"),
70
+ partial_variables={"format_instructions": parser.get_format_instructions()},
71
+ )
72
+
73
+ # Chain the prompt with the LLM and parser
74
+ chain_BIZ = prompt | llm | parser
apis/V1/prompts/translate_abstract_RnD.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.prompts import PromptTemplate, ChatPromptTemplate
2
+ from langchain_core.output_parsers import JsonOutputParser
3
+ from langchain_core.messages import HumanMessage, SystemMessage
4
+ from ..configs.llm_config import llm
5
+ from ..interfaces.llm_interface import TranslateOutput
6
+
7
+ system_instructions = SystemMessage(
8
+ content="""You are an expert in translating English into Vietnamese for R&D topics. Translate the given word or phrase based on the context of the abstract and sentence.
9
+ 1. Read the abstract to understand the context.
10
+ 2. Translate the given word or phrase into Vietnamese using the context.
11
+ 3. If the word or phrase is not in the dictionary, return "Không tìm thấy từ này trong từ điển".
12
+ 4. If the word or phrase is an acronym, expand it and provide the translation.
13
+ """
14
+ )
15
+
16
+ system_examples = SystemMessage(
17
+ content="""
18
+ **Examples:**
19
+
20
+ Abstract: "In recent years, there has been significant progress in the development of Transformer-based models for natural language processing tasks. These models, such as BERT, have set new benchmarks in various language understanding tasks."
21
+ Sentence: "outperforms a Transformer-based BERT baseline on the GLUE language benchmark"
22
+ Word: "outperforms"
23
+ Answer: "vượt trội"
24
+
25
+ Abstract: "This paper explores the use of reinforcement learning in optimizing neural network architectures. We demonstrate that our approach achieves state-of-the-art results on several benchmark datasets."
26
+ Sentence: "Our approach achieves state-of-the-art results on several benchmark datasets."
27
+ Word: "approach"
28
+ Answer: "phương pháp"
29
+
30
+ Abstract: "The integration of machine learning techniques in medical diagnosis has shown promising results. Our study focuses on the application of deep learning to detect early signs of diseases."
31
+ Sentence: "The integration of machine learning techniques in medical diagnosis has shown promising results."
32
+ Word: "integration"
33
+ Answer: "tích hợp"
34
+
35
+ Abstract: "Transformer-based models like BERT have transformed NLP tasks by achieving new state-of-the-art results."
36
+ Sentence: "Transformer-based models like BERT have transformed NLP tasks."
37
+ Word: "BERT"
38
+ Answer: "Bidirectional Encoder Representations from Transformers"
39
+ """
40
+ )
41
+
42
+ def create_prompt(abstract, sentence, word):
43
+ user_message = HumanMessage(
44
+ content=f"""
45
+ Abstract: {abstract}
46
+ Sentence: {sentence}
47
+ Word: {word}
48
+
49
+ Translate the word to Vietnamese based on the context of the sentence and abstract.
50
+ If the word is not in the dictionary, return "Không tìm thấy từ này trong từ điển".
51
+ If it is an acronym, expand and translate it.
52
+ {{format_instructions}}
53
+ """
54
+ )
55
+ messages = [system_instructions, system_examples, user_message]
56
+ chat_prompt = ChatPromptTemplate(messages=messages)
57
+
58
+ return chat_prompt.format()
59
+
60
+ # Use the JSON output parser
61
+ parser = JsonOutputParser(pydantic_object=TranslateOutput)
62
+
63
+ # Create the prompt template
64
+ prompt = PromptTemplate(
65
+ input_variables=["abstract", "sentence", "word"],
66
+ template=create_prompt("{abstract}", "{sentence}", "{word}"),
67
+ partial_variables={"format_instructions": parser.get_format_instructions()},
68
+ )
69
+
70
+ # Chain the prompt with the LLM and parser
71
+ chain_RnD = prompt | llm | parser
apis/V1/prompts/translate_detail.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain_core.prompts import PromptTemplate
3
+ import google.generativeai as genai
4
+ from pdf2image import convert_from_path
5
+
6
+ t_short = """
7
+ Requirement:
8
+ 1. Identify the word: "{word}".
9
+ 2. Understand the context, topic, or field of the document based on the provided information.
10
+ 3. Provide an explanation of the word in Vietnamese based on the context, topic, or field of the document.
11
+ 4. Output only the explanation content without any additional formatting.
12
+
13
+ Examples:
14
+ For the word "loss function":
15
+ Explanation: "Hàm mất mát là một hàm số đo lường sự khác biệt giữa giá trị dự đoán và giá trị thực tế của một mô hình học máy. Hàm mất mát càng nhỏ thì mô hình càng tốt."
16
+
17
+ For the word "neural network":
18
+ Explanation: "Mạng nơ-ron là một hệ thống các đơn vị tính toán kết nối với nhau được mô phỏng theo cách hoạt động của bộ não con người. Mạng nơ-ron được sử dụng trong học máy để phát hiện các mẫu và đưa ra dự đoán."
19
+
20
+ For the word "gradient descent":
21
+ Explanation: "Thuật toán gradient descent là một phương pháp tối ưu hóa được sử dụng để tìm giá trị cực tiểu của hàm số. Nó thực hiện điều này bằng cách di chuyển từng bước nhỏ theo hướng ngược lại của gradient của hàm số."
22
+
23
+ For the word "overfitting":
24
+ Explanation: "Overfitting là hiện tượng một mô hình học máy biểu hiện quá mức các dữ liệu huấn luyện, làm giảm khả năng dự đoán chính xác dữ liệu mới. Điều này thường xảy ra khi mô hình quá phức tạp so với dữ liệu."
25
+
26
+ Now, follow the steps and provide the explanation for the word "{word}".
27
+ """
28
+
29
+
30
+ def GetIndexContext(numPage: int, currentPage: int):
31
+ compulsoryContext = (0, 3)
32
+ threadContext = 3
33
+ start = currentPage - threadContext
34
+ end = currentPage + threadContext
35
+
36
+ start = 0 if start < compulsoryContext[1] else start
37
+ end = numPage - 1 if end > numPage - 1 else end
38
+
39
+ if start == 0:
40
+ indexs = [i for i in range(0, end + 1)]
41
+ else:
42
+ indexs = [i for i in range(threadContext)]
43
+ indexs.extend([j for j in range(start, end + 1)])
44
+
45
+ return indexs
46
+
47
+
48
+ class AskImage:
49
+ def __init__(self) -> None:
50
+ genai.configure(api_key=os.environ["GOOGLE_API_KEY"])
51
+ self.model = genai.GenerativeModel("gemini-1.5-flash")
52
+ self.prompt = PromptTemplate.from_template(t_short)
53
+ self.all_context = []
54
+ self.context = []
55
+
56
+ def uploaded(self, path: str, user_name: str):
57
+ try:
58
+ self.all_context = convert_from_path(
59
+ pdf_path=path, first_page=0, last_page=200, size=(850, 1000), thread_count=100)
60
+ print("Converted", len(self.all_context),
61
+ "pages for", user_name, "successfully")
62
+ return True
63
+ except:
64
+ print("Error converting pages for", user_name)
65
+ return False
66
+
67
+ def explain_word(self, word: str, current_page: int):
68
+ prompt = self.prompt.format(word=word)
69
+
70
+ self.context = [
71
+ self.all_context[idx]
72
+ for idx in GetIndexContext(len(self.all_context), current_page)
73
+ ]
74
+ print("Got context from page", GetIndexContext(
75
+ len(self.all_context), current_page))
76
+ input_data = self.context + [prompt]
77
+ result = self.model.generate_content(input_data)
78
+ return result.text
79
+
80
+ def ask(self, question: str, current_page: int):
81
+ self.context = [
82
+ self.all_context[idx]
83
+ for idx in GetIndexContext(len(self.all_context), current_page)
84
+ ]
85
+ print("Got context from page", GetIndexContext(
86
+ len(self.all_context), current_page))
87
+ result = self.model.generate_content([question] + self.all_context)
88
+ return result.text
apis/V1/prompts/translate_test.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.prompts import PromptTemplate, ChatPromptTemplate
2
+ from langchain_core.output_parsers import JsonOutputParser
3
+ from langchain_core.messages import HumanMessage, SystemMessage
4
+ from ..configs.llm_config import llm
5
+ from pydantic import BaseModel, Field
6
+
7
+ class TranslateOutput(BaseModel):
8
+ answer: str = Field(description="translated word")
9
+ word: str = Field(description="word to be translated")
10
+
11
+ system_instructions = SystemMessage(
12
+ content="""You are a master at translating English into Vietnamese. You are provided with an abstract of a paper, a sentence containing the word you want to translate, and the word itself. Translate the word into Vietnamese based on the context of the document and the sentence.
13
+ 1. Read the abstract to understand the context.
14
+ 2. Use the context to translate the given word into Vietnamese.
15
+ 3. If the word is not in the dictionary, return "Không tìm thấy từ này trong từ điển".
16
+ 4. If the word is an acronym, expand it and provide the translation.
17
+ """
18
+ )
19
+
20
+ system_examples = SystemMessage(
21
+ content="""
22
+ **Examples:**
23
+
24
+ Abstract: "In recent years, there has been significant progress in the development of Transformer-based models for natural language processing tasks. These models, such as BERT, have set new benchmarks in various language understanding tasks."
25
+ Sentence: "outperforms a Transformer-based BERT baseline on the GLUE language benchmark"
26
+ Word: "outperforms"
27
+ Answer: "vượt trội"
28
+
29
+ Abstract: "This paper explores the use of reinforcement learning in optimizing neural network architectures. We demonstrate that our approach achieves state-of-the-art results on several benchmark datasets."
30
+ Sentence: "Our approach achieves state-of-the-art results on several benchmark datasets."
31
+ Word: "approach"
32
+ Answer: "phương pháp"
33
+
34
+ Abstract: "The integration of machine learning techniques in medical diagnosis has shown promising results. Our study focuses on the application of deep learning to detect early signs of diseases."
35
+ Sentence: "The integration of machine learning techniques in medical diagnosis has shown promising results."
36
+ Word: "integration"
37
+ Answer: "tích hợp"
38
+
39
+ Abstract: "Transformer-based models like BERT have transformed NLP tasks by achieving new state-of-the-art results."
40
+ Sentence: "Transformer-based models like BERT have transformed NLP tasks."
41
+ Word: "BERT"
42
+ Answer: "Bidirectional Encoder Representations from Transformers"
43
+ """
44
+ )
45
+
46
+ def create_prompt(abstract, sentence, word):
47
+ user_message = HumanMessage(
48
+ content=f"""
49
+ Abstract: {abstract}
50
+ Sentence: {sentence}
51
+ Word: {word}
52
+
53
+ Translate the word to Vietnamese based on the context of the sentence and abstract.
54
+ If the word is not in the dictionary, return "Không tìm thấy từ này trong từ điển".
55
+ If it is an acronym, expand and translate it.
56
+ Return the answer as: "answer": Vietnamese word, "word": input word (English).
57
+ """
58
+ )
59
+ messages = [system_instructions, system_examples, user_message]
60
+ chat_prompt = ChatPromptTemplate(messages=messages)
61
+
62
+ return chat_prompt.format()
63
+
64
+ parser = JsonOutputParser(pydantic_object=TranslateOutput)
65
+
66
+ prompt = PromptTemplate(
67
+ input_variables=["abstract", "sentence", "word"],
68
+ template=create_prompt("{abstract}", "{sentence}", "{word}"),
69
+ partial_variables={"format_instructions": parser.get_format_instructions()},
70
+ )
71
+
72
+ chain_test_RnD = prompt | llm | parser
apis/V1/providers/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from .jwt_provider import JWTProvider
2
+ from .llm_provider import chain
3
+
4
+ jwt = JWTProvider()
5
+ llm = chain
apis/V1/providers/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (262 Bytes). View file
 
apis/V1/providers/__pycache__/firebase_provider.cpython-310.pyc ADDED
Binary file (2.24 kB). View file
 
apis/V1/providers/__pycache__/jwt_provider.cpython-310.pyc ADDED
Binary file (1.46 kB). View file
 
apis/V1/providers/__pycache__/llm_provider.cpython-310.pyc ADDED
Binary file (2.14 kB). View file
 
apis/V1/providers/firebase_provider.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..configs.firebase_config import firebase_bucket
2
+
3
+
4
+ def upload_file_to_storage(file_path, file_name):
5
+ """
6
+ Upload a file to Firebase Storage
7
+ param:
8
+ file_path: str - The path of the file on local machine to be uploaded
9
+ return:
10
+ str - The public URL of the uploaded file
11
+ """
12
+ file_name = file_name
13
+ blob = firebase_bucket.blob(file_name)
14
+ blob.upload_from_filename(file_path)
15
+ blob.make_public()
16
+
17
+ return blob.public_url
18
+
19
+
20
+ def delete_file_from_storage(file_name):
21
+ """
22
+ Delete a file from Firebase Storage
23
+ param:
24
+ file_name: str - The name of the file to be deleted
25
+ return:
26
+ bool - True if the file is deleted successfully, False if the file is not found
27
+ """
28
+ try:
29
+ blob = firebase_bucket.blob(file_name)
30
+ blob.delete()
31
+ return True
32
+ except Exception as e:
33
+ print("Error:", e)
34
+ return False
35
+
36
+
37
+ def list_all_files_in_storage():
38
+ """
39
+ View all files in Firebase Storage
40
+ return:
41
+ dict - Dictionary with keys are names and values are url of all files in Firebase Storage
42
+ """
43
+ blobs = firebase_bucket.list_blobs()
44
+ blob_dict = {blob.name: blob.public_url for blob in blobs}
45
+ return blob_dict
46
+
47
+
48
+ def download_file_from_storage(file_name, destination_path):
49
+ """
50
+ Download a file from Firebase Storage
51
+ param:
52
+ file_name: str - The name of the file to be downloaded
53
+ destination_path: str - The path to save the downloaded file
54
+ return:
55
+ bool - True if the file is downloaded successfully, False if the file is not found
56
+ """
57
+ try:
58
+ blob = firebase_bucket.blob(file_name)
59
+ blob.download_to_filename(destination_path)
60
+ print("da tai xun thanh cong")
61
+ return True
62
+ except Exception as e:
63
+ print("Error:", e)
64
+ return False
apis/V1/providers/jwt_provider.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import AnyStr, Dict
2
+ import os
3
+ from fastapi import HTTPException, status
4
+ from jose import jwt, JWTError
5
+
6
+
7
+ class JWTProvider:
8
+ """
9
+ Perform JWT Encryption and Decryption
10
+ """
11
+
12
+ def __init__(
13
+ self, secret: AnyStr = os.environ.get("JWT_SECRET"), algorithm: AnyStr = "HS256"
14
+ ):
15
+ self.secret = secret
16
+ self.algorithm = algorithm
17
+
18
+ def encrypt(self, data: Dict) -> AnyStr:
19
+ """
20
+ Encrypt the data with JWT
21
+ """
22
+ return jwt.encode(data, self.secret, algorithm=self.algorithm)
23
+
24
+ def decrypt(self, token: AnyStr) -> Dict | None:
25
+ """
26
+ Decrypt the token with JWT
27
+ """
28
+ try:
29
+ return jwt.decode(token, self.secret, algorithms=[self.algorithm])
30
+ except JWTError as e:
31
+ raise HTTPException(
32
+ status_code=status.HTTP_401_UNAUTHORIZED,
33
+ detail=f"Could not validate credentials. {str(e)}",
34
+ )
apis/V1/providers/llm_provider.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.prompts import PromptTemplate, ChatPromptTemplate
2
+ from langchain_core.output_parsers import JsonOutputParser
3
+ from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
4
+ from ..configs.llm_config import llm
5
+ from ..interfaces.llm_interface import TranslateOutput
6
+
7
+ # System message for initial context and instructions
8
+ system_instructions = SystemMessage(
9
+ content="""You are a master in translation. You are given a sentence and a word.
10
+ Translate the word to Vietnamese based on the context of the sentence. """
11
+ )
12
+
13
+ # Example system message to clarify format with examples
14
+ system_examples = SystemMessage(
15
+ content="""
16
+ Example 1:
17
+ My sentence: "outperforms a Transformer-based BERT baseline on the GLUE language benchmark"
18
+ My word: "outperforms"
19
+ Answer: "vượt trội"
20
+ Example 2:
21
+ My sentence: "I love my dog so much, I was interested in him the first time I saw him"
22
+ My word: "so much"
23
+ Answer: "rất nhiều"
24
+ """
25
+ )
26
+
27
+
28
+ # Function to create the prompt template dynamically based on input
29
+ def create_prompt(sentence, word):
30
+ user_message = HumanMessage(
31
+ content=f"""
32
+ My sentence: {sentence}
33
+ My word: {word}
34
+
35
+ If this word is not in dictionary, return "Không tìm thấy từ này trong từ điển".
36
+ Else, Translate the word to Vietnamese based on the context of the sentence. Return the answer as the translated word.
37
+ I want output to have the format: "word": input word ,"answer": translated word, "options": list of options.
38
+ Options is a list of words consisting of the answer and 3 random words which can be used to create flashcards for learning.
39
+ """
40
+ )
41
+ ai_response = AIMessage(content="Answer: [Your AI will provide the answer here]")
42
+ # Combine all messages into a conversation
43
+ messages = [system_instructions, system_examples, user_message, ai_response]
44
+ chat_prompt = ChatPromptTemplate(messages=messages)
45
+
46
+ return chat_prompt.format()
47
+
48
+
49
+ # Use the JSON output parser
50
+ parser = JsonOutputParser(pydantic_object=TranslateOutput)
51
+
52
+ # Create the prompt template
53
+ prompt = PromptTemplate(
54
+ input_variables=["sentence", "word"],
55
+ template=create_prompt("{sentence}", "{word}"),
56
+ partial_variables={"format_instructions": parser.get_format_instructions()},
57
+ )
58
+
59
+ # Chain the prompt with the LLM and parser
60
+ chain = prompt | llm | parser
apis/V1/routes/__pycache__/auth.cpython-310.pyc ADDED
Binary file (1.85 kB). View file
 
apis/V1/routes/__pycache__/project.cpython-310.pyc ADDED
Binary file (3.27 kB). View file
 
apis/V1/routes/__pycache__/upload.cpython-310.pyc ADDED
Binary file (3.12 kB). View file