puzan789 commited on
Commit
0870bc8
·
0 Parent(s):

add:updated

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +35 -0
  2. .github/workflows/dockerhub.yaml +39 -0
  3. .gitignore +149 -0
  4. Dockerfile +32 -0
  5. README.md +10 -0
  6. app.py +55 -0
  7. config.ini +39 -0
  8. examples/easy_ocr_example.py +12 -0
  9. examples/url_text_extraction_example.py +10 -0
  10. logging_config.yaml +27 -0
  11. requirements.txt +100 -0
  12. src/__init__.py +19 -0
  13. src/api/__init__.py +4 -0
  14. src/api/analytics_api.py +235 -0
  15. src/api/conversai_api.py +650 -0
  16. src/api/jewel_mirror.py +27 -0
  17. src/api/jwt_bearer.py +29 -0
  18. src/api/speech_api.py +85 -0
  19. src/api/user_management_api.py +157 -0
  20. src/jewel_mirror/__init__.py +0 -0
  21. src/jewel_mirror/jewel_langgraph.py +69 -0
  22. src/jewel_mirror/jewel_mirror.py +387 -0
  23. src/llms/__init__.py +0 -0
  24. src/models/__init__.py +4 -0
  25. src/models/apis_models.py +208 -0
  26. src/models/response_handling_models.py +27 -0
  27. src/models/utls.py +17 -0
  28. src/pipeline/__init__.py +4 -0
  29. src/pipeline/conversai_analytic_pipeline.py +34 -0
  30. src/pipeline/conversai_pipeline.py +76 -0
  31. src/pipeline/speech_transcription_pipeline.py +20 -0
  32. src/pipeline/user_management_pipeline.py +80 -0
  33. src/prompts/__init__.py +0 -0
  34. src/prompts/custom_prompts.py +70 -0
  35. src/services/__init__.py +4 -0
  36. src/services/answer_query/__init__.py +4 -0
  37. src/services/answer_query/answerquery.py +99 -0
  38. src/services/document/__init__.py +4 -0
  39. src/services/document/add_document.py +30 -0
  40. src/services/embeddings/BGE-M3_vector_embedding.py +13 -0
  41. src/services/embeddings/Qdrant_BM25_embedding.py +11 -0
  42. src/services/embeddings/__init__.py +4 -0
  43. src/services/embeddings/jina_embeddings.py +8 -0
  44. src/services/embeddings/sentence_transformers_all_MiniLM_L6_v2_vector_embedding.py +20 -0
  45. src/services/file_analyzer/__init__.py +4 -0
  46. src/services/file_analyzer/data_analyzer.py +28 -0
  47. src/services/get_links/__init__.py +4 -0
  48. src/services/get_links/web_scraper.py +49 -0
  49. src/services/ocr/__init__.py +4 -0
  50. src/services/ocr/easy_ocr/__init__.py +4 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.github/workflows/dockerhub.yaml ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Publish ConversAI Docker image
2
+
3
+ on:
4
+ push:
5
+ branches: [ jan20 ]
6
+
7
+ jobs:
8
+ push_to_registry:
9
+ name: Push Docker image to Docker Hub
10
+ runs-on: ubuntu-latest
11
+ permissions:
12
+ packages: write
13
+ contents: read
14
+ attestations: write
15
+ steps:
16
+ - name: Check out the repo
17
+ uses: actions/checkout@v4
18
+
19
+ - name: Log in to Docker Hub
20
+ uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a
21
+ with:
22
+ username: ${{ secrets.DOCKER_USERNAME }}
23
+ password: ${{ secrets.DOCKER_PASSWORD }}
24
+
25
+ - name: Extract metadata (tags, labels) for Docker
26
+ id: meta
27
+ uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
28
+ with:
29
+ images: techconsp/tcp_frwjeiqhpyl9ty53cyfg2jtpbhwwzl_co
30
+
31
+ - name: Build and push Docker image
32
+ id: push
33
+ uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671
34
+ with:
35
+ context: .
36
+ file: ./Dockerfile
37
+ push: true
38
+ tags: ${{ steps.meta.outputs.tags }}
39
+ labels: ${{ steps.meta.outputs.labels }}
.gitignore ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ .env
106
+ .venv
107
+ env/
108
+ venv/
109
+ ENV/
110
+ env.bak/
111
+ venv.bak/
112
+
113
+ # Spyder project settings
114
+ .spyderproject
115
+ .spyproject
116
+
117
+ # Rope project settings
118
+ .ropeproject
119
+
120
+ # mkdocs documentation
121
+ /site
122
+
123
+ # mypy
124
+ .mypy_cache/
125
+ .dmypy.json
126
+ dmypy.json
127
+
128
+ # Pyre type checker
129
+ .pyre/
130
+
131
+ # Machine Learning and Speech Libraries
132
+ # TensorFlow
133
+ *.ckpt*
134
+ *.pbtxt
135
+ *.tfevents*
136
+ # PyTorch
137
+ *.pt
138
+ # Keras
139
+ *.h5
140
+ # Scikit-learn
141
+ *.pkl
142
+ # Speech Recognition
143
+ *.wav
144
+ *.mp3
145
+ .idea/
146
+ logs
147
+ images
148
+ resources
149
+ experiments
Dockerfile ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY . /app
6
+
7
+ RUN chmod -R 777 /app
8
+
9
+ RUN apt-get update && \
10
+ apt-get upgrade -y && \
11
+ apt-get install -y \
12
+ build-essential \
13
+ git \
14
+ cmake \
15
+ poppler-utils \
16
+ ffmpeg \
17
+ libsm6 \
18
+ libxext6 && \
19
+ apt-get clean && \
20
+ rm -rf /var/lib/apt/lists/*
21
+
22
+ RUN pip install --no-cache-dir nltk && \
23
+ mkdir -p /app/nltk_data && \
24
+ chmod -R 777 /app/nltk_data && \
25
+ python -m nltk.downloader -d /app/nltk_data all
26
+
27
+ RUN pip install --no-cache-dir --upgrade pip && \
28
+ pip install --no-cache-dir -r requirements.txt
29
+
30
+ EXPOSE 8000
31
+
32
+ CMD ["python", "app.py"]
README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: J7RSYILECL
3
+ emoji: 🚀
4
+ colorFrom: purple
5
+ colorTo: purple
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-07-31
4
+ """
5
+ import uvicorn
6
+ from fastapi import FastAPI, Depends
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from src import logging as logger
9
+ from src.api.conversai_api import conversai_api_router
10
+ from src.api.speech_api import speech_translator_router
11
+ from src.api.user_management_api import user_management_api_router
12
+ from src.utils.utils import load_ini_config
13
+ from src.api.analytics_api import analytic_endpoints_router
14
+ from src.api.jwt_bearer import access_check_bearer
15
+ from src.api.jewel_mirror import jewel_api_router
16
+
17
+
18
+ logger.info("---------------------------> Starting FastAPI Server <---------------------------")
19
+
20
+ config = load_ini_config("config.ini")
21
+ app = FastAPI(docs_url=config.get('fastapi_config', 'docs_url'), redoc_url=config.get('fastapi_config', 'redoc_url'),
22
+ openapi_url=config.get('fastapi_config', 'openapi_url')
23
+ )
24
+
25
+ PROTECTED = [Depends(access_check_bearer)]
26
+
27
+ logger.info("---------------------------> FastAPI Server Started <---------------------------")
28
+ app.add_middleware(
29
+ CORSMiddleware,
30
+ allow_origins=["*"], # Allow access from all sources
31
+ allow_credentials=True,
32
+ allow_methods=["*"], # Allow all HTTP methods
33
+ allow_headers=["*"], # Allow all request headers
34
+ )
35
+ app.include_router(user_management_api_router, prefix="/conversai")
36
+ logger.info("---------------------------> User Management API Started <---------------------------")
37
+
38
+ app.include_router(conversai_api_router, prefix="/conversai")
39
+ logger.info("---------------------------> ConversAI API Started <---------------------------")
40
+
41
+ app.include_router(speech_translator_router, prefix="/conversai")
42
+ logger.info("---------------------------> Speech Translator API Started <---------------------------")
43
+
44
+ app.include_router(analytic_endpoints_router, prefix="/conversai",dependencies=PROTECTED)
45
+ logger.info("---------------------------> Analytics API Started <---------------------------")
46
+
47
+ #for goldpricing
48
+ app.include_router(jewel_api_router,prefix="/conversai")
49
+ logger.info("---------------------------> Goldpricing API Started <---------------------------")
50
+
51
+
52
+
53
+ if __name__ == '__main__':
54
+ uvicorn.run(app, port=int(config.get('fastapi_config', 'port')), host=config.get('fastapi_config', 'host'),
55
+ timeout_keep_alive=300, timeout_graceful_shutdown=600)
config.ini ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [default]
2
+ chatbot_name = ConversAI
3
+ chatbot_prefix = convai
4
+
5
+ [data_analyzer]
6
+ groq_llm_name = llama-3.1-8b-instant
7
+ additional_query = .In case, you are to plot a chart, make sure the x-axis labels are 90 degree rotated.
8
+ verbose = False
9
+
10
+ ; [easy_ocr]
11
+ ; model_path = resources/easyocr_model
12
+ ; language = en
13
+ ; gpu = True
14
+
15
+ [speech_to_text]
16
+ model_id = openai/whisper-large-v3
17
+ max_new_tokens = 128
18
+ chunks_length_s = 30
19
+ batch_size = 16
20
+
21
+ [supabase_chatbot_management]
22
+ user_config_table = ConversAI_UserConfig
23
+ chat_bot_table = ConversAI_ChatbotInfo
24
+
25
+ ; [all_mini_l6_v2_vector_embedding]
26
+ ; device = cuda
27
+ ; normalize_embeddings = True
28
+
29
+
30
+ [fastapi_config]
31
+ host = 0.0.0.0
32
+ port = 8000
33
+ docs_url = /docs
34
+ redoc_url = /redoc
35
+ openapi_url = /openapi.json
36
+
37
+
38
+ [oauth]
39
+ redirect_to : https://convers-ai-test.vercel.app/home/
examples/easy_ocr_example.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-08-23
4
+ """
5
+ import cv2
6
+
7
+ from src.services.ocr.easy_ocr.easy_ocr_ import EasyOCR_
8
+
9
+ if __name__ == '__main__':
10
+ image = cv2.imread("images/img.png")
11
+ ocr = EasyOCR_()
12
+ print(ocr.read_text(image))
examples/url_text_extraction_example.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-08-23
4
+ """
5
+ from src.services.website_url.text_extraction_urls import extract_text_from_url
6
+
7
+ if __name__ == '__main__':
8
+ website = "https://huggingface.co/BAAI/bge-m3"
9
+ extracted_text = extract_text_from_url(website)
10
+ print(extracted_text)
logging_config.yaml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: 1
2
+ disable_existing_loggers: False
3
+ formatters:
4
+ standard:
5
+ format: '[%(asctime)s: %(levelname)s: %(module)s: %(message)s]'
6
+ handlers:
7
+ console:
8
+ class: logging.StreamHandler
9
+ level: DEBUG
10
+ formatter: standard
11
+ stream: ext://sys.stdout
12
+ file:
13
+ class: logging.handlers.TimedRotatingFileHandler
14
+ level: INFO
15
+ formatter: standard
16
+ filename: logs/application.log
17
+ when: midnight
18
+ interval: 1
19
+ backupCount: 30
20
+ loggers:
21
+ __main__:
22
+ level: DEBUG
23
+ handlers: [console, file]
24
+ propagate: no
25
+ root:
26
+ level: DEBUG
27
+ handlers: [console, file]
requirements.txt ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.33.0
2
+ annotated-types==0.7.0
3
+ anyio==4.4.0
4
+ certifi==2024.7.4
5
+ charset-normalizer==3.3.2
6
+ click==8.1.7
7
+ dnspython==2.6.1
8
+ email_validator==2.2.0
9
+ exceptiongroup==1.2.2
10
+ fastapi==0.111.1
11
+ fastapi-cli==0.0.4
12
+ filelock==3.15.4
13
+ fsspec==2024.6.1
14
+ gTTS==2.5.2
15
+ h11==0.14.0
16
+ httpcore==1.0.5
17
+ httptools==0.6.1
18
+ httpx==0.27.0
19
+ huggingface-hub==0.24.5
20
+ idna==3.7
21
+ Jinja2==3.1.4
22
+ markdown-it-py==3.0.0
23
+ MarkupSafe==2.1.5
24
+ mdurl==0.1.2
25
+ mpmath==1.3.0
26
+ networkx==3.3
27
+ numpy==1.26.4
28
+ nvidia-cublas-cu12==12.1.3.1
29
+ nvidia-cuda-cupti-cu12==12.1.105
30
+ nvidia-cuda-nvrtc-cu12==12.1.105
31
+ nvidia-cuda-runtime-cu12==12.1.105
32
+ nvidia-cudnn-cu12==9.1.0.70
33
+ nvidia-cufft-cu12==11.0.2.54
34
+ nvidia-curand-cu12==10.3.2.106
35
+ nvidia-cusolver-cu12==11.4.5.107
36
+ nvidia-cusparse-cu12==12.1.0.106
37
+ nvidia-nccl-cu12==2.20.5
38
+ nvidia-nvjitlink-cu12==12.6.20
39
+ nvidia-nvtx-cu12==12.1.105
40
+ packaging==24.1
41
+ psutil==6.0.0
42
+ pydantic==2.8.2
43
+ pydantic_core==2.20.1
44
+ Pygments==2.18.0
45
+ python-dotenv==1.0.1
46
+ python-multipart==0.0.9
47
+ PyYAML==6.0.1
48
+ regex==2024.7.24
49
+ requests==2.32.3
50
+ rich==13.7.1
51
+ safetensors==0.4.3
52
+ scikit-build==0.18.0
53
+ shellingham==1.5.4
54
+ sniffio==1.3.1
55
+ starlette==0.37.2
56
+ sympy==1.13.1
57
+ tokenizers==0.19.1
58
+ tomli==2.0.1
59
+ torch==2.4.0
60
+ tqdm==4.66.4
61
+ transformers @ git+https://github.com/huggingface/transformers.git@85a1269e19af022e04bc2aad82572cd5a9e8cdd9
62
+ triton==3.0.0
63
+ typer==0.12.3
64
+ typing_extensions==4.12.2
65
+ urllib3==2.2.2
66
+ uvicorn==0.30.4
67
+ uvloop==0.19.0
68
+ watchfiles==0.22.0
69
+ websockets==12.0
70
+ bs4
71
+ huggingface-hub
72
+ fastembed
73
+ nest_asyncio
74
+ beautifulsoup4
75
+ flashrank
76
+ flashrank[listwise]
77
+ PyMuPDF
78
+ langchain
79
+ langchain-community
80
+ langchain-cohere
81
+ langchain-huggingface
82
+ langchain-qdrant
83
+ langchain-groq
84
+ lxml
85
+ python-dotenv
86
+ pillow
87
+ pandas
88
+ sentence-transformers
89
+ supabase
90
+ unstructured
91
+ urllib3
92
+ langsmith
93
+ pandasai
94
+ easyocr
95
+ youtube-transcript-api
96
+ pdf2image
97
+ PyPDF2
98
+ PyJWT
99
+ replicate
100
+ langgraph
src/__init__.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-07-31
4
+ """
5
+
6
+ import logging.config
7
+ import yaml
8
+ import os
9
+
10
+ if os.path.exists("logs"):
11
+ pass
12
+ else:
13
+ os.makedirs("logs")
14
+
15
+ log_config_path = os.path.join(os.getcwd(), "logging_config.yaml")
16
+ with open(log_config_path, 'r') as file:
17
+ config = yaml.safe_load(file.read())
18
+
19
+ logging.config.dictConfig(config)
src/api/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-07-31
4
+ """
src/api/analytics_api.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-08-28
4
+ """
5
+ from collections import Counter, defaultdict
6
+ from datetime import datetime, timedelta
7
+ from dateutil.parser import isoparse
8
+ from fastapi.routing import APIRouter
9
+ from src.pipeline.conversai_analytic_pipeline import ConversAIAnalyticPipeline
10
+ from fastapi import Request
11
+ from src.utils.error_handling import create_success_response, raise_http_exception, \
12
+ success_response_user_management
13
+ from src.models.apis_models import FeedbackRequest, DailyActiveEndUserRequest, AverageSessionInteractionRequest, \
14
+ TokenUsageRequest, UserSatisfactionRateRequest
15
+ from src import logging as logger
16
+
17
+ analytic_endpoints_router = APIRouter(tags=["Analytics Endpoints"])
18
+
19
+ conversai_analytic_pipeline = ConversAIAnalyticPipeline()
20
+
21
+
22
+ @analytic_endpoints_router.post("/daily_chat_count")
23
+ async def daily_chat_count(
24
+ request: DailyActiveEndUserRequest):
25
+ start_date, end_date, vectorstore = request.start_date, request.end_date, request.vectorstore
26
+ logger.info(f">>> daily_chat_count API Triggered by {vectorstore} <<<")
27
+ try:
28
+ if not start_date or not end_date:
29
+ end_date = datetime.now().astimezone().date()
30
+ start_date = end_date - timedelta(days=7)
31
+ else:
32
+ start_date = isoparse(start_date).date()
33
+ end_date = isoparse(end_date).date()
34
+
35
+ response = conversai_analytic_pipeline.chat_history_table_(vectorstore=vectorstore)
36
+
37
+ dates = [
38
+ isoparse(i["timestamp"]).date()
39
+ for i in response
40
+ if start_date <= isoparse(i["timestamp"]).date() <= end_date
41
+ ]
42
+
43
+ date_count = Counter(dates)
44
+
45
+ data = [{"date": date.isoformat(), "count": count} for date, count in date_count.items()]
46
+
47
+ response = create_success_response(code=200, data=dict(output=data))
48
+ logger.info(f">>> daily_chat_count API Response Success for {vectorstore} <<<")
49
+
50
+ return response
51
+
52
+ except Exception as e:
53
+ logger.error(f">>> daily_chat_count API Response Failed for {vectorstore} {e}<<<")
54
+
55
+ raise_http_exception(500, "Internal Server Error")
56
+
57
+
58
+ @analytic_endpoints_router.post("/daily_active_end_user")
59
+ async def daily_active_end_user(
60
+ request: DailyActiveEndUserRequest
61
+ ):
62
+ start_date, end_date, vectorstore = request.start_date, request.end_date, request.vectorstore
63
+ logger.info(f">>> daily_active_end_user API Triggered by {vectorstore} <<<")
64
+ try:
65
+ if not start_date or not end_date:
66
+ end_date = datetime.now().astimezone().date()
67
+ start_date = end_date - timedelta(days=7)
68
+ else:
69
+ start_date = isoparse(start_date).date()
70
+ end_date = isoparse(end_date).date()
71
+
72
+ response = conversai_analytic_pipeline.chat_history_table_(vectorstore=vectorstore)
73
+
74
+ ip_by_date = defaultdict(set)
75
+
76
+ for i in response:
77
+ timestamp = isoparse(i["timestamp"])
78
+ ip_address = i["IpAddress"]
79
+ if start_date <= timestamp.date() <= end_date:
80
+ date = timestamp.date()
81
+ ip_by_date[date].add(ip_address)
82
+
83
+ data = [{"date": date.isoformat(), "terminal": len(ips)} for date, ips in ip_by_date.items() if len(ips) > 1]
84
+
85
+ response = create_success_response(code=200, data=dict(output=data))
86
+ logger.info(f">>> daily_active_end_user API Response Success for {vectorstore} <<<")
87
+
88
+ return response
89
+ except Exception as e:
90
+ logger.error(f">>> daily_active_end_user API Response Failed for {vectorstore} {e}<<<")
91
+
92
+ raise_http_exception(500, "Internal Server Error")
93
+
94
+
95
+ @analytic_endpoints_router.post("/average_session_interaction")
96
+ async def average_session_interaction(
97
+ request: AverageSessionInteractionRequest
98
+ ):
99
+ start_date, end_date, vectorstore = request.start_date, request.end_date, request.vectorstore
100
+ logger.info(f">>> average_session_interaction API Triggered by {vectorstore} <<<")
101
+ try:
102
+ if not start_date or not end_date:
103
+ end_date = datetime.now().astimezone().date()
104
+ start_date = end_date - timedelta(days=7)
105
+ else:
106
+ start_date = isoparse(start_date).date()
107
+ end_date = isoparse(end_date).date()
108
+
109
+ response = conversai_analytic_pipeline.chat_history_table_(vectorstore=vectorstore)
110
+
111
+ total_messages_by_date = defaultdict(int)
112
+ unique_ips_by_date = defaultdict(set)
113
+
114
+ for i in response:
115
+ timestamp = isoparse(i["timestamp"])
116
+ ip_address = i["IpAddress"]
117
+ if start_date <= timestamp.date() <= end_date:
118
+ date = timestamp.date()
119
+ total_messages_by_date[date] += 1
120
+ unique_ips_by_date[date].add(ip_address)
121
+
122
+ data = []
123
+ for date in sorted(total_messages_by_date.keys()):
124
+ total_messages = total_messages_by_date[date]
125
+ unique_ips = len(unique_ips_by_date[date])
126
+ average_interactions = total_messages / unique_ips if unique_ips > 0 else 0
127
+ data.append({"date": date.isoformat(), "interactions": average_interactions})
128
+
129
+ response = create_success_response(code=200, data=dict(data=data))
130
+ logger.info(f">>> average_session_interaction API Response Success for {vectorstore} <<<")
131
+
132
+ return response
133
+ except Exception as e:
134
+ logger.error(f">>> average_session_interaction API Response Failed for {vectorstore} {e}<<<")
135
+ raise_http_exception(500, "Internal Server Error")
136
+
137
+
138
+ @analytic_endpoints_router.post("/token_usages")
139
+ async def token_usages(request: TokenUsageRequest):
140
+ start_date, end_date, vectorstore = request.start_date, request.end_date, request.vectorstore
141
+ logger.info(f">>> token_usages API Triggered by {vectorstore} <<<")
142
+ try:
143
+ if not start_date or not end_date:
144
+ end_date = datetime.now().astimezone().date()
145
+ start_date = end_date - timedelta(days=7)
146
+ else:
147
+ start_date = isoparse(start_date).date()
148
+ end_date = isoparse(end_date).date()
149
+
150
+ response = conversai_analytic_pipeline.chat_history_table_(vectorstore=vectorstore)
151
+
152
+ token_usage_by_date = defaultdict(int)
153
+
154
+ for i in response:
155
+ timestamp = isoparse(i["timestamp"])
156
+ if start_date <= timestamp.date() <= end_date:
157
+ date = timestamp.date()
158
+ response_token_count = i.get("ResponseTokenCount")
159
+ if response_token_count is not None:
160
+ token_usage_by_date[date] += response_token_count
161
+
162
+ data = [{"date": date.isoformat(), "total_tokens": total_tokens} for date, total_tokens in
163
+ token_usage_by_date.items()]
164
+
165
+ response = create_success_response(code=200, data=dict(output=data))
166
+ logger.info(f">>> token_usages API Response Success for {vectorstore} <<<")
167
+
168
+ return response
169
+ except Exception as e:
170
+ logger.error(f">>> token_usages API Response Failed for {vectorstore} {e}<<<")
171
+ raise_http_exception(500, "Internal Server Error")
172
+
173
+
174
+ @analytic_endpoints_router.post("/add_feedback")
175
+ async def add_feedback(req: Request, request: FeedbackRequest):
176
+ feedback, user_id, vectorstore = request.feedback, request.user_id, request.vectorstore
177
+ try:
178
+ logger.info(f">>> add_feedback API Triggered by {request.vectorstore} <<<")
179
+
180
+ client_ip = req.client.host
181
+ city = conversai_analytic_pipeline.get_ip_info(client_ip)
182
+
183
+ conversai_analytic_pipeline.add_feedback_(feedback, user_id, city, client_ip, vectorstore)
184
+
185
+ response = success_response_user_management(code=200, message="Add Feedback Sucess")
186
+ logger.info(f">>> add_feedback API Response Success for {vectorstore} <<<")
187
+
188
+ return response
189
+
190
+ except Exception as e:
191
+ logger.error(f">>> add_feedback API Response Failed for {vectorstore} {e}<<<")
192
+ raise_http_exception(500, "Internal Server Error")
193
+
194
+
195
+ @analytic_endpoints_router.post("/user_satisfaction_rate")
196
+ async def user_satisfaction_rate(
197
+ request: UserSatisfactionRateRequest
198
+ ):
199
+ start_date, end_date, vectorstore = request.start_date, request.end_date, request.vectorstore
200
+ logger.info(f">>> user_satisfaction_rate API Triggered by {vectorstore} <<<")
201
+ try:
202
+ if not start_date or not end_date:
203
+ end_date = datetime.now().astimezone().date()
204
+ start_date = end_date - timedelta(days=7)
205
+ else:
206
+ start_date = isoparse(start_date).date()
207
+ end_date = isoparse(end_date).date()
208
+
209
+ feedback_counts = defaultdict(lambda: {"like": 0, "dislike": 0})
210
+ response = conversai_analytic_pipeline.feedback_table_(vectorstore)
211
+ for i in response:
212
+ timestamp = isoparse(i["timestamp"])
213
+ if start_date <= timestamp.date() <= end_date:
214
+ date = timestamp.date()
215
+ feedback = i.get("feedback")
216
+ if feedback == "like":
217
+ feedback_counts[date]["like"] += 1
218
+ elif feedback == "dislike":
219
+ feedback_counts[date]["dislike"] += 1
220
+
221
+ data = []
222
+ for date in sorted(feedback_counts.keys()):
223
+ like_count = feedback_counts[date]["like"]
224
+ dislike_count = feedback_counts[date]["dislike"]
225
+ total_feedback = like_count + dislike_count
226
+ satisfaction_rate = (like_count / total_feedback * 100) if total_feedback > 0 else 0
227
+ data.append({"date": date.isoformat(), "rate": satisfaction_rate})
228
+
229
+ response = create_success_response(code=200, data=dict(output=data))
230
+ logger.info(f">>> user_satisfaction_rate API Response Success for {vectorstore} <<<")
231
+
232
+ return response
233
+ except Exception as e:
234
+ logger.info(f">>> user_satisfaction_rate API Response Failed for {vectorstore} {e}<<<")
235
+ raise_http_exception(500, "Internal Server Error")
src/api/conversai_api.py ADDED
@@ -0,0 +1,650 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-08-02
4
+ """
5
+ import io
6
+ import json
7
+ import os
8
+ import string
9
+ import tempfile
10
+ import requests
11
+ import pandas as pd
12
+ from src import logging as logger
13
+ from supabase import create_client
14
+ from urllib.parse import urlparse
15
+
16
+ from src.api.jwt_bearer import access_check_bearer
17
+ from src.models.apis_models import *
18
+ from fastapi.requests import Request
19
+ from fastapi.routing import APIRouter
20
+ from fastapi import UploadFile, File, HTTPException, Form, Depends
21
+ from src.pipeline.conversai_pipeline import ConversAIPipeline
22
+ from src.api.user_management_api import user_management
23
+ from src.services.supabase.analytics.analytic_tables import track_usage
24
+ from src.services.supabase.user_management.token_limit import token_limit_check
25
+ from src.utils.error_handling import create_error_response, create_success_response, raise_http_exception
26
+ from src.api.user_management_api import user_management as user_management_pipeline
27
+ from src.utils.utils import get_ip_info, encode_to_base64, clean_text, decode_base64
28
+
29
+ conversai_api_router = APIRouter(tags=["ConversAI"])
30
+
31
+ supabase_client = create_client(os.getenv("SUPABASE_URL"), os.getenv("SUPABASE_KEY"))
32
+ supabase_client_ = supabase_client
33
+ conversai_pipeline = ConversAIPipeline()
34
+
35
+
36
+ @conversai_api_router.post("/add_text" )
37
+ async def add_text(request: AddTextRequest):
38
+ logger.info(f">>>AddText API Triggered By {request.vectorstore}<<<")
39
+ try:
40
+ vectorstore, text = request.vectorstore, request.text
41
+ track_usage(vectorstore=vectorstore, endpoint="/add_text", supabase_client=supabase_client)
42
+ username, chat_bot_name = request.vectorstore.split("$")[1], request.vectorstore.split("$")[2]
43
+ cleaned_text = " ".join(text.split())
44
+ num_token = len(cleaned_text)
45
+ lim = token_limit_check(supabase_client=supabase_client, username=username, chatbot_name=chat_bot_name,
46
+ len_text=num_token)
47
+ text = clean_text(text)
48
+ if lim:
49
+ dct = {
50
+ "output": {"text": text},
51
+ "source": "Text",
52
+ }
53
+ cleaned_text = " ".join(text.split()) #handles unnencessary spaces
54
+ # Count characters
55
+ num_token = len(cleaned_text)
56
+ logger.info(f"Number of token {num_token}")
57
+ dct = json.dumps(dct, indent=1).encode("utf-8", errors="replace")
58
+ file_name = user_management_pipeline.create_data_source_name(source_name="text", username=username)
59
+ supabase_client.storage.from_("ConversAI").upload(file=dct, path=f"{file_name}_data.json")
60
+
61
+ supa = supabase_client.table("ConversAI_ChatbotDataSources").insert(
62
+ {"username": username, "chatbotName": chat_bot_name, "dataSourceName": file_name,
63
+ "numTokens": num_token, "sourceEndpoint": "/add_text",
64
+ "sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"],
65
+ f"{file_name}_data.json")}).execute()
66
+
67
+ response = create_success_response(200, {"message": "Successfully added the text."})
68
+ logger.info(f">>>Text added successfully for {request.vectorstore}.<<<")
69
+
70
+ return response
71
+ else:
72
+ response = create_error_response(400,
73
+ "Exceeding limits, please try with a smaller chunks of information or subscribe to our premium plan.")
74
+ return response
75
+
76
+ except Exception as e:
77
+ logger.error(f">>>Error in add_text: {e} for {request.vectorstore}.<<<")
78
+ raise_http_exception(500, "Internal Server Error")
79
+
80
+
81
+ @conversai_api_router.post("/answer_query")
82
+ async def answer_query(request: AnswerQueryRequest, req: Request):
83
+ logger.info(f">>>answer_query API Triggered By {request.vectorstore}<<<")
84
+ try:
85
+ track_usage(supabase_client=supabase_client, vectorstore=request.vectorstore, endpoint="/answer_query")
86
+ username, chatbot_name = request.vectorstore.split("$")[1], request.vectorstore.split("$")[2]
87
+ ip_address = req.client.host
88
+ city = get_ip_info(ip_address)
89
+ output, followup_questions, source = conversai_pipeline.answer_query_(query=request.query,
90
+ vectorstore=request.vectorstore,
91
+ llm_model=request.llm_model)
92
+ supa = supabase_client.table("ConversAI_ChatHistory").insert(
93
+ {"username": username, "chatbotName": chatbot_name, "llmModel": request.llm_model,
94
+ "question": request.query, "response": output, "IpAddress": ip_address, "ResponseTokenCount": len(output),
95
+ "vectorstore": request.vectorstore, "City": city}).execute()
96
+
97
+ response = create_success_response(200, data={"output": output, "follow_up_questions": followup_questions,
98
+ "source": source})
99
+ logger.info(f">>>Query answered successfully for {request.vectorstore}.<<<")
100
+ return response
101
+
102
+ except Exception as e:
103
+ logger.error(f">>>Error in answer_query: {e} for {request.vectorstore}.<<<")
104
+ # raise HTTPException(status_code=500, detail="Internal Server Error")
105
+ raise e
106
+
107
+ @conversai_api_router.post("/data_analyzer" )
108
+ async def data_analyzer(query: str = Form(...), file: UploadFile = File(...)):
109
+ logger.info(f">>>data_analyzer API Triggered By {query}<<<")
110
+ try:
111
+ extension = file.filename.split(".")[-1]
112
+ if extension in ["xls", "xlsx", "xlsm", "xlsb"]:
113
+ df = pd.read_excel(io.BytesIO(await file.read()))
114
+ elif extension == "csv":
115
+ df = pd.read_csv(io.BytesIO(await file.read()))
116
+ else:
117
+ return {"output": "INVALID FILE TYPE"}
118
+
119
+ response = conversai_pipeline.data_analyzer(query=query, dataframe=df)
120
+ response = create_success_response(200, {"output": response})
121
+ logger.info(f">>>Data analyzed successfully for {query}.<<<")
122
+ return response
123
+
124
+ except Exception as e:
125
+ logger.error(f">>>Error in data_analyzer: {e} for {query}.<<<")
126
+ raise_http_exception(500, "Internal Server Error")
127
+
128
+
129
+ @conversai_api_router.post("/get_links" )
130
+ async def get_links(request: GetLinksRequest):
131
+ logger.info(f">>>get_links API Triggered By {request.url}<<<")
132
+ try:
133
+ response = conversai_pipeline.get_links_(url=request.url, timeout=30)
134
+ response = create_success_response(200, {"urls": response, "source": urlparse(request.url).netloc})
135
+ logger.info(f">>>Links fetched successfully for {request.url}.<<<")
136
+ return response
137
+
138
+ except Exception as e:
139
+ logger.error(f">>>Error in get_links: {e} for {request.url}.<<<")
140
+ raise_http_exception(500, "Internal Server Error")
141
+
142
+
143
+ @conversai_api_router.post("/image_pdf_text_extraction" )
144
+ async def image_pdf_text_extraction(vectorstore: str = Form(...)
145
+ , pdf: UploadFile = File(...)):
146
+ logger.info(f">>>image_pdf_text_extraction API Triggered By {pdf.filename}<<<")
147
+ try:
148
+ track_usage(vectorstore=vectorstore, endpoint="/image_pdf_text_extraction", supabase_client=supabase_client)
149
+ username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
150
+ source = pdf.filename
151
+ pdf_bytes = await pdf.read()
152
+ response = conversai_pipeline.image_pdf_text_extraction_(image_pdf=pdf_bytes)
153
+ num_tokens = len(" ".join([response[x] for x in response]))
154
+ lim = token_limit_check(supabase_client=supabase_client, username=username, chatbot_name=chatbot_name,
155
+ len_text=num_tokens)
156
+ logger.info(f"this is the {lim}")
157
+ if lim:
158
+ dct = {
159
+ "output": response,
160
+ "source": source
161
+ }
162
+ dct = json.dumps(dct, indent=1).encode("utf-8", errors="replace")
163
+ file_name = user_management_pipeline.create_data_source_name(source_name=source, username=username)
164
+ num_tokens = len(" ".join([response[x] for x in response]))
165
+
166
+ response = supabase_client.storage.from_("ConversAI").upload(file=dct, path=f"{file_name}_data.json")
167
+ supa = supabase_client.table("ConversAI_ChatbotDataSources").insert(
168
+ {"username": username,
169
+ "chatbotName": chatbot_name,
170
+ "dataSourceName": file_name,
171
+ "numTokens": num_tokens,
172
+ "sourceEndpoint": "/image_pdf_text_extraction",
173
+ "sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"],
174
+ f"{file_name}_data.json")}).execute()
175
+
176
+ response = create_success_response(200,
177
+ {"source": pdf.filename, "message": "Successfully extracted the text."})
178
+ logger.info(f">>>Text extracted successfully for {pdf.filename}.<<<")
179
+ return response
180
+ else:
181
+ response = create_error_response(402,
182
+ "Exceeding limits, please try with a smaller chunks of PDF or subscribe to our premium plan.")
183
+ return response
184
+
185
+ except Exception as e:
186
+ logger.error(f">>>Error in image_pdf_text_extraction: {e} for {pdf.filename}.<<<")
187
+ raise_http_exception(500, "Internal Server Error")
188
+
189
+
190
+ @conversai_api_router.post("/text_pdf_extraction" )
191
+ async def text_pdf_extraction(vectorstore: str = Form(...)
192
+ , pdf: UploadFile = File(...)):
193
+ logger.info(f">>>text_pdf_extraction API Triggered By {pdf.filename}<<<")
194
+ try:
195
+ track_usage(vectorstore=vectorstore, endpoint="/text_pdf_extraction", supabase_client=supabase_client)
196
+ username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
197
+ source = pdf.filename
198
+ pdf = await pdf.read()
199
+
200
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
201
+ temp_file.write(pdf)
202
+ temp_file_path = temp_file.name
203
+
204
+ response = conversai_pipeline.text_pdf_extraction_(pdf=temp_file_path)
205
+ numTokens = len(" ".join([response[x] for x in response]))
206
+ lim = token_limit_check(supabase_client=supabase_client, username=username, chatbot_name=chatbot_name,
207
+ len_text=numTokens)
208
+ os.remove(temp_file_path)
209
+ if lim:
210
+ dct = {
211
+ "output": response,
212
+ "source": source
213
+ }
214
+ numTokens=len(" ".join([response[x] for x in response]))
215
+ logger.info(f"Num of tokens {numTokens} text_pdf_extraction")
216
+ dct = json.dumps(dct, indent=1).encode("utf-8", errors="replace")
217
+ file_name = user_management_pipeline.create_data_source_name(source_name=source, username=username)
218
+ response = supabase_client.storage.from_("ConversAI").upload(file=dct, path=f"{file_name}_data.json")
219
+ response = (
220
+ supabase_client.table("ConversAI_ChatbotDataSources")
221
+ .insert({"username": username,
222
+ "chatbotName": chatbot_name,
223
+ "dataSourceName": file_name,
224
+ "numTokens": numTokens,
225
+ "sourceEndpoint": "/text_pdf_extraction",
226
+ "sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"],
227
+ f"{file_name}_data.json")})
228
+ .execute()
229
+ )
230
+ response = create_success_response(200, {"source": source, "message": "Successfully extracted the text."})
231
+ logger.info(f">>>Text extracted successfully for {source}.<<<")
232
+ return response
233
+
234
+ else:
235
+ response = create_error_response(402,
236
+ "Exceeding limits, please try with a smaller chunks of PDF or subscribe to our premium plan.")
237
+ return response
238
+
239
+ except Exception as e:
240
+ logger.error(f">>>Error in text_pdf_extraction: {e} for {vectorstore}.<<<")
241
+ raise_http_exception(500, "Internal Server Error")
242
+
243
+
244
+ @conversai_api_router.post("/youtube_transcript" )
245
+ async def youtube_transcript(request: YoutubeTranscriptRequest):
246
+ vectorstore, urls = request.vectorstore, request.urls
247
+ logger.info(f">>>youtube_transcript API Triggered By {urls}<<<")
248
+ try:
249
+ track_usage(supabase_client=supabase_client, vectorstore=vectorstore, endpoint="/youtube_transcript")
250
+ username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
251
+
252
+ response = conversai_pipeline.youtube_transcript_(url=urls)
253
+ num_tokens = len(" ".join([response[x] for x in response]))
254
+ lim = token_limit_check(supabase_client=supabase_client, username=username, chatbot_name=chatbot_name,
255
+ len_text=num_tokens)
256
+ if lim:
257
+
258
+ dct = {
259
+ "output": response,
260
+ "source": "www.youtube.com"
261
+ }
262
+
263
+
264
+ dct = json.dumps(dct, indent=1).encode("utf-8", errors="replace")
265
+ file_name = user_management_pipeline.create_data_source_name(source_name="youtube", username=username)
266
+ response = supabase_client.storage.from_("ConversAI").upload(file=dct, path=f"{file_name}_data.json")
267
+ response = (
268
+ supabase_client.table("ConversAI_ChatbotDataSources")
269
+ .insert({"username": username,
270
+ "chatbotName": chatbot_name,
271
+ "dataSourceName": file_name,
272
+ "numTokens": num_tokens,
273
+ "sourceEndpoint": "/youtube_transcript",
274
+ "sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"],
275
+ f"{file_name}_data.json")})
276
+ .execute()
277
+ )
278
+ response = create_success_response(200, {"message": "Successfully fetched the youtube transcript."})
279
+ logger.info(f">>>Youtube transcript fetched successfully for {urls}.<<<")
280
+ return response
281
+ else:
282
+ response = create_error_response(402,
283
+ "Exceeding limits, please try with a smaller chunks of information or subscribe to our premium plan.")
284
+ return response
285
+
286
+ except Exception as e:
287
+ logger.error(f">>>Error in youtube_transcript: {e} for {urls}.<<<")
288
+ raise_http_exception(500, "Internal Server Error")
289
+
290
+
291
+ @conversai_api_router.post("/website_url_text_extraction" )
292
+ async def add_website(request: AddWebsiteRequest):
293
+ vectorstore, website_urls, source = request.vectorstore, request.website_urls, request.source
294
+
295
+ logger.info(f">>>website_url_text_extraction API Triggered By {request.website_urls}<<<")
296
+ try:
297
+ track_usage(supabase_client=supabase_client, vectorstore=vectorstore, endpoint="/fetch_text/urls")
298
+ username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
299
+
300
+ text = conversai_pipeline.website_url_text_extraction_list_(urls=website_urls)
301
+ num_token = len(" ".join([text[x] for x in text]))
302
+
303
+ logger.info(f">>>website_url_text_extraction len{num_token}<<<")
304
+ logger.info(f">>>website_url_text_extraction {text}<<<")
305
+
306
+ lim = token_limit_check(supabase_client=supabase_client, username=username, chatbot_name=chatbot_name,
307
+ len_text=num_token)
308
+ if not lim:
309
+
310
+ response = create_error_response(402,
311
+ "Exceeding limits, please try with a smaller chunks of information or subscribe to our premium plan.")
312
+ return response
313
+ else:
314
+ dct = {
315
+ "output": text,
316
+ "source": source
317
+ }
318
+
319
+ dct = json.dumps(dct, indent=1).encode("utf-8", errors="replace")
320
+ file_name = user_management_pipeline.create_data_source_name(source_name=urlparse(source).netloc,
321
+ username=username)
322
+ supabase_client.storage.from_("ConversAI").upload(file=dct, path=f"{file_name}_data.json")
323
+ (
324
+ supabase_client.table("ConversAI_ChatbotDataSources")
325
+ .insert({"username": username,
326
+ "chatbotName": chatbot_name,
327
+ "dataSourceName": file_name,
328
+ "numTokens": num_token,
329
+ "sourceEndpoint": "/fetch_text/urls",
330
+ "sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"],
331
+ f"{file_name}_data.json")})
332
+ .execute()
333
+ )
334
+ response = create_success_response(200, {"message": "Successfully fetched the website text."})
335
+ logger.info(f">>>Website text extracted successfully for {request.website_urls}.<<<")
336
+ return response
337
+ except Exception as e:
338
+ logger.error(f">>>Error in website_url_text_extraction: {e} for {request.website_urls}.<<<")
339
+ raise e
340
+ # raise HTTPException(status_code=500, detail="Internal Server Error")
341
+
342
+
343
+
344
+
345
+
346
+ @conversai_api_router.get("/get_current_count" )
347
+ async def get_count(vectorstore: str):
348
+ logger.info(f">>>get_current_count API Triggered By {vectorstore}<<<")
349
+ try:
350
+ username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
351
+ current_count = user_management_pipeline.get_current_count_(username)
352
+
353
+ response = create_success_response(200, {"current_count": current_count})
354
+ logger.info(f">>>Current count fetched successfully for {vectorstore}.<<<")
355
+ return response
356
+
357
+ except Exception as e:
358
+ logger.error(f">>>Error in get_current_count: {e} for {vectorstore}.<<<")
359
+ raise_http_exception(500, "Internal Server Error")
360
+
361
+
362
+ @conversai_api_router.post("/list_chatbots" )
363
+ async def list_chatbots(request: ListChatbotsRequest):
364
+ logger.info(f">>>list_chatbots API Triggered By {request.username}<<<")
365
+ try:
366
+ chatbots = user_management.list_tables(username=request.username)
367
+ response = create_success_response(200, {"chatbots": chatbots})
368
+ logger.info(f">>>Chatbots listed successfully for {request.username}.<<<")
369
+ return response
370
+
371
+ except Exception as e:
372
+ logger.error(f">>>Error in list_chatbots: {e} for {request.username}.<<<")
373
+ raise_http_exception(500, "Internal Server Error")
374
+
375
+
376
+ @conversai_api_router.post("/get_chat_history" )
377
+ async def chat_history(request: GetChatHistoryRequest):
378
+ logger.info(f">>>get_chat_history API Triggered By {request.vectorstore}<<<")
379
+ try:
380
+ _, username, chatbotName = request.vectorstore.split("$", 2)
381
+
382
+ history = supabase_client.table("ConversAI_ChatHistory").select(
383
+ "timestamp", "question", "response"
384
+ ).eq("username", username).eq("chatbotName", chatbotName).execute().data
385
+
386
+ response = create_success_response(200, {"history": history})
387
+ logger.info(f">>>Chat history fetched successfully for {request.vectorstore}.<<<")
388
+ return response
389
+
390
+
391
+ except IndexError:
392
+ logger.warning(f"Chat history not found for {request.vectorstore}")
393
+ return create_error_response(404, "Chat history not found for the given chatbot.")
394
+
395
+ except Exception as e:
396
+ logger.error(f">>>Error in get_chat_history: {e} for {request.vectorstore}.<<<")
397
+ raise_http_exception(500, "Internal Server Error")
398
+
399
+
400
+ @conversai_api_router.post("/delete_chatbot" )
401
+ async def delete_chatbot(request: DeleteChatbotRequest):
402
+ logger.info(f">>>delete_chatbot API Triggered By {request.vectorstore}<<<")
403
+ try:
404
+ username, chatbot_name = request.vectorstore.split("$")[1], request.vectorstore.split("$")[2]
405
+ supabase_client.table('ConversAI_ChatbotInfo').delete().eq('user_id', username).eq('chatbotname',
406
+ chatbot_name).execute()
407
+ all_sources = supabase_client.table("ConversAI_ChatbotDataSources").select("*").eq("username", username).eq(
408
+ "chatbotName", chatbot_name).execute().data
409
+ all_sources = [x["sourceContentURL"].split("/")[-1] for x in all_sources]
410
+ supabase_client.table("ConversAI_ChatbotDataSources").delete().eq("username", username).eq("chatbotName",
411
+ chatbot_name).execute()
412
+ supabase_client.table("ConversAI_ActivityLog").update({"isActive": False}).eq("username", username).eq(
413
+ "chatbotName",
414
+ chatbot_name).execute()
415
+ supabase_client.table("ConversAI_ChatHistory").update({"isActive": False}).eq("username", username).eq(
416
+ "chatbotName",
417
+ chatbot_name).execute()
418
+ for source in all_sources:
419
+ supabase_client.table("ConversAI_Chatbot")
420
+ supabase_client.storage.from_("ConversAI").remove(source)
421
+ user_management.delete_table(table_name=chatbot_name)
422
+ user_management.delete_qdrant_cluster(vectorstorename=request.vectorstore)
423
+ response = create_success_response(200, {"message": "Chatbot deleted successfully"})
424
+ logger.info(f">>>Chatbot deleted successfully for {request.vectorstore}.<<<")
425
+ return response
426
+ except Exception as e:
427
+ logger.error(f">>>Error in delete_chatbot: {e} for {request.vectorstore}.<<<")
428
+ raise_http_exception(500, "Internal Server Error")
429
+
430
+
431
+ @conversai_api_router.post("/add_qa_pair" )
432
+ async def add_qa_pair(request: AddQAPairRequest):
433
+ logger.info(f">>>add_qa_pair API Triggered By {request.vectorstore}<<<")
434
+ try:
435
+ vectorstore, question, answer = request.vectorstore, request.question, request.answer
436
+ track_usage(vectorstore=vectorstore, endpoint="/add_qa_pair", supabase_client=supabase_client)
437
+ username, chat_bot_name = request.vectorstore.split("$")[1], request.vectorstore.split("$")[2]
438
+ normal_text = f"\nQUESTION: {question}\nANSWER: {answer}\n"
439
+ lim = token_limit_check(supabase_client=supabase_client, username=username, chatbot_name=chat_bot_name,
440
+ text=normal_text)
441
+ if lim:
442
+ dct = {
443
+ "output": {"text": normal_text},
444
+ "source": "QA Pair",
445
+ }
446
+ num_token = len(normal_text.translate(str.maketrans('', '', string.punctuation)).split(" "))
447
+ dct = json.dumps(dct, indent=1).encode("utf-8", errors="replace")
448
+ file_name = user_management_pipeline.create_data_source_name(source_name="qa_pair", username=username)
449
+ supabase_client.storage.from_("ConversAI").upload(file=dct, path=f"{file_name}_data.json")
450
+ (
451
+ supabase_client.table("ConversAI_ChatbotDataSources")
452
+ .insert({"username": username,
453
+ "chatbotName": chat_bot_name,
454
+ "dataSourceName": file_name,
455
+ "numTokens": num_token,
456
+ "sourceEndpoint": "/add_qa_pair",
457
+ "sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"],
458
+ f"{file_name}_data.json")})
459
+ .execute()
460
+ )
461
+
462
+ response = create_success_response(200, {"message": "Successfully added the qa pair."})
463
+ logger.info(f">>>QA Pair added successfully for {request.vectorstore}.<<<")
464
+
465
+ return response
466
+ else:
467
+ response = create_error_response(400,
468
+ "Exceeding limits, please try with a smaller chunks of information or subscribe to our premium plan.")
469
+ return response
470
+
471
+ except Exception as e:
472
+ logger.error(f">>>Error in add_qa_pair: {e} for {request.vectorstore}.<<<")
473
+ raise_http_exception(500, "Internal Server Error")
474
+
475
+
476
+ @conversai_api_router.post("/load_edited_json" )
477
+ async def load_edited_json(request: LoadEditedJson):
478
+ vectorstore, data_source_name, source_endpoint, json_data = request.vectorstore, request.data_source_name, request.source_endpoint, request.json_data
479
+ username, chatbot_name = request.vectorstore.split("$")[1], request.vectorstore.split("$")[2]
480
+
481
+ logger.info(f">>>loadEditedJson API Triggered By {request.vectorstore}<<<")
482
+ try:
483
+ track_usage(supabase_client=supabase_client, vectorstore=request.vectorstore,
484
+ endpoint="/load_edited_json")
485
+ json_data = decode_base64(request.json_data)
486
+ json_data = json.dumps(json_data, indent=1).encode("utf-8", errors="replace")
487
+ file_name = user_management_pipeline.create_data_source_name(source_name=data_source_name,
488
+ username=username)
489
+ response = supabase_client.storage.from_("ConversAI").upload(file=json_data, path=f"{file_name}_data.json")
490
+ response = (
491
+ supabase_client.table("ConversAI_ChatbotDataSources")
492
+ .insert({"username": username,
493
+ "chatbotName": chatbot_name,
494
+ "dataSourceName": file_name,
495
+ "sourceEndpoint": source_endpoint,
496
+ "sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"],
497
+ f"{file_name}_data.json")})
498
+ .execute()
499
+ )
500
+ response = create_success_response(200, {"output": "Successfully loaded the edited json."})
501
+ logger.info(f">>>Edited json loaded successfully for {vectorstore}.<<<")
502
+
503
+ return response
504
+
505
+ except Exception as e:
506
+ logger.error(f">>>Error in loadEditedJson: {e} for {vectorstore}.<<<")
507
+ raise_http_exception(500, "Internal Server Error")
508
+
509
+
510
+ @conversai_api_router.get("/list_chatbot_sources" )
511
+ async def list_chatbot_sources(vectorstore: str):
512
+ try:
513
+ logger.info(f">>>list_chatbot_sources API Triggered By {vectorstore}<<<")
514
+
515
+ track_usage(supabase_client=supabase_client, vectorstore=vectorstore, endpoint="/list_chatbot_sources")
516
+ username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
517
+ result = supabase_client.table("ConversAI_ChatbotDataSources").select("*").eq("username", username).eq(
518
+ "chatbotName",
519
+ chatbot_name).execute().data
520
+
521
+ response = create_success_response(200, {"output": result})
522
+ logger.info(f">>>Chatbot listed successfully for {vectorstore}.<<<")
523
+ return response
524
+
525
+ except Exception as e:
526
+ logger.error(f">>>Error in list_chatbot_sources: {e} for {vectorstore}.<<<")
527
+ raise_http_exception(500, "Internal Server Error")
528
+
529
+
530
+ @conversai_api_router.get("/get_data_source" )
531
+ async def get_data_source(vectorstore: str, source_url: str):
532
+ try:
533
+ logger.info(f">>>get_data_source API Triggered By {vectorstore}<<<")
534
+
535
+ track_usage(supabase_client=supabase_client, vectorstore=vectorstore, endpoint="/get_data_source")
536
+ r = requests.get(source_url)
537
+ res = encode_to_base64(eval(r.content.decode("utf-8", errors="replace")))
538
+
539
+ response = create_success_response(200, {"output": res})
540
+
541
+ return response
542
+
543
+ except Exception as e:
544
+ logger.error(f">>>Error in get_data_source: {e} for {vectorstore}.<<<")
545
+ raise_http_exception(500, "Internal Server Error")
546
+
547
+
548
+ @conversai_api_router.post("/delete_chatbot_source" )
549
+ async def delete_chatbot_source(request: DeleteChatbotSourceRequest):
550
+ vectorstore, data_source_name = request.vectorstore, request.data_source_name
551
+ try:
552
+
553
+ track_usage(supabase_client=supabase_client, vectorstore=vectorstore, endpoint="/delete_chatbot_source")
554
+ response = supabase_client.table("ConversAI_ChatbotDataSources").delete().eq("dataSourceName",
555
+ data_source_name).execute()
556
+ response = supabase_client.storage.from_('ConversAI').remove(f"{data_source_name}_data.json")
557
+
558
+ response = create_success_response(200, {"output": f"Successfully deleted the {data_source_name} data source."})
559
+
560
+ logger.info(f">>>Data source deleted successfully for {vectorstore}.<<<")
561
+ return response
562
+
563
+
564
+ except Exception as e:
565
+ logger.error(f">>>Error in delete_chatbot_source: {e} for {vectorstore}.<<<")
566
+ raise_http_exception(500, "Internal Server Error")
567
+
568
+
569
+ @conversai_api_router.post("/train_chatbot")
570
+ async def train_chatbot(request: TrainChatbotRequest):
571
+ vectorstore, url_sources = request.vectorstore, request.urls
572
+ logger.info(f">>>train_chatbot API Triggered By {vectorstore}<<<")
573
+ try:
574
+ track_usage(supabase_client=supabase_client, vectorstore=vectorstore, endpoint="/train_chatbot")
575
+ texts = []
576
+ sources = []
577
+ fileTypes = [
578
+ supabase_client.table("ConversAI_ChatbotDataSources").select("sourceEndpoint").eq("sourceContentURL",
579
+ x).execute().data[0][
580
+ "sourceEndpoint"] for x in url_sources]
581
+ for source, fileType in zip(url_sources, fileTypes):
582
+ if ((fileType == "/text_pdf_extraction") | (fileType == "/image_pdf_text_extraction")):
583
+ logger.info(f"Source is {source}")
584
+ r = requests.get(source)
585
+ file = eval(r.content.decode("utf-8", errors="replace"))
586
+ content = file["output"]
587
+ logger.info(f"content is {content}")
588
+ fileSource = file["source"]
589
+ texts.append(".".join(
590
+ [content[key] for key in content.keys()]).replace(
591
+ "\n", " "))
592
+
593
+ sources.append(fileSource)
594
+ elif fileType == "/add_text" or fileType == "/add_qa_pair":
595
+ r = requests.get(source)
596
+ file = eval(r.content.decode("utf-8", errors="replace"))
597
+ content = file["output"]["text"]
598
+ fileSource = file["source"]
599
+ texts.append(content.replace("\n", " "))
600
+ sources.append(fileSource)
601
+ elif ((fileType == "/fetch_text/urls") | (fileType == "/youtube_transcript")):
602
+ r = requests.get(source)
603
+ file = eval(r.content.decode("utf-8", errors="replace"))
604
+ content = file["output"]
605
+ fileSource = file["source"]
606
+ texts.append(".".join(
607
+ [content[key] for key in content.keys()]).replace(
608
+ "\n", " "))
609
+ sources.append(fileSource)
610
+ else:
611
+ pass
612
+ texts = [(text, source) for text, source in zip(texts, sources)]
613
+ conversai_pipeline.add_document_(texts, vectorstore)
614
+ response = create_success_response(200, {"message": "Chatbot trained successfully."})
615
+ logger.info(f">>>Chatbot trained successfully for {vectorstore}.<<<")
616
+
617
+ return response
618
+
619
+
620
+ except Exception as e:
621
+ logger.error(f">>>Error in train_chatbot: {e} for {vectorstore}.<<<")
622
+ raise e
623
+ # raise_http_exception(500, "Internal Server Error")
624
+
625
+
626
+ @conversai_api_router.get("/activity_log" )
627
+ async def activity_log(username: str):
628
+ logger.info(f">>>activityLog API Triggered By {username}<<<")
629
+ try:
630
+ response = supabase_client.table("ConversAI_ActivityLog").select("*").eq("username", username).execute().data
631
+
632
+ logger.info(f">>>Activity log fetched successfully for {username}.<<<")
633
+
634
+ return response
635
+ except Exception as e:
636
+ logger.error(f">>>Error in activityLog: {e} for {username}.<<<")
637
+ raise_http_exception(500, "Internal Server Error")
638
+
639
+
640
+ @conversai_api_router.post("/new_chatbot" )
641
+ async def new_chatbot(request: NewChatbotRequest):
642
+ logger.info(f">>> new_chatbot API Triggered <<<")
643
+ try:
644
+ response = user_management.new_chatbot_(chatbot_name=request.chatbot_name, username=request.username)
645
+ logger.info(f">>> Chatbot created successfully for {request.username}.<<<")
646
+ return response
647
+
648
+ except Exception as e:
649
+ logger.error(f">>>Error in new_chatbot: {e} for {request.username}.<<<")
650
+ raise_http_exception(500, "Internal Server Error")
src/api/jewel_mirror.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi.routing import APIRouter
2
+ from fastapi.requests import Request
3
+ from src.jewel_mirror.jewel_langgraph import JewelGraphApp
4
+ from src.utils.error_handling import create_success_response, raise_http_exception
5
+ from src.models.apis_models import JewelQueryRequest
6
+ from src import logging as logger
7
+ jewel_api_router=APIRouter(tags=["jewelchatbot"])
8
+
9
+
10
+ @jewel_api_router.post("/answer_jewel_query")
11
+ async def answer_query(request:JewelQueryRequest):
12
+ logger.info(f">>>answer_jewel_query API Triggered <<<")
13
+ try:
14
+
15
+ jewel_app = JewelGraphApp(request.vectorstore)
16
+ output, followup_questions= jewel_app.get_response(query=request.query)
17
+
18
+
19
+ response = create_success_response(200, data={"output": output, "follow_up_questions": followup_questions,
20
+ "source": None})
21
+ logger.info(f">>>Query answered successfully for .<<<")
22
+ return response
23
+
24
+ except Exception as e:
25
+ logger.error(f">>>Error in answer_query: {e} for {request.vectorstore}.<<<")
26
+ # raise HTTPException(status_code=500, detail="Internal Server Error")
27
+ raise e
src/api/jwt_bearer.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-09-03
4
+ """
5
+ import os
6
+ from fastapi import Depends
7
+ from supabase import create_client
8
+ from src import logging as logger
9
+ from src.utils.error_handling import create_error_response, raise_http_exception
10
+ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
11
+
12
+ security = HTTPBearer()
13
+
14
+ supabase_client = create_client(
15
+ os.getenv("SUPABASE_URL"),
16
+ os.getenv("SUPABASE_KEY")
17
+ )
18
+
19
+
20
+ async def access_check_bearer(credentials: HTTPAuthorizationCredentials = Depends(security)):
21
+ access_token = credentials.credentials
22
+ try:
23
+ supabase_client.auth.get_user(access_token)
24
+
25
+ except Exception as e:
26
+ logger.info(f">>> Invalid access token {e}<<<")
27
+ raise_http_exception(code=401,
28
+ message="Invalid Access Token", details=[
29
+ {"info": "Invalid access token or access token expired please login again"}])
src/api/speech_api.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-07-31
4
+ """
5
+ import os
6
+ import tempfile
7
+ from fastapi import Form
8
+ from fastapi import UploadFile, HTTPException, status
9
+ from src.models.apis_models import TextToSpeechRequest
10
+ from fastapi.routing import APIRouter
11
+ from src.pipeline.speech_transcription_pipeline import SpeechTranscriptionPipeline
12
+ from src import logging as logger
13
+ from src.utils.error_handling import create_success_response, raise_http_exception
14
+
15
+ speech_translator_router = APIRouter(tags=["SpeechTranscription"])
16
+ pipeline = SpeechTranscriptionPipeline()
17
+
18
+
19
+ @speech_translator_router.post(
20
+ "/text_to_speech",
21
+ )
22
+ async def text_to_speech(request: TextToSpeechRequest):
23
+ logger.info(f">>>text_to_speech API Triggered <<<")
24
+ try:
25
+ audio_bytes = pipeline.text_to_speech(request.text, request.lang, request.tld)
26
+ if not audio_bytes:
27
+ raise ValueError("Audio generation failed.")
28
+ response = create_success_response(code=200, data={"audio": audio_bytes})
29
+ logger.info(f">>>text_to_speech API success <<<")
30
+ return response
31
+ except ValueError as ve:
32
+ logger.info(f">>>text_to_speech API failed {ve}<<<")
33
+ raise_http_exception(code=400, message="Text to speech failed")
34
+
35
+ except Exception as e:
36
+ logger.error(f">>> Error processing text-to-speech {e}<<<")
37
+ raise_http_exception(code=500, message="Internal server error")
38
+
39
+
40
+ @speech_translator_router.post(
41
+ "/speech_to_text",
42
+
43
+ )
44
+ async def speech_to_text(audio: UploadFile, lang: str = Form(...)):
45
+ logger.info(f">>>speech_to_text API Triggered <<<")
46
+ try:
47
+ audio_bytes = await audio.read()
48
+ if not audio_bytes:
49
+ logger.error(f">>> Empty audio file <<<")
50
+ raise ValueError("Empty audio file")
51
+ except Exception as e:
52
+ logger.error(f">>> Invalid audio file {e}<<<")
53
+ raise HTTPException(
54
+ status_code=status.HTTP_400_BAD_REQUEST,
55
+ detail="Invalid audio file"
56
+ )
57
+
58
+ try:
59
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
60
+ temp_audio_file.write(audio_bytes)
61
+ temp_audio_file_path = temp_audio_file.name
62
+ except Exception as e:
63
+ logger.error(f">>> Error creating temporary file{e} <<<")
64
+ raise_http_exception(code=500, message="Internal server error")
65
+
66
+ try:
67
+ transcript = pipeline.speech_to_text(temp_audio_file_path, lang)
68
+ response = create_success_response(code=200, data={"transcript": transcript})
69
+ logger.info(f">>>speech_to_text API success <<<")
70
+
71
+ return response
72
+
73
+ except FileNotFoundError:
74
+ logger.error(f">>> Temporary file not found <<<")
75
+ raise HTTPException(
76
+ status_code=status.HTTP_404_NOT_FOUND,
77
+ detail="Temporary file not found"
78
+ )
79
+ except Exception as e:
80
+ logger.error(f">>> Error processing speech-to-text {e}<<<")
81
+ raise_http_exception(code=500, message="Error processing speech-to-text")
82
+
83
+ finally:
84
+ if os.path.exists(temp_audio_file_path):
85
+ os.remove(temp_audio_file_path)
src/api/user_management_api.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-08-23
4
+ """
5
+ from fastapi import Depends
6
+ from src import logging as logger
7
+ from src.models.apis_models import *
8
+ from fastapi.routing import APIRouter
9
+ from src.api.jwt_bearer import access_check_bearer, supabase_client
10
+ from src.pipeline.user_management_pipeline import SupabaseUserManagementPipeline
11
+ from src.services.supabase.analytics.analytic_tables import track_usage
12
+ from src.utils.error_handling import raise_http_exception, create_success_response, create_error_response
13
+
14
+ user_management_api_router = APIRouter(tags=["User Management"])
15
+
16
+ user_management = SupabaseUserManagementPipeline()
17
+
18
+
19
+ @user_management_api_router.post("/user_signup")
20
+ async def user_signup(request: UserSignupRequest):
21
+ logger.info(f">>>user_signup API Triggered <<<")
22
+ response = user_management.user_signup_(username=request.username, email=request.email, password=request.password)
23
+ logger.info(f">>>user_signup API Success<<<")
24
+
25
+ return response
26
+
27
+
28
+ @user_management_api_router.post("/user_signin")
29
+ async def user_signin(request: UserSigninRequest):
30
+ logger.info(f">>>user_signin API Triggered <<<")
31
+
32
+ response = user_management.user_signin_(email=request.email, password=request.password)
33
+ if response != None:
34
+ logger.info(f">>>user_signin API Success.<<<")
35
+ return response
36
+ else:
37
+ logger.info(f">>> Email or password is incorrect please try again.<<<")
38
+ response = create_error_response(400, "Email or password is incorrect please try again.")
39
+ return response
40
+
41
+
42
+ @user_management_api_router.post("/get_user_data")
43
+ async def get_user_data(request: GetUserDataRequest):
44
+ logger.info(f">>>get_user_data API Triggered <<<")
45
+ response = user_management.get_user_data_(access_token=request.access_token)
46
+ return response
47
+
48
+
49
+ @user_management_api_router.post("/login_with_access_token")
50
+ async def login_with_access_token(request: LoginWithAccessTokenRequest):
51
+ logger.info(f">>>login_with_access_token API Triggered <<<")
52
+
53
+ response = user_management.login_with_access_token_(access_token=request.access_token,
54
+ refresh_token=request.refresh_token)
55
+ logger.info(f">>>login_with_access_token API Success<<<")
56
+ return response
57
+
58
+
59
+ @user_management_api_router.post("/set_session_data")
60
+ async def set_session_data(request: SetSessionDataRequest):
61
+ logger.info(f">>> set_session_data API Triggered <<<")
62
+
63
+ response = user_management.set_session_data_(access_token=request.access_token, refresh_token=request.refresh_token,
64
+ user_id=request.user_id)
65
+ return response
66
+
67
+
68
+ @user_management_api_router.post("/sign_out")
69
+ async def sign_out():
70
+ logger.info(f">>> sign_out API Triggered <<<")
71
+
72
+ response = user_management.sign_out_()
73
+ logger.info(f">>>sign_out API Success<<<")
74
+ return response
75
+
76
+
77
+ @user_management_api_router.post("/oauth_signin")
78
+ async def oauth_signin():
79
+ logger.info(f">>> oauth_signin API Triggered <<<")
80
+ response = user_management.oauth_signin_()
81
+ logger.info(f">>>oauth_signin API Success<<<")
82
+ return response
83
+
84
+
85
+ @user_management_api_router.post("/check_session")
86
+ async def check_session():
87
+ logger.info(f">>>check_session API Triggered <<<")
88
+
89
+ response = user_management.check_session_()
90
+ return response
91
+
92
+
93
+ @user_management_api_router.get("/get_public_chatbot")
94
+ async def get_public_chatbots():
95
+ logger.info(f">>>get_public_chatbot API Triggered<<<")
96
+ try:
97
+ response = supabase_client.table("ConversAI_ChatbotInfo").select("*").eq("isPrivate", False).execute().data
98
+ logger.info(f">>>Public chatbots fetched successfully.<<<")
99
+ return response
100
+ except Exception as e:
101
+ logger.error(f">>>Error in get_public_chatbot: {e}<<<")
102
+ raise_http_exception(500, "Internal Server Error")
103
+
104
+
105
+ @user_management_api_router.post("/public_private_check")
106
+ async def public_or_private(request: PublicPrivateCheckRequest):
107
+ vectorstore, mode = request.vectorstore, request.mode
108
+ logger.info(f">>>public_private_check API Triggered for {vectorstore}.<<<")
109
+ try:
110
+ track_usage(supabase_client=supabase_client, vectorstore=vectorstore, endpoint="/public_private_check")
111
+ username, chatbot_name = vectorstore.split("$")[1], vectorstore.split("$")[2]
112
+ if len(mode) == 0:
113
+ value = (
114
+ supabase_client.table("ConversAI_ChatbotInfo")
115
+ .select("isPrivate")
116
+ .eq("user_id", username)
117
+ .eq("chatbotname", chatbot_name)
118
+ .execute()
119
+ )
120
+ value = value.data[0]["isPrivate"]
121
+ response = create_success_response(200, {"output": value})
122
+ else:
123
+ response = (
124
+ supabase_client.table("ConversAI_ChatbotInfo")
125
+ .update({"isPrivate": mode})
126
+ .eq("user_id", username)
127
+ .eq("chatbotname", chatbot_name)
128
+ .execute()
129
+ )
130
+ response = create_success_response(200, {"output": response})
131
+ logger.info(f">>>Public/Private check successful for {vectorstore}.<<<")
132
+ return response
133
+
134
+
135
+ except Exception as e:
136
+ logger.error(f">>>Error in public_private_check: {e} for {vectorstore}.<<<")
137
+ raise_http_exception(500, "Internal Server Error")
138
+
139
+
140
+ @user_management_api_router.post("/refresh_session", dependencies=[Depends(access_check_bearer)])
141
+ async def refresh_session(request: RefreshSessionRequest):
142
+ logger.info(f">>>refresh_session API Triggered <<<")
143
+ response = user_management.refresh_session__(refresh_token=request.refresh_token)
144
+ logger.info(f">>>refresh token fetched successfully.<<<")
145
+
146
+ return response
147
+
148
+
149
+ @user_management_api_router.post("/username_creation_oauth", dependencies=[Depends(access_check_bearer)])
150
+ async def username_creation_oauth(request: UsernameCreationOauthRequest):
151
+ logger.info(f">>> username_creation_oauth API Triggered <<<")
152
+
153
+ response = user_management.username_creation_oauth_(username=request.username, user_id=request.user_id,
154
+ email=request.email)
155
+
156
+ logger.info(f">>>username creation successful.<<<")
157
+ return response
src/jewel_mirror/__init__.py ADDED
File without changes
src/jewel_mirror/jewel_langgraph.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langgraph.graph import StateGraph,END
2
+ from src.jewel_mirror.jewel_mirror import *
3
+
4
+
5
+
6
+ class JewelGraphApp:
7
+ def __init__(self,vectorstore):
8
+ self.jewelgraph = StateGraph(AgentState)
9
+ self.vectorstore=vectorstore
10
+ self.jewel_answer = jewel_answer(self.vectorstore)
11
+ self._setup_graph()
12
+
13
+
14
+ def get_response(self, query):
15
+ """Get response for a given query."""
16
+
17
+ response= self.jewelapp.invoke({"messages": [query]})["messages"][-1]
18
+ followups = self.jewel_answer.generate_vectorstore_followups(question=query,answer=response)
19
+ return response, followups
20
+
21
+ def _setup_graph(self):
22
+ # Add nodes
23
+
24
+ self.jewelgraph.add_node("firstagent", self.jewel_answer.function_1)
25
+ self.jewelgraph.add_node("nocontext", self.jewel_answer.context_end)
26
+ self.jewelgraph.add_node("secondagent", self.jewel_answer.function_2)
27
+ self.jewelgraph.add_node("JewelRag", self.jewel_answer.calling_jewel_rag)
28
+ self.jewelgraph.add_node("Goldprice", self.jewel_answer.calling_gold_price)
29
+ self.jewelgraph.add_node("JewelLLM", self.jewel_answer.calling_llm)
30
+
31
+ # Set entry point
32
+ self.jewelgraph.set_entry_point("firstagent")
33
+
34
+ # Add conditional edges
35
+ self.jewelgraph.add_conditional_edges(
36
+ "firstagent",
37
+ self.jewel_answer.router, {
38
+ "secondagent": "secondagent",
39
+ "nocontext": "nocontext",
40
+ }
41
+ )
42
+ self.jewelgraph.add_edge("nocontext", END)
43
+
44
+ self.jewelgraph.add_conditional_edges(
45
+ "secondagent",
46
+ self.jewel_answer.router2, {
47
+ "JewelRag": "JewelRag",
48
+ "Goldprice": "Goldprice",
49
+ "JewelLLM": "JewelLLM",
50
+ }
51
+ )
52
+
53
+ # Add end edges
54
+ self.jewelgraph.add_edge("JewelRag", END)
55
+ self.jewelgraph.add_edge("Goldprice", END)
56
+ self.jewelgraph.add_edge("JewelLLM", END)
57
+
58
+ # Compile graph
59
+ self.jewelapp = self.jewelgraph.compile()
60
+
61
+
62
+
63
+
64
+ if __name__ == "__main__":
65
+ jewel_app = JewelGraphApp()
66
+ query = "What is the price of a Gold in india?"
67
+ response = jewel_app.get_response(query)
68
+ print(response)
69
+
src/jewel_mirror/jewel_mirror.py ADDED
@@ -0,0 +1,387 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ #Todo: To create a class based for this
3
+ from typing import TypedDict, Annotated, Sequence
4
+ import operator
5
+ from langchain_core.messages import BaseMessage
6
+ from langchain.prompts import PromptTemplate
7
+ from langchain.output_parsers import PydanticOutputParser
8
+ from src.models.utls import TopicSelectionParser
9
+ from langchain_groq import ChatGroq
10
+ from langchain_community.document_compressors import JinaRerank
11
+ from langchain.retrievers import ContextualCompressionRetriever
12
+ from src.utils.utils import json_parser
13
+ import os
14
+ from src.services.embeddings.jina_embeddings import jina_embedding
15
+ from src.services.embeddings.Qdrant_BM25_embedding import qdrant_bm25_embedding
16
+ from src.services.vector_db.qdrent.upload_document import answer_query_from_existing_collection
17
+ from langchain.prompts import ChatPromptTemplate
18
+ from langchain_core.output_parsers import StrOutputParser
19
+ from langchain_core.runnables import RunnablePassthrough
20
+ from langchain_community.document_loaders import WebBaseLoader
21
+ from langchain.schema import Document
22
+ os.environ["JINA_API_KEY"] = os.getenv("JINA_API")
23
+ llm=ChatGroq(model_name="llama-3.1-70b-versatile")
24
+ from src import logging as logger
25
+
26
+
27
+
28
+ class AgentState(TypedDict):
29
+ # The 'messages' field should be a sequence of strings, and we annotate it with 'operator.add'
30
+ # This implies we might want to "add" new messages to the sequence later
31
+ messages: Annotated[Sequence[BaseMessage], operator.add]
32
+
33
+ parser = PydanticOutputParser(pydantic_object=TopicSelectionParser)
34
+
35
+
36
+ class jewel_answer:
37
+ def __init__(self, vector_store):
38
+ self.vector_store = vector_store
39
+ self.compressor = JinaRerank(model="jina-reranker-v2-base-multilingual")
40
+ self.vector_embed = jina_embedding()
41
+ self.sparse_embed = qdrant_bm25_embedding()
42
+ self.llm = ChatGroq(model_name="llama-3.1-70b-versatile")
43
+ self.parser = PydanticOutputParser(pydantic_object=TopicSelectionParser)
44
+ self.json_parser=json_parser()
45
+
46
+ # Function to generate a two output [About jewellery and None]
47
+ def function_1(self,state):
48
+ message = state["messages"]
49
+ question = message[-1]
50
+ template = """
51
+ Your task is to classify the given user query into one of the following categories [About Jewellery,None]:
52
+ - **About Jewellery**: Includes queries about:
53
+ - Jewelry-related topics, such as specific types, prices, or customization.
54
+ - JewelMirror's purpose, key features, and its functionalities (e.g., virtual try-on, batch selection, or sharing looks).
55
+ - FAQs related to JewelMirror (e.g., signing up, logging in, troubleshooting, or compatible devices).
56
+ - JewelMirror chatbot functionalities, such as assisting users with jewelry-related queries.
57
+ - **None**: Includes queries unrelated to jewelry, JewelMirror, or its functionalities (e.g., programming questions, general greetings without context, or unrelated topics like baking).
58
+
59
+ Only respond with the category name and nothing else.
60
+
61
+ Examples:
62
+
63
+ User query: "Can you explain how to use JewelMirror’s virtual try-on feature?"
64
+ Output: About Jewellery
65
+
66
+ User query: "Hi, how can I sign up for JewelMirror?"
67
+ Output: About Jewellery
68
+
69
+ User query: "What devices are compatible with the JewelMirror app?"
70
+ Output: About Jewellery
71
+
72
+ User query: "Hi, can you help me with Python programming?"
73
+ Output: None
74
+
75
+ User query: "How does the JewelMirror chatbot assist with jewelry questions?"
76
+ Output: About Jewellery
77
+
78
+
79
+ User query: "What is the best way to bake a cake?"
80
+ Output: None
81
+
82
+ User query: "Hello, what is the purpose of JewelMirror?"
83
+ Output: About Jewellery
84
+
85
+ User query: "Can JewelMirror help me share looks on social media?"
86
+ Output: About Jewellery
87
+
88
+
89
+ User query: "Can I customize my saree look with JewelMirror?"
90
+ Output: About Jewellery
91
+
92
+ User query: {question}
93
+ {format_instructions}
94
+ Output:
95
+ """
96
+
97
+ prompt = PromptTemplate(template=template,
98
+ input_variables=[question],
99
+ partial_variables={
100
+ "format_instructions": parser.get_format_instructions()}
101
+ )
102
+ chain = prompt | llm | parser
103
+
104
+ response = chain.invoke({"question": question, "format_instructions": parser.get_format_instructions()})
105
+
106
+ return {"messages": [response.Topic]}
107
+
108
+ # Function to handle NONE condition
109
+ def context_end(self,state):
110
+ result="Please ask about the jewellery related queries.Thank you !"
111
+ return {"messages": [result]}
112
+
113
+
114
+ #router to route the aboutjewellery and none
115
+ def router(self,state):
116
+ messages = state["messages"]
117
+ last_message = messages[-1]
118
+ print(last_message)
119
+ if 'About Jewellery' in last_message:
120
+ return 'secondagent'
121
+ else:
122
+ return 'nocontext'
123
+
124
+ #function to generate three output Rag,llm,price
125
+ def function_2(self,state):
126
+ messages = state['messages']
127
+ question = messages[0]
128
+ template = """Your task is to classify the given user query into one of the following categories [RAG,LLM,Price]:
129
+
130
+ `- **Gold Price**: If the query strictly asks about the price of ornaments -related jewelry. For example:
131
+ - "What is the price of a gold today?"
132
+ - "What is the price of a silver today?"
133
+ - "What is the price of gold in chennai for last 10days?"
134
+
135
+
136
+ - "How much does 24K gold cost?"
137
+
138
+ - **RAG**: If the query relates to JewelMirror’s features, operations, or FAQs, including:
139
+ - JewelMirror’s **chatbot** features, such as virtual try-on, batch selection, or sharing looks jewel mirro name can be anything jewelmirror jewelchatbot anything.
140
+ - JewelMirror’s **purpose**, **key features**, or how it enhances the shopping experience.
141
+ - User guides, such as signing up, logging in, managing the cart, or using the virtual try-on feature.
142
+ - Customizing jewelry or sarees, using batch selection, or downloading/sharing customized looks.
143
+ - FAQs about JewelMirror, including how it works, security of personal data, compatible devices, or troubleshooting.
144
+ - Using the JewelMirror chatbot for assistance with jewelry-related tasks.
145
+ - Anything operational, feature-specific, or directly related to JewelMirror’s functionality.
146
+
147
+ - **LLM**: If the query is a **generic, broader jewelry-related question** that does not specifically relate to JewelMirror features or price. For example:
148
+ - General differences between gold and diamonds.
149
+ - Why certain materials, like diamonds or gold, are used in jewelry.
150
+ - Types of jewelry, history of jewelry, or materials used.
151
+ - Examples:
152
+ - "What is the difference between gold and diamonds?"
153
+ - "Why are diamonds used in engagement rings?"
154
+ - "What are the most popular types of necklaces?"
155
+
156
+ **Key Rule**: Queries must strictly match the criteria for **Gold Price** or **RAG** to be classified under those categories. Only if the query does not match either, and is a generic jewelry-related question, classify it as **LLM**.
157
+
158
+ Only respond with the category name and nothing else.
159
+
160
+ Examples:
161
+
162
+ User query: "What is the price of a gold necklace?"
163
+ Output: Gold Price
164
+
165
+ User query: "How do I sign up for JewelMirror?"
166
+ Output: RAG
167
+
168
+ User query: "What are the key features of JewelMirror?"
169
+ Output: RAG
170
+
171
+ User query: "How can I use the virtual try-on feature?"
172
+ Output: RAG
173
+
174
+ User query: "What is the price of diamond rings?"
175
+ Output: Gold Price
176
+
177
+ User query: "How do I use batch selection to try different combinations?"
178
+ Output: RAG
179
+
180
+ User query: "What types of jewelry are available in JewelMirror?"
181
+ Output: RAG
182
+
183
+ User query: "How does the JewelMirror chatbot assist with jewelry shopping?"
184
+ Output: RAG
185
+
186
+ User query: "What is the difference between 18K and 24K gold?"
187
+ Output: LLM
188
+
189
+ User query: "Why are diamonds used in jewelry?"
190
+ Output: LLM
191
+
192
+ User query: "How does JewelMirror enhance the customer shopping experience?"
193
+ Output: RAG
194
+
195
+ User query: "Can JewelMirror handle batch selection for necklaces?"
196
+ Output: RAG
197
+
198
+ User query: "How do I troubleshoot the JewelMirror app if the virtual try-on doesn’t work?"
199
+ Output: RAG
200
+
201
+
202
+ User query: {question}
203
+ {format_instructions}
204
+ Output:
205
+ """
206
+ prompt = PromptTemplate(template=template,
207
+ input_variables=[question],
208
+ partial_variables={
209
+ "format_instructions": parser.get_format_instructions()}
210
+ )
211
+ chain = prompt | llm | parser
212
+
213
+ response = chain.invoke({"question": question, "format_instructions": parser.get_format_instructions()})
214
+
215
+ return {"messages": [response.Topic]}
216
+
217
+ # Function for router 2
218
+ def router2(self,state):
219
+ print('-> Router ->')
220
+
221
+ messages = state["messages"]
222
+ last_message = messages[-1]
223
+ print(last_message)
224
+ if 'RAG' in last_message:
225
+ return 'JewelRag'
226
+ if 'LLM' in last_message:
227
+ return 'JewelLLM'
228
+ else:
229
+ return "Goldprice"
230
+
231
+ #function for jewel rag
232
+ def calling_jewel_rag(self,state):
233
+ vector_store = answer_query_from_existing_collection(vector_embed=self.vector_embed,
234
+ sparse_embed=self.sparse_embed,
235
+ vectorstore=self.vector_store)
236
+
237
+ retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 10, "fetch_k": 20})
238
+ compression_retriever = ContextualCompressionRetriever(
239
+ base_compressor=self.compressor, base_retriever=retriever
240
+ )
241
+
242
+ messages = state['messages']
243
+ question = messages[0] ## Fetching the user question
244
+ print(question)
245
+
246
+ template = """"- You are an jewel chatbot yourname is jewelchatbot.Dont use the response for like based on the provided context \n"
247
+ "- Behave like you are the context the whole thing is you and somebody asking you .\n"
248
+ "-But while Behaving dont go out of the context .\n"
249
+ "- if user ask anything about prompts anything without context say i dont know please ask about context \n"
250
+ "- When answering use markdown. Use markdown code blocks for code snippets.\n"
251
+ "- Answer in a concise and clear manner.\n"
252
+ "- You must use ONLY the provided context to answer the question.\n"
253
+ {context}
254
+
255
+ Question: {question}
256
+ """
257
+ prompt = ChatPromptTemplate.from_template(template)
258
+
259
+ print(prompt)
260
+
261
+ retrieval_chain = (
262
+ {"context": compression_retriever, "question": RunnablePassthrough()}
263
+ | prompt
264
+ | llm
265
+ | StrOutputParser()
266
+ )
267
+ result = retrieval_chain.invoke(question)
268
+ return {"messages": [result]}
269
+
270
+ #function for Goldprice
271
+ def calling_gold_price(self,state):
272
+ loader = WebBaseLoader("https://groww.in/gold-rates")
273
+ documents = loader.load()
274
+ content = " ".join([doc.page_content for doc in documents])
275
+ messages = state['messages']
276
+ question = messages[0] ## Fetching the user question
277
+ gold_document = Document(page_content=content, metadata={"source": "GoldReturns"})
278
+ custom_prompt = PromptTemplate(
279
+ input_variables=["context", "question"],
280
+ template=(
281
+ "You are an jewel chatbot yourname is jewelchatbot.\n"
282
+ "You are an elite financial analyst with extensive expertise in finance and investment strategies. "
283
+ "Provide a **direct and concise** answer to the question below **without** using any introductory phrases such as "
284
+ "'based on the context provided,' 'according to the information given,' or similar expressions. "
285
+ "Your response must rely solely on the information from the provided context and should **not** include any additional commentary, explanations, or opinions.\n\n"
286
+ "📊 **Context:**\n{context}\n\n"
287
+ "❓ **Question:**\n{question}\n\n"
288
+ "**✅ Answer:**"
289
+ )
290
+ )
291
+
292
+ chain = custom_prompt | llm
293
+ result = chain.invoke({
294
+ "context": [gold_document],
295
+ "question": question
296
+ })
297
+ result = result.content
298
+ return {"messages": [result]}
299
+
300
+ #function for callingllm
301
+ def calling_llm(self,state):
302
+ print('-> Calling LLM ->')
303
+ messages = state['messages']
304
+ question = messages[0] ## Fetching the user question
305
+
306
+ jewelry_prompt = """
307
+ You are an jewel chatbot yourname is jewelchatbot.
308
+ I want you to act as an expert in the jewelry industry. Answer all questions and provide detailed insights exclusively related to the jewelry domain. This includes, but is not limited to:
309
+
310
+ - Manufacturing processes (e.g., materials like gold, silver, diamonds, gemstones, tools, and techniques such as casting, engraving, or 3D printing).
311
+ - Design trends (e.g., contemporary, vintage, or cultural styles).
312
+ - Marketing strategies (e.g., branding, customer engagement, sustainability).
313
+ - Supply chain dynamics (e.g., sourcing of raw materials, ethical considerations, and global trade).
314
+ - History and cultural significance (e.g., origins of certain jewelry styles, symbolic meanings in different cultures).
315
+ - Retail and customer behavior (e.g., pricing, customization trends, online vs. offline shopping).
316
+ - Sustainability in jewelry (e.g., ethical sourcing, lab-grown diamonds, recycling of materials).
317
+
318
+ You must limit your responses strictly to the jewelry industry and not discuss unrelated topics. Provide examples, actionable insights, and in-depth details whenever possible.
319
+ {question}
320
+
321
+ """
322
+ prompt = ChatPromptTemplate.from_template(jewelry_prompt)
323
+
324
+ retrieval_chain = (
325
+ prompt
326
+ | llm
327
+ | StrOutputParser()
328
+ )
329
+ result = retrieval_chain.invoke({"question": question})
330
+ return {"messages": [result]}
331
+
332
+ def generate_vectorstore_followups(self, question: str, answer: str|None=None) -> str:
333
+ """
334
+ Generate follow-up questions using context from the vector store.
335
+ """
336
+
337
+ vector_store = answer_query_from_existing_collection(vector_embed=self.vector_embed,
338
+ sparse_embed=self.sparse_embed,
339
+ vectorstore=self.vector_store)
340
+
341
+ retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 10, "fetch_k": 20})
342
+ compression_retriever = ContextualCompressionRetriever(
343
+ base_compressor=self.compressor, base_retriever=retriever
344
+ )
345
+
346
+ # Get relevant documents
347
+ relevant_docs = compression_retriever.get_relevant_documents(question)
348
+ context = "\n".join([doc.page_content for doc in relevant_docs])
349
+
350
+ followup_template = """You are jewelchatbot, an expert jewelry assistant. Generate follow-up questions based on the
351
+ previous interaction and the provided context from our knowledge base.
352
+
353
+ Previous Question: {question}
354
+ Previous Answer: {answer}
355
+ Knowledge Base Context: {context}
356
+
357
+ Rules for generating questions:
358
+ 1. Questions should be based on information present in the knowledge base context
359
+ 2. Focus on aspects mentioned in the context that weren't covered in the previous answer
360
+ 3. Questions should explore JewelMirror features or jewelry topics found in the context
361
+ 4. Make sure questions are specific and can be answered using our knowledge base
362
+ 5. Avoid questions about topics not present in the context
363
+ 6. Don't repeat information already covered in the previous answer
364
+
365
+ Generate exactly 3 relevant follow-up questions.only in json format dont use here are and all . just generate questions only.
366
+ "{format_instructions}\n"
367
+ """
368
+
369
+ follow_up_prompt = PromptTemplate(
370
+ template=followup_template,
371
+ input_variables=["context", "answer","question"],
372
+ partial_variables={"format_instructions": self.json_parser.get_format_instructions()},
373
+ )
374
+ followup_chain = (
375
+ follow_up_prompt
376
+ | llm
377
+ | json_parser()
378
+ )
379
+
380
+ followups = followup_chain.invoke({
381
+ "question": question,
382
+ "answer": answer,
383
+ "context": context
384
+ })
385
+
386
+ return followups
387
+
src/llms/__init__.py ADDED
File without changes
src/models/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-07-31
4
+ """
src/models/apis_models.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-07-31
4
+ """
5
+ from pydantic import BaseModel, EmailStr
6
+ from typing import List, Optional
7
+
8
+
9
+ ## ---------------------------------- SpeechTranscription API Models ----------------------------------
10
+ class TextToSpeechRequest(BaseModel):
11
+ text: str
12
+ lang: str
13
+ tld: str
14
+
15
+
16
+ class SpeechToTextRequest(BaseModel):
17
+ lang: str
18
+
19
+
20
+ ## ---------------------------------- Chatbot API Models ----------------------------------
21
+
22
+ class AddTextRequest(BaseModel):
23
+ vectorstore: str
24
+ text: str
25
+
26
+
27
+ class AddWebsiteRequest(BaseModel):
28
+ website_urls: List[str]
29
+ vectorstore: str
30
+ source: str
31
+
32
+
33
+ class AnswerQueryRequest(BaseModel):
34
+ query: str
35
+ vectorstore: str
36
+ llm_model: str = "llama3-70b-8192"
37
+
38
+
39
+ class DataAnalyzerRequest(BaseModel):
40
+ query: str
41
+
42
+
43
+ class GetLinksRequest(BaseModel):
44
+ url: str
45
+
46
+
47
+ class YoutubeTranscriptRequest(BaseModel):
48
+ vectorstore: str
49
+ urls: List[str]
50
+
51
+
52
+ class WebsiteUrlTextExtractionRequest(BaseModel):
53
+ url: str
54
+
55
+
56
+ class WebsiteUrlTextExtractionListRequest(BaseModel):
57
+ urls: List[str]
58
+
59
+
60
+ class GetCurrentCountRequest(BaseModel):
61
+ vectorstore: str
62
+
63
+
64
+ class ListChatbotsRequest(BaseModel):
65
+ username: str
66
+
67
+
68
+ class GetChatHistoryRequest(BaseModel):
69
+ vectorstore: str
70
+
71
+
72
+ class ChatHistoryItem(BaseModel):
73
+ timestamp: str
74
+ question: str
75
+ response: str
76
+
77
+
78
+ class DeleteChatbotRequest(BaseModel):
79
+ vectorstore: str
80
+
81
+
82
+ class AddQAPairRequest(BaseModel):
83
+ vectorstore: str
84
+ question: str
85
+ answer: str
86
+
87
+
88
+ class TrainChatbotRequest(BaseModel):
89
+ vectorstore: str
90
+ urls: list[str]
91
+
92
+
93
+ class LoadPDFRequest(BaseModel):
94
+ vectorstore: str
95
+
96
+
97
+ class LoadEditedJson(BaseModel):
98
+ vectorstore: str
99
+ data_source_name: str
100
+ source_endpoint: str
101
+ json_data: dict
102
+
103
+
104
+ class PublicPrivateCheckRequest(BaseModel):
105
+ vectorstore: str
106
+ mode: str | None = None
107
+
108
+
109
+ class DeleteChatbotSourceRequest(BaseModel):
110
+ vectorstore: str
111
+ data_source_name: str
112
+
113
+
114
+ ## ---------------------------------- User Management API Models ----------------------------------
115
+
116
+ class UserSignupRequest(BaseModel):
117
+ username: str
118
+ email: EmailStr
119
+ password: str
120
+
121
+
122
+ class UserSigninRequest(BaseModel):
123
+ email: EmailStr
124
+ password: str
125
+
126
+
127
+ class CheckSessionRequest(BaseModel):
128
+ user_id: str
129
+
130
+
131
+ class GetUserDataRequest(BaseModel):
132
+ access_token: str
133
+
134
+
135
+ class RefreshSessionRequest(BaseModel):
136
+ refresh_token: str
137
+
138
+
139
+ class LoginWithAccessTokenRequest(BaseModel):
140
+ access_token: str
141
+ refresh_token: str
142
+
143
+
144
+ class UsernameCreationOauthRequest(BaseModel):
145
+ username: str
146
+ user_id: str
147
+ email: str
148
+
149
+
150
+ class SetSessionDataRequest(BaseModel):
151
+ access_token: str
152
+ refresh_token: str
153
+ user_id: str
154
+
155
+
156
+ class SignOutRequest(BaseModel):
157
+ user_id: str
158
+
159
+
160
+ class NewChatbotRequest(BaseModel):
161
+ chatbot_name: str
162
+ username: str
163
+
164
+
165
+ ## ---------------------------------- Analytics API Models ----------------------------------
166
+
167
+
168
+ class FeedbackRequest(BaseModel):
169
+ feedback: str
170
+ user_id: str
171
+ vectorstore: Optional[str]
172
+
173
+
174
+ class UserSatisfactionRateRequest(BaseModel):
175
+ start_date: Optional[str]
176
+ end_date: Optional[str]
177
+ vectorstore: Optional[str]
178
+
179
+
180
+ class TokenUsageRequest(BaseModel):
181
+ start_date: Optional[str]
182
+ end_date: Optional[str]
183
+ vectorstore: Optional[str]
184
+
185
+
186
+ class AverageSessionInteractionRequest(BaseModel):
187
+ start_date: Optional[str]
188
+ end_date: Optional[str]
189
+ vectorstore: Optional[str]
190
+
191
+
192
+ class DailyActiveEndUserRequest(BaseModel):
193
+ start_date: Optional[str]
194
+ end_date: Optional[str]
195
+ vectorstore: Optional[str]
196
+
197
+
198
+ class DailyChatCountRequest(BaseModel):
199
+ start_date: Optional[str]
200
+ end_date: Optional[str]
201
+ vectorstore: Optional[str]
202
+
203
+ ##---------------------GOldPr---------------------------
204
+
205
+ class JewelQueryRequest(BaseModel):
206
+ query:str
207
+ vectorstore: str
208
+
src/models/response_handling_models.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-08-28
4
+ """
5
+ from typing import Any, Dict
6
+
7
+ from pydantic import BaseModel
8
+
9
+
10
+ ## ---------------------------------- Response Handling API Models ----------------------------------
11
+ class SuccessResponse(BaseModel):
12
+ status: str
13
+ code: int
14
+ data: Dict[str, Any]
15
+
16
+
17
+ class ErrorResponse(BaseModel):
18
+ status: str
19
+ code: int
20
+ error: Dict[str, Any]
21
+
22
+
23
+ class SuccessResponseUsermanagement(BaseModel):
24
+ status: str
25
+ code: int
26
+ message: str
27
+ data: Dict[str, Any]
src/models/utls.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-08-29
4
+ """
5
+ from pydantic import BaseModel, Field
6
+
7
+
8
+ class FollowUps(BaseModel):
9
+ q1: str = Field(description="First Follow-up Question")
10
+ q2: str = Field(description="Second Follow-up Question")
11
+ q3: str = Field(description="Third Follow-up Question")
12
+
13
+
14
+ ## langgraph models
15
+ class TopicSelectionParser(BaseModel):
16
+ Topic: str = Field(description='Selected Topic')
17
+ Reasoning: str = Field(description='Reasoning behind topic selection')
src/pipeline/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-07-31
4
+ """
src/pipeline/conversai_analytic_pipeline.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-08-28
4
+ """
5
+ import requests
6
+
7
+ from src.api.conversai_api import supabase_client
8
+ from src.services.supabase.analytics.analytic_tables import feedback_table, chat_history_table, add_feedback, \
9
+ track_usage
10
+
11
+
12
+ class ConversAIAnalyticPipeline:
13
+ def __init__(self):
14
+ self.supabase_client = supabase_client
15
+
16
+ def feedback_table_(self, vectorstore):
17
+ return feedback_table(self.supabase_client, vectorstore)
18
+
19
+ def chat_history_table_(self, vectorstore):
20
+ return chat_history_table(self.supabase_client, vectorstore)
21
+
22
+ def add_feedback_(self, feedback, user_id, city, client_ip, vectorstore):
23
+ return add_feedback(self.supabase_client, feedback, user_id, city, client_ip, vectorstore)
24
+
25
+ def track_usage_(self, vectorstore: str, endpoint: str):
26
+ return track_usage(supabase_client=self.supabase_client, vectorstore=vectorstore, endpoint=endpoint)
27
+
28
+ def get_ip_info(self, ip: str):
29
+ try:
30
+ response = requests.get(f"https://ipinfo.io/{ip}/json")
31
+ data = response.json()
32
+ return data.get("city", "Unknown")
33
+ except Exception as e:
34
+ return "Unknown"
src/pipeline/conversai_pipeline.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-08-02
4
+ """
5
+ from langchain_core.prompts import ChatPromptTemplate
6
+ from langchain_core.prompts import PromptTemplate
7
+ from src.services.embeddings.Qdrant_BM25_embedding import qdrant_bm25_embedding
8
+ from src.services.document.add_document import AddDocument
9
+ from src.services.answer_query.answerquery import AnswerQuery
10
+ from src.services.file_analyzer.data_analyzer import DataAnalyzer
11
+ from src.services.get_links.web_scraper import WebScraper
12
+ # from src.services.ocr.easy_ocr.easy_ocr_ import EasyOCR_ as OCRService
13
+ from src.services.pdf_extraction.image_pdf.image_pdf_text_extraction import get_text_from_image_pdf
14
+ from src.services.pdf_extraction.text_pdf.text_pdf_extraction import extract_text_from_pdf
15
+ from src.services.video_transcription.youtube_video_transcription.youtube_video_transcript import \
16
+ youtube_video_transcription
17
+ # from src.services.website_url.text_extraction_urls import extract_text_from_url_list, extract_text_from_url
18
+ from src.utils.utils import json_parser
19
+ from src.prompts.custom_prompts import _custom_prompts
20
+ from src.services.ocr.replicate_ocr.replicate_ocr import ReplicateOCR as OCRService
21
+ from src.services.embeddings.jina_embeddings import jina_embedding
22
+ from src.services.website_url.text_extraction_urlsnew import WebScrapertext
23
+
24
+
25
+ class ConversAIPipeline:
26
+ def __init__(self):
27
+ prompt_template=_custom_prompts["RAG_ANSWER_PROMPT"]
28
+ follow_up_prompt_template = _custom_prompts["FOLLOW_UP_PROMPT"]
29
+ prompt = ChatPromptTemplate.from_template(prompt_template)
30
+ json_parser_ = json_parser()
31
+ follow_up_prompt = PromptTemplate(
32
+ template=follow_up_prompt_template,
33
+ input_variables=["context"],
34
+ partial_variables={"format_instructions": json_parser_.get_format_instructions()},
35
+ )
36
+ self.vector_embedding = jina_embedding()
37
+ self.sparse_embedding = qdrant_bm25_embedding()
38
+ self.add_document_service = AddDocument(self.vector_embedding, self.sparse_embedding)
39
+
40
+ self.answer_query_service = AnswerQuery(vector_embedding=self.vector_embedding,
41
+ sparse_embedding=self.sparse_embedding, prompt=prompt,
42
+ follow_up_prompt=follow_up_prompt, json_parser=json_parser_)
43
+ self.data_analyzer = DataAnalyzer()
44
+ self.get_website_links = WebScraper()
45
+ self.ocr_service = OCRService()
46
+ self.web_text_extractor=WebScrapertext()
47
+
48
+ def add_document_(self, texts: list[tuple[str]], vectorstore: str):
49
+ return self.add_document_service.add_documents(texts=texts, vectorstore=vectorstore)
50
+
51
+ def answer_query_(self, query: str, vectorstore: str, llm_model: str = "llama-3.1-70b-versatile"):
52
+ output, follow_up_questions, source = self.answer_query_service.answer_query(query=query,
53
+ vectorstore=vectorstore,
54
+ llmModel=llm_model)
55
+ return output, follow_up_questions, source
56
+
57
+ def data_analyzer_(self, query: str, dataframe):
58
+ return self.data_analyzer.analyze_data(query=query, dataframe=dataframe)
59
+
60
+ def get_links_(self, url: str, timeout: int):
61
+ return self.get_website_links.get_links(url=url, timeout=timeout)
62
+
63
+ def image_pdf_text_extraction_(self, image_pdf: bytes):
64
+ return get_text_from_image_pdf(pdf_bytes=image_pdf)
65
+
66
+ def text_pdf_extraction_(self, pdf: str):
67
+ return extract_text_from_pdf(pdf_path=pdf)
68
+
69
+ def youtube_transcript_(self, url: list):
70
+ return youtube_video_transcription(youtube_video_url=url)
71
+
72
+ def website_url_text_extraction_(self, url: str):
73
+ return self.web_text_extractor.extract_text_from_url(url=url)
74
+
75
+ def website_url_text_extraction_list_(self, urls: list):
76
+ return self.web_text_extractor.extract_text_from_urls(urls=urls)
src/pipeline/speech_transcription_pipeline.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-07-31
4
+ """
5
+ from src.services.speech_to_text.speech_to_text_replicate import SpeechToTextReplicate
6
+ from src.services.text_to_speech.text_to_speech_gtts import TextToSpeech
7
+
8
+
9
+ class SpeechTranscriptionPipeline:
10
+ def __init__(self):
11
+ self.speech_to_text_ = SpeechToTextReplicate()
12
+ self.text_to_speech_ = TextToSpeech()
13
+
14
+ def text_to_speech(self, text: str, lang: str, tld: str) -> str:
15
+ speech = self.text_to_speech_.conversion(text, lang, tld)
16
+ return speech
17
+
18
+ def speech_to_text(self, audio, lang: str) -> str:
19
+ transcript_with_timestamp, transcript = self.speech_to_text_.transcribe_audio(audio=audio, language=lang)
20
+ return transcript
src/pipeline/user_management_pipeline.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-08-23
4
+ """
5
+ from src.services.supabase.user_management.user_service import UserManagement
6
+ from src.services.supabase.user_management.chatbot_management import SupabaseChatoBotManagement
7
+ from src.services.supabase.conversai_setup.conversai_user_db_setup import ConversAIUserDBSetup
8
+ from src.services.supabase.user_management.chat_history import get_chat_history
9
+ from supabase.client import create_client
10
+ from qdrant_client import QdrantClient
11
+ import os
12
+
13
+ SUPABASE_URL = os.getenv("SUPABASE_URL")
14
+ SUPABASE_KEY = os.getenv("SUPABASE_KEY")
15
+
16
+ QDRANT_URL = os.getenv("QDRANT_URL")
17
+ QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
18
+
19
+
20
+ class SupabaseUserManagementPipeline:
21
+ def __init__(self):
22
+ self.supabase_client = create_client(SUPABASE_URL, SUPABASE_KEY)
23
+ self.qdrant_client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
24
+ self.user_management = UserManagement(supabase_client=self.supabase_client)
25
+ self.chatbot_management = SupabaseChatoBotManagement(supabase_client=self.supabase_client,
26
+ qdrant_client=self.qdrant_client)
27
+ self.user_db_setup = ConversAIUserDBSetup(supabase_client=self.supabase_client)
28
+
29
+ def user_signup_(self, username: str, email: str, password: str) -> dict:
30
+ return self.user_management.user_signup(username=username, email=email, password=password)
31
+
32
+ def user_signin_(self, email: str, password: str) -> dict:
33
+ return self.user_management.user_signin(email=email, password=password)
34
+
35
+ def check_session_(self) -> dict:
36
+ return self.user_management.check_session()
37
+
38
+ def get_user_data_(self, access_token: str) -> dict:
39
+ return self.user_management.get_user_data(access_token=access_token)
40
+
41
+ def refresh_session__(self, refresh_token: str) -> dict:
42
+ return self.user_management.refresh_session_(refresh_token=refresh_token)
43
+
44
+ def login_with_access_token_(self, access_token: str, refresh_token: str) -> dict:
45
+ return self.user_management.login_with_access_token(access_token=access_token, refresh_token=refresh_token)
46
+
47
+ def username_creation_oauth_(self, username: str, user_id: str, email: str):
48
+ return self.user_management.user_name_creation_oauth(user_id=user_id, username=username, email=email)
49
+
50
+ def set_session_data_(self, access_token: str, refresh_token: str, user_id: str):
51
+ return self.user_management.set_session_data(access_token=access_token, refresh_token=refresh_token,
52
+ user_id=user_id)
53
+
54
+ def sign_out_(self):
55
+ return self.user_management.sign_out_()
56
+
57
+ def oauth_signin_(self) -> dict:
58
+ return self.user_management.oauth()
59
+
60
+ def new_chatbot_(self, chatbot_name: str, username: str):
61
+ return self.chatbot_management.new_chatbot(chatbot_name=chatbot_name, username=username)
62
+
63
+ def get_chat_history_(self, vectorstore: str):
64
+ return get_chat_history(vectorstore=vectorstore, supabase_client=self.supabase_client)
65
+
66
+ def delete_table(self, table_name: str):
67
+ return self.chatbot_management.delete_table(table_name=table_name)
68
+
69
+ def list_tables(self, username: str):
70
+ return self.chatbot_management.list_tables(username=username)
71
+
72
+ def create_data_source_name(self, source_name: str, username: str):
73
+ return self.chatbot_management.create_data_source_name(source_name=source_name, username=username)
74
+
75
+ def delete_qdrant_cluster(self, vectorstorename):
76
+ self.qdrant_client.delete_collection(collection_name=vectorstorename)
77
+
78
+ if __name__ =="__main__":
79
+ pipeline = SupabaseUserManagementPipeline()
80
+ pipeline.new_chatbot_(chatbot_name="anything",username="techconsp")
src/prompts/__init__.py ADDED
File without changes
src/prompts/custom_prompts.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ from langchain.prompts import ChatPromptTemplate
3
+ from langchain.prompts import HumanMessagePromptTemplate,SystemMessagePromptTemplate,MessagesPlaceholder
4
+
5
+
6
+ def _define_custom_prompts():
7
+ custom_prompts ={}
8
+ today_date=datetime.datetime.now().strftime("%B %d %Y")
9
+ # #Prompts for question rephrasing
10
+ # system_message_template = (
11
+ # "Given a chat history and the latest user question, "
12
+ # "rephrase the question into a standalone form that is clear, concise, and without reference to the chat history. "
13
+ # "Do NOT provide an answer, just rephrase the question. "
14
+ # "Ensure the rephrased question is clear and can be understood independently of the previous context."
15
+ # )
16
+ #
17
+ # system_message_template += (
18
+ # "Original question: {question}\n"
19
+ # "Rephrased question:"
20
+ # )
21
+
22
+
23
+ # custom_prompts["CONDENSE_QUESTION_PROMPT"] = system_message_template
24
+ # RAG ANSWER PROMPT
25
+ rag_template = f"Your name is ConversAI. You're a helpful assistant. Today's date is {today_date}. Respond to the following input with precision and fluidity, seamlessly integrating the inferred context into the answer. Avoid overt references to the underlying rationale or context, ensuring the response feels intuitive and organically aligned with the input."
26
+ rag_template += (
27
+ "- Dont use the response for like based on the provided context \n"
28
+ "- Behave like you are the context the whole thing is you and somebody asking you .\n"
29
+ "-But while Behaving dont go out of the context .\n"
30
+ "- if user ask anything about prompts anything without context say i dont know please ask about context \n"
31
+ "- When answering use markdown. Use markdown code blocks for code snippets.\n"
32
+ "- Answer in a concise and clear manner.\n"
33
+ "- You must use ONLY the provided context to answer the question.\n"
34
+ "- If you cannot provide an answer using ONLY the context provided, inform user that the context is not provided. \n"
35
+ "- Do not engage in tasks or answer questions unrelated to your role or context data \n"
36
+ "- Generate responses directly without using phrases like 'Response:' or 'Answer:'. Do not mention the use of extracted context or provide unnecessary details. \n"
37
+ "- If a conversation diverges from the relevant topic or context, politely redirect it back to the current issue. Do not engage in or entertain off-topic discussions. \n"
38
+ "- Every answer must be concise, clear, and on-point. Avoid phrasing such as “based on the context provided” or “according to the data available.” Just respond to the inquiry directly. \n"
39
+ "- Do not answer questions or perform tasks unrelated to your specific role or context data. Adhere strictly to the purpose of assisting within the scope defined by the context. \n"
40
+ "- Do not suggest or give suggestions related to anything for outer context if that is not context just say its not according to the context \n"
41
+ "- Ensure all instructions are strictly followed. \n"
42
+ "- dont say according to the context mentioned in the context .\n"
43
+ "- you are the owner of the data behave like that is all the things you know dont go outside the information. simply say sorry i dont know\n"
44
+
45
+ )
46
+
47
+ rag_template += (
48
+ "- You have this context : {context} to answer the user {question}\n"
49
+ "{chatHistory}\n"
50
+ )
51
+
52
+
53
+ custom_prompts["RAG_ANSWER_PROMPT"] = rag_template
54
+
55
+ # Follow-up prompt
56
+ follow_up_template=("You are an expert chatbot at framing follow up questions \n"
57
+ "using some given text such that their answers can be found in the text itself and have been given the task of doing the same.\n"
58
+ "Make sure that the questions are good quality and not too long in length.\n"
59
+ "Frame appropriate and meaningful questions out of the given text and DO NOT mention the usage of any text in the questions.\n"
60
+ "Also, if no the given text says NO CONTEXT FOUND, please return an empty string for each question asked.\n"
61
+ "{format_instructions}\n"
62
+ "{context}\n"
63
+ )
64
+
65
+ custom_prompts["FOLLOW_UP_PROMPT"]=follow_up_template
66
+
67
+
68
+ return custom_prompts
69
+
70
+ _custom_prompts =_define_custom_prompts()
src/services/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-07-31
4
+ """
src/services/answer_query/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-08-23
4
+ """
src/services/answer_query/answerquery.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from langchain_core.runnables import RunnablePassthrough, RunnableLambda
3
+ from langchain_core.output_parsers import StrOutputParser
4
+ from langchain_core.runnables.history import RunnableWithMessageHistory
5
+ from langchain.memory import ChatMessageHistory
6
+ from langchain.retrievers import ContextualCompressionRetriever
7
+ from langchain_community.document_compressors import JinaRerank
8
+ from langchain_core.chat_history import BaseChatMessageHistory
9
+ from src.services.vector_db.qdrent.upload_document import upload_document_existing_collection, \
10
+ answer_query_from_existing_collection
11
+ from langchain_groq import ChatGroq
12
+ import os
13
+ os.environ["JINA_API_KEY"] = os.getenv("JINA_API")
14
+ from src import logging as logger
15
+
16
+
17
+ class AnswerQuery:
18
+ def __init__(self, prompt, vector_embedding, sparse_embedding, follow_up_prompt, json_parser):
19
+ self.chat_history_store = {}
20
+ self.compressor = JinaRerank(model="jina-reranker-v2-base-multilingual")
21
+ self.vector_embed = vector_embedding
22
+ self.sparse_embed = sparse_embedding
23
+ self.prompt = prompt
24
+ self.follow_up_prompt = follow_up_prompt
25
+ self.json_parser = json_parser
26
+
27
+ def format_docs(self, docs: str):
28
+ global sources
29
+ global temp_context
30
+ sources = []
31
+ context = ""
32
+ for doc in docs:
33
+ context += f"{doc.page_content}\n\n\n"
34
+ source = doc.metadata
35
+ source = source["source"]
36
+ sources.append(source)
37
+ if context == "":
38
+ context = "No context found"
39
+ else:
40
+ pass
41
+ sources = list(set(sources))
42
+ temp_context = context
43
+ return context
44
+
45
+
46
+
47
+ def answer_query(self, query: str, vectorstore: str, llmModel: str = "llama-3.1-70b-versatile"):
48
+ global sources
49
+ global temp_context
50
+ vector_store_name = vectorstore
51
+ vector_store = answer_query_from_existing_collection(vector_embed=self.vector_embed,
52
+ sparse_embed=self.sparse_embed,
53
+ vectorstore=vectorstore)
54
+
55
+ retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 10, "fetch_k": 20})
56
+ compression_retriever = ContextualCompressionRetriever(
57
+ base_compressor = self.compressor, base_retriever = retriever
58
+ )
59
+ brain_chain = (
60
+ {"context": RunnableLambda(lambda x: x["question"]) | compression_retriever | RunnableLambda(self.format_docs),
61
+ "question": RunnableLambda(lambda x: x["question"]),
62
+ "chatHistory": RunnableLambda(lambda x: x["chatHistory"])}
63
+ | self.prompt
64
+ | ChatGroq(model=llmModel, temperature=0.75, max_tokens=512)
65
+ | StrOutputParser()
66
+ )
67
+ message_chain = RunnableWithMessageHistory(
68
+ brain_chain,
69
+ self.get_session_history,
70
+ input_messages_key="question",
71
+ history_messages_key="chatHistory"
72
+ )
73
+ chain = RunnablePassthrough.assign(messages_trimmed=self.trim_messages) | message_chain
74
+ follow_up_chain = self.follow_up_prompt | ChatGroq(model_name="llama-3.1-70b-versatile",
75
+ temperature=0) | self.json_parser
76
+
77
+ output = chain.invoke(
78
+ {"question": query},
79
+ {"configurable": {"session_id": vector_store_name}}
80
+ )
81
+ follow_up_questions = follow_up_chain.invoke({"context": temp_context})
82
+
83
+ return output, follow_up_questions, sources
84
+
85
+ def trim_messages(self, chain_input):
86
+ for store_name in self.chat_history_store:
87
+ messages = self.chat_history_store[store_name].messages
88
+ if len(messages) <= 1:
89
+ pass
90
+ else:
91
+ self.chat_history_store[store_name].clear()
92
+ for message in messages[-1:]:
93
+ self.chat_history_store[store_name].add_message(message)
94
+ return True
95
+
96
+ def get_session_history(self, session_id: str) -> BaseChatMessageHistory:
97
+ if session_id not in self.chat_history_store:
98
+ self.chat_history_store[session_id] = ChatMessageHistory()
99
+ return self.chat_history_store[session_id]
src/services/document/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-08-23
4
+ """
src/services/document/add_document.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-08-23
4
+ """
5
+ import string
6
+ from uuid import uuid4
7
+ from langchain.docstore.document import Document
8
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
9
+ from src.services.vector_db.qdrent.upload_document import upload_document_existing_collection
10
+
11
+
12
+ class AddDocument:
13
+ def __init__(self, vector_embedding, sparse_embedding):
14
+ self.vector_embed = vector_embedding
15
+ self.sparse_embed = sparse_embedding
16
+
17
+ def add_documents(self, texts: list[tuple[str]], vectorstore: str):
18
+ splitter = RecursiveCharacterTextSplitter(
19
+ chunk_size=400,
20
+ chunk_overlap=100,
21
+ add_start_index=True
22
+ )
23
+ sources = [textTuple[1] for textTuple in texts]
24
+ texts = [textTuple[0].replace("\n", " ") for textTuple in texts]
25
+ texts = [text.translate(str.maketrans('', '', string.punctuation.replace(".", ""))) for text in texts]
26
+ texts = [Document(page_content=text, metadata={"source": source}) for text, source in zip(texts, sources)]
27
+ documents = splitter.split_documents(texts)
28
+ upload_document_existing_collection(vector_embed=self.vector_embed,
29
+ sparse_embed=self.sparse_embed,
30
+ vectorstore=vectorstore, documents=documents)
src/services/embeddings/BGE-M3_vector_embedding.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # """
2
+ # Created By: ishwor subedi
3
+ # Date: 2024-08-23
4
+ # """
5
+ # from langchain_huggingface import HuggingFaceEmbeddings
6
+ #
7
+ #
8
+ # def bge_m3_vector_embedding():
9
+ # return HuggingFaceEmbeddings(
10
+ # model_name="BAAI/bge-m3",
11
+ # model_kwargs={"device": "cuda"},
12
+ # encode_kwargs={"normalize_embeddings": True}
13
+ # )
src/services/embeddings/Qdrant_BM25_embedding.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-08-23
4
+ """
5
+ from langchain_qdrant import FastEmbedSparse
6
+
7
+
8
+ def qdrant_bm25_embedding():
9
+ instance = FastEmbedSparse(model="Qdrant/BM25", threads=20, parallel=0)
10
+
11
+ return instance
src/services/embeddings/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-08-23
4
+ """
src/services/embeddings/jina_embeddings.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.embeddings import JinaEmbeddings
2
+ import os
3
+
4
+ def jina_embedding():
5
+ text_embeddings=JinaEmbeddings(
6
+ jina_api_key=os.getenv('JINA_API'),
7
+ model_name="jina-embeddings-v3")
8
+ return text_embeddings
src/services/embeddings/sentence_transformers_all_MiniLM_L6_v2_vector_embedding.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-08-23
4
+ """
5
+ from langchain_huggingface import HuggingFaceEmbeddings
6
+ from src.utils.utils import load_ini_config
7
+
8
+ #
9
+ # def all_minilm_l6_v2_vector_embedding():
10
+ # config = load_ini_config("config.ini")
11
+ #
12
+ # model_kwargs = {"device": config.get('all_mini_l6_v2_vector_embedding', 'device')}
13
+ # encode_kwargs = {"normalize_embeddings": config.get('all_mini_l6_v2_vector_embedding', 'normalize_embeddings')}
14
+ #
15
+ # all_minilm_object = HuggingFaceEmbeddings(
16
+ # model_name="BAAI/bge-base-en-v1.5",
17
+ # model_kwargs=model_kwargs,
18
+ # encode_kwargs=encode_kwargs
19
+ # )
20
+ # return all_minilm_object
src/services/file_analyzer/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-08-23
4
+ """
src/services/file_analyzer/data_analyzer.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-08-23
4
+ """
5
+ import os
6
+ import base64
7
+ from langchain_groq import ChatGroq
8
+ from pandasai import SmartDataframe
9
+ from src.utils.utils import load_ini_config
10
+
11
+
12
+ class DataAnalyzer:
13
+ def __init__(self):
14
+ self.config = load_ini_config("config.ini")
15
+
16
+ self.llm_config = ChatGroq(name=self.config.get('data_analyzer', 'groq_llm_name'))
17
+ self.additional_query = self.config.get('data_analyzer', 'additional_query')
18
+
19
+ def analyze_data(self, query, dataframe):
20
+ query += self.additional_query
21
+ df = SmartDataframe(dataframe,config={"llm": self.llm_config, "verbose": bool(self.config.get("data_analyzer", "verbose"))})
22
+ response = df.chat(query)
23
+ if os.path.isfile(response):
24
+ with open(response, "rb") as file:
25
+ b64string = base64.b64encode(file.read()).decode("utf-8")
26
+ return f"data:image/png;base64,{b64string}"
27
+ else:
28
+ return response
src/services/get_links/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-08-23
4
+ """
src/services/get_links/web_scraper.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ from urllib.parse import urlparse, urljoin
5
+ from concurrent.futures import ThreadPoolExecutor
6
+
7
+
8
+
9
+ class WebScraper:
10
+ def __init__(self):
11
+ pass
12
+
13
+ def get_links(self,url: str, timeout=4):
14
+ start = time.time()
15
+
16
+ def get_links_from_page(url: str) -> list:
17
+ response = requests.get(url)
18
+ soup = BeautifulSoup(response.content, "lxml")
19
+ anchors = soup.find_all("a")
20
+ links = []
21
+ for anchor in anchors:
22
+ if "href" in anchor.attrs:
23
+ if urlparse(anchor.attrs["href"]).netloc == urlparse(url).netloc:
24
+ links.append(anchor.attrs["href"])
25
+ elif not anchor.attrs["href"].startswith(("//", "file", "javascript", "tel", "mailto", "http")):
26
+ links.append(urljoin(url + "/", anchor.attrs["href"]))
27
+ else:
28
+ pass
29
+ links = [link for link in links if "#" not in link]
30
+ links = list(set(links))
31
+ else:
32
+ continue
33
+ return links
34
+
35
+ links = get_links_from_page(url)
36
+ unique_links = set()
37
+ for link in links:
38
+ now = time.time()
39
+ if now - start > timeout:
40
+ break
41
+ else:
42
+ unique_links = unique_links.union(set(get_links_from_page(link)))
43
+ return list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in unique_links]))
44
+
45
+ if __name__ == "__main__":
46
+ scraper = WebScraper()
47
+ links=scraper.get_links("https://www.additudemag.com/")
48
+ print(len(links))
49
+ print(type(links))
src/services/ocr/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-08-23
4
+ """
src/services/ocr/easy_ocr/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ """
2
+ Created By: ishwor subedi
3
+ Date: 2024-08-23
4
+ """