Aryan Jain commited on
Commit
609858e
·
1 Parent(s): c2ebaa7

migrate to groq

Browse files
Files changed (4) hide show
  1. .env.example +2 -2
  2. poetry.lock +77 -1
  3. pyproject.toml +1 -0
  4. src/utils/_chat_client.py +28 -14
.env.example CHANGED
@@ -1,4 +1,4 @@
1
  LOG_FILE=
2
- HF_TOKEN=
3
  PINECONE_API_KEY=
4
- PINECONE_INDEX_NAME=
 
 
1
  LOG_FILE=
 
2
  PINECONE_API_KEY=
3
+ PINECONE_INDEX_NAME=
4
+ GROQ_API_KEY=
poetry.lock CHANGED
@@ -264,6 +264,17 @@ files = [
264
  {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
265
  ]
266
 
 
 
 
 
 
 
 
 
 
 
 
267
  [[package]]
268
  name = "fastapi"
269
  version = "0.112.2"
@@ -427,6 +438,25 @@ files = [
427
  docs = ["Sphinx", "furo"]
428
  test = ["objgraph", "psutil"]
429
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
  [[package]]
431
  name = "grpcio"
432
  version = "1.66.1"
@@ -496,6 +526,27 @@ files = [
496
  {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
497
  ]
498
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
499
  [[package]]
500
  name = "httptools"
501
  version = "0.6.1"
@@ -544,6 +595,31 @@ files = [
544
  [package.extras]
545
  test = ["Cython (>=0.29.24,<0.30.0)"]
546
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
547
  [[package]]
548
  name = "huggingface-hub"
549
  version = "0.24.6"
@@ -1623,4 +1699,4 @@ dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"]
1623
  [metadata]
1624
  lock-version = "2.0"
1625
  python-versions = "3.11.*"
1626
- content-hash = "5fdd4c6613f0bfe9744a9ad2d35e8a225ab8eb6da52724717996dbc79c6f6062"
 
264
  {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
265
  ]
266
 
267
+ [[package]]
268
+ name = "distro"
269
+ version = "1.9.0"
270
+ description = "Distro - an OS platform information API"
271
+ optional = false
272
+ python-versions = ">=3.6"
273
+ files = [
274
+ {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
275
+ {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
276
+ ]
277
+
278
  [[package]]
279
  name = "fastapi"
280
  version = "0.112.2"
 
438
  docs = ["Sphinx", "furo"]
439
  test = ["objgraph", "psutil"]
440
 
441
+ [[package]]
442
+ name = "groq"
443
+ version = "0.11.0"
444
+ description = "The official Python library for the groq API"
445
+ optional = false
446
+ python-versions = ">=3.7"
447
+ files = [
448
+ {file = "groq-0.11.0-py3-none-any.whl", hash = "sha256:e328531c979542e563668c62260aec13b43a6ee0ca9e2fb22dff1d26f8c8ce54"},
449
+ {file = "groq-0.11.0.tar.gz", hash = "sha256:dbb9aefedf388ddd4801ec7bf3eba7f5edb67948fec0cd2829d97244059f42a7"},
450
+ ]
451
+
452
+ [package.dependencies]
453
+ anyio = ">=3.5.0,<5"
454
+ distro = ">=1.7.0,<2"
455
+ httpx = ">=0.23.0,<1"
456
+ pydantic = ">=1.9.0,<3"
457
+ sniffio = "*"
458
+ typing-extensions = ">=4.7,<5"
459
+
460
  [[package]]
461
  name = "grpcio"
462
  version = "1.66.1"
 
526
  {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
527
  ]
528
 
529
+ [[package]]
530
+ name = "httpcore"
531
+ version = "1.0.6"
532
+ description = "A minimal low-level HTTP client."
533
+ optional = false
534
+ python-versions = ">=3.8"
535
+ files = [
536
+ {file = "httpcore-1.0.6-py3-none-any.whl", hash = "sha256:27b59625743b85577a8c0e10e55b50b5368a4f2cfe8cc7bcfa9cf00829c2682f"},
537
+ {file = "httpcore-1.0.6.tar.gz", hash = "sha256:73f6dbd6eb8c21bbf7ef8efad555481853f5f6acdeaff1edb0694289269ee17f"},
538
+ ]
539
+
540
+ [package.dependencies]
541
+ certifi = "*"
542
+ h11 = ">=0.13,<0.15"
543
+
544
+ [package.extras]
545
+ asyncio = ["anyio (>=4.0,<5.0)"]
546
+ http2 = ["h2 (>=3,<5)"]
547
+ socks = ["socksio (==1.*)"]
548
+ trio = ["trio (>=0.22.0,<1.0)"]
549
+
550
  [[package]]
551
  name = "httptools"
552
  version = "0.6.1"
 
595
  [package.extras]
596
  test = ["Cython (>=0.29.24,<0.30.0)"]
597
 
598
+ [[package]]
599
+ name = "httpx"
600
+ version = "0.27.2"
601
+ description = "The next generation HTTP client."
602
+ optional = false
603
+ python-versions = ">=3.8"
604
+ files = [
605
+ {file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"},
606
+ {file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"},
607
+ ]
608
+
609
+ [package.dependencies]
610
+ anyio = "*"
611
+ certifi = "*"
612
+ httpcore = "==1.*"
613
+ idna = "*"
614
+ sniffio = "*"
615
+
616
+ [package.extras]
617
+ brotli = ["brotli", "brotlicffi"]
618
+ cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
619
+ http2 = ["h2 (>=3,<5)"]
620
+ socks = ["socksio (==1.*)"]
621
+ zstd = ["zstandard (>=0.18.0)"]
622
+
623
  [[package]]
624
  name = "huggingface-hub"
625
  version = "0.24.6"
 
1699
  [metadata]
1700
  lock-version = "2.0"
1701
  python-versions = "3.11.*"
1702
+ content-hash = "0f525b306edcca87bf7610ba62d87494081bf667f74bceb87cb2be1fb5b539a4"
pyproject.toml CHANGED
@@ -17,6 +17,7 @@ nltk = "^3.9.1"
17
  pinecone = {extras = ["grpc"], version = "^5.1.0"}
18
  uvicorn = {extras = ["standard"], version = "^0.30.6"}
19
  aiofiles = "^24.1.0"
 
20
 
21
 
22
  [build-system]
 
17
  pinecone = {extras = ["grpc"], version = "^5.1.0"}
18
  uvicorn = {extras = ["standard"], version = "^0.30.6"}
19
  aiofiles = "^24.1.0"
20
+ groq = "^0.11.0"
21
 
22
 
23
  [build-system]
src/utils/_chat_client.py CHANGED
@@ -1,37 +1,49 @@
1
  import os
2
- from huggingface_hub import InferenceClient
3
 
4
  INSTRUCTIONS="""
5
  You are an AI assistant designed by Sifars, a web development company, who gives answers to queries regarding Sifars, to the best of your ability.
6
  You will be provided a context along with each user query which you will use to answer the queries.
7
  Match the question with the context and according to that answer the user query.
 
 
 
 
 
 
 
 
8
  <RULES>
 
 
 
 
9
  - Keep your responses as short and to the point as possible. They should be concise, engaging, and easy to read.
10
  - Avoid including extra information not asked for in the user query.
11
  - Use plural first person pronouns when talking about sifars.
12
  - Try to answer the query from the context provided. Even if the context does not directly answer the question, try to relate it to the question and formulate an answer.
13
- - Do not mention the context provided, or the contents of the context, for any reason in your message.
14
- - If someone asks about sifars, give them only the necessary details and then ask them to visit our about page at [this link](https://www.sifars.com/en/about/).
15
- - If someone asks about the services we provide, give them a bit of idea according to the interest of their query and then direct them to our services page at [this link](https://www.sifars.com/en/services/).
16
- - If someone asks about our projects or portfolio, provide them details of any three projects with minimal overview of those projects, according to the interest of their query and then direct them to our portfolio page at [this link](https://www.sifars.com/en/portfolio/).
17
- - If someone asks about our technology or tech stack that we use, give them a bit of idea according to the interest of their query and then direct them to our technology page at [this link](https://www.sifars.com/en/technology/).
18
- - If someone asks about career opportunities, direct them to our career page at [this link](https://www.sifars.com/en/career/).
 
19
  </RULES>
20
  """
21
 
22
  class ChatClient:
23
  def __init__(
24
  self,
25
- model: str="meta-llama/Llama-3.2-11B-Vision-Instruct",
26
  max_tokens: int=4096,
27
  stream: bool=True,
28
  system_message: str=INSTRUCTIONS
29
  ):
30
- self.client = InferenceClient(
31
- model=model,
32
- token=os.getenv("HF_TOKEN")
33
  )
34
-
35
  self.max_tokens = max_tokens
36
  self.stream = stream
37
  self.system_message = system_message
@@ -46,13 +58,15 @@ class ChatClient:
46
  self,
47
  messages: list,
48
  ):
49
- for message in self.client.chat_completion(
50
  messages=[
51
  {"role": "system", "content": self.system_message},
52
  *messages
53
  ],
 
54
  max_tokens=self.max_tokens,
55
  stream=self.stream,
56
  temperature=0.7
57
  ):
58
- yield message.choices[0].delta.content
 
 
1
  import os
2
+ from groq import Groq
3
 
4
  INSTRUCTIONS="""
5
  You are an AI assistant designed by Sifars, a web development company, who gives answers to queries regarding Sifars, to the best of your ability.
6
  You will be provided a context along with each user query which you will use to answer the queries.
7
  Match the question with the context and according to that answer the user query.
8
+
9
+ # Here is a bit of information about Sifars:
10
+ Sifars, a pioneering web service provider, emerged onto the tech landscape in 2018 with a vision to revolutionize the digital sphere. Founded by visionary entrepreneurs Jatin Sethi, Munish Kumar, and Sukhwinder Singh, Sifars set its sights on empowering businesses worldwide with cutting-edge technology solutions. With its global headquarters nestled in the vibrant city of Patiala, Punjab, India, Sifars quickly garnered recognition as a leading application development company, committed to propelling businesses towards success in the ever-evolving tech landscape.
11
+
12
+ Email: contact@sifars.com
13
+ Address: SCO 6, First Floor, Phulkian Enclave,
14
+ Near Mini Secretariat, Patiala, Punjab 147001
15
+
16
  <RULES>
17
+ - It is mandatory to not give the project cost estimates. Instead ask the user to contact us for the same.
18
+ - If something you not found in the context then do not say that you do not have context.
19
+ - If user asks some information you don't have knowledge about then ask them to contact us along with the contact information and link of contact page instead of trying to answer it.
20
+ - It is mandatory for you to not mention the context provided, or the contents of the context, for any reason in your message.
21
  - Keep your responses as short and to the point as possible. They should be concise, engaging, and easy to read.
22
  - Avoid including extra information not asked for in the user query.
23
  - Use plural first person pronouns when talking about sifars.
24
  - Try to answer the query from the context provided. Even if the context does not directly answer the question, try to relate it to the question and formulate an answer.
25
+ - If someone asks to contact us, give them our contact information and link of contact page at https://www.sifars.com/en/contact/.
26
+ - If someone asks about sifars, give them only the necessary details and then ask them to visit our about page at https://www.sifars.com/en/about/.
27
+ - If someone asks about the services we provide, give them a bit of idea according to the interest of their query and then direct them to our services page at https://www.sifars.com/en/services/.
28
+ - If someone asks about our projects or portfolio, provide them details of any three projects with minimal overview of those projects, according to the interest of their query and then direct them to our portfolio page at https://www.sifars.com/en/portfolio/.
29
+ - If someone asks about our technology or tech stack that we use, give them a bit of idea according to the interest of their query and then direct them to our technology page at https://www.sifars.com/en/technology/.
30
+ - If someone asks about career opportunities, direct them to our career page at https://www.sifars.com/en/career/.
31
+ - It is mandatory for you to not make up any links on your own. Only use the links provided above.
32
  </RULES>
33
  """
34
 
35
  class ChatClient:
36
  def __init__(
37
  self,
38
+ model: str="llama-3.1-70b-versatile",
39
  max_tokens: int=4096,
40
  stream: bool=True,
41
  system_message: str=INSTRUCTIONS
42
  ):
43
+ self.client = Groq(
44
+ api_key=os.getenv("GROQ_API_KEY"),
 
45
  )
46
+ self.model = model
47
  self.max_tokens = max_tokens
48
  self.stream = stream
49
  self.system_message = system_message
 
58
  self,
59
  messages: list,
60
  ):
61
+ for message in self.client.chat.completions.create(
62
  messages=[
63
  {"role": "system", "content": self.system_message},
64
  *messages
65
  ],
66
+ model=self.model,
67
  max_tokens=self.max_tokens,
68
  stream=self.stream,
69
  temperature=0.7
70
  ):
71
+ if message.choices[0].delta.content:
72
+ yield message.choices[0].delta.content