codeBOKER commited on
Commit
9dfc4a2
·
1 Parent(s): 99cffc4

Switch AI provider to Hugging Face router

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. ai_service.py +11 -10
  3. config.py +21 -9
  4. requirements.txt +1 -1
README.md CHANGED
@@ -15,7 +15,7 @@ FastAPI backend for Hadhramout Bank AI customer service system.
15
 
16
  ## Features
17
  - Telegram webhook integration
18
- - AI-powered responses using Groq
19
  - Database integration with Supabase
20
  - Vector search with Pinecone
21
 
 
15
 
16
  ## Features
17
  - Telegram webhook integration
18
+ - AI-powered responses using Hugging Face Inference API
19
  - Database integration with Supabase
20
  - Vector search with Pinecone
21
 
ai_service.py CHANGED
@@ -1,5 +1,5 @@
1
  import re
2
- from config import pc, index, groq_client, EMBED_MODEL, GROQ_MODEL, PROMPT
3
  from database import db_manager
4
 
5
  def clean_ai_response(text: str):
@@ -7,8 +7,8 @@ def clean_ai_response(text: str):
7
  return cleaned_text.strip()
8
 
9
  async def get_ai_response(user_query: str, telegram_id: int = None):
10
-
11
- if not pc or not index or not groq_client:
12
  return "Ai service is not available at the moment. Please try again later."
13
 
14
  # Save user message if database is available and telegram_id is provided
@@ -16,8 +16,7 @@ async def get_ai_response(user_query: str, telegram_id: int = None):
16
  if telegram_id and db_manager:
17
  db_manager.save_message(telegram_id, user_query, "user")
18
  conversation_history = db_manager.get_formatted_history(telegram_id, limit=6)
19
-
20
-
21
  query_embedding = pc.inference.embed(
22
  model=EMBED_MODEL,
23
  inputs=[user_query],
@@ -48,19 +47,21 @@ async def get_ai_response(user_query: str, telegram_id: int = None):
48
  Based on the above information, provide an accurate and helpful response to the customer:
49
  """
50
  print("User content:", user_content)
51
- completion = groq_client.chat.completions.create(
 
 
52
  messages=[
53
  {"role": "system", "content": PROMPT},
54
- {"role": "user", "content": user_content}
55
  ],
56
- model=GROQ_MODEL,
57
  temperature=0.1,
58
- max_completion_tokens=800,
59
  top_p=0.9,
60
  )
 
61
  ai_response = completion.choices[0].message.content
62
  cleaned_response = clean_ai_response(ai_response)
63
-
64
  # Save assistant response if database is available and telegram_id is provided
65
  if telegram_id and db_manager:
66
  db_manager.save_message(telegram_id, cleaned_response, "assistant")
 
1
  import re
2
+ from config import pc, index, EMBED_MODEL, HF_MODEL, PROMPT, hf_client
3
  from database import db_manager
4
 
5
  def clean_ai_response(text: str):
 
7
  return cleaned_text.strip()
8
 
9
  async def get_ai_response(user_query: str, telegram_id: int = None):
10
+
11
+ if not pc or not index or not hf_client:
12
  return "Ai service is not available at the moment. Please try again later."
13
 
14
  # Save user message if database is available and telegram_id is provided
 
16
  if telegram_id and db_manager:
17
  db_manager.save_message(telegram_id, user_query, "user")
18
  conversation_history = db_manager.get_formatted_history(telegram_id, limit=6)
19
+
 
20
  query_embedding = pc.inference.embed(
21
  model=EMBED_MODEL,
22
  inputs=[user_query],
 
47
  Based on the above information, provide an accurate and helpful response to the customer:
48
  """
49
  print("User content:", user_content)
50
+
51
+ completion = hf_client.chat.completions.create(
52
+ model=HF_MODEL,
53
  messages=[
54
  {"role": "system", "content": PROMPT},
55
+ {"role": "user", "content": user_content},
56
  ],
 
57
  temperature=0.1,
58
+ max_tokens=800,
59
  top_p=0.9,
60
  )
61
+
62
  ai_response = completion.choices[0].message.content
63
  cleaned_response = clean_ai_response(ai_response)
64
+
65
  # Save assistant response if database is available and telegram_id is provided
66
  if telegram_id and db_manager:
67
  db_manager.save_message(telegram_id, cleaned_response, "assistant")
config.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
  from pinecone import Pinecone
3
- from groq import Groq
4
  from dotenv import load_dotenv
5
 
6
  # Load environment variables from .env file
@@ -8,7 +8,7 @@ load_dotenv()
8
 
9
  # Environment Variables
10
  PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
11
- GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
12
  TELEGRAM_TOKEN = os.environ.get("TELEGRAM_TOKEN")
13
  SUPABASE_URL = os.environ.get("SUPABASE_URL")
14
  SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
@@ -18,21 +18,33 @@ SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
18
  TELEGRAM_URL = f"https://149.154.167.220/bot{TELEGRAM_TOKEN}/sendMessage" if TELEGRAM_TOKEN else None
19
 
20
  EMBED_MODEL = os.environ.get("EMBED_MODEL", "multilingual-e5-large")
21
- GROQ_MODEL = os.environ.get("GROQ_MODEL", "llama-3.1-8b-instant")
22
- PROMPT = os.environ.get("PROMPT", "You are a helpful customer service assistant for Hadhramout Bank. Answer the user's question based on the provided context. If the context doesn't contain the answer, politely say you don't have enough information to help with that specific query.")
 
 
 
 
 
 
 
 
 
23
 
24
  # Initialize clients only if API keys are available
25
  pc = None
26
  if PINECONE_API_KEY:
27
  pc = Pinecone(api_key=PINECONE_API_KEY)
28
 
29
- groq_client = None
30
- if GROQ_API_KEY:
31
  try:
32
- groq_client = Groq(api_key=GROQ_API_KEY)
 
 
 
33
  except Exception as e:
34
- print(f"Warning: Failed to initialize Groq client: {e}")
35
- groq_client = None
36
 
37
  # Initialize index only if Pinecone client is available
38
  index = None
 
1
  import os
2
  from pinecone import Pinecone
3
+ from openai import OpenAI
4
  from dotenv import load_dotenv
5
 
6
  # Load environment variables from .env file
 
8
 
9
  # Environment Variables
10
  PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
11
+ HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HF_API_KEY")
12
  TELEGRAM_TOKEN = os.environ.get("TELEGRAM_TOKEN")
13
  SUPABASE_URL = os.environ.get("SUPABASE_URL")
14
  SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
 
18
  TELEGRAM_URL = f"https://149.154.167.220/bot{TELEGRAM_TOKEN}/sendMessage" if TELEGRAM_TOKEN else None
19
 
20
  EMBED_MODEL = os.environ.get("EMBED_MODEL", "multilingual-e5-large")
21
+ HF_MODEL = os.environ.get(
22
+ "HF_MODEL",
23
+ "dphn/Dolphin-Mistral-24B-Venice-Edition:featherless-ai",
24
+ )
25
+ PROMPT = os.environ.get(
26
+ "PROMPT",
27
+ "You are a helpful customer service assistant for Hadhramout Bank. "
28
+ "Answer the user's question based on the provided context. If the context "
29
+ "doesn't contain the answer, politely say you don't have enough information "
30
+ "to help with that specific query."
31
+ )
32
 
33
  # Initialize clients only if API keys are available
34
  pc = None
35
  if PINECONE_API_KEY:
36
  pc = Pinecone(api_key=PINECONE_API_KEY)
37
 
38
+ hf_client = None
39
+ if HF_TOKEN:
40
  try:
41
+ hf_client = OpenAI(
42
+ base_url="https://router.huggingface.co/v1",
43
+ api_key=HF_TOKEN,
44
+ )
45
  except Exception as e:
46
+ print(f"Warning: Failed to initialize Hugging Face OpenAI client: {e}")
47
+ hf_client = None
48
 
49
  # Initialize index only if Pinecone client is available
50
  index = None
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
  fastapi
2
  uvicorn
3
  pinecone
4
- groq
5
  httpx
6
  python-dotenv
7
  supabase
 
 
1
  fastapi
2
  uvicorn
3
  pinecone
 
4
  httpx
5
  python-dotenv
6
  supabase
7
+ openai