nivakaran commited on
Commit
4084b53
ยท
verified ยท
1 Parent(s): 5d23e94

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. .gitignore +3 -0
  2. README.md +2 -2
  3. app.py +4 -0
  4. src/llm/groq_llm.py +73 -23
.gitignore CHANGED
@@ -36,3 +36,6 @@ models/
36
  *.gguf
37
  *.safetensors
38
  models/.cache/
 
 
 
 
36
  *.gguf
37
  *.safetensors
38
  models/.cache/
39
+
40
+ .env
41
+ env
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: ๐Ÿš€
4
  colorFrom: blue
5
  colorTo: purple
6
  sdk: gradio
7
- sdk_version: 6.2.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
@@ -34,4 +34,4 @@ A modular Retrieval Augmented Generation (RAG) system powered by Phi-3.5-mini.
34
  - **LLM**: Phi-3.5-mini (GGUF via llama-cpp-python)
35
  - **Embeddings**: sentence-transformers (all-MiniLM-L6-v2)
36
  - **Vector Store**: ChromaDB
37
- - **UI**: Gradio
 
4
  colorFrom: blue
5
  colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 4.0.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
34
  - **LLM**: Phi-3.5-mini (GGUF via llama-cpp-python)
35
  - **Embeddings**: sentence-transformers (all-MiniLM-L6-v2)
36
  - **Vector Store**: ChromaDB
37
+ - **UI**: Gradio
app.py CHANGED
@@ -8,6 +8,10 @@ import sys
8
  import logging
9
  import threading
10
 
 
 
 
 
11
  # Configure logging for HuggingFace Spaces visibility
12
  logging.basicConfig(
13
  level=logging.INFO,
 
8
  import logging
9
  import threading
10
 
11
+ # Load environment variables from .env file
12
+ from dotenv import load_dotenv
13
+ load_dotenv()
14
+
15
  # Configure logging for HuggingFace Spaces visibility
16
  logging.basicConfig(
17
  level=logging.INFO,
src/llm/groq_llm.py CHANGED
@@ -2,35 +2,62 @@
2
 
3
  import logging
4
  import os
5
- from typing import Optional
6
 
7
  logger = logging.getLogger(__name__)
8
 
9
- # Groq API configuration
10
- GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "")
 
 
 
 
 
 
 
 
 
 
 
 
11
  GROQ_MODEL = "llama-3.1-8b-instant" # Fast, free model on Groq
12
 
13
 
14
  class GroqLLM:
15
  """Groq-based LLM with local model fallback.
16
 
17
- Uses Groq API for fast inference, falls back to local Phi-3
18
- if Groq is unavailable or rate limited.
 
19
  """
20
 
21
  def __init__(self):
22
- """Initialize Groq client."""
23
- self._groq_client = None
24
  self._local_model = None
25
- self._groq_available = bool(GROQ_API_KEY)
 
26
 
27
  if self._groq_available:
28
  try:
29
  from groq import Groq
30
- self._groq_client = Groq(api_key=GROQ_API_KEY)
31
- logger.info("โœ… Groq client initialized successfully")
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  except Exception as e:
33
- logger.warning(f"โš ๏ธ Groq initialization failed: {e}")
34
  self._groq_available = False
35
  else:
36
  logger.info("๐Ÿ“ No GROQ_API_KEY found, using local model only")
@@ -52,7 +79,7 @@ class GroqLLM:
52
  max_tokens: int = 256,
53
  temperature: float = 0.7
54
  ) -> str:
55
- """Generate response using Groq with local fallback.
56
 
57
  Args:
58
  prompt: User prompt/question.
@@ -63,27 +90,50 @@ class GroqLLM:
63
  Returns:
64
  Generated response string.
65
  """
66
- # Try Groq first if available
67
- if self._groq_available and self._groq_client:
68
- try:
69
- response = self._call_groq(prompt, system_prompt, max_tokens, temperature)
70
- if response:
71
- return response
72
- except Exception as e:
73
- logger.warning(f"โš ๏ธ Groq API error, falling back to local: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  # Fallback to local model
76
  logger.info("๐Ÿ”„ Using local model for generation")
77
  return self._call_local(prompt, system_prompt, max_tokens)
78
 
79
- def _call_groq(
80
  self,
 
81
  prompt: str,
82
  system_prompt: Optional[str],
83
  max_tokens: int,
84
  temperature: float
85
  ) -> str:
86
- """Call Groq API."""
87
  messages = []
88
 
89
  if system_prompt:
@@ -91,7 +141,7 @@ class GroqLLM:
91
 
92
  messages.append({"role": "user", "content": prompt})
93
 
94
- response = self._groq_client.chat.completions.create(
95
  model=GROQ_MODEL,
96
  messages=messages,
97
  max_tokens=max_tokens,
 
2
 
3
  import logging
4
  import os
5
+ from typing import Optional, List
6
 
7
  logger = logging.getLogger(__name__)
8
 
9
+ # Groq API configuration - Support for multiple API keys (up to 10)
10
+ GROQ_API_KEYS: List[str] = []
11
+
12
+ # Load primary key
13
+ _primary_key = os.environ.get("GROQ_API_KEY", "")
14
+ if _primary_key:
15
+ GROQ_API_KEYS.append(_primary_key)
16
+
17
+ # Load additional keys (GROQ_API_KEY_2 through GROQ_API_KEY_10)
18
+ for i in range(2, 11):
19
+ key = os.environ.get(f"GROQ_API_KEY_{i}", "")
20
+ if key:
21
+ GROQ_API_KEYS.append(key)
22
+
23
  GROQ_MODEL = "llama-3.1-8b-instant" # Fast, free model on Groq
24
 
25
 
26
  class GroqLLM:
27
  """Groq-based LLM with local model fallback.
28
 
29
+ Uses Groq API for fast inference with multiple API key fallback.
30
+ Rotates through available keys on rate limits or errors before
31
+ falling back to local Phi-3 model.
32
  """
33
 
34
  def __init__(self):
35
+ """Initialize Groq client with multiple API key support."""
36
+ self._groq_clients: List = []
37
  self._local_model = None
38
+ self._current_key_index = 0
39
+ self._groq_available = len(GROQ_API_KEYS) > 0
40
 
41
  if self._groq_available:
42
  try:
43
  from groq import Groq
44
+ # Initialize clients for all available API keys
45
+ for i, api_key in enumerate(GROQ_API_KEYS):
46
+ try:
47
+ client = Groq(api_key=api_key)
48
+ self._groq_clients.append(client)
49
+ key_name = "primary" if i == 0 else f"key_{i + 1}"
50
+ logger.info(f"โœ… Groq client initialized ({key_name})")
51
+ except Exception as e:
52
+ logger.warning(f"โš ๏ธ Groq client {i + 1} initialization failed: {e}")
53
+
54
+ if not self._groq_clients:
55
+ self._groq_available = False
56
+ logger.warning("โš ๏ธ No valid Groq clients initialized")
57
+ else:
58
+ logger.info(f"๐Ÿ”‘ {len(self._groq_clients)} Groq API key(s) available for rotation")
59
  except Exception as e:
60
+ logger.warning(f"โš ๏ธ Groq module initialization failed: {e}")
61
  self._groq_available = False
62
  else:
63
  logger.info("๐Ÿ“ No GROQ_API_KEY found, using local model only")
 
79
  max_tokens: int = 256,
80
  temperature: float = 0.7
81
  ) -> str:
82
+ """Generate response using Groq with multi-key rotation and local fallback.
83
 
84
  Args:
85
  prompt: User prompt/question.
 
90
  Returns:
91
  Generated response string.
92
  """
93
+ # Try all Groq API keys before falling back to local
94
+ if self._groq_available and self._groq_clients:
95
+ # Try each key starting from current index
96
+ keys_tried = 0
97
+ total_keys = len(self._groq_clients)
98
+
99
+ while keys_tried < total_keys:
100
+ current_client = self._groq_clients[self._current_key_index]
101
+ key_name = "primary" if self._current_key_index == 0 else f"key_{self._current_key_index + 1}"
102
+
103
+ try:
104
+ response = self._call_groq_with_client(
105
+ current_client, prompt, system_prompt, max_tokens, temperature
106
+ )
107
+ if response:
108
+ return response
109
+ except Exception as e:
110
+ error_str = str(e).lower()
111
+ is_rate_limit = "rate" in error_str or "limit" in error_str or "429" in error_str
112
+
113
+ if is_rate_limit:
114
+ logger.warning(f"โš ๏ธ Groq API rate limited ({key_name}), trying next key...")
115
+ else:
116
+ logger.warning(f"โš ๏ธ Groq API error ({key_name}): {e}")
117
+
118
+ # Move to next key
119
+ self._current_key_index = (self._current_key_index + 1) % total_keys
120
+ keys_tried += 1
121
+
122
+ logger.warning(f"โš ๏ธ All {total_keys} Groq API key(s) exhausted, falling back to local model")
123
 
124
  # Fallback to local model
125
  logger.info("๐Ÿ”„ Using local model for generation")
126
  return self._call_local(prompt, system_prompt, max_tokens)
127
 
128
+ def _call_groq_with_client(
129
  self,
130
+ client,
131
  prompt: str,
132
  system_prompt: Optional[str],
133
  max_tokens: int,
134
  temperature: float
135
  ) -> str:
136
+ """Call Groq API with a specific client."""
137
  messages = []
138
 
139
  if system_prompt:
 
141
 
142
  messages.append({"role": "user", "content": prompt})
143
 
144
+ response = client.chat.completions.create(
145
  model=GROQ_MODEL,
146
  messages=messages,
147
  max_tokens=max_tokens,