BolyosCsaba commited on
Commit
60c3b5b
·
1 Parent(s): 1f77c68

UPdate your project to use Qwen3-4B-Instruct-2507 and remove all other model references

Browse files
Files changed (4) hide show
  1. README.md +2 -2
  2. app.py +2 -2
  3. config/config.yaml +8 -7
  4. src/llm_client.py +49 -17
README.md CHANGED
@@ -85,7 +85,7 @@ Edit `config/config.yaml` to configure your LLM:
85
  ```yaml
86
  llm:
87
  provider: 'huggingface' # or 'openai', 'ollama'
88
- model: 'Qwen/Qwen2.5-7B-Instruct'
89
  system_prompt: |
90
  You are Talker, a helpful AI assistant...
91
  ```
@@ -181,7 +181,7 @@ The **🔍 Status & Debug** tab shows:
181
  ```yaml
182
  llm:
183
  provider: 'huggingface'
184
- model: 'Qwen/Qwen2.5-7B-Instruct'
185
  max_tokens: 512
186
  temperature: 0.7
187
  ```
 
85
  ```yaml
86
  llm:
87
  provider: 'huggingface' # or 'openai', 'ollama'
88
+ model: 'microsoft/Phi-3-mini-4k-instruct'
89
  system_prompt: |
90
  You are Talker, a helpful AI assistant...
91
  ```
 
181
  ```yaml
182
  llm:
183
  provider: 'huggingface'
184
+ model: 'microsoft/Phi-3-mini-4k-instruct'
185
  max_tokens: 512
186
  temperature: 0.7
187
  ```
app.py CHANGED
@@ -43,7 +43,7 @@ except FileNotFoundError:
43
  },
44
  'llm': {
45
  'provider': 'huggingface',
46
- 'model': 'Qwen/Qwen2.5-7B-Instruct',
47
  'max_tokens': 512,
48
  'temperature': 0.7,
49
  'system_prompt': 'You are a helpful AI assistant participating in an Open Floor Protocol conversation.'
@@ -64,7 +64,7 @@ except FileNotFoundError:
64
  # Initialize LLM client
65
  llm_client = LLMClient(
66
  provider=config['llm'].get('provider', 'huggingface'),
67
- model=config['llm'].get('model', 'Qwen/Qwen2.5-7B-Instruct'),
68
  api_key=config['llm'].get('api_key'),
69
  api_url=config['llm'].get('api_url'),
70
  system_prompt=config['llm'].get('system_prompt')
 
43
  },
44
  'llm': {
45
  'provider': 'huggingface',
46
+ 'model': 'microsoft/Phi-3-mini-4k-instruct',
47
  'max_tokens': 512,
48
  'temperature': 0.7,
49
  'system_prompt': 'You are a helpful AI assistant participating in an Open Floor Protocol conversation.'
 
64
  # Initialize LLM client
65
  llm_client = LLMClient(
66
  provider=config['llm'].get('provider', 'huggingface'),
67
+ model=config['llm'].get('model', 'microsoft/Phi-3-mini-4k-instruct'),
68
  api_key=config['llm'].get('api_key'),
69
  api_url=config['llm'].get('api_url'),
70
  system_prompt=config['llm'].get('system_prompt')
config/config.yaml CHANGED
@@ -9,26 +9,27 @@ agent:
9
  convener_url: 'https://convener-service.com/ofp'
10
 
11
  llm:
12
- # LLM provider: huggingface, openai, ollama
13
  provider: 'huggingface'
14
- # Model name - Using microsoft/Phi-3-mini-4k-instruct (confirmed free tier)
15
- model: 'microsoft/Phi-3-mini-4k-instruct'
16
 
17
  # API configuration
18
  # For HuggingFace: set HF_TOKEN environment variable
19
- # For OpenAI: set OPENAI_API_KEY environment variable
20
- # For Ollama: runs locally, no key needed
21
  api_url: null # Optional: custom API endpoint
22
 
23
- # Generation parameters
24
- max_tokens: 512
25
  temperature: 0.7
 
 
26
 
27
  # System prompt
28
  system_prompt: |
29
  You are Talker, a helpful AI assistant participating in an Open Floor Protocol conversation.
30
  You provide clear, concise, and friendly responses.
31
  You can discuss a wide range of topics and help with questions.
 
32
 
33
  conversation:
34
  # Automatically respond to all messages
 
9
  convener_url: 'https://convener-service.com/ofp'
10
 
11
  llm:
12
+ # LLM provider: huggingface
13
  provider: 'huggingface'
14
+ # Model name - Qwen3-4B-Instruct-2507 (optimized for instruction following and reasoning)
15
+ model: 'Qwen/Qwen3-4B-Instruct-2507'
16
 
17
  # API configuration
18
  # For HuggingFace: set HF_TOKEN environment variable
 
 
19
  api_url: null # Optional: custom API endpoint
20
 
21
+ # Generation parameters (optimized for Qwen3)
22
+ max_tokens: 16384
23
  temperature: 0.7
24
+ top_p: 0.8
25
+ top_k: 20
26
 
27
  # System prompt
28
  system_prompt: |
29
  You are Talker, a helpful AI assistant participating in an Open Floor Protocol conversation.
30
  You provide clear, concise, and friendly responses.
31
  You can discuss a wide range of topics and help with questions.
32
+ Please reason step by step when solving complex problems.
33
 
34
  conversation:
35
  # Automatically respond to all messages
src/llm_client.py CHANGED
@@ -17,7 +17,7 @@ class LLMClient:
17
  def __init__(
18
  self,
19
  provider: str = "huggingface",
20
- model: str = "Qwen/Qwen2.5-7B-Instruct",
21
  api_key: Optional[str] = None,
22
  api_url: Optional[str] = None,
23
  system_prompt: Optional[str] = None
@@ -97,37 +97,69 @@ class LLMClient:
97
  max_tokens: int,
98
  temperature: float
99
  ) -> str:
100
- """Generate response using HuggingFace Inference API"""
101
- headers = {}
 
 
102
  if self.api_key:
103
  headers["Authorization"] = f"Bearer {self.api_key}"
104
 
105
- # Build prompt with conversation history
106
- prompt = self._build_prompt(message, conversation_history)
 
 
 
 
 
107
 
 
 
108
  payload = {
109
- "inputs": prompt,
110
- "parameters": {
111
- "max_new_tokens": max_tokens,
112
- "temperature": temperature,
113
- "return_full_text": False
114
- }
115
  }
116
 
117
  response = requests.post(
118
- self.api_url,
119
  headers=headers,
120
  json=payload,
121
- timeout=60
122
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  response.raise_for_status()
124
-
125
  result = response.json()
126
 
127
- # Handle different response formats
128
- if isinstance(result, list) and len(result) > 0:
 
 
 
129
  return result[0].get("generated_text", "").strip()
130
- elif isinstance(result, dict):
131
  return result.get("generated_text", "").strip()
132
  else:
133
  logger.warning(f"Unexpected response format: {result}")
 
17
  def __init__(
18
  self,
19
  provider: str = "huggingface",
20
+ model: str = "Qwen/Qwen3-4B-Instruct-2507",
21
  api_key: Optional[str] = None,
22
  api_url: Optional[str] = None,
23
  system_prompt: Optional[str] = None
 
97
  max_tokens: int,
98
  temperature: float
99
  ) -> str:
100
+ """Generate response using HuggingFace Inference API with OpenAI-compatible format"""
101
+ headers = {
102
+ "Content-Type": "application/json"
103
+ }
104
  if self.api_key:
105
  headers["Authorization"] = f"Bearer {self.api_key}"
106
 
107
+ # Build messages in OpenAI format for better compatibility with Qwen3
108
+ messages = [{"role": "system", "content": self.system_prompt}]
109
+
110
+ if conversation_history:
111
+ messages.extend(conversation_history)
112
+
113
+ messages.append({"role": "user", "content": message})
114
 
115
+ # Use HuggingFace's OpenAI-compatible endpoint
116
+ api_base = "https://api-inference.huggingface.co/models"
117
  payload = {
118
+ "model": self.model,
119
+ "messages": messages,
120
+ "max_tokens": max_tokens,
121
+ "temperature": temperature,
122
+ "top_p": 0.8,
123
+ "stream": False
124
  }
125
 
126
  response = requests.post(
127
+ f"{api_base}/{self.model}/v1/chat/completions",
128
  headers=headers,
129
  json=payload,
130
+ timeout=120
131
  )
132
+
133
+ # Fallback to legacy format if OpenAI-compatible endpoint fails
134
+ if response.status_code == 404:
135
+ logger.info("Falling back to legacy HuggingFace API format")
136
+ prompt = self._build_prompt(message, conversation_history)
137
+ payload = {
138
+ "inputs": prompt,
139
+ "parameters": {
140
+ "max_new_tokens": max_tokens,
141
+ "temperature": temperature,
142
+ "top_p": 0.8,
143
+ "return_full_text": False
144
+ }
145
+ }
146
+ response = requests.post(
147
+ self.api_url,
148
+ headers=headers,
149
+ json=payload,
150
+ timeout=120
151
+ )
152
+
153
  response.raise_for_status()
 
154
  result = response.json()
155
 
156
+ # Handle OpenAI-compatible response format
157
+ if isinstance(result, dict) and "choices" in result:
158
+ return result["choices"][0]["message"]["content"].strip()
159
+ # Handle legacy response formats
160
+ elif isinstance(result, list) and len(result) > 0:
161
  return result[0].get("generated_text", "").strip()
162
+ elif isinstance(result, dict) and "generated_text" in result:
163
  return result.get("generated_text", "").strip()
164
  else:
165
  logger.warning(f"Unexpected response format: {result}")