Spaces:
Sleeping
Sleeping
BolyosCsaba commited on
Commit ·
60c3b5b
1
Parent(s): 1f77c68
UPdate your project to use Qwen3-4B-Instruct-2507 and remove all other model references
Browse files- README.md +2 -2
- app.py +2 -2
- config/config.yaml +8 -7
- src/llm_client.py +49 -17
README.md
CHANGED
|
@@ -85,7 +85,7 @@ Edit `config/config.yaml` to configure your LLM:
|
|
| 85 |
```yaml
|
| 86 |
llm:
|
| 87 |
provider: 'huggingface' # or 'openai', 'ollama'
|
| 88 |
-
model: '
|
| 89 |
system_prompt: |
|
| 90 |
You are Talker, a helpful AI assistant...
|
| 91 |
```
|
|
@@ -181,7 +181,7 @@ The **🔍 Status & Debug** tab shows:
|
|
| 181 |
```yaml
|
| 182 |
llm:
|
| 183 |
provider: 'huggingface'
|
| 184 |
-
model: '
|
| 185 |
max_tokens: 512
|
| 186 |
temperature: 0.7
|
| 187 |
```
|
|
|
|
| 85 |
```yaml
|
| 86 |
llm:
|
| 87 |
provider: 'huggingface' # or 'openai', 'ollama'
|
| 88 |
+
model: 'microsoft/Phi-3-mini-4k-instruct'
|
| 89 |
system_prompt: |
|
| 90 |
You are Talker, a helpful AI assistant...
|
| 91 |
```
|
|
|
|
| 181 |
```yaml
|
| 182 |
llm:
|
| 183 |
provider: 'huggingface'
|
| 184 |
+
model: 'microsoft/Phi-3-mini-4k-instruct'
|
| 185 |
max_tokens: 512
|
| 186 |
temperature: 0.7
|
| 187 |
```
|
app.py
CHANGED
|
@@ -43,7 +43,7 @@ except FileNotFoundError:
|
|
| 43 |
},
|
| 44 |
'llm': {
|
| 45 |
'provider': 'huggingface',
|
| 46 |
-
'model': '
|
| 47 |
'max_tokens': 512,
|
| 48 |
'temperature': 0.7,
|
| 49 |
'system_prompt': 'You are a helpful AI assistant participating in an Open Floor Protocol conversation.'
|
|
@@ -64,7 +64,7 @@ except FileNotFoundError:
|
|
| 64 |
# Initialize LLM client
|
| 65 |
llm_client = LLMClient(
|
| 66 |
provider=config['llm'].get('provider', 'huggingface'),
|
| 67 |
-
model=config['llm'].get('model', '
|
| 68 |
api_key=config['llm'].get('api_key'),
|
| 69 |
api_url=config['llm'].get('api_url'),
|
| 70 |
system_prompt=config['llm'].get('system_prompt')
|
|
|
|
| 43 |
},
|
| 44 |
'llm': {
|
| 45 |
'provider': 'huggingface',
|
| 46 |
+
'model': 'microsoft/Phi-3-mini-4k-instruct',
|
| 47 |
'max_tokens': 512,
|
| 48 |
'temperature': 0.7,
|
| 49 |
'system_prompt': 'You are a helpful AI assistant participating in an Open Floor Protocol conversation.'
|
|
|
|
| 64 |
# Initialize LLM client
|
| 65 |
llm_client = LLMClient(
|
| 66 |
provider=config['llm'].get('provider', 'huggingface'),
|
| 67 |
+
model=config['llm'].get('model', 'microsoft/Phi-3-mini-4k-instruct'),
|
| 68 |
api_key=config['llm'].get('api_key'),
|
| 69 |
api_url=config['llm'].get('api_url'),
|
| 70 |
system_prompt=config['llm'].get('system_prompt')
|
config/config.yaml
CHANGED
|
@@ -9,26 +9,27 @@ agent:
|
|
| 9 |
convener_url: 'https://convener-service.com/ofp'
|
| 10 |
|
| 11 |
llm:
|
| 12 |
-
# LLM provider: huggingface
|
| 13 |
provider: 'huggingface'
|
| 14 |
-
# Model name -
|
| 15 |
-
model: '
|
| 16 |
|
| 17 |
# API configuration
|
| 18 |
# For HuggingFace: set HF_TOKEN environment variable
|
| 19 |
-
# For OpenAI: set OPENAI_API_KEY environment variable
|
| 20 |
-
# For Ollama: runs locally, no key needed
|
| 21 |
api_url: null # Optional: custom API endpoint
|
| 22 |
|
| 23 |
-
# Generation parameters
|
| 24 |
-
max_tokens:
|
| 25 |
temperature: 0.7
|
|
|
|
|
|
|
| 26 |
|
| 27 |
# System prompt
|
| 28 |
system_prompt: |
|
| 29 |
You are Talker, a helpful AI assistant participating in an Open Floor Protocol conversation.
|
| 30 |
You provide clear, concise, and friendly responses.
|
| 31 |
You can discuss a wide range of topics and help with questions.
|
|
|
|
| 32 |
|
| 33 |
conversation:
|
| 34 |
# Automatically respond to all messages
|
|
|
|
| 9 |
convener_url: 'https://convener-service.com/ofp'
|
| 10 |
|
| 11 |
llm:
|
| 12 |
+
# LLM provider: huggingface
|
| 13 |
provider: 'huggingface'
|
| 14 |
+
# Model name - Qwen3-4B-Instruct-2507 (optimized for instruction following and reasoning)
|
| 15 |
+
model: 'Qwen/Qwen3-4B-Instruct-2507'
|
| 16 |
|
| 17 |
# API configuration
|
| 18 |
# For HuggingFace: set HF_TOKEN environment variable
|
|
|
|
|
|
|
| 19 |
api_url: null # Optional: custom API endpoint
|
| 20 |
|
| 21 |
+
# Generation parameters (optimized for Qwen3)
|
| 22 |
+
max_tokens: 16384
|
| 23 |
temperature: 0.7
|
| 24 |
+
top_p: 0.8
|
| 25 |
+
top_k: 20
|
| 26 |
|
| 27 |
# System prompt
|
| 28 |
system_prompt: |
|
| 29 |
You are Talker, a helpful AI assistant participating in an Open Floor Protocol conversation.
|
| 30 |
You provide clear, concise, and friendly responses.
|
| 31 |
You can discuss a wide range of topics and help with questions.
|
| 32 |
+
Please reason step by step when solving complex problems.
|
| 33 |
|
| 34 |
conversation:
|
| 35 |
# Automatically respond to all messages
|
src/llm_client.py
CHANGED
|
@@ -17,7 +17,7 @@ class LLMClient:
|
|
| 17 |
def __init__(
|
| 18 |
self,
|
| 19 |
provider: str = "huggingface",
|
| 20 |
-
model: str = "Qwen/
|
| 21 |
api_key: Optional[str] = None,
|
| 22 |
api_url: Optional[str] = None,
|
| 23 |
system_prompt: Optional[str] = None
|
|
@@ -97,37 +97,69 @@ class LLMClient:
|
|
| 97 |
max_tokens: int,
|
| 98 |
temperature: float
|
| 99 |
) -> str:
|
| 100 |
-
"""Generate response using HuggingFace Inference API"""
|
| 101 |
-
headers = {
|
|
|
|
|
|
|
| 102 |
if self.api_key:
|
| 103 |
headers["Authorization"] = f"Bearer {self.api_key}"
|
| 104 |
|
| 105 |
-
# Build
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
|
|
|
|
|
|
| 108 |
payload = {
|
| 109 |
-
"
|
| 110 |
-
"
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
}
|
| 116 |
|
| 117 |
response = requests.post(
|
| 118 |
-
self.
|
| 119 |
headers=headers,
|
| 120 |
json=payload,
|
| 121 |
-
timeout=
|
| 122 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
response.raise_for_status()
|
| 124 |
-
|
| 125 |
result = response.json()
|
| 126 |
|
| 127 |
-
# Handle
|
| 128 |
-
if isinstance(result,
|
|
|
|
|
|
|
|
|
|
| 129 |
return result[0].get("generated_text", "").strip()
|
| 130 |
-
elif isinstance(result, dict):
|
| 131 |
return result.get("generated_text", "").strip()
|
| 132 |
else:
|
| 133 |
logger.warning(f"Unexpected response format: {result}")
|
|
|
|
| 17 |
def __init__(
|
| 18 |
self,
|
| 19 |
provider: str = "huggingface",
|
| 20 |
+
model: str = "Qwen/Qwen3-4B-Instruct-2507",
|
| 21 |
api_key: Optional[str] = None,
|
| 22 |
api_url: Optional[str] = None,
|
| 23 |
system_prompt: Optional[str] = None
|
|
|
|
| 97 |
max_tokens: int,
|
| 98 |
temperature: float
|
| 99 |
) -> str:
|
| 100 |
+
"""Generate response using HuggingFace Inference API with OpenAI-compatible format"""
|
| 101 |
+
headers = {
|
| 102 |
+
"Content-Type": "application/json"
|
| 103 |
+
}
|
| 104 |
if self.api_key:
|
| 105 |
headers["Authorization"] = f"Bearer {self.api_key}"
|
| 106 |
|
| 107 |
+
# Build messages in OpenAI format for better compatibility with Qwen3
|
| 108 |
+
messages = [{"role": "system", "content": self.system_prompt}]
|
| 109 |
+
|
| 110 |
+
if conversation_history:
|
| 111 |
+
messages.extend(conversation_history)
|
| 112 |
+
|
| 113 |
+
messages.append({"role": "user", "content": message})
|
| 114 |
|
| 115 |
+
# Use HuggingFace's OpenAI-compatible endpoint
|
| 116 |
+
api_base = "https://api-inference.huggingface.co/models"
|
| 117 |
payload = {
|
| 118 |
+
"model": self.model,
|
| 119 |
+
"messages": messages,
|
| 120 |
+
"max_tokens": max_tokens,
|
| 121 |
+
"temperature": temperature,
|
| 122 |
+
"top_p": 0.8,
|
| 123 |
+
"stream": False
|
| 124 |
}
|
| 125 |
|
| 126 |
response = requests.post(
|
| 127 |
+
f"{api_base}/{self.model}/v1/chat/completions",
|
| 128 |
headers=headers,
|
| 129 |
json=payload,
|
| 130 |
+
timeout=120
|
| 131 |
)
|
| 132 |
+
|
| 133 |
+
# Fallback to legacy format if OpenAI-compatible endpoint fails
|
| 134 |
+
if response.status_code == 404:
|
| 135 |
+
logger.info("Falling back to legacy HuggingFace API format")
|
| 136 |
+
prompt = self._build_prompt(message, conversation_history)
|
| 137 |
+
payload = {
|
| 138 |
+
"inputs": prompt,
|
| 139 |
+
"parameters": {
|
| 140 |
+
"max_new_tokens": max_tokens,
|
| 141 |
+
"temperature": temperature,
|
| 142 |
+
"top_p": 0.8,
|
| 143 |
+
"return_full_text": False
|
| 144 |
+
}
|
| 145 |
+
}
|
| 146 |
+
response = requests.post(
|
| 147 |
+
self.api_url,
|
| 148 |
+
headers=headers,
|
| 149 |
+
json=payload,
|
| 150 |
+
timeout=120
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
response.raise_for_status()
|
|
|
|
| 154 |
result = response.json()
|
| 155 |
|
| 156 |
+
# Handle OpenAI-compatible response format
|
| 157 |
+
if isinstance(result, dict) and "choices" in result:
|
| 158 |
+
return result["choices"][0]["message"]["content"].strip()
|
| 159 |
+
# Handle legacy response formats
|
| 160 |
+
elif isinstance(result, list) and len(result) > 0:
|
| 161 |
return result[0].get("generated_text", "").strip()
|
| 162 |
+
elif isinstance(result, dict) and "generated_text" in result:
|
| 163 |
return result.get("generated_text", "").strip()
|
| 164 |
else:
|
| 165 |
logger.warning(f"Unexpected response format: {result}")
|