Spaces:

BladeSzaSza
/

Talker

Sleeping

App Files Files Community

BolyosCsaba commited on Feb 11

Commit

60c3b5b

1 Parent(s): 1f77c68

UPdate your project to use Qwen3-4B-Instruct-2507 and remove all other model references

Browse files

Files changed (4) hide show

README.md +2 -2
app.py +2 -2
config/config.yaml +8 -7
src/llm_client.py +49 -17

README.md CHANGED Viewed

@@ -85,7 +85,7 @@ Edit `config/config.yaml` to configure your LLM:
 ```yaml
 llm:
   provider: 'huggingface'  # or 'openai', 'ollama'
-  model: 'Qwen/Qwen2.5-7B-Instruct'
   system_prompt: |
     You are Talker, a helpful AI assistant...
 ```
@@ -181,7 +181,7 @@ The **🔍 Status & Debug** tab shows:
 ```yaml
 llm:
   provider: 'huggingface'
-  model: 'Qwen/Qwen2.5-7B-Instruct'
   max_tokens: 512
   temperature: 0.7
 ```

 ```yaml
 llm:
   provider: 'huggingface'  # or 'openai', 'ollama'
+  model: 'microsoft/Phi-3-mini-4k-instruct'
   system_prompt: |
     You are Talker, a helpful AI assistant...
 ```
 ```yaml
 llm:
   provider: 'huggingface'
+  model: 'microsoft/Phi-3-mini-4k-instruct'
   max_tokens: 512
   temperature: 0.7
 ```

app.py CHANGED Viewed

@@ -43,7 +43,7 @@ except FileNotFoundError:
         },
         'llm': {
             'provider': 'huggingface',
-            'model': 'Qwen/Qwen2.5-7B-Instruct',
             'max_tokens': 512,
             'temperature': 0.7,
             'system_prompt': 'You are a helpful AI assistant participating in an Open Floor Protocol conversation.'
@@ -64,7 +64,7 @@ except FileNotFoundError:
 # Initialize LLM client
 llm_client = LLMClient(
     provider=config['llm'].get('provider', 'huggingface'),
-    model=config['llm'].get('model', 'Qwen/Qwen2.5-7B-Instruct'),
     api_key=config['llm'].get('api_key'),
     api_url=config['llm'].get('api_url'),
     system_prompt=config['llm'].get('system_prompt')

         },
         'llm': {
             'provider': 'huggingface',
+            'model': 'microsoft/Phi-3-mini-4k-instruct',
             'max_tokens': 512,
             'temperature': 0.7,
             'system_prompt': 'You are a helpful AI assistant participating in an Open Floor Protocol conversation.'
 # Initialize LLM client
 llm_client = LLMClient(
     provider=config['llm'].get('provider', 'huggingface'),
+    model=config['llm'].get('model', 'microsoft/Phi-3-mini-4k-instruct'),
     api_key=config['llm'].get('api_key'),
     api_url=config['llm'].get('api_url'),
     system_prompt=config['llm'].get('system_prompt')

config/config.yaml CHANGED Viewed

@@ -9,26 +9,27 @@ agent:
   convener_url: 'https://convener-service.com/ofp'
 llm:
-  # LLM provider: huggingface, openai, ollama
   provider: 'huggingface'
-  # Model name - Using microsoft/Phi-3-mini-4k-instruct (confirmed free tier)
-  model: 'microsoft/Phi-3-mini-4k-instruct'
   # API configuration
   # For HuggingFace: set HF_TOKEN environment variable
-  # For OpenAI: set OPENAI_API_KEY environment variable
-  # For Ollama: runs locally, no key needed
   api_url: null  # Optional: custom API endpoint
-  # Generation parameters
-  max_tokens: 512
   temperature: 0.7
   # System prompt
   system_prompt: |
     You are Talker, a helpful AI assistant participating in an Open Floor Protocol conversation.
     You provide clear, concise, and friendly responses.
     You can discuss a wide range of topics and help with questions.
 conversation:
   # Automatically respond to all messages

   convener_url: 'https://convener-service.com/ofp'
 llm:
+  # LLM provider: huggingface
   provider: 'huggingface'
+  # Model name - Qwen3-4B-Instruct-2507 (optimized for instruction following and reasoning)
+  model: 'Qwen/Qwen3-4B-Instruct-2507'
   # API configuration
   # For HuggingFace: set HF_TOKEN environment variable
   api_url: null  # Optional: custom API endpoint
+  # Generation parameters (optimized for Qwen3)
+  max_tokens: 16384
   temperature: 0.7
+  top_p: 0.8
+  top_k: 20
   # System prompt
   system_prompt: |
     You are Talker, a helpful AI assistant participating in an Open Floor Protocol conversation.
     You provide clear, concise, and friendly responses.
     You can discuss a wide range of topics and help with questions.
+    Please reason step by step when solving complex problems.
 conversation:
   # Automatically respond to all messages

src/llm_client.py CHANGED Viewed

@@ -17,7 +17,7 @@ class LLMClient:
     def __init__(
         self,
         provider: str = "huggingface",
-        model: str = "Qwen/Qwen2.5-7B-Instruct",
         api_key: Optional[str] = None,
         api_url: Optional[str] = None,
         system_prompt: Optional[str] = None
@@ -97,37 +97,69 @@ class LLMClient:
         max_tokens: int,
         temperature: float
     ) -> str:
-        """Generate response using HuggingFace Inference API"""
-        headers = {}
         if self.api_key:
             headers["Authorization"] = f"Bearer {self.api_key}"
-        # Build prompt with conversation history
-        prompt = self._build_prompt(message, conversation_history)
         payload = {
-            "inputs": prompt,
-            "parameters": {
-                "max_new_tokens": max_tokens,
-                "temperature": temperature,
-                "return_full_text": False
-            }
         }
         response = requests.post(
-            self.api_url,
             headers=headers,
             json=payload,
-            timeout=60
         )
         response.raise_for_status()
         result = response.json()
-        # Handle different response formats
-        if isinstance(result, list) and len(result) > 0:
             return result[0].get("generated_text", "").strip()
-        elif isinstance(result, dict):
             return result.get("generated_text", "").strip()
         else:
             logger.warning(f"Unexpected response format: {result}")

     def __init__(
         self,
         provider: str = "huggingface",
+        model: str = "Qwen/Qwen3-4B-Instruct-2507",
         api_key: Optional[str] = None,
         api_url: Optional[str] = None,
         system_prompt: Optional[str] = None
         max_tokens: int,
         temperature: float
     ) -> str:
+        """Generate response using HuggingFace Inference API with OpenAI-compatible format"""
+        headers = {
+            "Content-Type": "application/json"
+        }
         if self.api_key:
             headers["Authorization"] = f"Bearer {self.api_key}"
+        # Build messages in OpenAI format for better compatibility with Qwen3
+        messages = [{"role": "system", "content": self.system_prompt}]
+        if conversation_history:
+            messages.extend(conversation_history)
+        messages.append({"role": "user", "content": message})
+        # Use HuggingFace's OpenAI-compatible endpoint
+        api_base = "https://api-inference.huggingface.co/models"
         payload = {
+            "model": self.model,
+            "messages": messages,
+            "max_tokens": max_tokens,
+            "temperature": temperature,
+            "top_p": 0.8,
+            "stream": False
         }
         response = requests.post(
+            f"{api_base}/{self.model}/v1/chat/completions",
             headers=headers,
             json=payload,
+            timeout=120
         )
+        # Fallback to legacy format if OpenAI-compatible endpoint fails
+        if response.status_code == 404:
+            logger.info("Falling back to legacy HuggingFace API format")
+            prompt = self._build_prompt(message, conversation_history)
+            payload = {
+                "inputs": prompt,
+                "parameters": {
+                    "max_new_tokens": max_tokens,
+                    "temperature": temperature,
+                    "top_p": 0.8,
+                    "return_full_text": False
+                }
+            }
+            response = requests.post(
+                self.api_url,
+                headers=headers,
+                json=payload,
+                timeout=120
+            )
         response.raise_for_status()
         result = response.json()
+        # Handle OpenAI-compatible response format
+        if isinstance(result, dict) and "choices" in result:
+            return result["choices"][0]["message"]["content"].strip()
+        # Handle legacy response formats
+        elif isinstance(result, list) and len(result) > 0:
             return result[0].get("generated_text", "").strip()
+        elif isinstance(result, dict) and "generated_text" in result:
             return result.get("generated_text", "").strip()
         else:
             logger.warning(f"Unexpected response format: {result}")