Spaces:
Sleeping
Sleeping
pakito312
commited on
Commit
·
c619745
1
Parent(s):
26cacf3
update
Browse files
api.py
CHANGED
|
@@ -198,37 +198,65 @@ class ModelManager:
|
|
| 198 |
self.loading = False
|
| 199 |
return self.llm
|
| 200 |
|
| 201 |
-
def generate(
|
| 202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
if self.llm is None:
|
| 204 |
self.load_model()
|
| 205 |
-
|
| 206 |
-
# Si c'est le DummyLLM, appeler la méthode factice
|
| 207 |
if isinstance(self.llm, DummyLLM):
|
| 208 |
return self.llm.generate(prompt, temperature, max_tokens, top_p)
|
| 209 |
-
|
| 210 |
-
# Pour Llama réel
|
| 211 |
try:
|
| 212 |
-
response = self.llm
|
| 213 |
-
prompt
|
| 214 |
max_tokens=max_tokens,
|
| 215 |
temperature=temperature,
|
| 216 |
top_p=top_p,
|
| 217 |
-
stop=None, # ou ['\n\n'] selon ton usage
|
| 218 |
echo=False
|
| 219 |
)
|
| 220 |
-
|
| 221 |
-
return response[
|
|
|
|
| 222 |
except Exception as e:
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
if self.llm is None:
|
| 229 |
self.load_model()
|
| 230 |
-
|
| 231 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
|
| 233 |
# ========== MODÈLE FACTICE POUR TEST ==========
|
| 234 |
class DummyLLM:
|
|
|
|
| 198 |
self.loading = False
|
| 199 |
return self.llm
|
| 200 |
|
| 201 |
+
def generate(
|
| 202 |
+
self,
|
| 203 |
+
prompt: str,
|
| 204 |
+
temperature: float = 0.2,
|
| 205 |
+
max_tokens: int = 256,
|
| 206 |
+
top_p: float = 0.95
|
| 207 |
+
):
|
| 208 |
+
"""Générer du texte avec llama_cpp"""
|
| 209 |
if self.llm is None:
|
| 210 |
self.load_model()
|
| 211 |
+
|
|
|
|
| 212 |
if isinstance(self.llm, DummyLLM):
|
| 213 |
return self.llm.generate(prompt, temperature, max_tokens, top_p)
|
| 214 |
+
|
|
|
|
| 215 |
try:
|
| 216 |
+
response = self.llm(
|
| 217 |
+
prompt,
|
| 218 |
max_tokens=max_tokens,
|
| 219 |
temperature=temperature,
|
| 220 |
top_p=top_p,
|
|
|
|
| 221 |
echo=False
|
| 222 |
)
|
| 223 |
+
|
| 224 |
+
return response["choices"][0]["text"]
|
| 225 |
+
|
| 226 |
except Exception as e:
|
| 227 |
+
return (
|
| 228 |
+
"# Fallback response\n\n"
|
| 229 |
+
f"Prompt: {prompt}\n\n"
|
| 230 |
+
f"Error: {str(e)[:200]}"
|
| 231 |
+
)
|
| 232 |
+
def chat(
|
| 233 |
+
self,
|
| 234 |
+
messages: List[dict],
|
| 235 |
+
temperature: float = 0.2,
|
| 236 |
+
max_tokens: int = 256
|
| 237 |
+
):
|
| 238 |
if self.llm is None:
|
| 239 |
self.load_model()
|
| 240 |
+
|
| 241 |
+
if isinstance(self.llm, DummyLLM):
|
| 242 |
+
return self.llm.chat(messages, temperature, max_tokens)
|
| 243 |
+
|
| 244 |
+
prompt = ""
|
| 245 |
+
for msg in messages:
|
| 246 |
+
role = msg["role"]
|
| 247 |
+
content = msg["content"]
|
| 248 |
+
prompt += f"{role.upper()}: {content}\n"
|
| 249 |
+
prompt += "ASSISTANT:"
|
| 250 |
+
|
| 251 |
+
response = self.llm(
|
| 252 |
+
prompt,
|
| 253 |
+
max_tokens=max_tokens,
|
| 254 |
+
temperature=temperature
|
| 255 |
+
)
|
| 256 |
+
|
| 257 |
+
return response["choices"][0]["text"]
|
| 258 |
+
|
| 259 |
+
|
| 260 |
|
| 261 |
# ========== MODÈLE FACTICE POUR TEST ==========
|
| 262 |
class DummyLLM:
|