Spaces:
Sleeping
Sleeping
Update api/clare_core.py
Browse files- api/clare_core.py +22 -11
api/clare_core.py
CHANGED
|
@@ -712,18 +712,29 @@ def chat_with_clare(
|
|
| 712 |
cognitive_state=cognitive_state,
|
| 713 |
rag_context=rag_context,
|
| 714 |
)
|
| 715 |
-
|
| 716 |
-
answer, _prof = safe_chat_completion_profiled(
|
| 717 |
-
model_name=model_name,
|
| 718 |
-
messages=messages,
|
| 719 |
-
lang=language_preference,
|
| 720 |
-
op="chat",
|
| 721 |
-
temperature=0.5,
|
| 722 |
-
max_tokens=DEFAULT_MAX_OUTPUT_TOKENS,
|
| 723 |
-
)
|
| 724 |
-
|
| 725 |
history = history + [(message, answer)]
|
| 726 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 727 |
|
| 728 |
|
| 729 |
def export_conversation(
|
|
|
|
| 712 |
cognitive_state=cognitive_state,
|
| 713 |
rag_context=rag_context,
|
| 714 |
)
|
| 715 |
+
answer, prof = safe_chat_completion_profiled(...)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 716 |
history = history + [(message, answer)]
|
| 717 |
+
|
| 718 |
+
llm_stats = {
|
| 719 |
+
"llm_profile": {
|
| 720 |
+
"model": prof.get("model"),
|
| 721 |
+
"streaming_used": prof.get("streaming_used"),
|
| 722 |
+
"max_tokens": prof.get("max_tokens"),
|
| 723 |
+
"output_tokens_est": prof.get("output_tokens_est"),
|
| 724 |
+
"tokens_per_sec_est": prof.get("tokens_per_sec_est"),
|
| 725 |
+
},
|
| 726 |
+
"marks_ms": {
|
| 727 |
+
"llm_first_token": prof.get("ttft_ms"),
|
| 728 |
+
"llm_done": prof.get("llm_total_ms"),
|
| 729 |
+
},
|
| 730 |
+
"segments_ms": {
|
| 731 |
+
"llm_ttft_ms": prof.get("ttft_ms"),
|
| 732 |
+
"llm_gen_ms": prof.get("gen_ms"),
|
| 733 |
+
"llm_done": prof.get("llm_total_ms"),
|
| 734 |
+
},
|
| 735 |
+
}
|
| 736 |
+
|
| 737 |
+
return answer, history, llm_stats
|
| 738 |
|
| 739 |
|
| 740 |
def export_conversation(
|