Spaces:
Build error
Build error
hit the token limit
Browse files
app.py
CHANGED
|
@@ -8,6 +8,7 @@ import torchaudio.transforms as T
|
|
| 8 |
import re
|
| 9 |
import logging, sys
|
| 10 |
import json
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
logging.basicConfig(stream=sys.stdout, level=logging.INFO, force=True)
|
|
@@ -51,6 +52,25 @@ SYSTEM_PROMPT = (
|
|
| 51 |
"If you need to ask the user a follow-up question, do so clearly.\n",
|
| 52 |
)
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
# ========== Generator handler ==========
|
| 55 |
def on_submit(symptoms_text, history):
|
| 56 |
log = []
|
|
@@ -103,8 +123,8 @@ def on_submit(symptoms_text, history):
|
|
| 103 |
f"{SYSTEM_PROMPT}",
|
| 104 |
f"User symptoms: '{cleaned}'",
|
| 105 |
f"Relevant ICD-10 context:\n{context_text}",
|
| 106 |
-
"Respond with your top 3 ICD-10 codes and their confidence scores.",
|
| 107 |
])
|
|
|
|
| 108 |
|
| 109 |
msg = "✏️ Prompt built"
|
| 110 |
log.append(msg)
|
|
|
|
| 8 |
import re
|
| 9 |
import logging, sys
|
| 10 |
import json
|
| 11 |
+
from llama_cpp import Llama
|
| 12 |
|
| 13 |
|
| 14 |
logging.basicConfig(stream=sys.stdout, level=logging.INFO, force=True)
|
|
|
|
| 52 |
"If you need to ask the user a follow-up question, do so clearly.\n",
|
| 53 |
)
|
| 54 |
|
| 55 |
+
def truncate_prompt(prompt, max_tokens=2048):
|
| 56 |
+
# Use your model's tokenizer here; this is a placeholder
|
| 57 |
+
tokens = prompt.split() # Replace with actual tokenization
|
| 58 |
+
if len(tokens) > max_tokens:
|
| 59 |
+
tokens = tokens[:max_tokens]
|
| 60 |
+
return " ".join(tokens)
|
| 61 |
+
|
| 62 |
+
# Initialize your model (adjust path and params as needed)
|
| 63 |
+
llm = Llama(model_path=model_path)
|
| 64 |
+
|
| 65 |
+
def truncate_prompt_llama(prompt, max_tokens=2048):
|
| 66 |
+
# Tokenize the prompt using llama_cpp's tokenizer
|
| 67 |
+
tokens = llm.tokenize(prompt.encode("utf-8"))
|
| 68 |
+
if len(tokens) > max_tokens:
|
| 69 |
+
# Truncate tokens and decode back to string
|
| 70 |
+
tokens = tokens[:max_tokens]
|
| 71 |
+
prompt = llm.detokenize(tokens).decode("utf-8", errors="ignore")
|
| 72 |
+
return prompt
|
| 73 |
+
|
| 74 |
# ========== Generator handler ==========
|
| 75 |
def on_submit(symptoms_text, history):
|
| 76 |
log = []
|
|
|
|
| 123 |
f"{SYSTEM_PROMPT}",
|
| 124 |
f"User symptoms: '{cleaned}'",
|
| 125 |
f"Relevant ICD-10 context:\n{context_text}",
|
|
|
|
| 126 |
])
|
| 127 |
+
prompt = truncate_prompt_llama(prompt, max_tokens=2048)
|
| 128 |
|
| 129 |
msg = "✏️ Prompt built"
|
| 130 |
log.append(msg)
|