Spaces:
Sleeping
Sleeping
Upload app/rag/utils.py with huggingface_hub
Browse files- app/rag/utils.py +10 -11
app/rag/utils.py
CHANGED
|
@@ -38,7 +38,7 @@ def get_llm():
|
|
| 38 |
logger.info(f"[LLM] Loading {MODEL_PATH} ...")
|
| 39 |
_llm = Llama(
|
| 40 |
model_path=MODEL_PATH,
|
| 41 |
-
n_ctx=
|
| 42 |
n_threads=os.cpu_count() or 4,
|
| 43 |
n_gpu_layers=0,
|
| 44 |
verbose=False,
|
|
@@ -72,7 +72,7 @@ SYSTEM_PROMPT = (
|
|
| 72 |
)
|
| 73 |
|
| 74 |
|
| 75 |
-
def build_context(hits: List[Dict[str, Any]], max_chars: int =
|
| 76 |
parts = []
|
| 77 |
total = 0
|
| 78 |
for i, hit in enumerate(hits, 1):
|
|
@@ -105,8 +105,8 @@ def generate_answer(
|
|
| 105 |
question: str,
|
| 106 |
hits: List[Dict[str, Any]],
|
| 107 |
max_tokens: int = 400,
|
| 108 |
-
temperature: float = 0.
|
| 109 |
-
top_p: float = 0.
|
| 110 |
presence_penalty: float = 0.0,
|
| 111 |
frequency_penalty: float = 0.0,
|
| 112 |
repeat_penalty: float = 1.1,
|
|
@@ -147,11 +147,11 @@ def generate_answer(
|
|
| 147 |
def answer_query(
|
| 148 |
searcher,
|
| 149 |
query: str,
|
| 150 |
-
top_k: int =
|
| 151 |
rerank: bool = True,
|
| 152 |
max_tokens: int = 400,
|
| 153 |
-
temperature: float = 0.
|
| 154 |
-
top_p: float = 0.
|
| 155 |
presence_penalty: float = 0.0,
|
| 156 |
frequency_penalty: float = 0.0,
|
| 157 |
) -> Dict[str, Any]:
|
|
@@ -188,11 +188,11 @@ def answer_query(
|
|
| 188 |
async def answer_query_async(
|
| 189 |
searcher,
|
| 190 |
query: str,
|
| 191 |
-
top_k: int =
|
| 192 |
rerank: bool = True,
|
| 193 |
max_tokens: int = 400,
|
| 194 |
-
temperature: float = 0.
|
| 195 |
-
top_p: float = 0.
|
| 196 |
presence_penalty: float = 0.0,
|
| 197 |
frequency_penalty: float = 0.0,
|
| 198 |
) -> Dict[str, Any]:
|
|
@@ -209,7 +209,6 @@ async def answer_query_async(
|
|
| 209 |
print(hit["text"])
|
| 210 |
print("-" * 80)
|
| 211 |
|
| 212 |
-
# ✅ FIX — lambda inapeleka ALL parameters vizuri
|
| 213 |
# Kabla top_p na repeat_penalty hazikupelekwa — ndio sababu jibu lilibadilika
|
| 214 |
answer = await loop.run_in_executor(
|
| 215 |
None,
|
|
|
|
| 38 |
logger.info(f"[LLM] Loading {MODEL_PATH} ...")
|
| 39 |
_llm = Llama(
|
| 40 |
model_path=MODEL_PATH,
|
| 41 |
+
n_ctx=4096,
|
| 42 |
n_threads=os.cpu_count() or 4,
|
| 43 |
n_gpu_layers=0,
|
| 44 |
verbose=False,
|
|
|
|
| 72 |
)
|
| 73 |
|
| 74 |
|
| 75 |
+
def build_context(hits: List[Dict[str, Any]], max_chars: int = 3000) -> str:
|
| 76 |
parts = []
|
| 77 |
total = 0
|
| 78 |
for i, hit in enumerate(hits, 1):
|
|
|
|
| 105 |
question: str,
|
| 106 |
hits: List[Dict[str, Any]],
|
| 107 |
max_tokens: int = 400,
|
| 108 |
+
temperature: float = 0.1,
|
| 109 |
+
top_p: float = 0.95,
|
| 110 |
presence_penalty: float = 0.0,
|
| 111 |
frequency_penalty: float = 0.0,
|
| 112 |
repeat_penalty: float = 1.1,
|
|
|
|
| 147 |
def answer_query(
|
| 148 |
searcher,
|
| 149 |
query: str,
|
| 150 |
+
top_k: int = 4,
|
| 151 |
rerank: bool = True,
|
| 152 |
max_tokens: int = 400,
|
| 153 |
+
temperature: float = 0.1,
|
| 154 |
+
top_p: float = 0.95,
|
| 155 |
presence_penalty: float = 0.0,
|
| 156 |
frequency_penalty: float = 0.0,
|
| 157 |
) -> Dict[str, Any]:
|
|
|
|
| 188 |
async def answer_query_async(
|
| 189 |
searcher,
|
| 190 |
query: str,
|
| 191 |
+
top_k: int = 4,
|
| 192 |
rerank: bool = True,
|
| 193 |
max_tokens: int = 400,
|
| 194 |
+
temperature: float = 0.1,
|
| 195 |
+
top_p: float = 0.95,
|
| 196 |
presence_penalty: float = 0.0,
|
| 197 |
frequency_penalty: float = 0.0,
|
| 198 |
) -> Dict[str, Any]:
|
|
|
|
| 209 |
print(hit["text"])
|
| 210 |
print("-" * 80)
|
| 211 |
|
|
|
|
| 212 |
# Kabla top_p na repeat_penalty hazikupelekwa — ndio sababu jibu lilibadilika
|
| 213 |
answer = await loop.run_in_executor(
|
| 214 |
None,
|