Upload folder using huggingface_hub
Browse files- advanced_rag.py +24 -11
advanced_rag.py
CHANGED
|
@@ -23,6 +23,7 @@ from langchain_core.runnables import RunnableParallel, RunnableLambda
|
|
| 23 |
from transformers.quantizers.auto import AutoQuantizationConfig
|
| 24 |
import gradio as gr
|
| 25 |
import requests
|
|
|
|
| 26 |
|
| 27 |
# Add Mistral imports with fallback handling
|
| 28 |
try:
|
|
@@ -174,7 +175,7 @@ class ElevatedRagChain:
|
|
| 174 |
class MistralLLM(LLM):
|
| 175 |
temperature: float = 0.7
|
| 176 |
top_p: float = 0.95
|
| 177 |
-
_client: Any =
|
| 178 |
def __init__(self, api_key: str, temperature: float = 0.7, top_p: float = 0.95):
|
| 179 |
super().__init__()
|
| 180 |
self._client = Mistral(api_key=api_key)
|
|
@@ -184,12 +185,12 @@ class ElevatedRagChain:
|
|
| 184 |
def _llm_type(self) -> str:
|
| 185 |
return "mistral_llm"
|
| 186 |
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
|
| 187 |
-
response = self._client.chat.complete(
|
| 188 |
model="mistral-small-latest",
|
| 189 |
messages=[{"role": "user", "content": prompt}],
|
| 190 |
temperature=self.temperature,
|
| 191 |
top_p=self.top_p,
|
| 192 |
-
max_tokens=
|
| 193 |
)
|
| 194 |
return response.choices[0].message.content
|
| 195 |
@property
|
|
@@ -220,7 +221,13 @@ class ElevatedRagChain:
|
|
| 220 |
def _llm_type(self) -> str:
|
| 221 |
return "local_llm"
|
| 222 |
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
|
| 223 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
@property
|
| 225 |
def _identifying_params(self) -> dict:
|
| 226 |
return {"model": model_id, "max_length": extra_kwargs.get("max_length")}
|
|
@@ -398,7 +405,7 @@ def submit_query_updated(query):
|
|
| 398 |
"question": query
|
| 399 |
}
|
| 400 |
if "llama" in rag_chain.llm_choice.lower():
|
| 401 |
-
prompt_variables["context"] = truncate_prompt(prompt_variables["context"], max_tokens=
|
| 402 |
response = rag_chain.elevated_rag_chain.invoke(prompt_variables)
|
| 403 |
rag_chain.conversation_history.append({"query": query, "response": response})
|
| 404 |
input_token_count = count_tokens(query)
|
|
@@ -450,20 +457,26 @@ with gr.Blocks(css=custom_css) as app:
|
|
| 450 |
**PhiRAG** Query Your Data with Advanced RAG Techniques
|
| 451 |
|
| 452 |
**Model Selection & Parameters:** Choose from the following options:
|
| 453 |
-
- πΊπΈ Remote Meta-Llama-3
|
| 454 |
-
- πͺπΊ Mistral-API
|
| 455 |
|
| 456 |
-
**π₯ Randomness (Temperature):** Adjusts output predictability.
|
|
|
|
| 457 |
|
| 458 |
**π― Word Variety (Topβp):** Limits word choices to a set probability percentage.
|
|
|
|
| 459 |
|
| 460 |
-
|
|
|
|
| 461 |
|
| 462 |
-
|
| 463 |
|
| 464 |
-
|
|
|
|
|
|
|
| 465 |
|
| 466 |
**π Query:** Enter your query below.
|
|
|
|
| 467 |
|
| 468 |
The response displays the model used, word count, and current context (with conversation history).
|
| 469 |
''')
|
|
|
|
| 23 |
from transformers.quantizers.auto import AutoQuantizationConfig
|
| 24 |
import gradio as gr
|
| 25 |
import requests
|
| 26 |
+
from pydantic import PrivateAttr
|
| 27 |
|
| 28 |
# Add Mistral imports with fallback handling
|
| 29 |
try:
|
|
|
|
| 175 |
class MistralLLM(LLM):
|
| 176 |
temperature: float = 0.7
|
| 177 |
top_p: float = 0.95
|
| 178 |
+
_client: Any = PrivateAttr() # Declare _client as a private attribute
|
| 179 |
def __init__(self, api_key: str, temperature: float = 0.7, top_p: float = 0.95):
|
| 180 |
super().__init__()
|
| 181 |
self._client = Mistral(api_key=api_key)
|
|
|
|
| 185 |
def _llm_type(self) -> str:
|
| 186 |
return "mistral_llm"
|
| 187 |
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
|
| 188 |
+
response = self._client.chat.complete(
|
| 189 |
model="mistral-small-latest",
|
| 190 |
messages=[{"role": "user", "content": prompt}],
|
| 191 |
temperature=self.temperature,
|
| 192 |
top_p=self.top_p,
|
| 193 |
+
max_tokens=32000
|
| 194 |
)
|
| 195 |
return response.choices[0].message.content
|
| 196 |
@property
|
|
|
|
| 221 |
def _llm_type(self) -> str:
|
| 222 |
return "local_llm"
|
| 223 |
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
|
| 224 |
+
# Reserve tokens for generation (e.g., 512 tokens)
|
| 225 |
+
reserved_gen = 512
|
| 226 |
+
max_total = 8192
|
| 227 |
+
max_prompt_tokens = max_total - reserved_gen
|
| 228 |
+
truncated_prompt = truncate_prompt(prompt, max_tokens=max_prompt_tokens)
|
| 229 |
+
generated = pipe(truncated_prompt, max_new_tokens=reserved_gen)[0]["generated_text"]
|
| 230 |
+
return generated
|
| 231 |
@property
|
| 232 |
def _identifying_params(self) -> dict:
|
| 233 |
return {"model": model_id, "max_length": extra_kwargs.get("max_length")}
|
|
|
|
| 405 |
"question": query
|
| 406 |
}
|
| 407 |
if "llama" in rag_chain.llm_choice.lower():
|
| 408 |
+
prompt_variables["context"] = truncate_prompt(prompt_variables["context"], max_tokens=4092)
|
| 409 |
response = rag_chain.elevated_rag_chain.invoke(prompt_variables)
|
| 410 |
rag_chain.conversation_history.append({"query": query, "response": response})
|
| 411 |
input_token_count = count_tokens(query)
|
|
|
|
| 457 |
**PhiRAG** Query Your Data with Advanced RAG Techniques
|
| 458 |
|
| 459 |
**Model Selection & Parameters:** Choose from the following options:
|
| 460 |
+
- πΊπΈ Remote Meta-Llama-3 - has context windows of 8000 tokens
|
| 461 |
+
- πͺπΊ Mistral-API - has context windows of 32000 tokens
|
| 462 |
|
| 463 |
+
**π₯ Randomness (Temperature):** Adjusts output predictability.
|
| 464 |
+
- Example: 0.2 makes the output very deterministic (less creative), while 0.8 introduces more variety and spontaneity.
|
| 465 |
|
| 466 |
**π― Word Variety (Topβp):** Limits word choices to a set probability percentage.
|
| 467 |
+
- Example: 0.5 restricts output to the most likely 50% of token choices for a focused answer; 0.95 allows almost all possibilities for more diverse responses.
|
| 468 |
|
| 469 |
+
**βοΈ BM25 Weight:** Adjust Lexical vs Semantics.
|
| 470 |
+
- Example: A value of 0.8 puts more emphasis on exact keyword (lexical) matching, while 0.3 shifts emphasis toward semantic similarity.
|
| 471 |
|
| 472 |
+
**βοΈ Prompt Template:** Edit as desired.
|
| 473 |
|
| 474 |
+
**π File URLs:** Enter one URL per line (.pdf or .txt).\
|
| 475 |
+
- Example: Provide one URL per line, such as
|
| 476 |
+
https://www.gutenberg.org/ebooks/8438.txt.utf-8
|
| 477 |
|
| 478 |
**π Query:** Enter your query below.
|
| 479 |
+
- Example: Select all parts in each book focusing on moral responsibility in Aristotle philosophy and discuss in a comprehensible way and link the topics to other top world philosophers. Use a structure and bullet points
|
| 480 |
|
| 481 |
The response displays the model used, word count, and current context (with conversation history).
|
| 482 |
''')
|