alx-d commited on
Commit
0ded8c7
Β·
verified Β·
1 Parent(s): cc814aa

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. advanced_rag.py +24 -11
advanced_rag.py CHANGED
@@ -23,6 +23,7 @@ from langchain_core.runnables import RunnableParallel, RunnableLambda
23
  from transformers.quantizers.auto import AutoQuantizationConfig
24
  import gradio as gr
25
  import requests
 
26
 
27
  # Add Mistral imports with fallback handling
28
  try:
@@ -174,7 +175,7 @@ class ElevatedRagChain:
174
  class MistralLLM(LLM):
175
  temperature: float = 0.7
176
  top_p: float = 0.95
177
- _client: Any = None
178
  def __init__(self, api_key: str, temperature: float = 0.7, top_p: float = 0.95):
179
  super().__init__()
180
  self._client = Mistral(api_key=api_key)
@@ -184,12 +185,12 @@ class ElevatedRagChain:
184
  def _llm_type(self) -> str:
185
  return "mistral_llm"
186
  def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
187
- response = self._client.chat.complete(
188
  model="mistral-small-latest",
189
  messages=[{"role": "user", "content": prompt}],
190
  temperature=self.temperature,
191
  top_p=self.top_p,
192
- max_tokens=512
193
  )
194
  return response.choices[0].message.content
195
  @property
@@ -220,7 +221,13 @@ class ElevatedRagChain:
220
  def _llm_type(self) -> str:
221
  return "local_llm"
222
  def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
223
- return pipe(prompt)[0]["generated_text"]
 
 
 
 
 
 
224
  @property
225
  def _identifying_params(self) -> dict:
226
  return {"model": model_id, "max_length": extra_kwargs.get("max_length")}
@@ -398,7 +405,7 @@ def submit_query_updated(query):
398
  "question": query
399
  }
400
  if "llama" in rag_chain.llm_choice.lower():
401
- prompt_variables["context"] = truncate_prompt(prompt_variables["context"], max_tokens=4096)
402
  response = rag_chain.elevated_rag_chain.invoke(prompt_variables)
403
  rag_chain.conversation_history.append({"query": query, "response": response})
404
  input_token_count = count_tokens(query)
@@ -450,20 +457,26 @@ with gr.Blocks(css=custom_css) as app:
450
  **PhiRAG** Query Your Data with Advanced RAG Techniques
451
 
452
  **Model Selection & Parameters:** Choose from the following options:
453
- - πŸ‡ΊπŸ‡Έ Remote Meta-Llama-3
454
- - πŸ‡ͺπŸ‡Ί Mistral-API
455
 
456
- **πŸ”₯ Randomness (Temperature):** Adjusts output predictability.
 
457
 
458
  **🎯 Word Variety (Top‑p):** Limits word choices to a set probability percentage.
 
459
 
460
- **✏️ Prompt Template:** Edit as desired.
 
461
 
462
- **πŸ”— File URLs:** Enter one URL per line (.pdf or .txt).
463
 
464
- **βš–οΈ BM25 Weight:** Adjust Lexical vs Semantics.
 
 
465
 
466
  **πŸ” Query:** Enter your query below.
 
467
 
468
  The response displays the model used, word count, and current context (with conversation history).
469
  ''')
 
23
  from transformers.quantizers.auto import AutoQuantizationConfig
24
  import gradio as gr
25
  import requests
26
+ from pydantic import PrivateAttr
27
 
28
  # Add Mistral imports with fallback handling
29
  try:
 
175
  class MistralLLM(LLM):
176
  temperature: float = 0.7
177
  top_p: float = 0.95
178
+ _client: Any = PrivateAttr() # Declare _client as a private attribute
179
  def __init__(self, api_key: str, temperature: float = 0.7, top_p: float = 0.95):
180
  super().__init__()
181
  self._client = Mistral(api_key=api_key)
 
185
  def _llm_type(self) -> str:
186
  return "mistral_llm"
187
  def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
188
+ response = self._client.chat.complete(
189
  model="mistral-small-latest",
190
  messages=[{"role": "user", "content": prompt}],
191
  temperature=self.temperature,
192
  top_p=self.top_p,
193
+ max_tokens=32000
194
  )
195
  return response.choices[0].message.content
196
  @property
 
221
  def _llm_type(self) -> str:
222
  return "local_llm"
223
  def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
224
+ # Reserve tokens for generation (e.g., 512 tokens)
225
+ reserved_gen = 512
226
+ max_total = 8192
227
+ max_prompt_tokens = max_total - reserved_gen
228
+ truncated_prompt = truncate_prompt(prompt, max_tokens=max_prompt_tokens)
229
+ generated = pipe(truncated_prompt, max_new_tokens=reserved_gen)[0]["generated_text"]
230
+ return generated
231
  @property
232
  def _identifying_params(self) -> dict:
233
  return {"model": model_id, "max_length": extra_kwargs.get("max_length")}
 
405
  "question": query
406
  }
407
  if "llama" in rag_chain.llm_choice.lower():
408
+ prompt_variables["context"] = truncate_prompt(prompt_variables["context"], max_tokens=4092)
409
  response = rag_chain.elevated_rag_chain.invoke(prompt_variables)
410
  rag_chain.conversation_history.append({"query": query, "response": response})
411
  input_token_count = count_tokens(query)
 
457
  **PhiRAG** Query Your Data with Advanced RAG Techniques
458
 
459
  **Model Selection & Parameters:** Choose from the following options:
460
+ - πŸ‡ΊπŸ‡Έ Remote Meta-Llama-3 - has context windows of 8000 tokens
461
+ - πŸ‡ͺπŸ‡Ί Mistral-API - has context windows of 32000 tokens
462
 
463
+ **πŸ”₯ Randomness (Temperature):** Adjusts output predictability.
464
+ - Example: 0.2 makes the output very deterministic (less creative), while 0.8 introduces more variety and spontaneity.
465
 
466
  **🎯 Word Variety (Top‑p):** Limits word choices to a set probability percentage.
467
+ - Example: 0.5 restricts output to the most likely 50% of token choices for a focused answer; 0.95 allows almost all possibilities for more diverse responses.
468
 
469
+ **βš–οΈ BM25 Weight:** Adjust Lexical vs Semantics.
470
+ - Example: A value of 0.8 puts more emphasis on exact keyword (lexical) matching, while 0.3 shifts emphasis toward semantic similarity.
471
 
472
+ **✏️ Prompt Template:** Edit as desired.
473
 
474
+ **πŸ”— File URLs:** Enter one URL per line (.pdf or .txt).\
475
+ - Example: Provide one URL per line, such as
476
+ https://www.gutenberg.org/ebooks/8438.txt.utf-8
477
 
478
  **πŸ” Query:** Enter your query below.
479
+ - Example: Select all parts in each book focusing on moral responsibility in Aristotle philosophy and discuss in a comprehensible way and link the topics to other top world philosophers. Use a structure and bullet points
480
 
481
  The response displays the model used, word count, and current context (with conversation history).
482
  ''')