Spaces:

alx-d
/

PhiRAG

Running

App Files Files Community

alx-d commited on Mar 1

Commit

23b48b8

verified ·

1 Parent(s): 4a725a5

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

advanced_rag.py +130 -138

advanced_rag.py CHANGED Viewed

@@ -81,55 +81,27 @@ jobs = {}  # Stores job status and results
 results_queue = queue.Queue()  # Thread-safe queue for completed jobs
 processing_lock = threading.Lock()  # Prevent simultaneous processing of the same job
-# Function to display all jobs as a clickable list
-def get_job_list():
-    job_list_md = "### Submitted Jobs\n\n"
-    if not jobs:
-        return "No jobs found. Submit a query or load files to create jobs."
-    # Sort jobs by start time (newest first)
-    sorted_jobs = sorted(
-        [(job_id, job_info) for job_id, job_info in jobs.items()],
-        key=lambda x: x[1].get("start_time", 0),
-        reverse=True
-    )
-    for job_id, job_info in sorted_jobs:
-        status = job_info.get("status", "unknown")
-        job_type = job_info.get("type", "unknown")
-        query = job_info.get("query", "")
-        start_time = job_info.get("start_time", 0)
-        time_str = datetime.datetime.fromtimestamp(start_time).strftime("%Y-%m-%d %H:%M:%S")
-        # Create a shortened query preview
-        query_preview = query[:30] + "..." if query and len(query) > 30 else query or "N/A"
-        # Create clickable links using Markdown
-        if job_type == "query":
-            job_list_md += f"- [{job_id}](javascript:void) - {time_str} - {status} - Query: {query_preview}\n"
-        else:
-            job_list_md += f"- [{job_id}](javascript:void) - {time_str} - {status} - File Load Job\n"
-    return job_list_md
-# Function to process tasks in background
-def process_in_background(job_id: str, function, args):
     try:
         result = function(*args)
         results_queue.put((job_id, result))
     except Exception as e:
-        error_msg = f"Error: {str(e)}\n\nTraceback: {traceback.format_exc()}"
-        debug_print(f"Job {job_id} failed: {error_msg}")
-        results_queue.put((job_id, (error_msg, "", "", "")))
-# Async version of load_pdfs_updated
 def load_pdfs_async(file_links, model_choice, prompt_template, bm25_weight, temperature, top_p):
     if not file_links:
-        return "Please enter non-empty URLs", "", "Model used: N/A", "Context: N/A"
     job_id = str(uuid.uuid4())
-    debug_print(f"Starting async job {job_id} for loading files")
     # Start background thread
     threading.Thread(
@@ -138,41 +110,33 @@ def load_pdfs_async(file_links, model_choice, prompt_template, bm25_weight, temp
     ).start()
     jobs[job_id] = {
-        "status": "processing",
         "type": "load_files",
-        "start_time": time.time()
     }
     return (
-        f"Files are being processed in the background (Job ID: {job_id}).\n\n"
-        f"Use 'Check Job Status' with this ID to get results.",
         f"Job ID: {job_id}",
-        f"Model selected: {model_choice}"
     )
-# Async version of submit_query_updated
 def submit_query_async(query, model_choice=None):
     if not query:
         return "Please enter a non-empty query", "", "Input tokens: 0", "Output tokens: 0"
-    if not hasattr(rag_chain, 'elevated_rag_chain') or not rag_chain.raw_data:
-        return "Please load files first", "", "Input tokens: 0", "Output tokens: 0"
-    # Use the provided model if specified, otherwise use the current model
-    if model_choice and model_choice != "":
-        # Update the model temporarily for this query
-        current_model = rag_chain.llm_choice
-        rag_chain.update_llm_pipeline(
-            model_choice,
-            rag_chain.temperature,
-            rag_chain.top_p,
-            rag_chain.prompt_template,
-            rag_chain.bm25_weight
-        )
     job_id = str(uuid.uuid4())
     debug_print(f"Starting async job {job_id} for query: {query}")
     # Start background thread
     threading.Thread(
         target=process_in_background,
@@ -184,16 +148,48 @@ def submit_query_async(query, model_choice=None):
         "type": "query",
         "start_time": time.time(),
         "query": query,
-        "model": rag_chain.llm_choice  # Store which model is being used
     }
     return (
         f"Query submitted and processing in the background (Job ID: {job_id}).\n\n"
-        f"Use 'Check Job Status' with this ID to get results.",
         f"Job ID: {job_id}",
         f"Input tokens: {count_tokens(query)}",
         "Output tokens: pending"
     )
 # Function to handle job list clicks
 def job_selected(job_id):
@@ -394,6 +390,7 @@ class ElevatedRagChain:
     # Improve error handling in the ElevatedRagChain class
     def create_llm_pipeline(self):
         normalized = self.llm_choice.lower()
         try:
             if "remote" in normalized:
@@ -406,7 +403,7 @@ class ElevatedRagChain:
                 client = InferenceClient(token=hf_api_token, timeout=120)
-                from huggingface_hub.utils._errors import HfHubHTTPError
                 def remote_generate(prompt: str) -> str:
                     max_retries = 3
                     backoff = 2  # start with 2 seconds
@@ -434,7 +431,7 @@ class ElevatedRagChain:
                     def _llm_type(self) -> str:
                         return "remote_llm"
-                    def _call(self, prompt: str, stop: typing.Optional[List[str]] = None) -> str:
                         return remote_generate(prompt)
                     @property
@@ -444,68 +441,57 @@ class ElevatedRagChain:
                 debug_print("Remote Meta-Llama-3 pipeline created successfully.")
                 return RemoteLLM()
-            elif "mistral" in normalized:
                 debug_print("Creating Mistral API pipeline...")
                 mistral_api_key = os.environ.get("MISTRAL_API_KEY")
                 if not mistral_api_key:
                     raise ValueError("Please set the MISTRAL_API_KEY environment variable to use Mistral API.")
-                # Import Mistral library with proper error handling
                 try:
                     from mistralai import Mistral
                     from mistralai.exceptions import MistralException
                     debug_print("Mistral library imported successfully")
                 except ImportError:
-                    raise ImportError("Mistral client library not found. Install with: pip install mistralai")
-                # Fixed MistralLLM implementation that works with Pydantic v1
-                class MistralLLM(LLM):
-                    client: Optional[Any] = None
-                    temperature: float = 0.7
-                    top_p: float = 0.95
-                    def __init__(self, api_key: str, temperature: float = 0.7, top_p: float = 0.95, **kwargs: Any):
-                        super().__init__(temperature=temperature, top_p=top_p, **kwargs)
-                        self.client = Mistral(api_key=api_key)
-                        debug_print("Mistral client initialized")
-                    @property
-                    def _llm_type(self) -> str:
-                        return "mistral_llm"
-                    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
-                        try:
-                            debug_print("Calling Mistral API...")
-                            response = self.client.chat.complete(
-                                model="mistral-small-latest",
-                                messages=[{"role": "user", "content": prompt}],
-                                temperature=self.temperature,
-                                top_p=self.top_p,
-                                max_tokens=1024  # Limit token count for faster response
-                            )
-                            return response.choices[0].message.content
-                        except Exception as e:
-                            debug_print(f"Mistral API error: {str(e)}")
-                            return f"Error generating response: {str(e)}"
-                    @property
-                    def _identifying_params(self) -> dict:
-                        return {"model": "mistral-small-latest"}
-                debug_print("Creating Mistral LLM instance")
-                mistral_llm = MistralLLM(
-                    api_key=mistral_api_key,
-                    temperature=self.temperature,
-                    top_p=self.top_p
-                )
-                debug_print("Mistral API pipeline created successfully.")
-                return mistral_llm
             else:
-                # Default case - use a smaller model that's more likely to work within constraints
                 debug_print("Using local/fallback model pipeline")
-                model_id = "facebook/opt-350m"  # Much smaller model
                 pipe = pipeline(
                     "text-generation",
                     model=model_id,
@@ -517,27 +503,21 @@ class ElevatedRagChain:
                     @property
                     def _llm_type(self) -> str:
                         return "local_llm"
                     def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
-                        # Aggressively truncate prompt
-                        truncated_prompt = truncate_prompt(prompt, max_tokens=512)
-                        try:
-                            generated = pipe(truncated_prompt, max_new_tokens=256)[0]["generated_text"]
-                            # Only return the newly generated part
-                            if generated.startswith(truncated_prompt):
-                                return generated[len(truncated_prompt):].strip()
-                            return generated
-                        except Exception as e:
-                            debug_print(f"Generation error: {str(e)}")
-                            return f"Error generating response: {str(e)}"
                     @property
                     def _identifying_params(self) -> dict:
-                        return {"model": model_id}
                 debug_print("Local fallback pipeline created.")
                 return LocalLLM()
         except Exception as e:
             debug_print(f"Error creating LLM pipeline: {str(e)}")
             # Return a dummy LLM that explains the error
@@ -546,7 +526,7 @@ class ElevatedRagChain:
                 def _llm_type(self) -> str:
                     return "error_llm"
-                def _call(self, prompt: str, stop: typing.Optional[List[str]] = None) -> str:
                     return f"Error initializing LLM: \n\nPlease check your environment variables and try again."
                 @property
@@ -555,6 +535,7 @@ class ElevatedRagChain:
             return ErrorLLM()
     def update_llm_pipeline(self, new_model_choice: str, temperature: float, top_p: float, prompt_template: str, bm25_weight: float):
         debug_print(f"Updating chain with new model: {new_model_choice}")
         self.llm_choice = new_model_choice
@@ -624,7 +605,9 @@ class ElevatedRagChain:
             "context": RunnableLambda(self.extract_question) | self.ensemble_retriever,
             "question": RunnableLambda(self.extract_question)
         }) | self.capture_context
         self.rag_prompt = ChatPromptTemplate.from_template(self.prompt_template)
         self.str_output_parser = StrOutputParser()
         debug_print("Selecting LLM pipeline based on choice: " + self.llm_choice)
         self.llm = self.create_llm_pipeline()
@@ -637,9 +620,10 @@ class ElevatedRagChain:
             formatted += f"- **Generated using:** {self.llm_choice}\n"
             formatted += f"\n**Conversation History:** {len(self.conversation_history)} conversation(s) considered.\n"
             return formatted
-        self.elevated_rag_chain = base_runnable | self.rag_prompt | self.llm | format_response
         debug_print("Elevated RAG chain successfully built and ready to use.")
     def get_current_context(self) -> str:
         base_context = "\n".join([str(doc) for doc in self.split_data[:3]]) if self.split_data else "No context available."
         history_summary = "\n\n---\n**Recent Conversations (last 3):**\n"
@@ -917,6 +901,13 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
         with gr.TabItem("Submit Query"):
             with gr.Row():
                 query_input = gr.Textbox(
                     label="Enter your query here",
                     placeholder="Type your query",
@@ -1007,6 +998,13 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
         outputs=[load_response, load_context, model_output]
     )
     submit_button.click(
         submit_query_async,
         inputs=[query_input, query_model_dropdown],
@@ -1044,19 +1042,13 @@ https://www.gutenberg.org/ebooks/8438.txt.utf-8
         outputs=[reset_response, reset_context, reset_model]
     )
     model_dropdown.change(
         fn=sync_model_dropdown,
         inputs=model_dropdown,
         outputs=query_model_dropdown
     )
-    # Also sync in the other direction
-    query_model_dropdown.change(
-        fn=sync_model_dropdown,
-        inputs=query_model_dropdown,
-        outputs=model_dropdown
-    )
     # Add an event to refresh the job list on page load
     app.load(
         fn=refresh_job_list,

 results_queue = queue.Queue()  # Thread-safe queue for completed jobs
 processing_lock = threading.Lock()  # Prevent simultaneous processing of the same job
+# Add these missing async processing functions
+def process_in_background(job_id, function, args):
+    """Process a function in the background and store results"""
     try:
+        debug_print(f"Processing job {job_id} in background")
         result = function(*args)
         results_queue.put((job_id, result))
+        debug_print(f"Job {job_id} completed and added to results queue")
     except Exception as e:
+        debug_print(f"Error in background job {job_id}: {str(e)}")
+        error_result = (f"Error processing job: {str(e)}", "", "", "")
+        results_queue.put((job_id, error_result))
 def load_pdfs_async(file_links, model_choice, prompt_template, bm25_weight, temperature, top_p):
+    """Asynchronous version of load_pdfs_updated to prevent timeouts"""
     if not file_links:
+        return "Please enter non-empty URLs", "", "Model used: N/A"
     job_id = str(uuid.uuid4())
+    debug_print(f"Starting async job {job_id} for file loading")
     # Start background thread
     threading.Thread(
     ).start()
     jobs[job_id] = {
+        "status": "processing",
         "type": "load_files",
+        "start_time": time.time(),
+        "query": f"Loading files: {file_links.split()[0]}..." if file_links else "No files"
     }
     return (
+        f"Files submitted and processing in the background (Job ID: {job_id}).\n\n"
+        f"Use 'Check Job Status' tab with this ID to get results.",
         f"Job ID: {job_id}",
+        f"Model requested: {model_choice}"
     )
 def submit_query_async(query, model_choice=None):
+    """Asynchronous version of submit_query_updated to prevent timeouts"""
     if not query:
         return "Please enter a non-empty query", "", "Input tokens: 0", "Output tokens: 0"
     job_id = str(uuid.uuid4())
     debug_print(f"Starting async job {job_id} for query: {query}")
+    # Update model if specified
+    if model_choice and rag_chain and rag_chain.llm_choice != model_choice:
+        debug_print(f"Updating model to {model_choice} for this query")
+        rag_chain.update_llm_pipeline(model_choice, rag_chain.temperature, rag_chain.top_p,
+                                     rag_chain.prompt_template, rag_chain.bm25_weight)
     # Start background thread
     threading.Thread(
         target=process_in_background,
         "type": "query",
         "start_time": time.time(),
         "query": query,
+        "model": rag_chain.llm_choice if hasattr(rag_chain, 'llm_choice') else "Unknown"
     }
     return (
         f"Query submitted and processing in the background (Job ID: {job_id}).\n\n"
+        f"Use 'Check Job Status' tab with this ID to get results.",
         f"Job ID: {job_id}",
         f"Input tokens: {count_tokens(query)}",
         "Output tokens: pending"
     )
+# Function to display all jobs as a clickable list
+def get_job_list():
+    job_list_md = "### Submitted Jobs\n\n"
+    if not jobs:
+        return "No jobs found. Submit a query or load files to create jobs."
+    # Sort jobs by start time (newest first)
+    sorted_jobs = sorted(
+        [(job_id, job_info) for job_id, job_info in jobs.items()],
+        key=lambda x: x[1].get("start_time", 0),
+        reverse=True
+    )
+    for job_id, job_info in sorted_jobs:
+        status = job_info.get("status", "unknown")
+        job_type = job_info.get("type", "unknown")
+        query = job_info.get("query", "")
+        start_time = job_info.get("start_time", 0)
+        time_str = datetime.datetime.fromtimestamp(start_time).strftime("%Y-%m-%d %H:%M:%S")
+        # Create a shortened query preview
+        query_preview = query[:30] + "..." if query and len(query) > 30 else query or "N/A"
+        # Create clickable links using Markdown
+        if job_type == "query":
+            job_list_md += f"- [{job_id}](javascript:void) - {time_str} - {status} - Query: {query_preview}\n"
+        else:
+            job_list_md += f"- [{job_id}](javascript:void) - {time_str} - {status} - File Load Job\n"
+    return job_list_md
 # Function to handle job list clicks
 def job_selected(job_id):
     # Improve error handling in the ElevatedRagChain class
     def create_llm_pipeline(self):
+        from langchain.llms.base import LLM  # Import LLM here so it's always defined
         normalized = self.llm_choice.lower()
         try:
             if "remote" in normalized:
                 client = InferenceClient(token=hf_api_token, timeout=120)
+                # We no longer use wait_for_model because it's unsupported
                 def remote_generate(prompt: str) -> str:
                     max_retries = 3
                     backoff = 2  # start with 2 seconds
                     def _llm_type(self) -> str:
                         return "remote_llm"
+                    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
                         return remote_generate(prompt)
                     @property
                 debug_print("Remote Meta-Llama-3 pipeline created successfully.")
                 return RemoteLLM()
+            elif "mistral-api" in normalized:
                 debug_print("Creating Mistral API pipeline...")
                 mistral_api_key = os.environ.get("MISTRAL_API_KEY")
                 if not mistral_api_key:
                     raise ValueError("Please set the MISTRAL_API_KEY environment variable to use Mistral API.")
                 try:
                     from mistralai import Mistral
                     from mistralai.exceptions import MistralException
                     debug_print("Mistral library imported successfully")
                 except ImportError:
+                    debug_print("Mistral client library not installed. Falling back to Llama pipeline.")
+                    normalized = "llama"
+                if normalized != "llama":
+                    class MistralLLM(LLM):
+                        temperature: float = 0.7
+                        top_p: float = 0.95
+                        _client: Any = PrivateAttr(default=None)
+                        def __init__(self, api_key: str, temperature: float = 0.7, top_p: float = 0.95, **kwargs: Any):
+                            super().__init__(**kwargs)
+                            self._client = Mistral(api_key=api_key)
+                            self.temperature = temperature
+                            self.top_p = top_p
+                        @property
+                        def _llm_type(self) -> str:
+                            return "mistral_llm"
+                        def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
+                            try:
+                                debug_print("Calling Mistral API...")
+                                response = self._client.chat.complete(
+                                    model="mistral-small-latest",
+                                    messages=[{"role": "user", "content": prompt}],
+                                    temperature=self.temperature,
+                                    top_p=self.top_p,
+                                    max_tokens=32000
+                                )
+                                return response.choices[0].message.content
+                            except Exception as e:
+                                debug_print(f"Mistral API error: {str(e)}")
+                                return f"Error generating response: {str(e)}"
+                        @property
+                        def _identifying_params(self) -> dict:
+                            return {"model": "mistral-small-latest"}
+                    debug_print("Creating Mistral LLM instance")
+                    mistral_llm = MistralLLM(api_key=mistral_api_key, temperature=self.temperature, top_p=self.top_p)
+                    debug_print("Mistral API pipeline created successfully.")
+                    return mistral_llm
             else:
+                # Default case - using a fallback model (or Llama)
                 debug_print("Using local/fallback model pipeline")
+                model_id = "facebook/opt-350m"  # Use a smaller model as fallback
                 pipe = pipeline(
                     "text-generation",
                     model=model_id,
                     @property
                     def _llm_type(self) -> str:
                         return "local_llm"
                     def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
+                        # For this fallback, truncate prompt if it exceeds limits
+                        reserved_gen = 128
+                        max_total = 1024
+                        max_prompt_tokens = max_total - reserved_gen
+                        truncated_prompt = truncate_prompt(prompt, max_tokens=max_prompt_tokens)
+                        generated = pipe(truncated_prompt, max_new_tokens=reserved_gen)[0]["generated_text"]
+                        return generated
                     @property
                     def _identifying_params(self) -> dict:
+                        return {"model": model_id, "max_length": 1024}
                 debug_print("Local fallback pipeline created.")
                 return LocalLLM()
         except Exception as e:
             debug_print(f"Error creating LLM pipeline: {str(e)}")
             # Return a dummy LLM that explains the error
                 def _llm_type(self) -> str:
                     return "error_llm"
+                def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
                     return f"Error initializing LLM: \n\nPlease check your environment variables and try again."
                 @property
             return ErrorLLM()
     def update_llm_pipeline(self, new_model_choice: str, temperature: float, top_p: float, prompt_template: str, bm25_weight: float):
         debug_print(f"Updating chain with new model: {new_model_choice}")
         self.llm_choice = new_model_choice
             "context": RunnableLambda(self.extract_question) | self.ensemble_retriever,
             "question": RunnableLambda(self.extract_question)
         }) | self.capture_context
+        # Wrap the prompt template in a RunnableLambda
         self.rag_prompt = ChatPromptTemplate.from_template(self.prompt_template)
+        prompt_runnable = RunnableLambda(lambda vars: self.rag_prompt.format(**vars))
         self.str_output_parser = StrOutputParser()
         debug_print("Selecting LLM pipeline based on choice: " + self.llm_choice)
         self.llm = self.create_llm_pipeline()
             formatted += f"- **Generated using:** {self.llm_choice}\n"
             formatted += f"\n**Conversation History:** {len(self.conversation_history)} conversation(s) considered.\n"
             return formatted
+        self.elevated_rag_chain = base_runnable | prompt_runnable | self.llm | format_response
         debug_print("Elevated RAG chain successfully built and ready to use.")
     def get_current_context(self) -> str:
         base_context = "\n".join([str(doc) for doc in self.split_data[:3]]) if self.split_data else "No context available."
         history_summary = "\n\n---\n**Recent Conversations (last 3):**\n"
         with gr.TabItem("Submit Query"):
             with gr.Row():
+                # Add this line to define the query_model_dropdown
+                query_model_dropdown = gr.Dropdown(
+                    choices=["🇺🇸 Remote Meta-Llama-3", "🇪🇺 Mistral-API"],
+                    value="🇺🇸 Remote Meta-Llama-3",
+                    label="Query Model"
+                )
                 query_input = gr.Textbox(
                     label="Enter your query here",
                     placeholder="Type your query",
         outputs=[load_response, load_context, model_output]
     )
+    # Also sync in the other direction
+    query_model_dropdown.change(
+        fn=sync_model_dropdown,
+        inputs=query_model_dropdown,
+        outputs=model_dropdown
+    )
     submit_button.click(
         submit_query_async,
         inputs=[query_input, query_model_dropdown],
         outputs=[reset_response, reset_context, reset_model]
     )
     model_dropdown.change(
         fn=sync_model_dropdown,
         inputs=model_dropdown,
         outputs=query_model_dropdown
     )
     # Add an event to refresh the job list on page load
     app.load(
         fn=refresh_job_list,