Spaces:

Mateo4
/

ml_assistant

Sleeping

App Files Files Community

Mateo4 commited on Jun 10, 2025

Commit

d02a876

verified ·

1 Parent(s): 2d1eba3

Update app.py

Browse files

GeminiRAG added to app.py

Files changed (1) hide show

app.py +258 -47

app.py CHANGED Viewed

@@ -1,64 +1,275 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
 """
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
 ):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
 if __name__ == "__main__":
-    demo.launch()

+import os
+import time
+import fitz
+import faiss
+import pickle
+import numpy as np
+from typing import List, Dict
+import re
+import google.generativeai as genai
+from google.api_core.exceptions import InternalServerError
+from sentence_transformers import SentenceTransformer
+# Import gradio for the web interface
 import gradio as gr
+# Define the ML_prompt (as it was in your notebook)
+ML_prompt = """
+نقش ات:
+تو دستیار هوش مصنوعی من برای امتحان یادگیری ماشین هستی
+این امتحان تمرکز روی مفاهیم تیوری یادگیری ماشین داره
+منبع درس کتاب بیشاپ هست
+لحن صحبت کردن ات:
+تو استاد دانشگاه هستی و کسایی که باهات چت می کنن دانشجوهات اند
 """
+class GeminiRAG:
+    def __init__(self, api_key: str, model_name: str = "models/gemini-2.0-flash",
+                 embed_model_name: str = "all-MiniLM-L6-v2", # Using a common SentenceTransformer model
+                 instruction_prompt: str = ML_prompt,
+                 vectorstore_dir: str = "vectorstore"): # Use a directory within the app for persistence
+        if not api_key:
+            raise ValueError("API key is missing.")
+        self.instruction_prompt = instruction_prompt
+        self.vectorstore_dir = vectorstore_dir
+        self.vectorstore_faiss_path = os.path.join(self.vectorstore_dir, "faiss_index.index")
+        self.vectorstore_data_path = os.path.join(self.vectorstore_dir, "faiss_data.pkl")
+        # Ensure vectorstore directory exists
+        os.makedirs(self.vectorstore_dir, exist_ok=True)
+        # Setup Gemini
+        genai.configure(api_key=api_key)
+        self.model = genai.GenerativeModel(model_name=model_name)
+        # Setup Embedder
+        self.embedder = SentenceTransformer(embed_model_name)
+        # FAISS index and storage for sentence chunks and their parent documents
+        embedding_dim = self.embedder.get_sentence_embedding_dimension() # Get embedding dimension
+        self.index = faiss.IndexFlatL2(embedding_dim)
+        self.sentence_chunks: List[str] = []
+        self.parent_documents: List[str] = []
+        self.sentence_to_parent_map: List[int] = []
+        # Load existing vector store if available
+        self.load_vectorstore()
+    def _split_into_sentences(self, text: str) -> List[str]:
+        sentences = re.split(r'(?<=[.!?])\s+', text)
+        return [s.strip() for s in sentences if s.strip()]
+    def load_document(self, pdf_path: str) -> List[str]:
+        doc = fitz.open(pdf_path)
+        page_contents = []
+        for page_num in range(len(doc)):
+            page = doc.load_page(page_num)
+            text = page.get_text()
+            if text.strip():
+                page_contents.append(text.strip())
+        doc.close()
+        return page_contents
+    def add_document(self, parent_chunks: List[str]):
+        new_sentence_chunks = []
+        new_sentence_to_parent_map = []
+        current_parent_doc_index = len(self.parent_documents)
+        for parent_chunk in parent_chunks:
+            self.parent_documents.append(parent_chunk)
+            sentences = self._split_into_sentences(parent_chunk)
+            for sentence in sentences:
+                new_sentence_chunks.append(sentence)
+                new_sentence_to_parent_map.append(current_parent_doc_index)
+            current_parent_doc_index += 1
+        if new_sentence_chunks:
+            embeddings = self.embedder.encode(new_sentence_chunks, batch_size=32, convert_to_numpy=True)
+            self.index.add(np.array(embeddings))
+            self.sentence_chunks.extend(new_sentence_chunks)
+            self.sentence_to_parent_map.extend(new_sentence_to_parent_map)
+            print(f"Added {len(new_sentence_chunks)} sentence chunks from {len(parent_chunks)} parent documents.")
+        else:
+            print("No new sentence chunks to add.")
+    def ask_question(self, query: str, top_k: int = 5) -> str:
+        if not self.sentence_chunks or not self.parent_documents:
+            return "Knowledge base is empty. Please load documents first."
+        query_emb = self.embedder.encode([query], convert_to_numpy=True)
+        D, I = self.index.search(np.array(query_emb), top_k)
+        retrieved_parent_doc_indices = set()
+        for idx in I[0]:
+            if idx < len(self.sentence_chunks):
+                parent_idx = self.sentence_to_parent_map[idx]
+                retrieved_parent_doc_indices.add(parent_idx)
+        context_parts = []
+        sorted_parent_indices = sorted(list(retrieved_parent_doc_indices))
+        for parent_idx in sorted_parent_indices:
+            if parent_idx < len(self.parent_documents):
+                context_parts.append(self.parent_documents[parent_idx])
+        context = "\n\n---\\n\\n".join(context_parts)
+        if not context.strip():
+            return "No relevant information found in the knowledge base."
+        prompt = f"""
+              ### instruction prompt : (explanation : this text is your guideline don't mention it on response)
+              {self.instruction_prompt}
+              Use the following context to answer the question.\n
+              Context:\n
+              {context}\n
+              Question: {query}\n
+              Answer:"""
+        for attempt in range(3):
+            try:
+                response = self.model.generate_content(prompt)
+                return response.text
+            except InternalServerError as e:
+                print(f"Error: {e}. Retrying in 5 seconds...")
+                time.sleep(5)
+        raise Exception("Failed to generate after 3 retries.")
+    def save_vectorstore(self):
+        faiss.write_index(self.index, self.vectorstore_faiss_path)
+        with open(self.vectorstore_data_path, "wb") as f:
+            pickle.dump({
+                'sentence_chunks': self.sentence_chunks,
+                'parent_documents': self.parent_documents,
+                'sentence_to_parent_map': self.sentence_to_parent_map
+            }, f)
+        print(f"Vectorstore saved to {self.vectorstore_faiss_path} and {self.vectorstore_data_path}")
+    def load_vectorstore(self):
+        if os.path.exists(self.vectorstore_faiss_path) and os.path.exists(self.vectorstore_data_path):
+            self.index = faiss.read_index(self.vectorstore_faiss_path)
+            with open(self.vectorstore_data_path, "rb") as f:
+                data = pickle.load(f)
+                self.sentence_chunks = data['sentence_chunks']
+                self.parent_documents = data['parent_documents']
+                self.sentence_to_parent_map = data['sentence_to_parent_map']
+            print("📦 Loaded vectorstore.")
+            return True
+        print("ℹ️ No saved vectorstore found.")
+        return False
+# --- Gradio Interface Setup ---
+# Get API key from environment variable
+api_key = os.getenv("GEMINI_API_KEY")
+if not api_key:
+    raise ValueError("GEMINI_API_KEY environment variable not set. Please set it in Hugging Face Space secrets.")
+# Initialize the RAG system globally for the Gradio app
+rag_instance = GeminiRAG(api_key=api_key)
 def respond(
+    message: str,
+    history: list[list[str]], # Gradio Chatbot history format
+    system_message: str, # From additional_inputs
+    max_tokens: int, # From additional_inputs (not directly used by RAG but kept for interface consistency)
+    temperature: float, # From additional_inputs (not directly used by RAG)
+    top_p: float, # From additional_inputs (not directly used by RAG)
 ):
+    # The `system_message` from Gradio can be used to dynamically update the RAG's instruction prompt
+    # For this example, we'll keep the ML_prompt fixed, but you could add logic here:
+    # rag_instance.instruction_prompt = system_message
+    try:
+        # Call your RAG system's ask_question method
+        # The top_k parameter can be exposed in Gradio's additional_inputs if needed
+        response = rag_instance.ask_question(message)
+        # Gradio ChatInterface expects a generator for streaming or a direct string for non-streaming
+        yield response # Yield the full response, as ask_question does not stream token by token
+    except Exception as e:
+        yield f"❌ An error occurred: {e}"
+def upload_and_process_documents(files):
+    if not files:
+        return "Please upload PDF files to process."
+    # Re-initialize RAG instance to clear previous data and rebuild with new documents
+    # This is a simple approach; for more complex scenarios, you might want to append
+    # or manage different knowledge bases.
+    print("Rebuilding knowledge base with new documents...")
+    try:
+        # Re-initialize to clear previous data
+        global rag_instance
+        rag_instance = GeminiRAG(api_key=api_key)
+    except Exception as e:
+        return f"Error re-initializing RAG: {e}"
+    success_count = 0
+    error_files = []
+    for file_obj in files:
+        file_path = file_obj.name # Gradio passes a NamedTemporaryFile object
+        print(f"Processing {file_path}")
+        try:
+            chunks = rag_instance.load_document(file_path)
+            rag_instance.add_document(chunks)
+            success_count += 1
+        except Exception as e:
+            error_files.append(f"{os.path.basename(file_path)}: {e}")
+    rag_instance.save_vectorstore()
+    status_message = f"Successfully loaded and embedded {success_count} document(s)."
+    if error_files:
+        status_message += f"\nErrors occurred with: {'; '.join(error_files)}"
+    return status_message
+# Define the Gradio ChatInterface
+with gr.Blocks() as demo:
+    gr.Markdown("# Gemini RAG Chatbot for ML Theory")
+    gr.Markdown("Upload your PDF documents, and then ask questions about the content. Ensure your `GEMINI_API_KEY` is set as a Space Secret.")
+    with gr.Row():
+        file_output = gr.Textbox(label="Upload Status", interactive=False)
+        upload_button = gr.UploadButton(
+            label="Upload PDF Documents",
+            file_types=["pdf"],
+            file_count="multiple"
+        )
+        upload_button.upload(upload_and_process_documents, inputs=upload_button, outputs=file_output)
+    # The ChatInterface component simplifies the chat UI setup
+    chat_interface_component = gr.ChatInterface(
+        respond,
+        additional_inputs=[
+            gr.Textbox(value=ML_prompt, label="System message", info="This sets the fixed role for the AI."), # Keep ML_prompt fixed
+            gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens", info="Not directly used by RAG model."),
+            gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature", info="Not directly used by RAG model."),
+            gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=0.95,
+                step=0.05,
+                label="Top-p (nucleus sampling)",
+                info="Not directly used by RAG model."
+            ),
+        ],
+        chatbot=gr.Chatbot(height=400),
+        textbox=gr.Textbox(placeholder="Ask me about Machine Learning Theory!", container=False, scale=7),
+        clear_btn="Clear Chat",
+        submit_btn="Send",
+        # Set examples for quick testing
+        examples=["درمورد boosting بهم بگو", "انواع رگرسیون را توضیح بده", "شبکه های عصبی چیستند؟"]
+    )
 if __name__ == "__main__":
+    demo.launch()