Spaces:

Asish22
/

code-crawler

Sleeping

Asish Karthikeya Gogineni commited on Jan 28

Commit

6d5c110

1 Parent(s): cddcaaf

feat: Add local embeddings to bypass API rate limits

Files changed (2) hide show

app.py CHANGED Viewed

@@ -380,17 +380,18 @@ with st.sidebar:
             os.environ["QDRANT_API_KEY"] = qdrant_key
     # For Groq, we need an embedding provider
-    embedding_provider = provider
     embedding_api_key = api_key
     if provider == "groq":
-        st.info(f"ℹ️ {provider.capitalize()} is used for Chat. For indexing, please select 'gemini' for embeddings.")
-        embedding_provider = "gemini" # Force gemini if groq is used, as openai is removed
-        # Check Embedding Key for Gemini
         emb_env_key = os.getenv("GOOGLE_API_KEY")
         if not emb_env_key and provider != "gemini":
-             embedding_api_key = st.text_input("Google API Key (for Embeddings)", type="password")
         else:
              embedding_api_key = emb_env_key

             os.environ["QDRANT_API_KEY"] = qdrant_key
     # For Groq, we need an embedding provider
+    # Use LOCAL embeddings by default - NO RATE LIMITS!
+    embedding_provider = "local"  # Use local HuggingFace embeddings
     embedding_api_key = api_key
     if provider == "groq":
+        st.info(f"ℹ️ {provider.capitalize()} is used for Chat. Using LOCAL embeddings (no rate limits!).")
+        embedding_provider = "local"  # Use local embeddings for Groq too
+        # Check Embedding Key for Gemini (not needed for local)
         emb_env_key = os.getenv("GOOGLE_API_KEY")
         if not emb_env_key and provider != "gemini":
+             embedding_api_key = emb_env_key  # Optional now
         else:
              embedding_api_key = emb_env_key

code_chatbot/indexer.py CHANGED Viewed

@@ -62,11 +62,20 @@ class Indexer:
             )
             logger.info("Path obfuscation enabled")
-        # Setup Embeddings (only Gemini supported)
         if embedding_function:
             self.embedding_function = embedding_function
         else:
-            if provider == "gemini":
                 api_key = api_key or os.getenv("GOOGLE_API_KEY")
                 if not api_key:
                     raise ValueError("Google API Key is required for Gemini Embeddings")
@@ -74,8 +83,9 @@ class Indexer:
                     model="models/gemini-embedding-001",
                     google_api_key=api_key
                 )
             else:
-                raise ValueError(f"Unsupported embedding provider: {provider}. Only 'gemini' is supported.")
     def clear_collection(self, collection_name: str = "codebase"):
         """

             )
             logger.info("Path obfuscation enabled")
+        # Setup Embeddings - supports Gemini (API) and local HuggingFace
         if embedding_function:
             self.embedding_function = embedding_function
         else:
+            if provider == "local" or provider == "huggingface":
+                # Use local embeddings - NO RATE LIMITS!
+                from langchain_huggingface import HuggingFaceEmbeddings
+                self.embedding_function = HuggingFaceEmbeddings(
+                    model_name="all-MiniLM-L6-v2",  # Fast & good quality
+                    model_kwargs={'device': 'cpu'},
+                    encode_kwargs={'normalize_embeddings': True}
+                )
+                logger.info("Using LOCAL embeddings (no rate limits)")
+            elif provider == "gemini":
                 api_key = api_key or os.getenv("GOOGLE_API_KEY")
                 if not api_key:
                     raise ValueError("Google API Key is required for Gemini Embeddings")
                     model="models/gemini-embedding-001",
                     google_api_key=api_key
                 )
+                logger.info("Using Gemini embeddings (API rate limits apply)")
             else:
+                raise ValueError(f"Unsupported embedding provider: {provider}. Use 'local', 'huggingface', or 'gemini'.")
     def clear_collection(self, collection_name: str = "codebase"):
         """