Spaces:

jaczad
/

JacekAI

Sleeping

Jacek Zadrożny commited on Dec 12, 2025

Commit

5411262

1 Parent(s): 06bb39f

Remove asyncio completely to fix event loop cleanup errors

- Convert all async functions to synchronous
- Remove asyncio.to_thread() and event loop management
- Simplify embeddings client (direct API calls with retry logic)
- Simplify agent initialization (no event loops needed)
- Fix all docstring examples to remove await
- Eliminates 'Invalid file descriptor: -1' errors completely

Files changed (4) hide show

agent/a11y_agent.py +6 -7
agent/tools.py +3 -3
app.py +9 -30
models/embeddings.py +51 -67

agent/a11y_agent.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """A11y Expert - Main accessibility question-answering agent."""
-import asyncio
-from typing import Optional, AsyncGenerator
 from openai import OpenAI
 from langdetect import detect, LangDetectException
 from config import get_settings
@@ -53,7 +52,7 @@ class A11yExpertAgent:
         except Exception as e:
             logger.warning(f"Error closing A11yExpertAgent: {e}")
-    async def ask(self, question: str) -> AsyncGenerator[str, None]:
         """
         Ask a question and get a streaming answer with RAG.
@@ -77,7 +76,7 @@ class A11yExpertAgent:
         current_system_prompt = get_system_prompt(language, self.expertise)
         logger.info("Searching knowledge base...")
-        context = await search_knowledge_base(question, self.vector_store, language=language)
         messages = [
             {"role": "system", "content": current_system_prompt},
@@ -158,12 +157,12 @@ Remember to:
         self.conversation_history = []
         logger.info("Conversation history cleared")
-    async def batch_ask(self, questions: list[str]) -> list[dict]:
         """Ask multiple questions in sequence."""
         results = []
         for question in questions:
             try:
-                answer_chunks = [chunk async for chunk in self.ask(question)]
                 answer = "".join(answer_chunks)
                 results.append({"question": question, "answer": answer, "success": True})
             except Exception as e:
@@ -172,7 +171,7 @@ Remember to:
         return results
-async def create_agent(language: Optional[str] = None) -> A11yExpertAgent:
     """Factory function to create and initialize agent."""
     language = language or "en"

 """A11y Expert - Main accessibility question-answering agent."""
+from typing import Optional, Generator
 from openai import OpenAI
 from langdetect import detect, LangDetectException
 from config import get_settings
         except Exception as e:
             logger.warning(f"Error closing A11yExpertAgent: {e}")
+    def ask(self, question: str) -> Generator[str, None, None]:
         """
         Ask a question and get a streaming answer with RAG.
         current_system_prompt = get_system_prompt(language, self.expertise)
         logger.info("Searching knowledge base...")
+        context = search_knowledge_base(question, self.vector_store, language=language)
         messages = [
             {"role": "system", "content": current_system_prompt},
         self.conversation_history = []
         logger.info("Conversation history cleared")
+    def batch_ask(self, questions: list[str]) -> list[dict]:
         """Ask multiple questions in sequence."""
         results = []
         for question in questions:
             try:
+                answer_chunks = [chunk for chunk in self.ask(question)]
                 answer = "".join(answer_chunks)
                 results.append({"question": question, "answer": answer, "success": True})
             except Exception as e:
         return results
+def create_agent(language: Optional[str] = None) -> A11yExpertAgent:
     """Factory function to create and initialize agent."""
     language = language or "en"

agent/tools.py CHANGED Viewed

@@ -5,7 +5,7 @@ from database.vector_store_client import VectorStoreClient
 from models.embeddings import get_embeddings_client
 from loguru import logger
-async def search_knowledge_base(
     query: str,
     vector_store: VectorStoreClient,
     language: str = "en"
@@ -25,7 +25,7 @@ async def search_knowledge_base(
         logger.info(f"Query: {query} (language: {language})")
         embeddings_client = get_embeddings_client()
-        query_embedding = await embeddings_client.get_embedding(query)
         where_clause = f"language = '{language}'"
         results = vector_store.search(
@@ -50,7 +50,7 @@ async def search_knowledge_base(
         logger.error(f"Search failed: {e}")
         return f"Error searching knowledge base: {str(e)}"
-async def get_database_stats(vector_store: VectorStoreClient) -> str:
     """
     Get statistics about the knowledge base.

 from models.embeddings import get_embeddings_client
 from loguru import logger
+def search_knowledge_base(
     query: str,
     vector_store: VectorStoreClient,
     language: str = "en"
         logger.info(f"Query: {query} (language: {language})")
         embeddings_client = get_embeddings_client()
+        query_embedding = embeddings_client.get_embedding(query)
         where_clause = f"language = '{language}'"
         results = vector_store.search(
         logger.error(f"Search failed: {e}")
         return f"Error searching knowledge base: {str(e)}"
+def get_database_stats(vector_store: VectorStoreClient) -> str:
     """
     Get statistics about the knowledge base.

app.py CHANGED Viewed

@@ -3,13 +3,11 @@ Gradio UI for the A11y Expert Agent with lazy initialization.
 This module creates a Gradio ChatInterface that starts FAST,
 then initializes the agent in the background.
 """
-import asyncio
 import gradio as gr
 from loguru import logger
 import sys
 import atexit
 import threading
-import time
 from agent.a11y_agent import create_agent, A11yExpertAgent
 from config import get_settings
@@ -22,30 +20,16 @@ logger.add(sys.stderr, level=get_settings().log_level)
 agent_instance: A11yExpertAgent = None
 agent_ready = False
 agent_error = None
-agent_loop = None  # Keep reference to prevent garbage collection
 # --- Agent Initialization ---
 def initialize_agent_background():
     """Initialize the agent in background thread."""
-    global agent_instance, agent_ready, agent_error, agent_loop
     try:
         logger.info("🔄 Starting agent initialization in background...")
-        # Create a new event loop for this thread and set it as current
-        agent_loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(agent_loop)
-        try:
-            # Run the async initialization
-            async def init():
-                return await create_agent()
-            agent_instance = agent_loop.run_until_complete(init())
-            agent_ready = True
-            logger.success("✅ A11y Expert Agent is ready!")
-        finally:
-            # Don't close the loop here - we need it for the respond function
-            pass
     except Exception as e:
         logger.error(f"❌ Failed to initialize agent: {e}")
         agent_error = str(e)
@@ -53,7 +37,7 @@ def initialize_agent_background():
 def cleanup_resources():
     """Clean up resources on app shutdown."""
-    global agent_instance, agent_loop
     logger.info("Cleaning up resources...")
     try:
         # Close agent and all its resources
@@ -65,17 +49,12 @@ def cleanup_resources():
         if hasattr(get_embeddings_client, '_instance'):
             get_embeddings_client._instance.close()
-        # Properly close the event loop
-        if agent_loop and not agent_loop.is_closed():
-            agent_loop.close()
-            logger.info("Event loop closed")
         logger.success("✅ Resources cleaned up successfully")
     except Exception as e:
         logger.warning(f"Error during cleanup: {e}")
 # --- Gradio Chat Logic ---
-async def respond(message: str, history: list[list[str]]):
     """
     Main function for the Gradio ChatInterface.
     Receives a user message and chat history, then uses the agent
@@ -96,8 +75,9 @@ async def respond(message: str, history: list[list[str]]):
         yield "⏳ Agent is initializing, please wait..."
         # Wait up to 120 seconds for agent to be ready
         for i in range(120):
-            await asyncio.sleep(1)
             if agent_ready:
                 break
             if agent_error:
@@ -115,8 +95,7 @@ async def respond(message: str, history: list[list[str]]):
     logger.info(f"User query: '{message}'")
     full_response = ""
     try:
-        # Use the global event loop to run async generator
-        async for chunk in agent_instance.ask(message):
             full_response += chunk
             yield full_response
     except Exception as e:

 This module creates a Gradio ChatInterface that starts FAST,
 then initializes the agent in the background.
 """
 import gradio as gr
 from loguru import logger
 import sys
 import atexit
 import threading
 from agent.a11y_agent import create_agent, A11yExpertAgent
 from config import get_settings
 agent_instance: A11yExpertAgent = None
 agent_ready = False
 agent_error = None
 # --- Agent Initialization ---
 def initialize_agent_background():
     """Initialize the agent in background thread."""
+    global agent_instance, agent_ready, agent_error
     try:
         logger.info("🔄 Starting agent initialization in background...")
+        agent_instance = create_agent()
+        agent_ready = True
+        logger.success("✅ A11y Expert Agent is ready!")
     except Exception as e:
         logger.error(f"❌ Failed to initialize agent: {e}")
         agent_error = str(e)
 def cleanup_resources():
     """Clean up resources on app shutdown."""
+    global agent_instance
     logger.info("Cleaning up resources...")
     try:
         # Close agent and all its resources
         if hasattr(get_embeddings_client, '_instance'):
             get_embeddings_client._instance.close()
         logger.success("✅ Resources cleaned up successfully")
     except Exception as e:
         logger.warning(f"Error during cleanup: {e}")
 # --- Gradio Chat Logic ---
+def respond(message: str, history: list[list[str]]):
     """
     Main function for the Gradio ChatInterface.
     Receives a user message and chat history, then uses the agent
         yield "⏳ Agent is initializing, please wait..."
         # Wait up to 120 seconds for agent to be ready
+        import time
         for i in range(120):
+            time.sleep(1)
             if agent_ready:
                 break
             if agent_error:
     logger.info(f"User query: '{message}'")
     full_response = ""
     try:
+        for chunk in agent_instance.ask(message):
             full_response += chunk
             yield full_response
     except Exception as e:

models/embeddings.py CHANGED Viewed

@@ -12,7 +12,6 @@ from functools import wraps
 from openai import OpenAI, RateLimitError
 from config import get_settings
 from loguru import logger
-import asyncio
 try:
     from diskcache import Cache
@@ -22,41 +21,6 @@ except ImportError:
     logger.warning("diskcache not available - embeddings caching disabled")
-def retry_on_rate_limit(max_retries: int = 5):
-    """
-    Decorator for retrying async functions on rate limit with exponential backoff.
-    Args:
-        max_retries: Maximum number of retry attempts
-    Returns:
-        Decorated function with retry logic
-    Examples:
-        >>> @retry_on_rate_limit(max_retries=3)
-        ... async def my_api_call():
-        ...     return await some_api()
-    """
-    def decorator(func):
-        @wraps(func)
-        async def wrapper(*args, **kwargs):
-            for attempt in range(max_retries):
-                try:
-                    return await func(*args, **kwargs)
-                except RateLimitError as e:
-                    if attempt == max_retries - 1:
-                        raise
-                    wait_time = (2 ** attempt) * 2  # Exponential: 2s, 4s, 8s, 16s, 32s
-                    logger.warning(
-                        f"Rate limited. Retrying in {wait_time}s "
-                        f"(attempt {attempt + 1}/{max_retries})"
-                    )
-                    await asyncio.sleep(wait_time)
-            raise RuntimeError(f"Failed after {max_retries} retries")
-        return wrapper
-    return decorator
 class EmbeddingsClient:
     """
     Client for generating embeddings with caching and retry logic.
@@ -75,12 +39,12 @@ class EmbeddingsClient:
     Examples:
         >>> client = EmbeddingsClient()
-        >>> embedding = await client.get_embedding("Hello world")
         >>> len(embedding)
         3072
         >>> # Second call uses cache
-        >>> embedding2 = await client.get_embedding("Hello world")
         >>> embedding == embedding2
         True
     """
@@ -133,27 +97,36 @@ class EmbeddingsClient:
         """
         return hashlib.md5(f"{self.model}:{text}".encode()).hexdigest()
-    @retry_on_rate_limit(max_retries=5)
-    async def _get_embedding_uncached(self, text: str) -> List[float]:
         """
         Generate embedding without cache (internal method).
         Args:
             text: Input text (already truncated)
         Returns:
             Embedding vector
         """
-        response = await asyncio.to_thread(
-            self.client.embeddings.create,
-            model=self.model,
-            input=text
-        )
-        embedding = response.data[0].embedding
-        logger.debug(f"Generated embedding (dim={len(embedding)})")
-        return embedding
-    async def get_embedding(self, text: str) -> List[float]:
         """
         Generate embedding for text with caching.
@@ -167,7 +140,7 @@ class EmbeddingsClient:
             List of float values representing the embedding
         Examples:
-            >>> embedding = await client.get_embedding("Hello world")
             >>> len(embedding)
             3072
         """
@@ -182,7 +155,7 @@ class EmbeddingsClient:
                 return self.cache[cache_key]
         # Generate embedding
-        embedding = await self._get_embedding_uncached(text)
         # Store in cache
         if self.cache is not None:
@@ -191,30 +164,40 @@ class EmbeddingsClient:
         return embedding
-    @retry_on_rate_limit(max_retries=3)
-    async def _get_embeddings_batch_uncached(
         self,
-        texts: List[str]
     ) -> List[List[float]]:
         """
         Generate embeddings for batch without cache (internal method).
         Args:
             texts: List of texts (already truncated)
         Returns:
             List of embedding vectors
         """
-        response = await asyncio.to_thread(
-            self.client.embeddings.create,
-            model=self.model,
-            input=texts
-        )
-        # Sort by index to maintain order
-        batch_embeddings = sorted(response.data, key=lambda x: x.index)
-        return [e.embedding for e in batch_embeddings]
-    async def get_embeddings_batch(
         self,
         texts: List[str],
         batch_size: int = 100
@@ -234,10 +217,11 @@ class EmbeddingsClient:
         Examples:
             >>> texts = ["Hello", "World", "!"]
-            >>> embeddings = await client.get_embeddings_batch(texts)
             >>> len(embeddings)
             3
         """
         all_embeddings = []
         for i in range(0, len(texts), batch_size):
@@ -280,7 +264,7 @@ class EmbeddingsClient:
             # Generate embeddings for cache misses
             if texts_to_generate:
                 try:
-                    generated = await self._get_embeddings_batch_uncached(
                         texts_to_generate
                     )
@@ -306,7 +290,7 @@ class EmbeddingsClient:
             # Small delay between batches to avoid rate limiting
             if num_batches > 1 and current_batch_num < num_batches:
-                await asyncio.sleep(0.5)
         logger.success(f"   🧠 Generated {len(all_embeddings)} embeddings total.")
         return all_embeddings
@@ -321,7 +305,7 @@ def get_embeddings_client() -> EmbeddingsClient:
     Examples:
         >>> client = get_embeddings_client()
-        >>> embedding = await client.get_embedding("test")
     """
     if not hasattr(get_embeddings_client, '_instance'):
         get_embeddings_client._instance = EmbeddingsClient()

 from openai import OpenAI, RateLimitError
 from config import get_settings
 from loguru import logger
 try:
     from diskcache import Cache
     logger.warning("diskcache not available - embeddings caching disabled")
 class EmbeddingsClient:
     """
     Client for generating embeddings with caching and retry logic.
     Examples:
         >>> client = EmbeddingsClient()
+        >>> embedding = client.get_embedding("Hello world")
         >>> len(embedding)
         3072
         >>> # Second call uses cache
+        >>> embedding2 = client.get_embedding("Hello world")
         >>> embedding == embedding2
         True
     """
         """
         return hashlib.md5(f"{self.model}:{text}".encode()).hexdigest()
+    def _get_embedding_uncached(self, text: str, retry_count: int = 5) -> List[float]:
         """
         Generate embedding without cache (internal method).
         Args:
             text: Input text (already truncated)
+            retry_count: Number of retries on rate limit
         Returns:
             Embedding vector
         """
+        for attempt in range(retry_count):
+            try:
+                response = self.client.embeddings.create(
+                    model=self.model,
+                    input=text
+                )
+                embedding = response.data[0].embedding
+                logger.debug(f"Generated embedding (dim={len(embedding)})")
+                return embedding
+            except RateLimitError as e:
+                if attempt == retry_count - 1:
+                    raise
+                wait_time = (2 ** attempt) * 2
+                logger.warning(f"Rate limited. Retrying in {wait_time}s (attempt {attempt + 1}/{retry_count})")
+                import time
+                time.sleep(wait_time)
+        raise RuntimeError(f"Failed after {retry_count} retries")
+    def get_embedding(self, text: str) -> List[float]:
         """
         Generate embedding for text with caching.
             List of float values representing the embedding
         Examples:
+            >>> embedding = client.get_embedding("Hello world")
             >>> len(embedding)
             3072
         """
                 return self.cache[cache_key]
         # Generate embedding
+        embedding = self._get_embedding_uncached(text)
         # Store in cache
         if self.cache is not None:
         return embedding
+    def _get_embeddings_batch_uncached(
         self,
+        texts: List[str],
+        retry_count: int = 3
     ) -> List[List[float]]:
         """
         Generate embeddings for batch without cache (internal method).
         Args:
             texts: List of texts (already truncated)
+            retry_count: Number of retries on rate limit
         Returns:
             List of embedding vectors
         """
+        for attempt in range(retry_count):
+            try:
+                response = self.client.embeddings.create(
+                    model=self.model,
+                    input=texts
+                )
+                # Sort by index to maintain order
+                batch_embeddings = sorted(response.data, key=lambda x: x.index)
+                return [e.embedding for e in batch_embeddings]
+            except RateLimitError as e:
+                if attempt == retry_count - 1:
+                    raise
+                wait_time = (2 ** attempt) * 2
+                logger.warning(f"Rate limited. Retrying in {wait_time}s (attempt {attempt + 1}/{retry_count})")
+                import time
+                time.sleep(wait_time)
+        raise RuntimeError(f"Failed after {retry_count} retries")
+    def get_embeddings_batch(
         self,
         texts: List[str],
         batch_size: int = 100
         Examples:
             >>> texts = ["Hello", "World", "!"]
+            >>> embeddings = client.get_embeddings_batch(texts)
             >>> len(embeddings)
             3
         """
+        import time
         all_embeddings = []
         for i in range(0, len(texts), batch_size):
             # Generate embeddings for cache misses
             if texts_to_generate:
                 try:
+                    generated = self._get_embeddings_batch_uncached(
                         texts_to_generate
                     )
             # Small delay between batches to avoid rate limiting
             if num_batches > 1 and current_batch_num < num_batches:
+                time.sleep(0.5)
         logger.success(f"   🧠 Generated {len(all_embeddings)} embeddings total.")
         return all_embeddings
     Examples:
         >>> client = get_embeddings_client()
+        >>> embedding = client.get_embedding("test")
     """
     if not hasattr(get_embeddings_client, '_instance'):
         get_embeddings_client._instance = EmbeddingsClient()