Spaces:

brickfrog
/

ankigen

Build error

App Files Files Community

brickfrog commited on Sep 25, 2025

Commit

d6bb543

verified ·

1 Parent(s): 509b428

Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

ankigen_core/agents/integration.py +23 -1
ankigen_core/agents/schemas.py +47 -0
ankigen_core/auto_config.py +171 -0
ankigen_core/context7.py +104 -22
app.py +86 -2
test_context7_debug.py +30 -0
test_pandas_resolution.py +14 -0

ankigen_core/agents/integration.py CHANGED Viewed

@@ -72,8 +72,30 @@ class AgentOrchestrator:
                 logger.info(f"Fetching library documentation for: {library_name}")
                 try:
                     context7_client = Context7Client()
                     library_docs = await context7_client.fetch_library_documentation(
-                        library_name, topic=library_topic, tokens=5000
                     )
                     if library_docs:

                 logger.info(f"Fetching library documentation for: {library_name}")
                 try:
                     context7_client = Context7Client()
+                    # Dynamic token allocation based on card generation needs
+                    # More cards need more comprehensive documentation
+                    base_tokens = 8000  # Increased base from 5000
+                    if num_cards > 40:
+                        token_limit = 12000  # Large card sets need more context
+                    elif num_cards > 20:
+                        token_limit = 10000  # Medium sets
+                    else:
+                        token_limit = base_tokens  # Small sets
+                    # If topic is specified, we can be more focused and use fewer tokens
+                    if library_topic:
+                        token_limit = int(
+                            token_limit * 0.8
+                        )  # Can be more efficient with focused retrieval
+                    logger.info(
+                        f"Fetching {token_limit} tokens of documentation"
+                        + (f" for topic: {library_topic}" if library_topic else "")
+                    )
                     library_docs = await context7_client.fetch_library_documentation(
+                        library_name, topic=library_topic, tokens=token_limit
                     )
                     if library_docs:

ankigen_core/agents/schemas.py CHANGED Viewed

@@ -134,3 +134,50 @@ class TokenUsageSchema(BaseModel):
     total_tokens: int = Field(..., ge=0, description="Total tokens used")
     estimated_cost: float = Field(..., ge=0.0, description="Estimated cost in USD")
     model: str = Field(..., description="Model used for the request")

     total_tokens: int = Field(..., ge=0, description="Total tokens used")
     estimated_cost: float = Field(..., ge=0.0, description="Estimated cost in USD")
     model: str = Field(..., description="Model used for the request")
+class AutoConfigSchema(BaseModel):
+    """Schema for auto-configuration based on subject analysis"""
+    # What to search for in Context7
+    library_search_term: str = Field(
+        ...,
+        description="Library name to search for in Context7 (e.g., 'pandas', 'react', 'tensorflow')",
+    )
+    # Specific topic within the library (optional)
+    documentation_focus: Optional[str] = Field(
+        None,
+        description="Specific topic/area within the library documentation to focus on",
+    )
+    # Suggested settings based on subject analysis
+    topic_number: int = Field(
+        ..., ge=2, le=20, description="Number of topics to generate (2-20)"
+    )
+    cards_per_topic: int = Field(
+        ..., ge=2, le=30, description="Number of cards per topic (2-30)"
+    )
+    learning_preferences: str = Field(
+        ..., description="Learning preferences and focus areas for card generation"
+    )
+    generate_cloze: bool = Field(
+        ...,
+        description="Whether to generate cloze cards (true for syntax/code, false for concepts)",
+    )
+    model_choice: str = Field(
+        ...,
+        description="Recommended model: 'gpt-4.1' for complex topics, 'gpt-4.1-nano' for simpler topics",
+    )
+    # Analysis metadata
+    subject_type: str = Field(
+        ...,
+        description="Type of subject: 'concepts', 'syntax', 'api', 'theory', 'practical'",
+    )
+    scope: str = Field(
+        ..., description="Scope of the subject: 'narrow', 'medium', 'broad'"
+    )
+    rationale: str = Field(
+        ..., description="Brief explanation of why these settings were chosen"
+    )

ankigen_core/auto_config.py ADDED Viewed

	@@ -0,0 +1,171 @@

+"""Auto-configuration service for intelligent settings population"""
+from typing import Dict, Any
+from openai import AsyncOpenAI
+from ankigen_core.logging import logger
+from ankigen_core.context7 import Context7Client
+from ankigen_core.agents.schemas import AutoConfigSchema
+class AutoConfigService:
+    """Service for analyzing subjects and auto-configuring flashcard generation settings"""
+    def __init__(self):
+        self.context7_client = Context7Client()
+    async def analyze_subject(
+        self, subject: str, openai_client: AsyncOpenAI
+    ) -> AutoConfigSchema:
+        """Analyze a subject string and return optimal configuration settings"""
+        system_prompt = """You are an expert educational content analyzer specializing in spaced repetition learning. Analyze the given subject and determine optimal flashcard generation settings that focus on ESSENTIAL, HIGH-VALUE concepts.
+CRITICAL PRINCIPLE: Quality over quantity. Focus on fundamental concepts that unlock understanding, not trivial facts.
+Consider:
+1. Extract any library/framework names for Context7 search (e.g., "pandas", "react", "tensorflow")
+2. IMPORTANT: Extract the specific documentation focus from the subject
+   - "Basic Pandas Dataframe" → documentation_focus: "dataframe basics, creation, indexing"
+   - "React hooks tutorial" → documentation_focus: "hooks, useState, useEffect"
+   - "Docker networking" → documentation_focus: "networking, network drivers, container communication"
+3. Identify the scope: narrow (specific feature), medium (several related topics), broad (comprehensive overview)
+4. Determine content type: concepts (theory/understanding), syntax (code/commands), api (library usage), practical (hands-on skills)
+5. Suggest optimal number of topics and cards - aim for comprehensive learning (30-60 total cards minimum)
+6. Recommend cloze cards for syntax/code, basic cards for concepts
+7. Choose model based on complexity: gpt-4.1 for complex/advanced, gpt-4.1-nano for basic/simple
+IMPORTANT - Focus on HIGH-VALUE topics:
+- GOOD topics: Core concepts, fundamental principles, mental models, design patterns, key abstractions
+- AVOID topics: Trivial commands (like "docker ps"), basic syntax that's easily googled, minor API details
+- Example: For Docker, focus on "container lifecycle", "image layers", "networking models" NOT "list of docker commands"
+Guidelines for settings (MINIMUM 30 cards total):
+- Narrow/specific scope: 4-5 essential topics with 8-10 cards each (32-50 cards)
+- Medium scope: 5-7 core topics with 7-9 cards each (35-63 cards)
+- Broad scope: 6-8 fundamental topics with 6-8 cards each (36-64 cards)
+- "Basic"/"Introduction" keywords: Start with fundamentals, 40-50 cards total
+- "Advanced"/"Complex" keywords: Deep dive into critical concepts, 45-60 cards
+Learning preference suggestions:
+- For basics: "Focus on fundamental concepts and mental models that form the foundation"
+- For practical: "Emphasize core patterns and principles with real-world applications"
+- For theory: "Build deep conceptual understanding with progressive complexity"
+Documentation focus examples (be specific and comprehensive):
+- "Basic Pandas Dataframe" → "dataframe creation, indexing, selection, basic operations, data types"
+- "React hooks" → "useState, useEffect, custom hooks, hook rules, common patterns"
+- "Docker basics" → "containers, images, Dockerfile, volumes, basic networking"
+- "Advanced TypeScript" → "generics, conditional types, mapped types, utility types, type inference"
+Return a JSON object matching the AutoConfigSchema."""
+        user_prompt = f"""Analyze this subject for flashcard generation: "{subject}"
+Extract:
+1. The library name if mentioned
+2. The specific documentation focus (what aspects of the library to focus on)
+3. Optimal settings for effective learning
+Provide a brief rationale for your choices."""
+        try:
+            response = await openai_client.beta.chat.completions.parse(
+                model="gpt-4.1-nano",  # Use nano for this analysis task
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_prompt},
+                ],
+                response_format=AutoConfigSchema,
+                temperature=0.3,  # Lower temperature for more consistent analysis
+            )
+            if not response.choices or not response.choices[0].message.parsed:
+                raise ValueError("Failed to get valid response from OpenAI")
+            config = response.choices[0].message.parsed
+            logger.info(
+                f"Subject analysis complete: library='{config.library_search_term}', "
+                f"topics={config.topic_number}, cards/topic={config.cards_per_topic}"
+            )
+            return config
+        except Exception as e:
+            logger.error(f"Failed to analyze subject: {e}")
+            # Return sensible defaults on error (still aim for good card count)
+            return AutoConfigSchema(
+                library_search_term="",
+                documentation_focus=None,
+                topic_number=6,
+                cards_per_topic=8,
+                learning_preferences="Focus on fundamental concepts and core principles with practical examples",
+                generate_cloze=False,
+                model_choice="gpt-4.1-nano",
+                subject_type="concepts",
+                scope="medium",
+                rationale="Using default settings due to analysis error",
+            )
+    async def auto_configure(
+        self, subject: str, openai_client: AsyncOpenAI
+    ) -> Dict[str, Any]:
+        """
+        Complete auto-configuration pipeline:
+        1. Analyze subject with AI
+        2. Search Context7 for library if detected
+        3. Return complete configuration for UI
+        """
+        if not subject or not subject.strip():
+            logger.warning("Empty subject provided to auto_configure")
+            return {}
+        logger.info(f"Starting auto-configuration for subject: '{subject}'")
+        # Step 1: Analyze the subject
+        config = await self.analyze_subject(subject, openai_client)
+        # Step 2: Search Context7 for library if one was detected
+        library_id = None
+        if config.library_search_term:
+            logger.info(
+                f"Searching Context7 for library: '{config.library_search_term}'"
+            )
+            try:
+                library_id = await self.context7_client.resolve_library_id(
+                    config.library_search_term
+                )
+                if library_id:
+                    logger.info(f"Resolved library to Context7 ID: {library_id}")
+                else:
+                    logger.warning(
+                        f"Could not find library '{config.library_search_term}' in Context7"
+                    )
+            except Exception as e:
+                logger.error(f"Context7 search failed: {e}")
+        # Step 3: Build complete configuration dict for UI
+        ui_config = {
+            "library_name": config.library_search_term if library_id else "",
+            "library_topic": config.documentation_focus or "",
+            "topic_number": config.topic_number,
+            "cards_per_topic": config.cards_per_topic,
+            "preference_prompt": config.learning_preferences,
+            "generate_cloze_checkbox": config.generate_cloze,
+            "model_choice": config.model_choice,
+            # Metadata for display
+            "analysis_metadata": {
+                "subject_type": config.subject_type,
+                "scope": config.scope,
+                "rationale": config.rationale,
+                "library_found": library_id is not None,
+                "context7_id": library_id,
+            },
+        }
+        logger.info(
+            f"Auto-configuration complete: library={'found' if library_id else 'not found'}, "
+            f"topics={config.topic_number}, model={config.model_choice}"
+        )
+        return ui_config

ankigen_core/context7.py CHANGED Viewed

@@ -88,32 +88,114 @@ class Context7Client:
         )
         if result and result.get("success") and result.get("text"):
-            # Parse the text to extract library ID
             text = result["text"]
-            import re
-            # First, look for specific Context7-compatible library ID mentions
             lines = text.split("\n")
             for line in lines:
-                if "Context7-compatible library ID:" in line:
-                    # Extract the ID after the colon
-                    parts = line.split("Context7-compatible library ID:")
-                    if len(parts) > 1:
-                        library_id = parts[1].strip()
-                        if library_id.startswith("/"):
-                            logger.info(
-                                f"Resolved '{library_name}' to ID: {library_id}"
-                            )
-                            return library_id
-            # Fallback: Look for library ID pattern but be more specific
-            # Must have actual library names, not generic /org/project
-            matches = re.findall(r"/[\w-]+/[\w.-]+(?:/[\w.-]+)?", text)
-            for match in matches:
-                # Filter out generic placeholders
-                if match != "/org/project" and "example" not in match.lower():
-                    logger.info(f"Resolved '{library_name}' to ID: {match}")
-                    return match
         logger.warning(f"Could not resolve library ID for '{library_name}'")
         return None

         )
         if result and result.get("success") and result.get("text"):
             text = result["text"]
+            # Parse the structured response format
+            libraries = []
             lines = text.split("\n")
+            current_lib = {}
             for line in lines:
+                line = line.strip()
+                # Parse title
+                if line.startswith("- Title:"):
+                    if current_lib and current_lib.get("id"):
+                        libraries.append(current_lib)
+                    current_lib = {
+                        "title": line.replace("- Title:", "").strip().lower()
+                    }
+                # Parse library ID
+                elif line.startswith("- Context7-compatible library ID:"):
+                    lib_id = line.replace(
+                        "- Context7-compatible library ID:", ""
+                    ).strip()
+                    if current_lib is not None:
+                        current_lib["id"] = lib_id
+                # Parse code snippets count
+                elif line.startswith("- Code Snippets:"):
+                    snippets_str = line.replace("- Code Snippets:", "").strip()
+                    try:
+                        snippets = int(snippets_str)
+                        if current_lib is not None:
+                            current_lib["snippets"] = snippets
+                    except ValueError:
+                        pass
+                # Parse trust score
+                elif line.startswith("- Trust Score:"):
+                    score_str = line.replace("- Trust Score:", "").strip()
+                    try:
+                        trust = float(score_str)
+                        if current_lib is not None:
+                            current_lib["trust"] = trust
+                    except ValueError:
+                        pass
+            # Add the last library if exists
+            if current_lib and current_lib.get("id"):
+                libraries.append(current_lib)
+            # If we found libraries, pick the best match
+            if libraries:
+                search_term = library_name.lower()
+                # Score each library
+                best_lib = None
+                best_score = -1
+                for lib in libraries:
+                    score = 0
+                    lib_title = lib.get("title", "")
+                    lib_id = lib["id"].lower()
+                    # Exact title match gets highest priority
+                    if lib_title == search_term:
+                        score += 10000
+                    # Check if it's exactly "pandas" in the path (not geopandas, etc)
+                    elif lib_id == f"/{search_term}-dev/{search_term}":
+                        score += 5000
+                    elif f"/{search_term}/" in lib_id or lib_id.endswith(
+                        f"/{search_term}"
+                    ):
+                        score += 2000
+                    # Partial title match (but penalize if it's a compound like "geopandas")
+                    elif search_term in lib_title:
+                        if lib_title == search_term:
+                            score += 1000
+                        elif lib_title.startswith(search_term):
+                            score += 200
+                        else:
+                            score += 50
+                    # Strong bonus for code snippets (indicates main library)
+                    snippets = lib.get("snippets", 0)
+                    score += snippets / 10  # Pandas has 7386 snippets
+                    # Significant bonus for trust score (high trust = official/authoritative)
+                    trust = lib.get("trust", 0)
+                    score += trust * 100  # Trust 9.2 = 920 points, Trust 7 = 700 points
+                    # Debug logging
+                    if search_term in lib_title or search_term in lib_id:
+                        logger.debug(
+                            f"Scoring {lib['id']}: title='{lib_title}', snippets={snippets}, "
+                            f"trust={trust}, score={score:.2f}"
+                        )
+                    if score > best_score:
+                        best_score = score
+                        best_lib = lib
+                if best_lib:
+                    logger.info(
+                        f"Resolved '{library_name}' to ID: {best_lib['id']} "
+                        f"(title: {best_lib.get('title', 'unknown')}, snippets: {best_lib.get('snippets', 0)}, "
+                        f"trust: {best_lib.get('trust', 0)}, score: {best_score:.2f})"
+                    )
+                    return best_lib["id"]
         logger.warning(f"Could not resolve library ID for '{library_name}'")
         return None

app.py CHANGED Viewed

@@ -29,6 +29,7 @@ from ankigen_core.utils import (
     ResponseCache,
     get_logger,
 )  # fetch_webpage_text is used by card_generator
 # --- Initialization ---
 logger = get_logger()
@@ -203,6 +204,10 @@ def create_ankigen_interface():
                                 label="Subject",
                                 placeholder="e.g., 'Basic SQL Concepts'",
                             )
                         with gr.Group(visible=False) as path_mode:
                             description = gr.Textbox(
                                 label="Learning Goal",
@@ -258,9 +263,10 @@ def create_ankigen_interface():
                         )
                         # Context7 Library Documentation
-                        with gr.Accordion(
                             "Library Documentation (optional)", open=False
-                        ):
                             library_name_input = gr.Textbox(
                                 label="Library Name",
                                 placeholder="e.g., 'react', 'tensorflow', 'pandas'",
@@ -681,6 +687,84 @@ def create_ankigen_interface():
                 api_name="export_main_to_apkg",
             )
             async def handle_web_crawl_click(
                 api_key_val: str,
                 url: str,

     ResponseCache,
     get_logger,
 )  # fetch_webpage_text is used by card_generator
+from ankigen_core.auto_config import AutoConfigService
 # --- Initialization ---
 logger = get_logger()
                                 label="Subject",
                                 placeholder="e.g., 'Basic SQL Concepts'",
                             )
+                            auto_fill_btn = gr.Button(
+                                "Auto-fill",
+                                variant="secondary",
+                            )
                         with gr.Group(visible=False) as path_mode:
                             description = gr.Textbox(
                                 label="Learning Goal",
                         )
                         # Context7 Library Documentation
+                        library_accordion = gr.Accordion(
                             "Library Documentation (optional)", open=False
+                        )
+                        with library_accordion:
                             library_name_input = gr.Textbox(
                                 label="Library Name",
                                 placeholder="e.g., 'react', 'tensorflow', 'pandas'",
                 api_name="export_main_to_apkg",
             )
+            # Auto-fill handler
+            async def handle_auto_fill_click(
+                subject_text: str,
+                api_key: str,
+                progress=gr.Progress(track_tqdm=True),
+            ):
+                """Handle auto-fill button click to populate all settings"""
+                if not subject_text or not subject_text.strip():
+                    gr.Warning("Please enter a subject first")
+                    return [gr.update()] * 8  # Return no updates for all outputs
+                if not api_key:
+                    gr.Warning("OpenAI API key is required for auto-configuration")
+                    return [gr.update()] * 8
+                try:
+                    progress(0, desc="Analyzing subject...")
+                    # Initialize OpenAI client
+                    await client_manager.initialize_client(api_key)
+                    openai_client = client_manager.get_client()
+                    # Get auto-configuration
+                    auto_config_service = AutoConfigService()
+                    config = await auto_config_service.auto_configure(
+                        subject_text, openai_client
+                    )
+                    if not config:
+                        gr.Warning("Could not generate configuration")
+                        return [gr.update()] * 8
+                    # Return updates for all relevant UI components
+                    return (
+                        gr.update(
+                            value=config.get("library_name", "")
+                        ),  # library_name_input
+                        gr.update(
+                            value=config.get("library_topic", "")
+                        ),  # library_topic_input
+                        gr.update(value=config.get("topic_number", 3)),  # topic_number
+                        gr.update(
+                            value=config.get("cards_per_topic", 5)
+                        ),  # cards_per_topic
+                        gr.update(
+                            value=config.get("preference_prompt", "")
+                        ),  # preference_prompt
+                        gr.update(
+                            value=config.get("generate_cloze_checkbox", False)
+                        ),  # generate_cloze_checkbox
+                        gr.update(
+                            value=config.get("model_choice", "gpt-4.1-nano")
+                        ),  # model_choice
+                        gr.update(
+                            open=True
+                        ),  # Open the Library Documentation accordion
+                    )
+                except Exception as e:
+                    logger.error(f"Auto-configuration failed: {e}", exc_info=True)
+                    gr.Error(f"Auto-configuration failed: {str(e)}")
+                    return [gr.update()] * 8
+            auto_fill_btn.click(
+                fn=handle_auto_fill_click,
+                inputs=[subject, api_key_input],
+                outputs=[
+                    library_name_input,
+                    library_topic_input,
+                    topic_number,
+                    cards_per_topic,
+                    preference_prompt,
+                    generate_cloze_checkbox,
+                    model_choice,
+                    library_accordion,  # Reference to the accordion component
+                ],
+            )
             async def handle_web_crawl_click(
                 api_key_val: str,
                 url: str,

test_context7_debug.py ADDED Viewed

	@@ -0,0 +1,30 @@

+"""Debug Context7 response to understand format"""
+import asyncio
+from ankigen_core.context7 import Context7Client
+async def test_debug():
+    client = Context7Client()
+    # Get raw response
+    result = await client.call_context7_tool(
+        "resolve-library-id", {"libraryName": "pandas"}
+    )
+    if result and result.get("success") and result.get("text"):
+        print("=== RAW RESPONSE ===")
+        print(result["text"])
+        print("=== END RESPONSE ===")
+        # Also show line by line with indices
+        lines = result["text"].split("\n")
+        print("\n=== LINES WITH INDICES ===")
+        for i, line in enumerate(lines):
+            print(f"{i:3}: '{line}'")
+    else:
+        print("Failed to get response:", result)
+if __name__ == "__main__":
+    asyncio.run(test_debug())

test_pandas_resolution.py ADDED Viewed

	@@ -0,0 +1,14 @@

+"""Test pandas library resolution specifically"""
+import asyncio
+from ankigen_core.context7 import Context7Client
+async def test_pandas():
+    client = Context7Client()
+    library_id = await client.resolve_library_id("pandas")
+    print(f"Result: {library_id}")
+if __name__ == "__main__":
+    asyncio.run(test_pandas())