Upload folder using huggingface_hub
Browse files- ankigen_core/agents/integration.py +23 -1
- ankigen_core/agents/schemas.py +47 -0
- ankigen_core/auto_config.py +171 -0
- ankigen_core/context7.py +104 -22
- app.py +86 -2
- test_context7_debug.py +30 -0
- test_pandas_resolution.py +14 -0
ankigen_core/agents/integration.py
CHANGED
|
@@ -72,8 +72,30 @@ class AgentOrchestrator:
|
|
| 72 |
logger.info(f"Fetching library documentation for: {library_name}")
|
| 73 |
try:
|
| 74 |
context7_client = Context7Client()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
library_docs = await context7_client.fetch_library_documentation(
|
| 76 |
-
library_name, topic=library_topic, tokens=
|
| 77 |
)
|
| 78 |
|
| 79 |
if library_docs:
|
|
|
|
| 72 |
logger.info(f"Fetching library documentation for: {library_name}")
|
| 73 |
try:
|
| 74 |
context7_client = Context7Client()
|
| 75 |
+
|
| 76 |
+
# Dynamic token allocation based on card generation needs
|
| 77 |
+
# More cards need more comprehensive documentation
|
| 78 |
+
base_tokens = 8000 # Increased base from 5000
|
| 79 |
+
if num_cards > 40:
|
| 80 |
+
token_limit = 12000 # Large card sets need more context
|
| 81 |
+
elif num_cards > 20:
|
| 82 |
+
token_limit = 10000 # Medium sets
|
| 83 |
+
else:
|
| 84 |
+
token_limit = base_tokens # Small sets
|
| 85 |
+
|
| 86 |
+
# If topic is specified, we can be more focused and use fewer tokens
|
| 87 |
+
if library_topic:
|
| 88 |
+
token_limit = int(
|
| 89 |
+
token_limit * 0.8
|
| 90 |
+
) # Can be more efficient with focused retrieval
|
| 91 |
+
|
| 92 |
+
logger.info(
|
| 93 |
+
f"Fetching {token_limit} tokens of documentation"
|
| 94 |
+
+ (f" for topic: {library_topic}" if library_topic else "")
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
library_docs = await context7_client.fetch_library_documentation(
|
| 98 |
+
library_name, topic=library_topic, tokens=token_limit
|
| 99 |
)
|
| 100 |
|
| 101 |
if library_docs:
|
ankigen_core/agents/schemas.py
CHANGED
|
@@ -134,3 +134,50 @@ class TokenUsageSchema(BaseModel):
|
|
| 134 |
total_tokens: int = Field(..., ge=0, description="Total tokens used")
|
| 135 |
estimated_cost: float = Field(..., ge=0.0, description="Estimated cost in USD")
|
| 136 |
model: str = Field(..., description="Model used for the request")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
total_tokens: int = Field(..., ge=0, description="Total tokens used")
|
| 135 |
estimated_cost: float = Field(..., ge=0.0, description="Estimated cost in USD")
|
| 136 |
model: str = Field(..., description="Model used for the request")
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
class AutoConfigSchema(BaseModel):
|
| 140 |
+
"""Schema for auto-configuration based on subject analysis"""
|
| 141 |
+
|
| 142 |
+
# What to search for in Context7
|
| 143 |
+
library_search_term: str = Field(
|
| 144 |
+
...,
|
| 145 |
+
description="Library name to search for in Context7 (e.g., 'pandas', 'react', 'tensorflow')",
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
# Specific topic within the library (optional)
|
| 149 |
+
documentation_focus: Optional[str] = Field(
|
| 150 |
+
None,
|
| 151 |
+
description="Specific topic/area within the library documentation to focus on",
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
# Suggested settings based on subject analysis
|
| 155 |
+
topic_number: int = Field(
|
| 156 |
+
..., ge=2, le=20, description="Number of topics to generate (2-20)"
|
| 157 |
+
)
|
| 158 |
+
cards_per_topic: int = Field(
|
| 159 |
+
..., ge=2, le=30, description="Number of cards per topic (2-30)"
|
| 160 |
+
)
|
| 161 |
+
learning_preferences: str = Field(
|
| 162 |
+
..., description="Learning preferences and focus areas for card generation"
|
| 163 |
+
)
|
| 164 |
+
generate_cloze: bool = Field(
|
| 165 |
+
...,
|
| 166 |
+
description="Whether to generate cloze cards (true for syntax/code, false for concepts)",
|
| 167 |
+
)
|
| 168 |
+
model_choice: str = Field(
|
| 169 |
+
...,
|
| 170 |
+
description="Recommended model: 'gpt-4.1' for complex topics, 'gpt-4.1-nano' for simpler topics",
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
# Analysis metadata
|
| 174 |
+
subject_type: str = Field(
|
| 175 |
+
...,
|
| 176 |
+
description="Type of subject: 'concepts', 'syntax', 'api', 'theory', 'practical'",
|
| 177 |
+
)
|
| 178 |
+
scope: str = Field(
|
| 179 |
+
..., description="Scope of the subject: 'narrow', 'medium', 'broad'"
|
| 180 |
+
)
|
| 181 |
+
rationale: str = Field(
|
| 182 |
+
..., description="Brief explanation of why these settings were chosen"
|
| 183 |
+
)
|
ankigen_core/auto_config.py
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Auto-configuration service for intelligent settings population"""
|
| 2 |
+
|
| 3 |
+
from typing import Dict, Any
|
| 4 |
+
from openai import AsyncOpenAI
|
| 5 |
+
|
| 6 |
+
from ankigen_core.logging import logger
|
| 7 |
+
from ankigen_core.context7 import Context7Client
|
| 8 |
+
from ankigen_core.agents.schemas import AutoConfigSchema
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class AutoConfigService:
|
| 12 |
+
"""Service for analyzing subjects and auto-configuring flashcard generation settings"""
|
| 13 |
+
|
| 14 |
+
def __init__(self):
|
| 15 |
+
self.context7_client = Context7Client()
|
| 16 |
+
|
| 17 |
+
async def analyze_subject(
|
| 18 |
+
self, subject: str, openai_client: AsyncOpenAI
|
| 19 |
+
) -> AutoConfigSchema:
|
| 20 |
+
"""Analyze a subject string and return optimal configuration settings"""
|
| 21 |
+
|
| 22 |
+
system_prompt = """You are an expert educational content analyzer specializing in spaced repetition learning. Analyze the given subject and determine optimal flashcard generation settings that focus on ESSENTIAL, HIGH-VALUE concepts.
|
| 23 |
+
|
| 24 |
+
CRITICAL PRINCIPLE: Quality over quantity. Focus on fundamental concepts that unlock understanding, not trivial facts.
|
| 25 |
+
|
| 26 |
+
Consider:
|
| 27 |
+
1. Extract any library/framework names for Context7 search (e.g., "pandas", "react", "tensorflow")
|
| 28 |
+
2. IMPORTANT: Extract the specific documentation focus from the subject
|
| 29 |
+
- "Basic Pandas Dataframe" → documentation_focus: "dataframe basics, creation, indexing"
|
| 30 |
+
- "React hooks tutorial" → documentation_focus: "hooks, useState, useEffect"
|
| 31 |
+
- "Docker networking" → documentation_focus: "networking, network drivers, container communication"
|
| 32 |
+
3. Identify the scope: narrow (specific feature), medium (several related topics), broad (comprehensive overview)
|
| 33 |
+
4. Determine content type: concepts (theory/understanding), syntax (code/commands), api (library usage), practical (hands-on skills)
|
| 34 |
+
5. Suggest optimal number of topics and cards - aim for comprehensive learning (30-60 total cards minimum)
|
| 35 |
+
6. Recommend cloze cards for syntax/code, basic cards for concepts
|
| 36 |
+
7. Choose model based on complexity: gpt-4.1 for complex/advanced, gpt-4.1-nano for basic/simple
|
| 37 |
+
|
| 38 |
+
IMPORTANT - Focus on HIGH-VALUE topics:
|
| 39 |
+
- GOOD topics: Core concepts, fundamental principles, mental models, design patterns, key abstractions
|
| 40 |
+
- AVOID topics: Trivial commands (like "docker ps"), basic syntax that's easily googled, minor API details
|
| 41 |
+
- Example: For Docker, focus on "container lifecycle", "image layers", "networking models" NOT "list of docker commands"
|
| 42 |
+
|
| 43 |
+
Guidelines for settings (MINIMUM 30 cards total):
|
| 44 |
+
- Narrow/specific scope: 4-5 essential topics with 8-10 cards each (32-50 cards)
|
| 45 |
+
- Medium scope: 5-7 core topics with 7-9 cards each (35-63 cards)
|
| 46 |
+
- Broad scope: 6-8 fundamental topics with 6-8 cards each (36-64 cards)
|
| 47 |
+
- "Basic"/"Introduction" keywords: Start with fundamentals, 40-50 cards total
|
| 48 |
+
- "Advanced"/"Complex" keywords: Deep dive into critical concepts, 45-60 cards
|
| 49 |
+
|
| 50 |
+
Learning preference suggestions:
|
| 51 |
+
- For basics: "Focus on fundamental concepts and mental models that form the foundation"
|
| 52 |
+
- For practical: "Emphasize core patterns and principles with real-world applications"
|
| 53 |
+
- For theory: "Build deep conceptual understanding with progressive complexity"
|
| 54 |
+
|
| 55 |
+
Documentation focus examples (be specific and comprehensive):
|
| 56 |
+
- "Basic Pandas Dataframe" → "dataframe creation, indexing, selection, basic operations, data types"
|
| 57 |
+
- "React hooks" → "useState, useEffect, custom hooks, hook rules, common patterns"
|
| 58 |
+
- "Docker basics" → "containers, images, Dockerfile, volumes, basic networking"
|
| 59 |
+
- "Advanced TypeScript" → "generics, conditional types, mapped types, utility types, type inference"
|
| 60 |
+
|
| 61 |
+
Return a JSON object matching the AutoConfigSchema."""
|
| 62 |
+
|
| 63 |
+
user_prompt = f"""Analyze this subject for flashcard generation: "{subject}"
|
| 64 |
+
|
| 65 |
+
Extract:
|
| 66 |
+
1. The library name if mentioned
|
| 67 |
+
2. The specific documentation focus (what aspects of the library to focus on)
|
| 68 |
+
3. Optimal settings for effective learning
|
| 69 |
+
|
| 70 |
+
Provide a brief rationale for your choices."""
|
| 71 |
+
|
| 72 |
+
try:
|
| 73 |
+
response = await openai_client.beta.chat.completions.parse(
|
| 74 |
+
model="gpt-4.1-nano", # Use nano for this analysis task
|
| 75 |
+
messages=[
|
| 76 |
+
{"role": "system", "content": system_prompt},
|
| 77 |
+
{"role": "user", "content": user_prompt},
|
| 78 |
+
],
|
| 79 |
+
response_format=AutoConfigSchema,
|
| 80 |
+
temperature=0.3, # Lower temperature for more consistent analysis
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
if not response.choices or not response.choices[0].message.parsed:
|
| 84 |
+
raise ValueError("Failed to get valid response from OpenAI")
|
| 85 |
+
|
| 86 |
+
config = response.choices[0].message.parsed
|
| 87 |
+
logger.info(
|
| 88 |
+
f"Subject analysis complete: library='{config.library_search_term}', "
|
| 89 |
+
f"topics={config.topic_number}, cards/topic={config.cards_per_topic}"
|
| 90 |
+
)
|
| 91 |
+
return config
|
| 92 |
+
|
| 93 |
+
except Exception as e:
|
| 94 |
+
logger.error(f"Failed to analyze subject: {e}")
|
| 95 |
+
# Return sensible defaults on error (still aim for good card count)
|
| 96 |
+
return AutoConfigSchema(
|
| 97 |
+
library_search_term="",
|
| 98 |
+
documentation_focus=None,
|
| 99 |
+
topic_number=6,
|
| 100 |
+
cards_per_topic=8,
|
| 101 |
+
learning_preferences="Focus on fundamental concepts and core principles with practical examples",
|
| 102 |
+
generate_cloze=False,
|
| 103 |
+
model_choice="gpt-4.1-nano",
|
| 104 |
+
subject_type="concepts",
|
| 105 |
+
scope="medium",
|
| 106 |
+
rationale="Using default settings due to analysis error",
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
async def auto_configure(
|
| 110 |
+
self, subject: str, openai_client: AsyncOpenAI
|
| 111 |
+
) -> Dict[str, Any]:
|
| 112 |
+
"""
|
| 113 |
+
Complete auto-configuration pipeline:
|
| 114 |
+
1. Analyze subject with AI
|
| 115 |
+
2. Search Context7 for library if detected
|
| 116 |
+
3. Return complete configuration for UI
|
| 117 |
+
"""
|
| 118 |
+
|
| 119 |
+
if not subject or not subject.strip():
|
| 120 |
+
logger.warning("Empty subject provided to auto_configure")
|
| 121 |
+
return {}
|
| 122 |
+
|
| 123 |
+
logger.info(f"Starting auto-configuration for subject: '{subject}'")
|
| 124 |
+
|
| 125 |
+
# Step 1: Analyze the subject
|
| 126 |
+
config = await self.analyze_subject(subject, openai_client)
|
| 127 |
+
|
| 128 |
+
# Step 2: Search Context7 for library if one was detected
|
| 129 |
+
library_id = None
|
| 130 |
+
if config.library_search_term:
|
| 131 |
+
logger.info(
|
| 132 |
+
f"Searching Context7 for library: '{config.library_search_term}'"
|
| 133 |
+
)
|
| 134 |
+
try:
|
| 135 |
+
library_id = await self.context7_client.resolve_library_id(
|
| 136 |
+
config.library_search_term
|
| 137 |
+
)
|
| 138 |
+
if library_id:
|
| 139 |
+
logger.info(f"Resolved library to Context7 ID: {library_id}")
|
| 140 |
+
else:
|
| 141 |
+
logger.warning(
|
| 142 |
+
f"Could not find library '{config.library_search_term}' in Context7"
|
| 143 |
+
)
|
| 144 |
+
except Exception as e:
|
| 145 |
+
logger.error(f"Context7 search failed: {e}")
|
| 146 |
+
|
| 147 |
+
# Step 3: Build complete configuration dict for UI
|
| 148 |
+
ui_config = {
|
| 149 |
+
"library_name": config.library_search_term if library_id else "",
|
| 150 |
+
"library_topic": config.documentation_focus or "",
|
| 151 |
+
"topic_number": config.topic_number,
|
| 152 |
+
"cards_per_topic": config.cards_per_topic,
|
| 153 |
+
"preference_prompt": config.learning_preferences,
|
| 154 |
+
"generate_cloze_checkbox": config.generate_cloze,
|
| 155 |
+
"model_choice": config.model_choice,
|
| 156 |
+
# Metadata for display
|
| 157 |
+
"analysis_metadata": {
|
| 158 |
+
"subject_type": config.subject_type,
|
| 159 |
+
"scope": config.scope,
|
| 160 |
+
"rationale": config.rationale,
|
| 161 |
+
"library_found": library_id is not None,
|
| 162 |
+
"context7_id": library_id,
|
| 163 |
+
},
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
logger.info(
|
| 167 |
+
f"Auto-configuration complete: library={'found' if library_id else 'not found'}, "
|
| 168 |
+
f"topics={config.topic_number}, model={config.model_choice}"
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
return ui_config
|
ankigen_core/context7.py
CHANGED
|
@@ -88,32 +88,114 @@ class Context7Client:
|
|
| 88 |
)
|
| 89 |
|
| 90 |
if result and result.get("success") and result.get("text"):
|
| 91 |
-
# Parse the text to extract library ID
|
| 92 |
text = result["text"]
|
| 93 |
-
import re
|
| 94 |
|
| 95 |
-
#
|
|
|
|
| 96 |
lines = text.split("\n")
|
|
|
|
|
|
|
| 97 |
for line in lines:
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
logger.warning(f"Could not resolve library ID for '{library_name}'")
|
| 119 |
return None
|
|
|
|
| 88 |
)
|
| 89 |
|
| 90 |
if result and result.get("success") and result.get("text"):
|
|
|
|
| 91 |
text = result["text"]
|
|
|
|
| 92 |
|
| 93 |
+
# Parse the structured response format
|
| 94 |
+
libraries = []
|
| 95 |
lines = text.split("\n")
|
| 96 |
+
|
| 97 |
+
current_lib = {}
|
| 98 |
for line in lines:
|
| 99 |
+
line = line.strip()
|
| 100 |
+
|
| 101 |
+
# Parse title
|
| 102 |
+
if line.startswith("- Title:"):
|
| 103 |
+
if current_lib and current_lib.get("id"):
|
| 104 |
+
libraries.append(current_lib)
|
| 105 |
+
current_lib = {
|
| 106 |
+
"title": line.replace("- Title:", "").strip().lower()
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
# Parse library ID
|
| 110 |
+
elif line.startswith("- Context7-compatible library ID:"):
|
| 111 |
+
lib_id = line.replace(
|
| 112 |
+
"- Context7-compatible library ID:", ""
|
| 113 |
+
).strip()
|
| 114 |
+
if current_lib is not None:
|
| 115 |
+
current_lib["id"] = lib_id
|
| 116 |
+
|
| 117 |
+
# Parse code snippets count
|
| 118 |
+
elif line.startswith("- Code Snippets:"):
|
| 119 |
+
snippets_str = line.replace("- Code Snippets:", "").strip()
|
| 120 |
+
try:
|
| 121 |
+
snippets = int(snippets_str)
|
| 122 |
+
if current_lib is not None:
|
| 123 |
+
current_lib["snippets"] = snippets
|
| 124 |
+
except ValueError:
|
| 125 |
+
pass
|
| 126 |
+
|
| 127 |
+
# Parse trust score
|
| 128 |
+
elif line.startswith("- Trust Score:"):
|
| 129 |
+
score_str = line.replace("- Trust Score:", "").strip()
|
| 130 |
+
try:
|
| 131 |
+
trust = float(score_str)
|
| 132 |
+
if current_lib is not None:
|
| 133 |
+
current_lib["trust"] = trust
|
| 134 |
+
except ValueError:
|
| 135 |
+
pass
|
| 136 |
+
|
| 137 |
+
# Add the last library if exists
|
| 138 |
+
if current_lib and current_lib.get("id"):
|
| 139 |
+
libraries.append(current_lib)
|
| 140 |
+
|
| 141 |
+
# If we found libraries, pick the best match
|
| 142 |
+
if libraries:
|
| 143 |
+
search_term = library_name.lower()
|
| 144 |
+
|
| 145 |
+
# Score each library
|
| 146 |
+
best_lib = None
|
| 147 |
+
best_score = -1
|
| 148 |
+
|
| 149 |
+
for lib in libraries:
|
| 150 |
+
score = 0
|
| 151 |
+
lib_title = lib.get("title", "")
|
| 152 |
+
lib_id = lib["id"].lower()
|
| 153 |
+
|
| 154 |
+
# Exact title match gets highest priority
|
| 155 |
+
if lib_title == search_term:
|
| 156 |
+
score += 10000
|
| 157 |
+
# Check if it's exactly "pandas" in the path (not geopandas, etc)
|
| 158 |
+
elif lib_id == f"/{search_term}-dev/{search_term}":
|
| 159 |
+
score += 5000
|
| 160 |
+
elif f"/{search_term}/" in lib_id or lib_id.endswith(
|
| 161 |
+
f"/{search_term}"
|
| 162 |
+
):
|
| 163 |
+
score += 2000
|
| 164 |
+
# Partial title match (but penalize if it's a compound like "geopandas")
|
| 165 |
+
elif search_term in lib_title:
|
| 166 |
+
if lib_title == search_term:
|
| 167 |
+
score += 1000
|
| 168 |
+
elif lib_title.startswith(search_term):
|
| 169 |
+
score += 200
|
| 170 |
+
else:
|
| 171 |
+
score += 50
|
| 172 |
+
|
| 173 |
+
# Strong bonus for code snippets (indicates main library)
|
| 174 |
+
snippets = lib.get("snippets", 0)
|
| 175 |
+
score += snippets / 10 # Pandas has 7386 snippets
|
| 176 |
+
|
| 177 |
+
# Significant bonus for trust score (high trust = official/authoritative)
|
| 178 |
+
trust = lib.get("trust", 0)
|
| 179 |
+
score += trust * 100 # Trust 9.2 = 920 points, Trust 7 = 700 points
|
| 180 |
+
|
| 181 |
+
# Debug logging
|
| 182 |
+
if search_term in lib_title or search_term in lib_id:
|
| 183 |
+
logger.debug(
|
| 184 |
+
f"Scoring {lib['id']}: title='{lib_title}', snippets={snippets}, "
|
| 185 |
+
f"trust={trust}, score={score:.2f}"
|
| 186 |
+
)
|
| 187 |
+
|
| 188 |
+
if score > best_score:
|
| 189 |
+
best_score = score
|
| 190 |
+
best_lib = lib
|
| 191 |
+
|
| 192 |
+
if best_lib:
|
| 193 |
+
logger.info(
|
| 194 |
+
f"Resolved '{library_name}' to ID: {best_lib['id']} "
|
| 195 |
+
f"(title: {best_lib.get('title', 'unknown')}, snippets: {best_lib.get('snippets', 0)}, "
|
| 196 |
+
f"trust: {best_lib.get('trust', 0)}, score: {best_score:.2f})"
|
| 197 |
+
)
|
| 198 |
+
return best_lib["id"]
|
| 199 |
|
| 200 |
logger.warning(f"Could not resolve library ID for '{library_name}'")
|
| 201 |
return None
|
app.py
CHANGED
|
@@ -29,6 +29,7 @@ from ankigen_core.utils import (
|
|
| 29 |
ResponseCache,
|
| 30 |
get_logger,
|
| 31 |
) # fetch_webpage_text is used by card_generator
|
|
|
|
| 32 |
|
| 33 |
# --- Initialization ---
|
| 34 |
logger = get_logger()
|
|
@@ -203,6 +204,10 @@ def create_ankigen_interface():
|
|
| 203 |
label="Subject",
|
| 204 |
placeholder="e.g., 'Basic SQL Concepts'",
|
| 205 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
with gr.Group(visible=False) as path_mode:
|
| 207 |
description = gr.Textbox(
|
| 208 |
label="Learning Goal",
|
|
@@ -258,9 +263,10 @@ def create_ankigen_interface():
|
|
| 258 |
)
|
| 259 |
|
| 260 |
# Context7 Library Documentation
|
| 261 |
-
|
| 262 |
"Library Documentation (optional)", open=False
|
| 263 |
-
)
|
|
|
|
| 264 |
library_name_input = gr.Textbox(
|
| 265 |
label="Library Name",
|
| 266 |
placeholder="e.g., 'react', 'tensorflow', 'pandas'",
|
|
@@ -681,6 +687,84 @@ def create_ankigen_interface():
|
|
| 681 |
api_name="export_main_to_apkg",
|
| 682 |
)
|
| 683 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 684 |
async def handle_web_crawl_click(
|
| 685 |
api_key_val: str,
|
| 686 |
url: str,
|
|
|
|
| 29 |
ResponseCache,
|
| 30 |
get_logger,
|
| 31 |
) # fetch_webpage_text is used by card_generator
|
| 32 |
+
from ankigen_core.auto_config import AutoConfigService
|
| 33 |
|
| 34 |
# --- Initialization ---
|
| 35 |
logger = get_logger()
|
|
|
|
| 204 |
label="Subject",
|
| 205 |
placeholder="e.g., 'Basic SQL Concepts'",
|
| 206 |
)
|
| 207 |
+
auto_fill_btn = gr.Button(
|
| 208 |
+
"Auto-fill",
|
| 209 |
+
variant="secondary",
|
| 210 |
+
)
|
| 211 |
with gr.Group(visible=False) as path_mode:
|
| 212 |
description = gr.Textbox(
|
| 213 |
label="Learning Goal",
|
|
|
|
| 263 |
)
|
| 264 |
|
| 265 |
# Context7 Library Documentation
|
| 266 |
+
library_accordion = gr.Accordion(
|
| 267 |
"Library Documentation (optional)", open=False
|
| 268 |
+
)
|
| 269 |
+
with library_accordion:
|
| 270 |
library_name_input = gr.Textbox(
|
| 271 |
label="Library Name",
|
| 272 |
placeholder="e.g., 'react', 'tensorflow', 'pandas'",
|
|
|
|
| 687 |
api_name="export_main_to_apkg",
|
| 688 |
)
|
| 689 |
|
| 690 |
+
# Auto-fill handler
|
| 691 |
+
async def handle_auto_fill_click(
|
| 692 |
+
subject_text: str,
|
| 693 |
+
api_key: str,
|
| 694 |
+
progress=gr.Progress(track_tqdm=True),
|
| 695 |
+
):
|
| 696 |
+
"""Handle auto-fill button click to populate all settings"""
|
| 697 |
+
if not subject_text or not subject_text.strip():
|
| 698 |
+
gr.Warning("Please enter a subject first")
|
| 699 |
+
return [gr.update()] * 8 # Return no updates for all outputs
|
| 700 |
+
|
| 701 |
+
if not api_key:
|
| 702 |
+
gr.Warning("OpenAI API key is required for auto-configuration")
|
| 703 |
+
return [gr.update()] * 8
|
| 704 |
+
|
| 705 |
+
try:
|
| 706 |
+
progress(0, desc="Analyzing subject...")
|
| 707 |
+
|
| 708 |
+
# Initialize OpenAI client
|
| 709 |
+
await client_manager.initialize_client(api_key)
|
| 710 |
+
openai_client = client_manager.get_client()
|
| 711 |
+
|
| 712 |
+
# Get auto-configuration
|
| 713 |
+
auto_config_service = AutoConfigService()
|
| 714 |
+
config = await auto_config_service.auto_configure(
|
| 715 |
+
subject_text, openai_client
|
| 716 |
+
)
|
| 717 |
+
|
| 718 |
+
if not config:
|
| 719 |
+
gr.Warning("Could not generate configuration")
|
| 720 |
+
return [gr.update()] * 8
|
| 721 |
+
|
| 722 |
+
# Return updates for all relevant UI components
|
| 723 |
+
return (
|
| 724 |
+
gr.update(
|
| 725 |
+
value=config.get("library_name", "")
|
| 726 |
+
), # library_name_input
|
| 727 |
+
gr.update(
|
| 728 |
+
value=config.get("library_topic", "")
|
| 729 |
+
), # library_topic_input
|
| 730 |
+
gr.update(value=config.get("topic_number", 3)), # topic_number
|
| 731 |
+
gr.update(
|
| 732 |
+
value=config.get("cards_per_topic", 5)
|
| 733 |
+
), # cards_per_topic
|
| 734 |
+
gr.update(
|
| 735 |
+
value=config.get("preference_prompt", "")
|
| 736 |
+
), # preference_prompt
|
| 737 |
+
gr.update(
|
| 738 |
+
value=config.get("generate_cloze_checkbox", False)
|
| 739 |
+
), # generate_cloze_checkbox
|
| 740 |
+
gr.update(
|
| 741 |
+
value=config.get("model_choice", "gpt-4.1-nano")
|
| 742 |
+
), # model_choice
|
| 743 |
+
gr.update(
|
| 744 |
+
open=True
|
| 745 |
+
), # Open the Library Documentation accordion
|
| 746 |
+
)
|
| 747 |
+
|
| 748 |
+
except Exception as e:
|
| 749 |
+
logger.error(f"Auto-configuration failed: {e}", exc_info=True)
|
| 750 |
+
gr.Error(f"Auto-configuration failed: {str(e)}")
|
| 751 |
+
return [gr.update()] * 8
|
| 752 |
+
|
| 753 |
+
auto_fill_btn.click(
|
| 754 |
+
fn=handle_auto_fill_click,
|
| 755 |
+
inputs=[subject, api_key_input],
|
| 756 |
+
outputs=[
|
| 757 |
+
library_name_input,
|
| 758 |
+
library_topic_input,
|
| 759 |
+
topic_number,
|
| 760 |
+
cards_per_topic,
|
| 761 |
+
preference_prompt,
|
| 762 |
+
generate_cloze_checkbox,
|
| 763 |
+
model_choice,
|
| 764 |
+
library_accordion, # Reference to the accordion component
|
| 765 |
+
],
|
| 766 |
+
)
|
| 767 |
+
|
| 768 |
async def handle_web_crawl_click(
|
| 769 |
api_key_val: str,
|
| 770 |
url: str,
|
test_context7_debug.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Debug Context7 response to understand format"""
|
| 2 |
+
|
| 3 |
+
import asyncio
|
| 4 |
+
from ankigen_core.context7 import Context7Client
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
async def test_debug():
|
| 8 |
+
client = Context7Client()
|
| 9 |
+
|
| 10 |
+
# Get raw response
|
| 11 |
+
result = await client.call_context7_tool(
|
| 12 |
+
"resolve-library-id", {"libraryName": "pandas"}
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
if result and result.get("success") and result.get("text"):
|
| 16 |
+
print("=== RAW RESPONSE ===")
|
| 17 |
+
print(result["text"])
|
| 18 |
+
print("=== END RESPONSE ===")
|
| 19 |
+
|
| 20 |
+
# Also show line by line with indices
|
| 21 |
+
lines = result["text"].split("\n")
|
| 22 |
+
print("\n=== LINES WITH INDICES ===")
|
| 23 |
+
for i, line in enumerate(lines):
|
| 24 |
+
print(f"{i:3}: '{line}'")
|
| 25 |
+
else:
|
| 26 |
+
print("Failed to get response:", result)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
if __name__ == "__main__":
|
| 30 |
+
asyncio.run(test_debug())
|
test_pandas_resolution.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Test pandas library resolution specifically"""
|
| 2 |
+
|
| 3 |
+
import asyncio
|
| 4 |
+
from ankigen_core.context7 import Context7Client
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
async def test_pandas():
|
| 8 |
+
client = Context7Client()
|
| 9 |
+
library_id = await client.resolve_library_id("pandas")
|
| 10 |
+
print(f"Result: {library_id}")
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
if __name__ == "__main__":
|
| 14 |
+
asyncio.run(test_pandas())
|