import os from google import genai from google.genai import types from groq import Groq from typing import Dict, Any, List, Optional import json from src.config import Config import logging logger = logging.getLogger("KnowledgeGraph") def extract_knowledge_graph(text: str) -> Optional[Dict[str, Any]]: """ Uses Groq (primary) or Gemini (backup) to extract a Knowledge Graph (Concepts & Relationships) from text. Returns a JSON object with 'concepts' and 'relationships'. """ groq_key = os.environ.get("GROQ_API_KEY") or Config.GROQ_API_KEY gemini_key = os.environ.get("GEMINI_API_KEY") or Config.GEMINI_API_KEY if not groq_key and not gemini_key: logger.warning("No LLM API key set (GROQ_API_KEY or GEMINI_API_KEY). Skipping Knowledge Graph extraction.") return None prompt = f""" Analyze the following text and extract a Knowledge Graph. Identify key "Concepts" (entities, technologies, ideas) and "Relationships" between them. Return ONLY a valid JSON object with this structure: {{ "concepts": [ {{"id": "concept_name", "type": "technology/person/idea", "description": "short definition"}} ], "relationships": [ {{"source": "concept_name", "target": "concept_name", "relation": "uses/created/is_a"}} ] }} Text: {text[:10000]} """ # Try Groq first (PRIMARY - fastest) if groq_key: try: client = Groq(api_key=groq_key) response = client.chat.completions.create( model="llama-3.3-70b-versatile", messages=[{"role": "user", "content": prompt}], max_tokens=2000, temperature=0.3 ) content = response.choices[0].message.content.strip() # Clean up response (remove markdown code blocks if present) if content.startswith("```json"): content = content[7:] if content.startswith("```"): content = content[3:] if content.endswith("```"): content = content[:-3] return json.loads(content.strip()) except Exception as e: logger.warning(f"Groq failed for Knowledge Graph: {e}, trying Gemini...") # Fallback to Gemini (BACKUP) - Using new google.genai SDK if gemini_key: try: client = genai.Client(api_key=gemini_key) response = client.models.generate_content( model='gemini-2.0-flash-exp', contents=prompt ) # Clean up response (remove markdown code blocks if present) content = response.text.strip() if content.startswith("```json"): content = content[7:] if content.endswith("```"): content = content[:-3] return json.loads(content) except Exception as e: logger.error(f"Gemini also failed for Knowledge Graph: {e}") return None return None