File size: 3,077 Bytes
ae588db
545358b
 
40cdc42
ae588db
 
 
 
 
 
 
 
 
 
40cdc42
ae588db
 
40cdc42
 
 
 
 
ae588db
 
40cdc42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae588db
40cdc42
 
 
 
 
 
 
 
 
 
 
 
545358b
40cdc42
 
545358b
40cdc42
545358b
 
 
 
40cdc42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os
from google import genai
from google.genai import types
from groq import Groq
from typing import Dict, Any, List, Optional
import json

from src.config import Config
import logging

logger = logging.getLogger("KnowledgeGraph")

def extract_knowledge_graph(text: str) -> Optional[Dict[str, Any]]:
    """
    Uses Groq (primary) or Gemini (backup) to extract a Knowledge Graph (Concepts & Relationships) from text.
    Returns a JSON object with 'concepts' and 'relationships'.
    """
    groq_key = os.environ.get("GROQ_API_KEY") or Config.GROQ_API_KEY
    gemini_key = os.environ.get("GEMINI_API_KEY") or Config.GEMINI_API_KEY
    
    if not groq_key and not gemini_key:
        logger.warning("No LLM API key set (GROQ_API_KEY or GEMINI_API_KEY). Skipping Knowledge Graph extraction.")
        return None

    prompt = f"""
Analyze the following text and extract a Knowledge Graph.
Identify key "Concepts" (entities, technologies, ideas) and "Relationships" between them.

Return ONLY a valid JSON object with this structure:
{{
    "concepts": [
        {{"id": "concept_name", "type": "technology/person/idea", "description": "short definition"}}
    ],
    "relationships": [
        {{"source": "concept_name", "target": "concept_name", "relation": "uses/created/is_a"}}
    ]
}}

Text:
{text[:10000]}
"""

    # Try Groq first (PRIMARY - fastest)
    if groq_key:
        try:
            client = Groq(api_key=groq_key)
            response = client.chat.completions.create(
                model="llama-3.3-70b-versatile",
                messages=[{"role": "user", "content": prompt}],
                max_tokens=2000,
                temperature=0.3
            )
            content = response.choices[0].message.content.strip()
            
            # Clean up response (remove markdown code blocks if present)
            if content.startswith("```json"):
                content = content[7:]
            if content.startswith("```"):
                content = content[3:]
            if content.endswith("```"):
                content = content[:-3]
            
            return json.loads(content.strip())
        except Exception as e:
            logger.warning(f"Groq failed for Knowledge Graph: {e}, trying Gemini...")

    # Fallback to Gemini (BACKUP) - Using new google.genai SDK
    if gemini_key:
        try:
            client = genai.Client(api_key=gemini_key)
            
            response = client.models.generate_content(
                model='gemini-2.0-flash-exp',
                contents=prompt
            )
            
            # Clean up response (remove markdown code blocks if present)
            content = response.text.strip()
            if content.startswith("```json"):
                content = content[7:]
            if content.endswith("```"):
                content = content[:-3]
                
            return json.loads(content)
            
        except Exception as e:
            logger.error(f"Gemini also failed for Knowledge Graph: {e}")
            return None

    return None