File size: 5,071 Bytes
f93e884
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import os
import json
from dotenv import load_dotenv
from google import genai
from google.genai import types
from typing import List, Dict, Any, Optional

# 載入環境變數
load_dotenv()

class GeminiService:
    def __init__(self):
        # 從環境變數讀取 Key,兼容本地 .env 與 Hugging Face Secrets
        api_key = os.getenv("GEMINI_API_KEY")
        if not api_key:
            # 為了避免佈署時報錯,這裡僅印出警告,讓 UI 層處理
            print("警告:找不到 GEMINI_API_KEY")
        
        self.client = genai.Client(api_key=api_key) if api_key else None
        self.model_id = os.getenv("GEMINI_MODEL_ID", "gemini-2.0-flash")

    def _check_client(self):
        if not self.client:
            raise ValueError("API Key 未設定,請檢查 .env 或 Hugging Face Secrets")

    def search_professors(self, query: str, exclude_names: List[str] = []) -> List[Dict]:
        self._check_client()
        exclusion_prompt = ""
        if exclude_names:
            exclusion_prompt = f"IMPORTANT: Do not include: {', '.join(exclude_names)}."

        # Phase 1: Search (Pure Text)
        search_prompt = f"""
        Using Google Search, find 10 prominent professors in universities across Taiwan who are experts in the field of "{query}".
        
        CRITICAL:
        1. FACT CHECK: Verify they are currently faculty.
        2. RELEVANCE: Their PRIMARY research focus must be "{query}".
        {exclusion_prompt}
        
        List them (Name - University - Department) in Traditional Chinese.
        """

        search_response = self.client.models.generate_content(
            model=self.model_id,
            contents=search_prompt,
            config=types.GenerateContentConfig(
                tools=[types.Tool(google_search=types.GoogleSearch())]
            )
        )
        raw_text = search_response.text

        # Phase 2: Extract JSON
        extract_prompt = f"""
        From the text below, extract professor names, universities, and departments.
        Calculate a Relevance Score (0-100) based on query: "{query}".
        
        Return ONLY a JSON array: [{{"name": "...", "university": "...", "department": "...", "relevanceScore": 85}}]
        
        Text:
        ---
        {raw_text}
        ---
        """

        extract_response = self.client.models.generate_content(
            model=self.model_id,
            contents=extract_prompt,
            config=types.GenerateContentConfig(
                response_mime_type='application/json'
            )
        )

        try:
            return json.loads(extract_response.text)
        except Exception as e:
            print(f"JSON Parse Error: {e}")
            return []

    def get_professor_details(self, professor: Dict) -> Dict:
        self._check_client()
        name = professor.get('name')
        uni = professor.get('university')
        dept = professor.get('department')

        prompt = f"""
        Act as an academic consultant. Investigate Professor {name} from {dept} at {uni}.
        
        Find their "Combat Experience" (實戰經驗). Search for:
        1. **Recent Key Publications (Last 5 Years)**: Find 2-3 top papers. **MUST try to find Citation Counts**.
        2. **Alumni Directions**: Where do their graduates work? (e.g., TSMC, Google).
        3. **Industry Collaboration**: Any industry projects?

        Format output in Markdown (Traditional Chinese).
        """

        response = self.client.models.generate_content(
            model=self.model_id,
            contents=prompt,
            config=types.GenerateContentConfig(
                tools=[types.Tool(google_search=types.GoogleSearch())]
            )
        )
        
        # Extract Sources
        sources = []
        if response.candidates[0].grounding_metadata and response.candidates[0].grounding_metadata.grounding_chunks:
            for chunk in response.candidates[0].grounding_metadata.grounding_chunks:
                if chunk.web and chunk.web.uri and chunk.web.title:
                    sources.append({"title": chunk.web.title, "uri": chunk.web.uri})
        
        # Deduplicate
        unique_sources = {v['uri']: v for v in sources}.values()

        return {
            "text": response.text,
            "sources": list(unique_sources)
        }

    def chat_with_ai(self, history: List[Dict], new_message: str, context: str) -> str:
        self._check_client()
        system_instruction = f"Source of truth:\n{context}"
        
        chat_history = []
        for h in history:
            role = "user" if h["role"] == "user" else "model"
            chat_history.append(types.Content(role=role, parts=[types.Part(text=h["content"])]))

        chat = self.client.chats.create(
            model=self.model_id,
            history=chat_history,
            config=types.GenerateContentConfig(
                system_instruction=system_instruction
            )
        )
        
        response = chat.send_message(new_message)
        return response.text