File size: 7,191 Bytes
f93e884
 
 
0b31a89
f93e884
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b3bc74
f93e884
5b3bc74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c1e48e
5b3bc74
 
 
 
 
 
 
 
 
 
 
7c1e48e
5b3bc74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f93e884
 
 
 
 
5b3bc74
f93e884
5b3bc74
 
 
 
f93e884
9297b21
f93e884
 
5b3bc74
 
f93e884
 
5b3bc74
f93e884
5b3bc74
 
 
 
f93e884
 
5b3bc74
 
f93e884
5b3bc74
 
f93e884
7c1e48e
f93e884
7c1e48e
 
5b3bc74
 
 
 
 
 
f93e884
 
5b3bc74
 
f93e884
5b3bc74
 
 
 
 
 
f93e884
 
 
 
 
 
5b3bc74
f93e884
5b3bc74
f93e884
5b3bc74
f93e884
 
 
 
 
 
 
5b3bc74
 
f93e884
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import os
import json
from dotenv import load_dotenv
from google import genai
from google.genai import types
from typing import List, Dict, Any, Optional

# 載入環境變數
load_dotenv()

class GeminiService:
    def __init__(self):
        api_key = os.getenv("GEMINI_API_KEY")
        if not api_key:
            print("警告:找不到 GEMINI_API_KEY")
        
        self.client = genai.Client(api_key=api_key) if api_key else None
        self.model_id = os.getenv("GEMINI_MODEL_ID", "gemini-2.0-flash")

    def _check_client(self):
        if not self.client:
            raise ValueError("API Key 未設定,請檢查 .env 或 Hugging Face Secrets")

    # ==========================
    # 🎓 教授搜尋相關功能
    # ==========================
    def search_professors(self, query: str, exclude_names: List[str] = []) -> List[Dict]:
        self._check_client()
        exclusion_prompt = ""
        if exclude_names:
            exclusion_prompt = f"IMPORTANT: Do not include: {', '.join(exclude_names)}."

        # Phase 1: Search
        search_prompt = f"""
        Using Google Search, find 10 prominent professors in universities across Taiwan who are experts in the field of "{query}".
        CRITICAL: FACT CHECK they are current faculty. RELEVANCE must be high.
        {exclusion_prompt}
        List them (Name - University - Department) in Traditional Chinese.
        """
        search_response = self.client.models.generate_content(
            model=self.model_id, contents=search_prompt,
            config=types.GenerateContentConfig(tools=[types.Tool(google_search=types.GoogleSearch())])
        )
        
        # Phase 2: Extract JSON
        extract_prompt = f"""
        From the text below, extract professor names, universities, and departments.
        Calculate a Relevance Score (0-100) based on query: "{query}".
        Return ONLY a JSON array: [{{"name": "...", "university": "...", "department": "...", "relevanceScore": 85}}]
        Text: --- {search_response.text} ---
        """
        extract_response = self.client.models.generate_content(
            model=self.model_id, contents=extract_prompt,
            config=types.GenerateContentConfig(response_mime_type='application/json')
        )
        try: return json.loads(extract_response.text)
        except: return []

    def get_professor_details(self, professor: Dict) -> Dict:
        self._check_client()
        name, uni, dept = professor.get('name'), professor.get('university'), professor.get('department')
        prompt = f"""
        Act as an academic consultant. Investigate Professor {name} from {dept} at {uni}.
        Find "Combat Experience":
        1. **Key Publications (Last 5 Years)**: Find 2-3 top papers with Citation Counts.
        2. **Alumni Directions**: Where do their graduates work?
        3. **Industry Collaboration**: Any industry projects?
        Format output in Markdown (Traditional Chinese).
        """
        response = self.client.models.generate_content(
            model=self.model_id, contents=prompt,
            config=types.GenerateContentConfig(tools=[types.Tool(google_search=types.GoogleSearch())])
        )
        return self._format_response_with_sources(response)

    # ==========================
    # 🏢 公司搜尋相關功能
    # ==========================
    def search_companies(self, query: str, exclude_names: List[str] = []) -> List[Dict]:
        self._check_client()
        exclusion_prompt = ""
        if exclude_names:
            exclusion_prompt = f"IMPORTANT: Do not include: {', '.join(exclude_names)}."

        # Phase 1: Search
        search_prompt = f"""
        Using Google Search, find 5 to 10 prominent companies in Taiwan related to: "{query}".
        Instructions:
        1. If "{query}" is an industry (e.g. AI), list representative Taiwanese companies.
        2. If "{query}" is a name, list the company and competitors.
        {exclusion_prompt}
        List them (Full Name - Industry/Main Product) in Traditional Chinese.
        """
        search_response = self.client.models.generate_content(
            model=self.model_id, contents=search_prompt,
            config=types.GenerateContentConfig(tools=[types.Tool(google_search=types.GoogleSearch())])
        )

        # Phase 2: Extract JSON
        extract_prompt = f"""
        From text, extract company names and industry.
        Calculate Relevance Score (0-100) for query: "{query}".
        Return ONLY JSON array: [{{"name": "...", "industry": "...", "relevanceScore": 85}}]
        Text: --- {search_response.text} ---
        """
        extract_response = self.client.models.generate_content(
            model=self.model_id, contents=extract_prompt,
            config=types.GenerateContentConfig(response_mime_type='application/json')
        )
        try: return json.loads(extract_response.text)
        except: return []

    def get_company_details(self, company: Dict) -> Dict:
        self._check_client()
        name = company.get('name')
        prompt = f"""
        Act as a "Business Analyst". Investigate Taiwanese company: "{name}".
        Targets:
        1. **Overview**: Tax ID (統編), Capital (資本額), Representative.
        2. **Workforce & Culture**: Employee count, Reviews from PTT(Tech_Job)/Dcard/Qollie (Pros & Cons).
        3. **Legal & Risks**: Search for "{name} 勞資糾紛", "{name} 判決", "{name} 違反勞基法".
        Format in Markdown (Traditional Chinese). Be objective.
        """
        response = self.client.models.generate_content(
            model=self.model_id, contents=prompt,
            config=types.GenerateContentConfig(tools=[types.Tool(google_search=types.GoogleSearch())])
        )
        return self._format_response_with_sources(response)

    # ==========================
    # 共用功能
    # ==========================
    def _format_response_with_sources(self, response):
        sources = []
        if response.candidates[0].grounding_metadata and response.candidates[0].grounding_metadata.grounding_chunks:
            for chunk in response.candidates[0].grounding_metadata.grounding_chunks:
                if chunk.web and chunk.web.uri and chunk.web.title:
                    sources.append({"title": chunk.web.title, "uri": chunk.web.uri})
        unique_sources = {v['uri']: v for v in sources}.values()
        return {"text": response.text, "sources": list(unique_sources)}

    def chat_with_ai(self, history: List[Dict], new_message: str, context: str, role_instruction: str = "Source of truth") -> str:
        self._check_client()
        system_instruction = f"{role_instruction}:\n{context}"
        
        chat_history = []
        for h in history:
            role = "user" if h["role"] == "user" else "model"
            chat_history.append(types.Content(role=role, parts=[types.Part(text=h["content"])]))

        chat = self.client.chats.create(
            model=self.model_id, history=chat_history,
            config=types.GenerateContentConfig(system_instruction=system_instruction)
        )
        response = chat.send_message(new_message)
        return response.text