Spaces:
Sleeping
Sleeping
File size: 7,191 Bytes
f93e884 0b31a89 f93e884 5b3bc74 f93e884 5b3bc74 7c1e48e 5b3bc74 7c1e48e 5b3bc74 f93e884 5b3bc74 f93e884 5b3bc74 f93e884 9297b21 f93e884 5b3bc74 f93e884 5b3bc74 f93e884 5b3bc74 f93e884 5b3bc74 f93e884 5b3bc74 f93e884 7c1e48e f93e884 7c1e48e 5b3bc74 f93e884 5b3bc74 f93e884 5b3bc74 f93e884 5b3bc74 f93e884 5b3bc74 f93e884 5b3bc74 f93e884 5b3bc74 f93e884 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import os
import json
from dotenv import load_dotenv
from google import genai
from google.genai import types
from typing import List, Dict, Any, Optional
# 載入環境變數
load_dotenv()
class GeminiService:
def __init__(self):
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
print("警告:找不到 GEMINI_API_KEY")
self.client = genai.Client(api_key=api_key) if api_key else None
self.model_id = os.getenv("GEMINI_MODEL_ID", "gemini-2.0-flash")
def _check_client(self):
if not self.client:
raise ValueError("API Key 未設定,請檢查 .env 或 Hugging Face Secrets")
# ==========================
# 🎓 教授搜尋相關功能
# ==========================
def search_professors(self, query: str, exclude_names: List[str] = []) -> List[Dict]:
self._check_client()
exclusion_prompt = ""
if exclude_names:
exclusion_prompt = f"IMPORTANT: Do not include: {', '.join(exclude_names)}."
# Phase 1: Search
search_prompt = f"""
Using Google Search, find 10 prominent professors in universities across Taiwan who are experts in the field of "{query}".
CRITICAL: FACT CHECK they are current faculty. RELEVANCE must be high.
{exclusion_prompt}
List them (Name - University - Department) in Traditional Chinese.
"""
search_response = self.client.models.generate_content(
model=self.model_id, contents=search_prompt,
config=types.GenerateContentConfig(tools=[types.Tool(google_search=types.GoogleSearch())])
)
# Phase 2: Extract JSON
extract_prompt = f"""
From the text below, extract professor names, universities, and departments.
Calculate a Relevance Score (0-100) based on query: "{query}".
Return ONLY a JSON array: [{{"name": "...", "university": "...", "department": "...", "relevanceScore": 85}}]
Text: --- {search_response.text} ---
"""
extract_response = self.client.models.generate_content(
model=self.model_id, contents=extract_prompt,
config=types.GenerateContentConfig(response_mime_type='application/json')
)
try: return json.loads(extract_response.text)
except: return []
def get_professor_details(self, professor: Dict) -> Dict:
self._check_client()
name, uni, dept = professor.get('name'), professor.get('university'), professor.get('department')
prompt = f"""
Act as an academic consultant. Investigate Professor {name} from {dept} at {uni}.
Find "Combat Experience":
1. **Key Publications (Last 5 Years)**: Find 2-3 top papers with Citation Counts.
2. **Alumni Directions**: Where do their graduates work?
3. **Industry Collaboration**: Any industry projects?
Format output in Markdown (Traditional Chinese).
"""
response = self.client.models.generate_content(
model=self.model_id, contents=prompt,
config=types.GenerateContentConfig(tools=[types.Tool(google_search=types.GoogleSearch())])
)
return self._format_response_with_sources(response)
# ==========================
# 🏢 公司搜尋相關功能
# ==========================
def search_companies(self, query: str, exclude_names: List[str] = []) -> List[Dict]:
self._check_client()
exclusion_prompt = ""
if exclude_names:
exclusion_prompt = f"IMPORTANT: Do not include: {', '.join(exclude_names)}."
# Phase 1: Search
search_prompt = f"""
Using Google Search, find 5 to 10 prominent companies in Taiwan related to: "{query}".
Instructions:
1. If "{query}" is an industry (e.g. AI), list representative Taiwanese companies.
2. If "{query}" is a name, list the company and competitors.
{exclusion_prompt}
List them (Full Name - Industry/Main Product) in Traditional Chinese.
"""
search_response = self.client.models.generate_content(
model=self.model_id, contents=search_prompt,
config=types.GenerateContentConfig(tools=[types.Tool(google_search=types.GoogleSearch())])
)
# Phase 2: Extract JSON
extract_prompt = f"""
From text, extract company names and industry.
Calculate Relevance Score (0-100) for query: "{query}".
Return ONLY JSON array: [{{"name": "...", "industry": "...", "relevanceScore": 85}}]
Text: --- {search_response.text} ---
"""
extract_response = self.client.models.generate_content(
model=self.model_id, contents=extract_prompt,
config=types.GenerateContentConfig(response_mime_type='application/json')
)
try: return json.loads(extract_response.text)
except: return []
def get_company_details(self, company: Dict) -> Dict:
self._check_client()
name = company.get('name')
prompt = f"""
Act as a "Business Analyst". Investigate Taiwanese company: "{name}".
Targets:
1. **Overview**: Tax ID (統編), Capital (資本額), Representative.
2. **Workforce & Culture**: Employee count, Reviews from PTT(Tech_Job)/Dcard/Qollie (Pros & Cons).
3. **Legal & Risks**: Search for "{name} 勞資糾紛", "{name} 判決", "{name} 違反勞基法".
Format in Markdown (Traditional Chinese). Be objective.
"""
response = self.client.models.generate_content(
model=self.model_id, contents=prompt,
config=types.GenerateContentConfig(tools=[types.Tool(google_search=types.GoogleSearch())])
)
return self._format_response_with_sources(response)
# ==========================
# 共用功能
# ==========================
def _format_response_with_sources(self, response):
sources = []
if response.candidates[0].grounding_metadata and response.candidates[0].grounding_metadata.grounding_chunks:
for chunk in response.candidates[0].grounding_metadata.grounding_chunks:
if chunk.web and chunk.web.uri and chunk.web.title:
sources.append({"title": chunk.web.title, "uri": chunk.web.uri})
unique_sources = {v['uri']: v for v in sources}.values()
return {"text": response.text, "sources": list(unique_sources)}
def chat_with_ai(self, history: List[Dict], new_message: str, context: str, role_instruction: str = "Source of truth") -> str:
self._check_client()
system_instruction = f"{role_instruction}:\n{context}"
chat_history = []
for h in history:
role = "user" if h["role"] == "user" else "model"
chat_history.append(types.Content(role=role, parts=[types.Part(text=h["content"])]))
chat = self.client.chats.create(
model=self.model_id, history=chat_history,
config=types.GenerateContentConfig(system_instruction=system_instruction)
)
response = chat.send_message(new_message)
return response.text |