Prof.404.Com / services.py
DeepLearning101's picture
Update services.py
7c1e48e verified
raw
history blame
6.53 kB
import os
import json
from dotenv import load_dotenv
from google import genai
from google.genai import types
from typing import List, Dict, Any, Optional
# 載入環境變數
load_dotenv()
class GeminiService:
def __init__(self):
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
print("警告:找不到 GEMINI_API_KEY")
self.client = genai.Client(api_key=api_key) if api_key else None
# 建議使用最新模型以獲得最佳分析能力
self.model_id = os.getenv("GEMINI_MODEL_ID", "gemini-2.0-flash")
def _check_client(self):
if not self.client:
raise ValueError("API Key 未設定")
def search_companies(self, query: str, exclude_names: List[str] = []) -> List[Dict]:
"""
Step 1: 搜尋台灣公司
"""
self._check_client()
exclusion_prompt = ""
if exclude_names:
exclusion_prompt = f"IMPORTANT: Do not include: {', '.join(exclude_names)}."
# Phase 1: Google Search (廣泛搜尋)
search_prompt = f"""
Using Google Search, find 5 to 10 prominent companies in Taiwan related to the query: "{query}".
**CRITICAL INSTRUCTIONS:**
1. **TARGET:** Focus on Taiwanese companies (or global companies with a major branch in Taiwan).
2. **IDENTIFIERS:** Try to find their distinct "Company Name" (e.g., 台積電 / 台灣積體電路製造股份有限公司).
{exclusion_prompt}
List them (Full Name - Industry/Sector) in Traditional Chinese.
"""
search_response = self.client.models.generate_content(
model=self.model_id,
contents=search_prompt,
config=types.GenerateContentConfig(
tools=[types.Tool(google_search=types.GoogleSearch())]
)
)
raw_text = search_response.text
# Phase 2: Extract JSON (結構化)
extract_prompt = f"""
From the text below, extract company names and their industry.
Calculate a Relevance Score (0-100) based on query: "{query}".
Return ONLY a JSON array: [{{"name": "...", "industry": "...", "relevanceScore": 85}}]
Text:
---
{raw_text}
---
"""
extract_response = self.client.models.generate_content(
model=self.model_id,
contents=extract_prompt,
config=types.GenerateContentConfig(
response_mime_type='application/json'
)
)
try:
return json.loads(extract_response.text)
except Exception as e:
print(f"JSON Parse Error: {e}")
return []
def get_company_details(self, company: Dict) -> Dict:
"""
Step 2: 進行商業徵信調查 (Deep Dive)
"""
self._check_client()
name = company.get('name')
prompt = f"""
Act as a professional "Business Due Diligence Analyst" (商業徵信分析師).
Conduct a comprehensive investigation on the Taiwanese company: "{name}".
**Investigation Targets (Must search for these specifically):**
1. **Corporate Identity (基本資料)**:
- Find the **Tax ID (統一編號)**.
- **Registered Capital (資本額)**.
- **Representative (代表人)**.
- **Establishment Date (設立日期)**.
- *Source Hint: Ministry of Economic Affairs (經濟部商業司), Datagovtw.*
2. **Scale & Business (規模與業務)**:
- **Employee Count**: Estimated number of employees.
- **Core Products/Services**: What do they actually sell or do?
- *Source Hint: 104 Job Bank, Company Website, LinkedIn.*
3. **Market Reputation & Culture (評價與文化 - KEY PART)**:
- Search for employee reviews on **PTT (Tech_Job, Soft_Job, Salary)**, **Dcard (Work board)**, **Qollie (求職天眼通)**, or **Google Maps**.
- Summarize the **Pros** (e.g., high pay, free snacks) and **Cons** (e.g., toxic management, forced overtime, family business style).
- *Tone:* Be objective but highlight recurring complaints.
4. **Legal & Risk Assessment (法律與風險 - CRITICAL)**:
- Search for keywords: "{name} 判決", "{name} 勞資糾紛", "{name} 違反勞基法", "{name} 詐騙", "{name} 吸金", "{name} 罰款".
- List any major lawsuits, fines, or controversies found in news or government records.
- If clean, state "No major public legal disputes found."
**Format Requirements**:
- Structure the output as a clean, readable report using Markdown.
- Use clear headings.
- **Language**: Traditional Chinese (繁體中文).
"""
response = self.client.models.generate_content(
model=self.model_id,
contents=prompt,
config=types.GenerateContentConfig(
tools=[types.Tool(google_search=types.GoogleSearch())]
)
)
# Extract Sources
sources = []
if response.candidates[0].grounding_metadata and response.candidates[0].grounding_metadata.grounding_chunks:
for chunk in response.candidates[0].grounding_metadata.grounding_chunks:
if chunk.web and chunk.web.uri and chunk.web.title:
sources.append({"title": chunk.web.title, "uri": chunk.web.uri})
# Deduplicate
unique_sources = {v['uri']: v for v in sources}.values()
return {
"text": response.text,
"sources": list(unique_sources)
}
def chat_with_ai(self, history: List[Dict], new_message: str, context: str) -> str:
self._check_client()
system_instruction = f"You are a sharp Business Analyst. Answer based on this due diligence report:\n{context}"
chat_history = []
for h in history:
role = "user" if h["role"] == "user" else "model"
chat_history.append(types.Content(role=role, parts=[types.Part(text=h["content"])]))
chat = self.client.chats.create(
model=self.model_id,
history=chat_history,
config=types.GenerateContentConfig(
system_instruction=system_instruction
)
)
response = chat.send_message(new_message)
return response.text