Spaces:

DeepLearning101
/

Prof.404.Com

Paused

App Files Files Community

Prof.404.Com / services.py

DeepLearning101

Update services.py

7c1e48e verified 3 months ago

raw

history blame

6.53 kB

	import os
	import json
	from dotenv import load_dotenv
	from google import genai
	from google.genai import types
	from typing import List, Dict, Any, Optional

	# 載入環境變數
	load_dotenv()

	class GeminiService:
	def __init__(self):
	api_key = os.getenv("GEMINI_API_KEY")
	if not api_key:
	print("警告：找不到 GEMINI_API_KEY")

	self.client = genai.Client(api_key=api_key) if api_key else None
	# 建議使用最新模型以獲得最佳分析能力
	self.model_id = os.getenv("GEMINI_MODEL_ID", "gemini-2.0-flash")

	def _check_client(self):
	if not self.client:
	raise ValueError("API Key 未設定")

	def search_companies(self, query: str, exclude_names: List[str] = []) -> List[Dict]:
	"""
	Step 1: 搜尋台灣公司
	"""
	self._check_client()
	exclusion_prompt = ""
	if exclude_names:
	exclusion_prompt = f"IMPORTANT: Do not include: {', '.join(exclude_names)}."

	# Phase 1: Google Search (廣泛搜尋)
	search_prompt = f"""
	Using Google Search, find 5 to 10 prominent companies in Taiwan related to the query: "{query}".

	CRITICAL INSTRUCTIONS:
	1. TARGET: Focus on Taiwanese companies (or global companies with a major branch in Taiwan).
	2. IDENTIFIERS: Try to find their distinct "Company Name" (e.g., 台積電 / 台灣積體電路製造股份有限公司).
	{exclusion_prompt}

	List them (Full Name - Industry/Sector) in Traditional Chinese.
	"""

	search_response = self.client.models.generate_content(
	model=self.model_id,
	contents=search_prompt,
	config=types.GenerateContentConfig(
	tools=[types.Tool(google_search=types.GoogleSearch())]
	)
	)
	raw_text = search_response.text

	# Phase 2: Extract JSON (結構化)
	extract_prompt = f"""
	From the text below, extract company names and their industry.
	Calculate a Relevance Score (0-100) based on query: "{query}".

	Return ONLY a JSON array: [{{"name": "...", "industry": "...", "relevanceScore": 85}}]

	Text:
	---
	{raw_text}
	---
	"""

	extract_response = self.client.models.generate_content(
	model=self.model_id,
	contents=extract_prompt,
	config=types.GenerateContentConfig(
	response_mime_type='application/json'
	)
	)

	try:
	return json.loads(extract_response.text)
	except Exception as e:
	print(f"JSON Parse Error: {e}")
	return []

	def get_company_details(self, company: Dict) -> Dict:
	"""
	Step 2: 進行商業徵信調查 (Deep Dive)
	"""
	self._check_client()
	name = company.get('name')

	prompt = f"""
	Act as a professional "Business Due Diligence Analyst" (商業徵信分析師).
	Conduct a comprehensive investigation on the Taiwanese company: "{name}".

	Investigation Targets (Must search for these specifically):

	1. Corporate Identity (基本資料):
	- Find the Tax ID (統一編號).
	- Registered Capital (資本額).
	- Representative (代表人).
	- Establishment Date (設立日期).
	- Source Hint: Ministry of Economic Affairs (經濟部商業司), Datagovtw.

	2. Scale & Business (規模與業務):
	- Employee Count: Estimated number of employees.
	- Core Products/Services: What do they actually sell or do?
	- Source Hint: 104 Job Bank, Company Website, LinkedIn.

	3. Market Reputation & Culture (評價與文化 - KEY PART):
	- Search for employee reviews on PTT (Tech_Job, Soft_Job, Salary), Dcard (Work board), Qollie (求職天眼通), or Google Maps.
	- Summarize the Pros (e.g., high pay, free snacks) and Cons (e.g., toxic management, forced overtime, family business style).
	- Tone: Be objective but highlight recurring complaints.

	4. Legal & Risk Assessment (法律與風險 - CRITICAL):
	- Search for keywords: "{name} 判決", "{name} 勞資糾紛", "{name} 違反勞基法", "{name} 詐騙", "{name} 吸金", "{name} 罰款".
	- List any major lawsuits, fines, or controversies found in news or government records.
	- If clean, state "No major public legal disputes found."

	Format Requirements:
	- Structure the output as a clean, readable report using Markdown.
	- Use clear headings.
	- Language: Traditional Chinese (繁體中文).
	"""

	response = self.client.models.generate_content(
	model=self.model_id,
	contents=prompt,
	config=types.GenerateContentConfig(
	tools=[types.Tool(google_search=types.GoogleSearch())]
	)
	)

	# Extract Sources
	sources = []
	if response.candidates[0].grounding_metadata and response.candidates[0].grounding_metadata.grounding_chunks:
	for chunk in response.candidates[0].grounding_metadata.grounding_chunks:
	if chunk.web and chunk.web.uri and chunk.web.title:
	sources.append({"title": chunk.web.title, "uri": chunk.web.uri})

	# Deduplicate
	unique_sources = {v['uri']: v for v in sources}.values()

	return {
	"text": response.text,
	"sources": list(unique_sources)
	}

	def chat_with_ai(self, history: List[Dict], new_message: str, context: str) -> str:
	self._check_client()
	system_instruction = f"You are a sharp Business Analyst. Answer based on this due diligence report:\n{context}"

	chat_history = []
	for h in history:
	role = "user" if h["role"] == "user" else "model"
	chat_history.append(types.Content(role=role, parts=[types.Part(text=h["content"])]))

	chat = self.client.chats.create(
	model=self.model_id,
	history=chat_history,
	config=types.GenerateContentConfig(
	system_instruction=system_instruction
	)
	)

	response = chat.send_message(new_message)
	return response.text