Spaces:
Paused
Paused
lanny xu
commited on
Commit
·
8da7c51
1
Parent(s):
c848868
delete urls
Browse files- entity_extractor.py +15 -7
entity_extractor.py
CHANGED
|
@@ -21,11 +21,11 @@ from config import LOCAL_LLM
|
|
| 21 |
class EntityExtractor:
|
| 22 |
"""实体提取器 - 使用LLM从文本中提取实体(支持异步批处理)"""
|
| 23 |
|
| 24 |
-
def __init__(self, timeout: int =
|
| 25 |
"""初始化实体提取器
|
| 26 |
|
| 27 |
Args:
|
| 28 |
-
timeout: LLM
|
| 29 |
max_retries: 失败重试次数
|
| 30 |
enable_async: 是否启用异步处理(默认启用)
|
| 31 |
"""
|
|
@@ -38,6 +38,7 @@ class EntityExtractor:
|
|
| 38 |
self.max_retries = max_retries
|
| 39 |
self.enable_async = enable_async
|
| 40 |
self.ollama_url = "http://localhost:11434/api/generate"
|
|
|
|
| 41 |
|
| 42 |
# 实体提取提示模板
|
| 43 |
self.entity_prompt = PromptTemplate(
|
|
@@ -184,6 +185,12 @@ class EntityExtractor:
|
|
| 184 |
async def _async_llm_call(self, prompt: str, session: aiohttp.ClientSession, attempt: int = 0) -> Dict:
|
| 185 |
"""异步调用 Ollama API"""
|
| 186 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
async with session.post(
|
| 188 |
self.ollama_url,
|
| 189 |
json={
|
|
@@ -193,21 +200,22 @@ class EntityExtractor:
|
|
| 193 |
"stream": False,
|
| 194 |
"options": {"temperature": 0}
|
| 195 |
},
|
| 196 |
-
timeout=
|
| 197 |
) as response:
|
| 198 |
if response.status == 200:
|
| 199 |
result = await response.json()
|
| 200 |
return json.loads(result.get('response', '{}'))
|
| 201 |
else:
|
| 202 |
raise Exception(f"API返回错误: {response.status}")
|
| 203 |
-
except asyncio.TimeoutError:
|
| 204 |
if attempt < self.max_retries - 1:
|
| 205 |
-
|
|
|
|
| 206 |
return await self._async_llm_call(prompt, session, attempt + 1)
|
| 207 |
-
raise
|
| 208 |
except Exception as e:
|
| 209 |
if attempt < self.max_retries - 1:
|
| 210 |
-
await asyncio.sleep(
|
| 211 |
return await self._async_llm_call(prompt, session, attempt + 1)
|
| 212 |
raise
|
| 213 |
|
|
|
|
| 21 |
class EntityExtractor:
|
| 22 |
"""实体提取器 - 使用LLM从文本中提取实体(支持异步批处理)"""
|
| 23 |
|
| 24 |
+
def __init__(self, timeout: int = 180, max_retries: int = 3, enable_async: bool = True):
|
| 25 |
"""初始化实体提取器
|
| 26 |
|
| 27 |
Args:
|
| 28 |
+
timeout: LLM调用超时时间(秒)- 默认180秒以应对首次模型加载
|
| 29 |
max_retries: 失败重试次数
|
| 30 |
enable_async: 是否启用异步处理(默认启用)
|
| 31 |
"""
|
|
|
|
| 38 |
self.max_retries = max_retries
|
| 39 |
self.enable_async = enable_async
|
| 40 |
self.ollama_url = "http://localhost:11434/api/generate"
|
| 41 |
+
self.timeout = timeout # 保存超时设置供异步使用
|
| 42 |
|
| 43 |
# 实体提取提示模板
|
| 44 |
self.entity_prompt = PromptTemplate(
|
|
|
|
| 185 |
async def _async_llm_call(self, prompt: str, session: aiohttp.ClientSession, attempt: int = 0) -> Dict:
|
| 186 |
"""异步调用 Ollama API"""
|
| 187 |
try:
|
| 188 |
+
timeout = aiohttp.ClientTimeout(
|
| 189 |
+
total=self.timeout, # 总超时
|
| 190 |
+
connect=30, # 连接超时 30 秒
|
| 191 |
+
sock_read=self.timeout # 读取超时
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
async with session.post(
|
| 195 |
self.ollama_url,
|
| 196 |
json={
|
|
|
|
| 200 |
"stream": False,
|
| 201 |
"options": {"temperature": 0}
|
| 202 |
},
|
| 203 |
+
timeout=timeout
|
| 204 |
) as response:
|
| 205 |
if response.status == 200:
|
| 206 |
result = await response.json()
|
| 207 |
return json.loads(result.get('response', '{}'))
|
| 208 |
else:
|
| 209 |
raise Exception(f"API返回错误: {response.status}")
|
| 210 |
+
except (asyncio.TimeoutError, aiohttp.ClientError) as e:
|
| 211 |
if attempt < self.max_retries - 1:
|
| 212 |
+
wait_time = (attempt + 1) * 3
|
| 213 |
+
await asyncio.sleep(wait_time)
|
| 214 |
return await self._async_llm_call(prompt, session, attempt + 1)
|
| 215 |
+
raise Exception(f"连接失败: {str(e)}")
|
| 216 |
except Exception as e:
|
| 217 |
if attempt < self.max_retries - 1:
|
| 218 |
+
await asyncio.sleep(2)
|
| 219 |
return await self._async_llm_call(prompt, session, attempt + 1)
|
| 220 |
raise
|
| 221 |
|