FinalTest

Runtime error

App Files Files Community

FinalTest / agent.py

yoshizen

Create agent.py

d55317d verified 11 months ago

raw

history blame

3.99 kB

	import os
	import json
	import re
	import torch
	from typing import Dict, Optional
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

	CACHE_FILE = "gaia_answers_cache.json"
	DEFAULT_MODEL = "google/flan-t5-base"

	class EnhancedGAIAAgent:
	"""Агент для Hugging Face GAIA с улучшенной обработкой вопросов"""

	def __init__(self, model_name=DEFAULT_MODEL, use_cache=False):
	print(f"Initializing EnhancedGAIAAgent with model: {model_name}")
	self.model_name = model_name
	self.use_cache = use_cache
	self.cache = self._load_cache() if use_cache else {}
	self.tokenizer = AutoTokenizer.from_pretrained(model_name)
	self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

	def _load_cache(self) -> Dict[str, str]:
	if os.path.exists(CACHE_FILE):
	try:
	with open(CACHE_FILE, 'r', encoding='utf-8') as f:
	return json.load(f)
	except:
	return {}
	return {}

	def _save_cache(self) -> None:
	try:
	with open(CACHE_FILE, 'w', encoding='utf-8') as f:
	json.dump(self.cache, f, ensure_ascii=False, indent=2)
	except:
	pass

	def _classify_question(self, question: str) -> str:
	question_lower = question.lower()

	if any(word in question_lower for word in ["calculate", "sum", "how many"]):
	return "calculation"
	elif any(word in question_lower for word in ["list", "enumerate"]):
	return "list"
	elif any(word in question_lower for word in ["date", "time", "when"]):
	return "date_time"
	return "factual"

	def _format_answer(self, raw_answer: str, question_type: str) -> str:
	answer = raw_answer.strip()

	# Удаление префиксов
	prefixes = ["Answer:", "The answer is:", "I think", "I believe"]
	for prefix in prefixes:
	if answer.lower().startswith(prefix.lower()):
	answer = answer[len(prefix):].strip()

	# Специфическое форматирование
	if question_type == "calculation":
	numbers = re.findall(r'-?\d+\.?\d*', answer)
	if numbers:
	answer = numbers[0]
	elif question_type == "list":
	if "," not in answer and " " in answer:
	items = [item.strip() for item in answer.split() if item.strip()]
	answer = ", ".join(items)

	# Финальная очистка
	answer = answer.strip('"\'')
	if answer.endswith('.') and not re.match(r'.*\d\.$', answer):
	answer = answer[:-1]
	return re.sub(r'\s+', ' ', answer).strip()

	def __call__(self, question: str, task_id: Optional[str] = None) -> str:
	cache_key = task_id if task_id else question
	if self.use_cache and cache_key in self.cache:
	return self.cache[cache_key]

	question_type = self._classify_question(question)

	try:
	# Генерация ответа
	inputs = self.tokenizer(question, return_tensors="pt")
	outputs = self.model.generate(**inputs, max_length=100)
	raw_answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Форматирование
	formatted_answer = self._format_answer(raw_answer, question_type)

	# Формирование JSON
	result = {"final_answer": formatted_answer}
	json_response = json.dumps(result)

	if self.use_cache:
	self.cache[cache_key] = json_response
	self._save_cache()

	return json_response

	except Exception as e:
	return json.dumps({"final_answer": f"AGENT ERROR: {e}"})