Spaces:

davanstrien
/

data-centril-ml_config_space

Sleeping

App Files Files Community

data-centril-ml_config_space / domain.py

ignacioct

recommiting all files

8773ff3 almost 2 years ago

raw

history blame contribute delete

2.78 kB

	import json
	from typing import Any, Dict, List

	from distilabel.steps.tasks.typing import ChatType
	from distilabel.steps.tasks.text_generation import TextGeneration
	from distilabel.steps import StepInput, StepOutput, Step

	from dotenv import load_dotenv

	from defaults import (
	DEFAULT_DOMAIN,
	DEFAULT_PERSPECTIVES,
	DEFAULT_TOPICS,
	DEFAULT_EXAMPLES,
	DEFAULT_SYSTEM_PROMPT,
	N_PERSPECTIVES,
	N_TOPICS,
	N_EXAMPLES,
	)

	load_dotenv()

	# Application description used for SelfInstruct
	APPLICATION_DESCRIPTION = f"""You are an AI assistant than generates queries around the domain of {DEFAULT_DOMAIN}.
	Your should not expect basic but profound questions from your users.
	The queries should reflect a diversity of vision and economic positions and political positions.
	The queries may know about different methods of {DEFAULT_DOMAIN}.
	The queries can be positioned politically, economically, socially, or practically.
	Also take into account the impact of diverse causes on diverse domains."""


	TOPICS = DEFAULT_TOPICS[:N_TOPICS]
	PERSPECTIVES = DEFAULT_PERSPECTIVES[:N_PERSPECTIVES]
	EXAMPLES = DEFAULT_EXAMPLES[:N_EXAMPLES]


	def create_examples_template(examples: List[Dict[str, str]]) -> List[str]:
	questions = """ Examples of high quality questions:"""
	answers = """ Examples of high quality answers:"""
	for example in examples:
	questions += f"""\n- Question: {example["question"]}\n"""
	answers += f"""\n- Answer: {example["answer"]}\n"""

	_template: str = (
	"""{instruction}\nThis is the the instruction.\n Examples: """
	+ questions
	+ answers
	)
	return _template


	def create_topics(topics: List[str], positions: List[str]) -> List[str]:
	return [
	f"{topic} from a {position} perspective"
	for topic in topics
	for position in positions
	]


	class DomainExpert(TextGeneration):
	"""A customized task to generate text as a domain expert in the domain of farming and agriculture."""

	_system_prompt: (str) = DEFAULT_SYSTEM_PROMPT
	_template: str = """{instruction}\nThis is the the instruction.\n Examples: """

	def format_input(self, input: Dict[str, Any]) -> "ChatType":
	return [
	{
	"role": "system",
	"content": self._system_prompt,
	},
	{
	"role": "user",
	"content": self._template.format(**input),
	},
	]


	class CleanNumberedList(Step):
	"""A step to clean the numbered list of questions."""

	def process(self, inputs: StepInput) -> StepOutput:
	import re

	pattern = r"^\d+\.\s"

	for input in inputs:
	input["question"] = re.sub(pattern, "", input["question"])
	yield inputs