{"SOURCE": "codesearchnet", "instruction": "Explain what the following Python 3 code does\ndef update(self, gist, content):\n # abort if content is False\n if content is False:\n return False\n\n # request\n url = self._api_url(\"gists\", gist.get(\"id\"))\n data = {\"files\": {self.filename: {\"content\": content}}}\n self.output(\"Sending contents of {} to {}\".format(self.file_path, url))\n response = self.requests.patch(url, data=dumps(data))\n\n # error\n if response.status_code != 200:\n self.oops(\"Could not update \" + gist.get(\"description\"))\n self.oops(\"PATCH request returned \" + str(response.status_code))\n return False\n\n # success\n self.yeah(\"Done!\")\n self.hey(\"The URL to this Gist is: {}\".format(gist[\"url\"]))\n return True", "response": "Updates the contents of a gist at GitHub."} {"SOURCE": "codesearchnet", "instruction": "Here you have a function in Python 3, explain what it does\ndef create(self, content, **kwargs):\n # abort if content is False\n if content is False:\n return False\n\n # set new gist\n public = bool(kwargs.get(\"public\", True))\n data = {\n \"description\": self.filename,\n \"public\": public,\n \"files\": {self.filename: {\"content\": content}},\n }\n\n # send request\n url = self._api_url(\"gists\")\n self.output(\"Sending contents of {} to {}\".format(self.file_path, url))\n response = self.requests.post(url, data=dumps(data))\n\n # error\n if response.status_code != 201:\n self.oops(\"Could not create \" + self.filename)\n self.oops(\"POST request returned \" + str(response.status_code))\n return False\n\n # parse created gist\n gist = self._parse_gist(response.json())\n\n # success\n self.yeah(\"Done!\")\n self.hey(\"The URL to this Gist is: {}\".format(gist[\"url\"]))\n return True", "response": "Create a new gist."} {"SOURCE": "codesearchnet", "instruction": "How would you implement a function in Python 3 that\nasks user which gist to use in case of more than one gist matching the instance filename.", "response": "def _ask_which_gist(self, matches):\n \"\"\"\n Asks user which gist to use in case of more than one gist matching the\n instance filename.\n :param matches: (list) of dictioaries generated within select_gists()\n :return: (dict) of the selected gist\n \"\"\"\n # ask user which gist to use\n self.hey(\"Use {} from which gist?\".format(self.filename))\n for count, gist in enumerate(matches, 1):\n self.hey(\"[{}] {}\".format(count, gist.get(\"description\")))\n\n # get the gist index\n selected = False\n while not selected:\n gist_index = prompt(\"Type the number: \", type=int) - 1\n try:\n selected = matches[gist_index]\n except IndexError:\n self.oops(\"Invalid number, please try again.\")\n\n self.output(\"Using `{}` Gist\".format(selected[\"description\"]))\n return selected"} {"SOURCE": "codesearchnet", "instruction": "Write a Python 3 function that can\nreceive a gist dict and parse it to GetGist", "response": "def _parse_gist(gist):\n \"\"\"Receive a gist (dict) and parse it to GetGist\"\"\"\n\n # parse files\n files = list()\n file_names = sorted(filename for filename in gist[\"files\"].keys())\n for name in file_names:\n files.append(\n dict(filename=name, raw_url=gist[\"files\"][name].get(\"raw_url\"))\n )\n\n # parse description\n description = gist[\"description\"]\n if not description:\n names = sorted(f.get(\"filename\") for f in files)\n description = names.pop(0)\n\n return dict(\n description=description,\n id=gist.get(\"id\"),\n files=files,\n url=gist.get(\"html_url\"),\n )"} {"SOURCE": "codesearchnet", "instruction": "Here you have a function in Python 3, explain what it does\ndef indent(self, message):\n indent = self.indent_char * self.indent_size\n return indent + message", "response": "Sets the indent for standardized output\n "} {"SOURCE": "codesearchnet", "instruction": "Can you generate a brief explanation for the following Python 3 code\ndef get(self, url, params=None, **kwargs):\n return requests.get(url, params=params, headers=self.add_headers(**kwargs))", "response": "Encapsulte requests. get to use this class instance header"} {"SOURCE": "codesearchnet", "instruction": "Can you tell what is the following Python 3 function doing\ndef patch(self, url, data=None, **kwargs):\n return requests.patch(url, data=data, headers=self.add_headers(**kwargs))", "response": "Encapsulte requests. patch to use this class instance header"} {"SOURCE": "codesearchnet", "instruction": "Make a summary of the following Python 3 code\ndef post(self, url, data=None, **kwargs):\n return requests.post(url, data=data, headers=self.add_headers(**kwargs))", "response": "Encapsulte requests. post to use this class instance header"} {"SOURCE": "codesearchnet", "instruction": "Given the following Python 3 function, write the documentation\ndef save(self, content):\n # backup existing file if needed\n if os.path.exists(self.file_path) and not self.assume_yes:\n message = \"Overwrite existing {}? (y/n) \"\n if not confirm(message.format(self.filename)):\n self.backup()\n\n # write file\n self.output(\"Saving \" + self.filename)\n with open(self.file_path, \"wb\") as handler:\n if not isinstance(content, bytes):\n content = bytes(content, \"utf-8\")\n handler.write(content)\n self.yeah(\"Done!\")", "response": "Save any given content to the instance file."} {"SOURCE": "codesearchnet", "instruction": "Can you generate a brief explanation for the following Python 3 code\ndef backup(self):\n count = 0\n name = \"{}.bkp\".format(self.filename)\n backup = os.path.join(self.cwd, name)\n while os.path.exists(backup):\n count += 1\n name = \"{}.bkp{}\".format(self.filename, count)\n backup = os.path.join(self.cwd, name)\n self.hey(\"Moving existing {} to {}\".format(self.filename, name))\n os.rename(os.path.join(self.cwd, self.filename), backup)", "response": "Backups files with the same name of the instance filename"} {"SOURCE": "codesearchnet", "instruction": "Implement a function in Python 3 to\nread the contents of a file in the local file system.", "response": "def read(self, file_path=None):\n \"\"\"\n Read the contents of a file.\n :param filename: (str) path to a file in the local file system\n :return: (str) contents of the file, or (False) if not found/not file\n \"\"\"\n if not file_path:\n file_path = self.file_path\n\n # abort if the file path does not exist\n if not os.path.exists(file_path):\n self.oops(\"Sorry, but {} does not exist\".format(file_path))\n return False\n\n # abort if the file path is not a file\n if not os.path.isfile(file_path):\n self.oops(\"Sorry, but {} is not a file\".format(file_path))\n return False\n\n with open(file_path) as handler:\n return handler.read()"} {"SOURCE": "codesearchnet", "instruction": "Here you have a function in Python 3, explain what it does\ndef str_matcher(mode):\r\n\r\n def f_raw(inp_str, pos):\r\n return unique_literal_cache_pool[mode] if inp_str.startswith(mode, pos) else None\r\n\r\n def f_collection(inp_str, pos):\r\n for each in mode:\r\n if inp_str.startswith(each, pos):\r\n return unique_literal_cache_pool[each]\r\n return None\r\n\r\n if isinstance(mode, str):\r\n return f_raw\r\n\r\n if len(mode) is 1:\r\n mode = mode[0]\r\n return f_raw\r\n\r\n return f_collection", "response": "Returns a function that returns a list of tokens that match the input string."} {"SOURCE": "codesearchnet", "instruction": "Explain what the following Python 3 code does\ndef regex_matcher(regex_pat):\r\n if isinstance(regex_pat, str):\r\n regex_pat = re.compile(regex_pat)\r\n\r\n def f(inp_str, pos):\r\n m = regex_pat.match(inp_str, pos)\r\n return m.group() if m else None\r\n\r\n return f", "response": "generate token names cache\r\n generate token names"} {"SOURCE": "codesearchnet", "instruction": "Can you tell what is the following Python 3 function doing\ndef ast_for_stmts(self, stmts: T) -> None:\r\n if not stmts:\r\n raise ValueError('no ast found!')\r\n head, *equals = stmts\r\n\r\n if head.name is NameEnum.TokenDef:\r\n self.ast_for_token_def(head)\r\n elif head.name is NameEnum.TokenIgnore:\r\n self.ast_for_token_ignore(head)\r\n else:\r\n self.ast_for_equals(head)\r\n\r\n for each in equals:\r\n self.ast_for_equals(each)\r\n\r\n # if every combined parser can reach any other combined, \r\n # just take any of them and compile it!\r\n if not self.compile_helper.alone and self._current__combined_parser_name:\r\n self.compile_helper.alone.add(self._current__combined_parser_name)", "response": "Add the ast for the given list of statements."} {"SOURCE": "codesearchnet", "instruction": "Make a summary of the following Python 3 code\ndef _request(self, method, resource_uri, **kwargs):\n data = kwargs.get('data')\n response = method(self.API_BASE_URL + resource_uri,\n json=data, headers=self.headers)\n response.raise_for_status()\n return response.json()", "response": "Perform a method on a resource."} {"SOURCE": "codesearchnet", "instruction": "Write a Python 3 script for\ngetting a resource s nacms.", "response": "def get(self, endpoint, **kwargs):\n \"\"\"Get a resource.\n\n Args:\n endpoint: resource endpoint.\n \"\"\"\n return self._request(requests.get, endpoint, **kwargs)"} {"SOURCE": "codesearchnet", "instruction": "How would you code a function in Python 3 to\ncreate a resource. Args: endpoint: resource endpoint.", "response": "def post(self, endpoint, **kwargs):\n \"\"\"Create a resource.\n\n Args:\n endpoint: resource endpoint.\n \"\"\"\n return self._request(requests.post, endpoint, **kwargs)"} {"SOURCE": "codesearchnet", "instruction": "Make a summary of the following Python 3 code\ndef put(self, endpoint, **kwargs):\n return self._request(requests.put, endpoint, **kwargs)", "response": "Update a resource s nacms."} {"SOURCE": "codesearchnet", "instruction": "Implement a function in Python 3 to\ntake Slack formatted text and returns HTML.", "response": "def render(txt):\n \"\"\"\n Accepts Slack formatted text and returns HTML.\n \"\"\"\n\n # Removing links to other channels\n txt = re.sub(r'<#[^\\|]*\\|(.*)>', r'#\\g<1>', txt)\n\n # Removing links to other users\n txt = re.sub(r'<(@.*)>', r'\\g<1>', txt)\n\n # handle named hyperlinks\n txt = re.sub(r'<([^\\|]*)\\|([^\\|]*)>', r'\" target=\"blank\">\\g<2>', txt)\n\n # handle unnamed hyperlinks\n txt = re.sub(r'<([^a|/a].*)>', r'\" target=\"blank\">\\g<1>', txt)\n\n # handle ordered and unordered lists\n for delimeter in LIST_DELIMITERS:\n slack_tag = delimeter\n class_name = LIST_DELIMITERS[delimeter]\n\n # Wrap any lines that start with the slack_tag in
  • \n list_regex = u'(?:^|\\n){}\\s?(.*)'.format(slack_tag)\n list_repl = r'
  • \\g<1>
  • '.format(class_name)\n txt = re.sub(list_regex, list_repl, txt)\n\n # hanlde blockquotes\n txt = re.sub(u'(^|\\n)(?:>){3}\\s?(.*)$', r'\\g<1>
    \\g<2>
    ', txt, flags=re.DOTALL)\n txt = re.sub(u'(?:^|\\n)>\\s?(.*)\\n?', r'
    \\g<1>
    ', txt)\n\n # handle code blocks\n txt = re.sub(r'```\\n?(.*)```', r'
    \\g<1>
    ', txt, flags=re.DOTALL)\n txt = re.sub(r'\\n()', r'\\g<1>', txt)\n\n # handle bolding, italics, and strikethrough\n for wrapper in FORMATTERS:\n slack_tag = wrapper\n html_tag = FORMATTERS[wrapper]\n\n # Grab all text in formatted characters on the same line unless escaped\n regex = r'(?\\g<1>'.format(t=html_tag)\n txt = re.sub(regex, repl, txt)\n\n # convert line breaks\n txt = txt.replace('\\n', '
    ')\n\n # clean up bad HTML\n parser = CustomSlackdownHTMLParser(txt)\n txt = parser.clean()\n\n # convert multiple spaces\n txt = txt.replace(r' ', '  ')\n\n return txt"} {"SOURCE": "codesearchnet", "instruction": "Make a summary of the following Python 3 code\ndef _open_list(self, list_type):\n if list_type in LIST_TYPES.keys():\n tag = LIST_TYPES[list_type]\n else:\n raise Exception('CustomSlackdownHTMLParser:_open_list: Not a valid list type.')\n\n html = '<{t} class=\"list-container-{c}\">'.format(\n t=tag,\n c=list_type\n )\n self.cleaned_html += html\n self.current_parent_element['tag'] = LIST_TYPES[list_type]\n self.current_parent_element['attrs'] = {'class': list_type}", "response": "Add an open list tag corresponding to the specification in the LIST_TYPES."} {"SOURCE": "codesearchnet", "instruction": "Implement a function in Python 3 to\nadd an close list tag corresponding to the currently open list found in current_parent_element.", "response": "def _close_list(self):\n \"\"\"\n Add an close list tag corresponding to the currently open\n list found in current_parent_element.\n \"\"\"\n list_type = self.current_parent_element['attrs']['class']\n tag = LIST_TYPES[list_type]\n\n html = ''.format(\n t=tag\n )\n self.cleaned_html += html\n self.current_parent_element['tag'] = ''\n self.current_parent_element['attrs'] = {}"} {"SOURCE": "codesearchnet", "instruction": "Create a Python 3 function for\nhandling the start tag of the element.", "response": "def handle_starttag(self, tag, attrs):\n \"\"\"\n Called by HTMLParser.feed when a start tag is found.\n \"\"\"\n # Parse the tag attributes\n attrs_dict = dict(t for t in attrs)\n\n # If the tag is a predefined parent element\n if tag in PARENT_ELEMENTS:\n # If parser is parsing another parent element\n if self.current_parent_element['tag'] != '':\n # close the parent element\n self.cleaned_html += ''.format(self.current_parent_element['tag'])\n\n self.current_parent_element['tag'] = tag\n self.current_parent_element['attrs'] = {}\n\n self.cleaned_html += '<{}>'.format(tag)\n\n # If the tag is a list item\n elif tag == 'li':\n self.parsing_li = True\n\n # Parse the class name & subsequent type\n class_name = attrs_dict['class']\n list_type = class_name[10:]\n\n # Check if parsing a list\n if self.current_parent_element['tag'] == 'ul' or self.current_parent_element['tag'] == 'ol':\n cur_list_type = self.current_parent_element['attrs']['class']\n # Parsing a different list\n if cur_list_type != list_type:\n # Close that list\n self._close_list()\n\n # Open new list\n self._open_list(list_type)\n # Not parsing a list\n else:\n # if parsing some other parent\n if self.current_parent_element['tag'] != '':\n self.cleaned_html += ''.format(self.current_parent_element['tag'])\n # Open new list\n self._open_list(list_type)\n\n self.cleaned_html += '<{}>'.format(tag)\n\n # If the tag is a line break\n elif tag == 'br':\n # If parsing a paragraph, close it\n if self.current_parent_element['tag'] == 'p':\n self.cleaned_html += '

    '\n self.current_parent_element['tag'] = ''\n self.current_parent_element['attrs'] = {}\n # If parsing a list, close it\n elif self.current_parent_element['tag'] == 'ul' or self.current_parent_element['tag'] == 'ol':\n self._close_list()\n # If parsing any other parent element, keep it\n elif self.current_parent_element['tag'] in PARENT_ELEMENTS:\n self.cleaned_html += '
    '\n # If not in any parent element, create an empty paragraph\n else:\n self.cleaned_html += '

    '\n\n # If the tag is something else, like a or tag\n else:\n # If not parsing any parent element\n if self.current_parent_element['tag'] == '':\n self.cleaned_html += '

    '\n self.current_parent_element['tag'] = 'p'\n self.cleaned_html += '<{}'.format(tag)\n\n for attr in sorted(attrs_dict.keys()):\n self.cleaned_html += ' {k}=\"{v}\"'.format(\n k=attr,\n v=attrs_dict[attr]\n )\n\n self.cleaned_html += '>'"} {"SOURCE": "codesearchnet", "instruction": "How would you code a function in Python 3 to\ncall by HTMLParser. feed when an end tag is found.", "response": "def handle_endtag(self, tag):\n \"\"\"\n Called by HTMLParser.feed when an end tag is found.\n \"\"\"\n if tag in PARENT_ELEMENTS:\n self.current_parent_element['tag'] = ''\n self.current_parent_element['attrs'] = ''\n\n if tag == 'li':\n self.parsing_li = True\n if tag != 'br':\n self.cleaned_html += ''.format(tag)"} {"SOURCE": "codesearchnet", "instruction": "Given the following Python 3 function, write the documentation\ndef handle_data(self, data):\n if self.current_parent_element['tag'] == '':\n self.cleaned_html += '

    '\n self.current_parent_element['tag'] = 'p'\n\n self.cleaned_html += data", "response": "Called by HTMLParser. feed when text is found."} {"SOURCE": "codesearchnet", "instruction": "Can you write a function in Python 3 where it\nremoves formatting tags added to pre elements.", "response": "def _remove_pre_formatting(self):\n \"\"\"\n Removes formatting tags added to pre elements.\n \"\"\"\n preformatted_wrappers = [\n 'pre',\n 'code'\n ]\n\n for wrapper in preformatted_wrappers:\n for formatter in FORMATTERS:\n tag = FORMATTERS[formatter]\n character = formatter\n\n regex = r'(<{w}>.*)<{t}>(.*)(.*)'.format(\n t=tag,\n w=wrapper\n )\n repl = r'\\g<1>{c}\\g<2>{c}\\g<3>'.format(c=character)\n self.cleaned_html = re.sub(regex, repl, self.cleaned_html)"} {"SOURCE": "codesearchnet", "instruction": "How would you explain what the following Python 3 function does\ndef clean(self):\n # Calls handle_starttag, handle_endtag, and handle_data\n self.feed()\n\n # Clean up any parent tags left open\n if self.current_parent_element['tag'] != '':\n self.cleaned_html += ''.format(self.current_parent_element['tag'])\n\n # Remove empty

    added after lists\n self.cleaned_html = re.sub(r'()

    ', r'\\g<1>', self.cleaned_html)\n\n self._remove_pre_formatting()\n\n return self.cleaned_html", "response": "Cleans up any problematic HTML."} {"SOURCE": "codesearchnet", "instruction": "Make a summary of the following Python 3 code\ndef render_author(**kwargs):\n html = '
    '\n\n author_avatar = kwargs.get('author_avatar', None)\n if author_avatar:\n html += ''.format(author_avatar)\n\n author_name = kwargs.get('author_name', None)\n if author_name:\n html += '

    '\n\n author_link = kwargs.get('author_link', None)\n if author_link:\n html += '{author_name}'.format(\n author_link=author_link,\n author_name=author_name\n )\n else:\n html += author_name\n\n html += '

    '\n\n author_handle = kwargs.get('author_handle', None)\n if author_handle:\n html += '

    {}

    '.format(author_handle)\n\n html += '
    '", "response": "Unstrict template block for rendering authors:\n
    \n \n

    \n {author_name}\n

    \n

    {author_handle}

    \n
    "} {"SOURCE": "codesearchnet", "instruction": "Here you have a function in Python 3, explain what it does\ndef render_metadata(**kwargs):\n html = '
    '\n\n service_logo = kwargs.get('service_logo', None)\n if service_logo:\n html += ''.format(service_logo)\n\n service_name = kwargs.get('service_name', None)\n if service_name:\n html += ''.format(service_name)\n\n timestamp = kwargs.get('timestamp', None)\n if timestamp:\n html += '

    '\n\n timestamp_link = kwargs.get('timestamp_link', None)\n if timestamp_link:\n html += '{timestamp}'.format(\n timestamp_link=timestamp_link,\n timestamp=timestamp\n )\n else:\n html += timestamp\n\n html += '

    '\n\n html += '
    '", "response": "Render the metadata page."} {"SOURCE": "codesearchnet", "instruction": "Explain what the following Python 3 code does\ndef render_image(**kwargs):\n html = ''\n\n url = kwargs.get('url', None)\n if url:\n html = '\n {author}\n

    {text}

    \n {metadata}\n {image}\n \n \"\"\".format(\n author=author,\n text=text,\n metadata=metadata,\n image=image\n ).strip()\n\n return html"} {"SOURCE": "codesearchnet", "instruction": "How would you code a function in Python 3 to\nget the model params from the given LAB directory.", "response": "def get_model(LAB_DIR):\n \"\"\" Cannon model params \"\"\"\n coeffs = np.load(\"%s/coeffs.npz\" %LAB_DIR)['arr_0']\n scatters = np.load(\"%s/scatters.npz\" %LAB_DIR)['arr_0']\n chisqs = np.load(\"%s/chisqs.npz\" %LAB_DIR)['arr_0']\n pivots = np.load(\"%s/pivots.npz\" %LAB_DIR)['arr_0']\n return coeffs, scatters, chisqs, pivots"} {"SOURCE": "codesearchnet", "instruction": "Can you generate a brief explanation for the following Python 3 code\ndef get_labels(ids_find):\n a = pyfits.open(\"%s/lamost_catalog_full.fits\" %LAB_DIR)\n data = a[1].data\n a.close()\n id_all = data['lamost_id']\n id_all = np.array(id_all)\n id_all = np.array([val.strip() for val in id_all])\n snr_all = data['cannon_snrg']\n chisq_all = data['cannon_chisq']\n teff = data['cannon_teff']\n logg = data['cannon_logg']\n feh = data['cannon_m_h']\n afe = data['cannon_alpha_m']\n ak = data['cannon_a_k']\n labels = np.vstack((teff,logg,feh,afe,ak))\n choose = np.in1d(id_all, ids_find)\n id_choose = id_all[choose]\n label_choose = labels[:,choose]\n snr_choose = snr_all[choose]\n chisq_choose = chisq_all[choose]\n inds = np.array([np.where(id_choose==val)[0][0] for val in ids_find])\n print(id_choose[inds][100])\n print(ids_find[100])\n return label_choose[:,inds], snr_choose[inds], chisq_choose[inds]", "response": "Get labels for a list of ids"} {"SOURCE": "codesearchnet", "instruction": "Create a Python 3 function for\ngetting the normalized spectra for the lamost model", "response": "def get_normed_spectra():\n \"\"\" Spectra to compare with models \"\"\"\n wl = np.load(\"%s/wl.npz\" %LAB_DIR)['arr_0']\n filenames = np.array(\n [SPEC_DIR + \"/Spectra\" + \"/\" + val for val in lamost_id])\n grid, fluxes, ivars, npix, SNRs = lamost.load_spectra(\n lamost_id, input_grid=wl)\n ds = dataset.Dataset(\n wl, lamost_id, fluxes, ivars, [1], \n lamost_id[0:2], fluxes[0:2], ivars[0:2])\n ds.continuum_normalize_gaussian_smoothing(L=50)\n np.savez(SPEC_DIR + \"/\" + \"norm_flux.npz\", ds.tr_flux)\n np.savez(SPEC_DIR + \"/\" + \"norm_ivar.npz\", ds.tr_ivar)\n return ds.tr_flux, ds.tr_ivar"} {"SOURCE": "codesearchnet", "instruction": "How would you explain what the following Python 3 function does\ndef wget_files():\n for f in lamost_id:\n short = (f.split('-')[2]).split('_')[0]\n filename = \"%s/%s.gz\" %(short,f)\n DIR = \"/Users/annaho/Data/Li_Giants/Spectra_APOKASC\"\n searchfor = \"%s/%s.gz\" %(DIR,f)\n if glob.glob(searchfor):\n print(\"done\")\n else:\n #print(searchfor)\n os.system(\n \"wget http://dr2.lamost.org/sas/fits/%s\" %(filename))\n new_filename = filename.split(\"_\")[0] + \"_\" + filename.split(\"_\")[2]\n os.system(\n \"wget http://dr2.lamost.org/sas/fits/%s\" %(new_filename))", "response": "Pull the files from the LAMOST archive"} {"SOURCE": "codesearchnet", "instruction": "Explain what the following Python 3 code does\ndef get_labels():\n cannon_teff = data['cannon_teff_2']\n cannon_logg = data['cannon_logg_2']\n cannon_m_h = data['cannon_m_h']\n cannon_alpha_m = data['cannon_alpha_m']\n cannon_a_k = data['cannon_a_k']\n labels = np.vstack(\n (cannon_teff, cannon_logg, cannon_m_h, cannon_alpha_m, cannon_a_k))\n cannon_chisq = data['cannon_chisq']\n np.savez(DATA_DIR + \"chisq.npz\", labels)\n np.savez(DATA_DIR + \"labels.npz\", labels)\n snrg = data['cannon_snrg'] # snrg * 3\n np.savez(\"snr.npz\", snrg)\n return labels.T", "response": "Get the labels of the Cannon model spectra"} {"SOURCE": "codesearchnet", "instruction": "Can you generate a brief explanation for the following Python 3 code\ndef cannon_normalize(spec_raw):\n spec = np.array([spec_raw])\n wl = np.arange(0, spec.shape[1])\n w = continuum_normalization.gaussian_weight_matrix(wl, L=50)\n ivar = np.ones(spec.shape)*0.5\n cont = continuum_normalization._find_cont_gaussian_smooth(\n wl, spec, ivar, w)\n norm_flux, norm_ivar = continuum_normalization._cont_norm(\n spec, ivar, cont)\n return norm_flux[0]", "response": "Normalize according to The Cannon"} {"SOURCE": "codesearchnet", "instruction": "Can you generate a brief explanation for the following Python 3 code\ndef resample(grid, wl, flux):\n flux_rs = (interpolate.interp1d(wl, flux))(grid)\n return flux_rs", "response": "Resample spectrum onto desired grid"} {"SOURCE": "codesearchnet", "instruction": "Write a Python 3 script for\ngenerating the gradient spectra for a Cannon object.", "response": "def gen_cannon_grad_spec(choose, coeffs, pivots):\n \"\"\" Generate Cannon gradient spectra\n\n Parameters\n ----------\n labels: default values for [teff, logg, feh, cfe, nfe, afe, ak]\n choose: val of cfe or nfe, whatever you're varying\n low: lowest val of cfe or nfe, whatever you're varying\n high: highest val of cfe or nfe, whatever you're varying\n \"\"\"\n base_labels = [4800, 2.5, 0.03, 0.10, -0.17, -0.17, 0, -0.16,\n -0.13, -0.15, 0.13, 0.08, 0.17, -0.062]\n label_names = np.array(\n ['TEFF', 'LOGG', 'AK', 'Al', 'Ca', 'C', 'Fe', 'Mg', 'Mn',\n 'Ni', 'N', 'O', 'Si', 'Ti'])\n label_atnum = np.array(\n [0, 1, -1, 13, 20, 6, 26, 12, 25, 28, 7, 8, 14, 22])\n # Generate Cannon gradient spectra\n ind = np.where(label_atnum==choose)[0][0]\n low_lab = copy.copy(base_labels)\n high = base_labels[ind]\n if choose > 0:\n low = base_labels[ind] - 0.2\n else: #temperature\n if choose != 0: print(\"warning...\")\n low = base_labels[ind] - 200\n low_lab[ind] = low\n lvec = (train_model._get_lvec(np.array([low_lab]), pivots))[0]\n model_low = np.dot(coeffs, lvec)\n lvec = (train_model._get_lvec(np.array([base_labels]), pivots))[0]\n model_high = np.dot(coeffs, lvec)\n grad_spec = (model_high - model_low) / (high - low)\n return grad_spec"} {"SOURCE": "codesearchnet", "instruction": "How would you code a function in Python 3 to\nget model specting from the given atomic number", "response": "def get_model_spec_ting(atomic_number):\n \"\"\" \n X_u_template[0:2] are teff, logg, vturb in km/s\n X_u_template[:,3] -> onward, put atomic number \n atomic_number is 6 for C, 7 for N\n \"\"\"\n DATA_DIR = \"/Users/annaho/Data/LAMOST/Mass_And_Age\"\n temp = np.load(\"%s/X_u_template_KGh_res=1800.npz\" %DATA_DIR)\n X_u_template = temp[\"X_u_template\"]\n wl = temp[\"wavelength\"]\n grad_spec = X_u_template[:,atomic_number]\n return wl, grad_spec"} {"SOURCE": "codesearchnet", "instruction": "Create a Python 3 function for\ncalculating the residuals of the dataset and model object using the dataset and model object ds and model object m.", "response": "def get_residuals(ds, m):\n \"\"\" Using the dataset and model object, calculate the residuals and return\n\n Parameters\n ----------\n ds: dataset object\n m: model object\n Return\n ------\n residuals: array of residuals, spec minus model spec\n \"\"\"\n model_spectra = get_model_spectra(ds, m)\n resid = ds.test_flux - model_spectra\n return resid"} {"SOURCE": "codesearchnet", "instruction": "Can you implement a function in Python 3 that\nloads the model object for the", "response": "def load_model():\n \"\"\" Load the model \n\n Parameters\n ----------\n direc: directory with all of the model files\n \n Returns\n -------\n m: model object\n \"\"\"\n direc = \"/home/annaho/TheCannon/code/lamost/mass_age/cn\"\n m = model.CannonModel(2)\n m.coeffs = np.load(direc + \"/coeffs.npz\")['arr_0'][0:3626,:] # no cols\n m.scatters = np.load(direc + \"/scatters.npz\")['arr_0'][0:3626] # no cols\n m.chisqs = np.load(direc + \"/chisqs.npz\")['arr_0'][0:3626] # no cols\n m.pivots = np.load(direc + \"/pivots.npz\")['arr_0']\n return m"} {"SOURCE": "codesearchnet", "instruction": "How would you implement a function in Python 3 that\nloads the dataset for a single date", "response": "def load_dataset(date):\n \"\"\" Load the dataset for a single date \n \n Parameters\n ----------\n date: the date (string) for which to load the data & dataset\n\n Returns\n -------\n ds: the dataset object\n \"\"\"\n LAB_DIR = \"/home/annaho/TheCannon/data/lamost\"\n WL_DIR = \"/home/annaho/TheCannon/code/lamost/mass_age/cn\"\n SPEC_DIR = \"/home/annaho/TheCannon/code/apogee_lamost/xcalib_4labels/output\"\n wl = np.load(WL_DIR + \"/wl_cols.npz\")['arr_0'][0:3626] # no cols\n ds = dataset.Dataset(wl, [], [], [], [], [], [], [])\n test_label = np.load(\"%s/%s_all_cannon_labels.npz\" %(LAB_DIR,date))['arr_0']\n ds.test_label_vals = test_label\n a = np.load(\"%s/%s_norm.npz\" %(SPEC_DIR,date))\n ds.test_flux = a['arr_0']\n ds.test_ivar = a['arr_1']\n ds.test_ID = np.load(\"%s/%s_ids.npz\" %(SPEC_DIR,date))['arr_0']\n return ds"} {"SOURCE": "codesearchnet", "instruction": "Write a Python 3 function that can\nfit a Gaussian to the data", "response": "def fit_gaussian(x, y, yerr, p0):\n \"\"\" Fit a Gaussian to the data \"\"\"\n try:\n popt, pcov = curve_fit(gaussian, x, y, sigma=yerr, p0=p0, absolute_sigma=True)\n except RuntimeError:\n return [0],[0]\n return popt, pcov"} {"SOURCE": "codesearchnet", "instruction": "Can you generate a brief explanation for the following Python 3 code\ndef select(yerrs, amps, amp_errs, widths):\n keep_1 = np.logical_and(amps < 0, widths > 1)\n keep_2 = np.logical_and(np.abs(amps) > 3*yerrs, amp_errs < 3*np.abs(amps))\n keep = np.logical_and(keep_1, keep_2)\n return keep", "response": "selects an object based on the given error and amplitudes"} {"SOURCE": "codesearchnet", "instruction": "Can you generate the documentation for the following Python 3 function\ndef run_all():\n DATA_DIR = \"/home/annaho/TheCannon/code/apogee_lamost/xcalib_4labels\"\n dates = os.listdir(\"/home/share/LAMOST/DR2/DR2_release\")\n dates = np.array(dates)\n dates = np.delete(dates, np.where(dates=='.directory')[0][0])\n dates = np.delete(dates, np.where(dates=='all_folders.list')[0][0])\n dates = np.delete(dates, np.where(dates=='dr2.lis')[0][0])\n\n for date in dates:\n if glob.glob(\"*%s*.txt\" %date):\n print(\"%s done\" %date)\n else:\n print(\"running %s\" %date)\n run_one_date(date)", "response": "Load the data that we re using to search for Li - rich giants."} {"SOURCE": "codesearchnet", "instruction": "Explain what the following Python 3 code does\ndef gen_cannon_grad_spec(base_labels, choose, low, high, coeffs, pivots):\n # Generate Cannon gradient spectra\n low_lab = copy.copy(base_labels)\n low_lab[choose] = low\n lvec = (train_model._get_lvec(np.array([low_lab]), pivots))[0]\n model_low = np.dot(coeffs, lvec)\n high_lab = copy.copy(base_labels)\n high_lab[choose] = high\n lvec = (train_model._get_lvec(np.array([high_lab]), pivots))[0]\n model_high = np.dot(coeffs, lvec)\n grad_spec = (model_high - model_low) / (high - low)\n return grad_spec", "response": "Generate the gradient of a Cannon graph based on the base_labels and coeffs."} {"SOURCE": "codesearchnet", "instruction": "Explain what the following Python 3 code does\ndef get_err(snr):\n\n quad_terms = np.array(\n [3.11e-3, 1.10e-5, 6.95e-6, 5.05e-6, 4.65e-6, 4.10e-6])\n lin_terms = np.array(\n [-0.869, -2.07e-3, -1.40e-3, -1.03e-3, -1.13e-3, -7.29e-4])\n consts = np.array([104, 0.200, 0.117, 0.114, 0.156, 0.0624])\n err = quad_terms[:,None] * snr**2 + lin_terms[:,None] * snr + consts[:,None]\n\n # find the minimum of the quadratic function\n min_snr = -lin_terms / (2*quad_terms)\n min_err = quad_terms * min_snr**2 + lin_terms * min_snr + consts\n mask = (snr[:,None] > min_snr).T\n for i in range(0,len(min_err)):\n err[i][mask[i]] = min_err[i]\n\n return err", "response": "Get approximate scatters from SNR."} {"SOURCE": "codesearchnet", "instruction": "Create a Python 3 function to\nget colors from a catalog", "response": "def get_colors(catalog):\n \"\"\" \n Pull colors from catalog\n\n Parameters\n ----------\n catalog: filename\n \"\"\"\n print(\"Get Colors\")\n a = pyfits.open(catalog)\n data = a[1].data\n a.close()\n all_ids = data['LAMOST_ID_1']\n all_ids = np.array([val.strip() for val in all_ids])\n # G magnitude\n gmag = data['gpmag']\n gmag_err = data['e_gpmag']\n # R magnitude\n rmag = data['rpmag']\n rmag_err = data['e_rpmag']\n # I magnitude\n imag = data['ipmag']\n imag_err = data['e_ipmag']\n # W1\n W1 = data['W1mag']\n W1_err = data['e_W1mag']\n # W1\n W2 = data['W2mag']\n W2_err = data['e_W2mag']\n # J magnitude\n Jmag = data['Jmag']\n Jmag_err = data['e_Jmag']\n # H magnitude\n Hmag = data['Hmag']\n Hmag_err = data['e_Hmag']\n # K magnitude\n Kmag = data['Kmag']\n Kmag_err = data['e_Kmag']\n # Stack\n mag = np.vstack((\n gmag, rmag, imag, Jmag, Hmag, Kmag, W2, W1)) # 8, nobj\n mag_err = np.vstack((\n gmag_err, rmag_err, imag_err, Jmag_err, \n Hmag_err, Kmag_err, W2_err, W1_err))\n # Make g-r, r-i, i-J, etc\n col = mag[:-1] - mag[1:]\n col_ivar = 1/(mag_err[:-1]**2 + mag_err[1:]**2)\n\n # There's something wrong with the i-band, I think..so the second color r-i\n #bad = col[:,1] < 0.0\n #col_ivar[bad] = 0.0\n\n return all_ids, col, col_ivar"} {"SOURCE": "codesearchnet", "instruction": "How would you explain what the following Python 3 function does\ndef draw_spectra(md, ds):\n coeffs_all, covs, scatters, red_chisqs, pivots, label_vector = model.model\n nstars = len(dataset.test_SNR)\n cannon_flux = np.zeros(dataset.test_flux.shape)\n cannon_ivar = np.zeros(dataset.test_ivar.shape)\n for i in range(nstars):\n x = label_vector[:,i,:]\n spec_fit = np.einsum('ij, ij->i', x, coeffs_all)\n cannon_flux[i,:] = spec_fit\n bad = dataset.test_ivar[i,:] == SMALL**2\n cannon_ivar[i,:][~bad] = 1. / scatters[~bad] ** 2\n return cannon_flux, cannon_ivar", "response": "Generates the best fit spectra for all the test objects \n\n "} {"SOURCE": "codesearchnet", "instruction": "Can you tell what is the following Python 3 function doing\ndef overlay_spectra(model, dataset):\n best_flux, best_ivar = draw_spectra(model, dataset)\n coeffs_all, covs, scatters, all_chisqs, pivots, label_vector = model.model\n\n # Overplot original spectra with best-fit spectra\n print(\"Overplotting spectra for ten random stars\")\n res = dataset.test_flux-best_flux\n lambdas = dataset.wl\n npix = len(lambdas)\n nstars = best_flux.shape[0]\n pickstars = []\n for i in range(10):\n pickstars.append(random.randrange(0, nstars-1))\n for i in pickstars:\n print(\"Star %s\" % i)\n ID = dataset.test_ID[i]\n spec_orig = dataset.test_flux[i,:]\n bad = dataset.test_flux[i,:] == 0\n lambdas = np.ma.array(lambdas, mask=bad, dtype=float)\n npix = len(lambdas.compressed())\n spec_orig = np.ma.array(dataset.test_flux[i,:], mask=bad)\n spec_fit = np.ma.array(best_flux[i,:], mask=bad)\n ivars_orig = np.ma.array(dataset.test_ivar[i,:], mask=bad)\n ivars_fit = np.ma.array(best_ivar[i,:], mask=bad)\n red_chisq = np.sum(all_chisqs[:,i], axis=0) / (npix - coeffs_all.shape[1])\n red_chisq = np.round(red_chisq, 2)\n fig,axarr = plt.subplots(2)\n ax1 = axarr[0]\n im = ax1.scatter(lambdas, spec_orig, label=\"Orig Spec\",\n c=1 / np.sqrt(ivars_orig), s=10)\n ax1.scatter(lambdas, spec_fit, label=\"Cannon Spec\", c='r', s=10)\n ax1.errorbar(lambdas, spec_fit, \n yerr=1/np.sqrt(ivars_fit), fmt='ro', ms=1, alpha=0.7)\n ax1.set_xlabel(r\"Wavelength $\\lambda (\\AA)$\")\n ax1.set_ylabel(\"Normalized flux\")\n ax1.set_title(\"Spectrum Fit: %s\" % ID)\n ax1.set_title(\"Spectrum Fit\")\n ax1.set_xlim(min(lambdas.compressed())-10, max(lambdas.compressed())+10)\n ax1.legend(loc='lower center', fancybox=True, shadow=True)\n ax2 = axarr[1]\n ax2.scatter(spec_orig, spec_fit, c=1/np.sqrt(ivars_orig), alpha=0.7)\n ax2.errorbar(spec_orig, spec_fit, yerr=1 / np.sqrt(ivars_fit),\n ecolor='k', fmt=\"none\", ms=1, alpha=0.7)\n #fig.subplots_adjust(right=0.8)\n #cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])\n fig.colorbar()\n #fig.colorbar(\n # im, cax=cbar_ax,\n # label=\"Uncertainties on the Fluxes from the Original Spectrum\")\n xlims = ax2.get_xlim()\n ylims = ax2.get_ylim()\n lims = [np.min([xlims, ylims]), np.max([xlims, ylims])]\n ax2.plot(lims, lims, 'k-', alpha=0.75)\n textstr = \"Red Chi Sq: %s\" % red_chisq\n props = dict(boxstyle='round', facecolor='palevioletred', alpha=0.5)\n ax2.text(0.05, 0.95, textstr, transform=ax2.transAxes, fontsize=14,\n verticalalignment='top', bbox=props)\n ax2.set_xlim(xlims)\n ax2.set_ylim(ylims)\n ax2.set_xlabel(\"Orig Fluxes\")\n ax2.set_ylabel(\"Fitted Fluxes\")\n plt.tight_layout()\n filename = \"best_fit_spec_Star%s.png\" % i\n print(\"Saved as %s\" % filename)\n fig.savefig(filename)\n plt.close(fig)", "response": "Overlays the original spectra with the best fit spectra."} {"SOURCE": "codesearchnet", "instruction": "Make a summary of the following Python 3 code\ndef residuals(cannon_set, dataset):\n print(\"Stacking spectrum fit residuals\")\n res = dataset.test_fluxes - cannon_set.test_fluxes\n bad = dataset.test_ivars == SMALL**2\n err = np.zeros(len(dataset.test_ivars))\n err = np.sqrt(1. / dataset.test_ivars + 1. / cannon_set.test_ivars)\n res_norm = res / err\n res_norm = np.ma.array(res_norm,\n mask=(np.ones_like(res_norm) *\n (np.std(res_norm,axis=0) == 0)))\n res_norm = np.ma.compress_cols(res_norm)\n\n for i in range(len(cannon_set.get_plotting_labels())):\n label_name = cannon_set.get_plotting_labels()[i]\n print(\"Plotting residuals sorted by %s\" % label_name)\n label_vals = cannon_set.tr_label_vals[:,i]\n sorted_res = res_norm[np.argsort(label_vals)]\n mu = np.mean(sorted_res.flatten())\n sigma = np.std(sorted_res.flatten())\n left, width = 0.1, 0.65\n bottom, height = 0.1, 0.65\n bottom_h = left_h = left+width+0.1\n rect_scatter = [left, bottom, width, height]\n rect_histx = [left, bottom_h, width, 0.1]\n rect_histy = [left_h, bottom, 0.1, height]\n plt.figure()\n axScatter = plt.axes(rect_scatter)\n axHistx = plt.axes(rect_histx)\n axHisty = plt.axes(rect_histy)\n im = axScatter.imshow(sorted_res, cmap=plt.cm.bwr_r,\n interpolation=\"nearest\", vmin=mu - 3. * sigma,\n vmax=mu + 3. * sigma, aspect='auto',\n origin='lower', extent=[0, len(dataset.wl),\n min(label_vals),\n max(label_vals)])\n cax, kw = colorbar.make_axes(axScatter.axes, location='bottom')\n plt.colorbar(im, cax=cax, orientation='horizontal')\n axScatter.set_title(\n r\"Spectral Residuals Sorted by ${0:s}$\".format(label_name))\n axScatter.set_xlabel(\"Pixels\")\n axScatter.set_ylabel(r\"$%s$\" % label_name)\n axHisty.hist(np.std(res_norm,axis=1)[~np.isnan(np.std(res_norm, axis=1))], orientation='horizontal', range=[0,2])\n axHisty.axhline(y=1, c='k', linewidth=3, label=\"y=1\")\n axHisty.legend(bbox_to_anchor=(0., 0.8, 1., .102),\n prop={'family':'serif', 'size':'small'})\n axHisty.text(1.0, 0.5, \"Distribution of Stdev of Star Residuals\",\n verticalalignment='center', transform=axHisty.transAxes,\n rotation=270)\n axHisty.set_ylabel(\"Standard Deviation\")\n start, end = axHisty.get_xlim()\n axHisty.xaxis.set_ticks(np.linspace(start, end, 3))\n axHisty.set_xlabel(\"Number of Stars\")\n axHisty.xaxis.set_label_position(\"top\")\n axHistx.hist(np.std(res_norm, axis=0)[~np.isnan(np.std(res_norm, axis=0))], range=[0.8,1.1])\n axHistx.axvline(x=1, c='k', linewidth=3, label=\"x=1\")\n axHistx.set_title(\"Distribution of Stdev of Pixel Residuals\")\n axHistx.set_xlabel(\"Standard Deviation\")\n axHistx.set_ylabel(\"Number of Pixels\")\n start, end = axHistx.get_ylim()\n axHistx.yaxis.set_ticks(np.linspace(start, end, 3))\n axHistx.legend()\n filename = \"residuals_sorted_by_label_%s.png\" % i\n plt.savefig(filename)\n print(\"File saved as %s\" % filename)\n plt.close()\n\n # Auto-correlation of mean residuals\n print(\"Plotting Auto-Correlation of Mean Residuals\")\n mean_res = res_norm.mean(axis=0)\n autocorr = np.correlate(mean_res, mean_res, mode=\"full\")\n pkwidth = int(len(autocorr)/2-np.argmin(autocorr))\n xmin = int(len(autocorr)/2)-pkwidth\n xmax = int(len(autocorr)/2)+pkwidth\n zoom_x = np.linspace(xmin, xmax, len(autocorr[xmin:xmax]))\n fig, axarr = plt.subplots(2)\n axarr[0].plot(autocorr)\n axarr[0].set_title(\"Autocorrelation of Mean Spectral Residual\")\n axarr[0].set_xlabel(\"Lag (# Pixels)\")\n axarr[0].set_ylabel(\"Autocorrelation\")\n axarr[1].plot(zoom_x, autocorr[xmin:xmax])\n axarr[1].set_title(\"Central Peak, Zoomed\")\n axarr[1].set_xlabel(\"Lag (# Pixels)\")\n axarr[1].set_ylabel(\"Autocorrelation\")\n filename = \"residuals_autocorr.png\"\n plt.savefig(filename)\n print(\"saved %s\" % filename)\n plt.close()", "response": "Stack spectrum fit residuals"} {"SOURCE": "codesearchnet", "instruction": "How would you explain what the following Python 3 function does\ndef _find_contpix_given_cuts(f_cut, sig_cut, wl, fluxes, ivars):\n f_bar = np.median(fluxes, axis=0)\n sigma_f = np.var(fluxes, axis=0)\n bad = np.logical_and(f_bar==0, sigma_f==0)\n cont1 = np.abs(f_bar-1) <= f_cut\n cont2 = sigma_f <= sig_cut\n contmask = np.logical_and(cont1, cont2)\n contmask[bad] = False\n return contmask", "response": "Find and return continuum pixels given the flux and sigma cuts."} {"SOURCE": "codesearchnet", "instruction": "Create a Python 3 function to\nfind continuum pix in the spec meeting a set target fraction.", "response": "def _find_contpix(wl, fluxes, ivars, target_frac):\n \"\"\" Find continuum pix in spec, meeting a set target fraction\n\n Parameters\n ----------\n wl: numpy ndarray\n rest-frame wavelength vector\n\n fluxes: numpy ndarray\n pixel intensities\n \n ivars: numpy ndarray\n inverse variances, parallel to fluxes\n\n target_frac: float\n the fraction of pixels in spectrum desired to be continuum\n\n Returns\n -------\n contmask: boolean numpy ndarray\n True corresponds to continuum pixels\n \"\"\"\n print(\"Target frac: %s\" %(target_frac))\n bad1 = np.median(ivars, axis=0) == SMALL\n bad2 = np.var(ivars, axis=0) == 0\n bad = np.logical_and(bad1, bad2)\n npixels = len(wl)-sum(bad)\n f_cut = 0.0001\n stepsize = 0.0001\n sig_cut = 0.0001\n contmask = _find_contpix_given_cuts(f_cut, sig_cut, wl, fluxes, ivars)\n if npixels > 0:\n frac = sum(contmask)/float(npixels)\n else:\n frac = 0\n while (frac < target_frac): \n f_cut += stepsize\n sig_cut += stepsize\n contmask = _find_contpix_given_cuts(f_cut, sig_cut, wl, fluxes, ivars)\n if npixels > 0:\n frac = sum(contmask)/float(npixels)\n else:\n frac = 0\n if frac > 0.10*npixels:\n print(\"Warning: Over 10% of pixels identified as continuum.\")\n print(\"%s out of %s pixels identified as continuum\" %(sum(contmask), \n npixels))\n print(\"Cuts: f_cut %s, sig_cut %s\" %(f_cut, sig_cut))\n return contmask"} {"SOURCE": "codesearchnet", "instruction": "Make a summary of the following Python 3 code\ndef _find_contpix_regions(wl, fluxes, ivars, frac, ranges):\n contmask = np.zeros(len(wl), dtype=bool)\n for chunk in ranges:\n start = chunk[0]\n stop = chunk[1]\n contmask[start:stop] = _find_contpix(\n wl[start:stop], fluxes[:,start:stop], ivars[:,start:stop], frac)\n return contmask", "response": "Find continuum pix in a spectrum split into chunks"} {"SOURCE": "codesearchnet", "instruction": "Create a Python 3 function to\nload the reference data and assign each object a random integer from 0 to 7. Save the IDs.", "response": "def group_data():\n \"\"\" Load the reference data, and assign each object\n a random integer from 0 to 7. Save the IDs. \"\"\"\n\n tr_obj = np.load(\"%s/ref_id.npz\" %direc_ref)['arr_0']\n groups = np.random.randint(0, 8, size=len(tr_obj))\n np.savez(\"ref_groups.npz\", groups)"} {"SOURCE": "codesearchnet", "instruction": "Explain what the following Python 3 code does\ndef train(ds, ii):\n print(\"Loading model\")\n m = model.CannonModel(2)\n print(\"Training...\")\n m.fit(ds)\n np.savez(\"./ex%s_coeffs.npz\" %ii, m.coeffs)\n np.savez(\"./ex%s_scatters.npz\" %ii, m.scatters)\n np.savez(\"./ex%s_chisqs.npz\" %ii, m.chisqs)\n np.savez(\"./ex%s_pivots.npz\" %ii, m.pivots)\n fig = m.diagnostics_leading_coeffs(ds)\n plt.savefig(\"ex%s_leading_coeffs.png\" %ii)\n # m.diagnostics_leading_coeffs_triangle(ds)\n # m.diagnostics_plot_chisq(ds)\n return m", "response": "Train the Cannon model on a dataset object."} {"SOURCE": "codesearchnet", "instruction": "Here you have a function in Python 3, explain what it does\ndef xvalidate():\n\n print(\"Loading data\")\n groups = np.load(\"ref_groups.npz\")['arr_0']\n ref_label = np.load(\"%s/ref_label.npz\" %direc_ref)['arr_0']\n ref_id = np.load(\"%s/ref_id.npz\" %direc_ref)['arr_0']\n ref_flux = np.load(\"%s/ref_flux.npz\" %direc_ref)['arr_0']\n ref_ivar = np.load(\"%s/ref_ivar.npz\" %direc_ref)['arr_0']\n wl = np.load(\"%s/wl.npz\" %direc_ref)['arr_0']\n\n num_models = 8\n\n for ii in np.arange(num_models):\n print(\"Leaving out group %s\" %ii)\n train_on = groups != ii\n test_on = groups == ii\n\n tr_label = ref_label[train_on]\n tr_id = ref_id[train_on]\n tr_flux = ref_flux[train_on]\n tr_ivar = ref_ivar[train_on]\n print(\"Training on %s objects\" %len(tr_id))\n test_label = ref_label[test_on]\n test_id = ref_id[test_on]\n test_flux = ref_flux[test_on]\n test_ivar = ref_ivar[test_on]\n print(\"Testing on %s objects\" %len(test_id))\n\n print(\"Loading dataset...\")\n ds = dataset.Dataset(\n wl, tr_id, tr_flux, tr_ivar, tr_label, \n test_id, test_flux, test_ivar)\n ds.set_label_names(\n ['T_{eff}', '\\log g', '[M/H]', '[\\\\alpha/Fe]', 'AKWISE'])\n fig = ds.diagnostics_SNR()\n plt.savefig(\"ex%s_SNR.png\" %ii)\n fig = ds.diagnostics_ref_labels()\n plt.savefig(\"ex%s_ref_label_triangle.png\" %ii)\n np.savez(\"ex%s_tr_snr.npz\" %ii, ds.tr_SNR)\n\n # train a model\n m = train(ds, ii)\n\n # test step\n ds.tr_label = test_label # to compare the results\n test(ds, m, ii)", "response": "Train a model on the remaining 1 / 8 of the sample."} {"SOURCE": "codesearchnet", "instruction": "Implement a Python 3 function for\ncalculating the weighted standard deviation of a sequence of values.", "response": "def weighted_std(values, weights):\n \"\"\" Calculate standard deviation weighted by errors \"\"\"\n average = np.average(values, weights=weights)\n variance = np.average((values-average)**2, weights=weights)\n return np.sqrt(variance)"} {"SOURCE": "codesearchnet", "instruction": "Create a Python 3 function to\nestimate the scatter in a region of the spectrum taken to be continuum", "response": "def estimate_noise(fluxes, contmask):\n \"\"\" Estimate the scatter in a region of the spectrum\n taken to be continuum \"\"\"\n nstars = fluxes.shape[0]\n scatter = np.zeros(nstars)\n for i,spec in enumerate(fluxes): \n cont = spec[contmask]\n scatter[i] = stats.funcs.mad_std(cont)\n return scatter"} {"SOURCE": "codesearchnet", "instruction": "Make a summary of the following Python 3 code\ndef load_ref_spectra():\n data_dir = \"/Users/annaho/Data/AAOmega/ref_spectra\"\n # Load the files & count the number of training objects\n ff = glob.glob(\"%s/*.txt\" %data_dir)\n nstars = len(ff)\n print(\"We have %s training objects\" %nstars)\n \n # Read the first file to get the wavelength array\n f = ff[0]\n data = Table.read(f, format=\"ascii.fast_no_header\")\n wl = data['col1']\n npix = len(wl)\n print(\"We have %s pixels\" %npix)\n\n tr_flux = np.zeros((nstars,npix))\n tr_ivar = np.zeros(tr_flux.shape)\n\n for i,f in enumerate(ff):\n data = Table.read(f, format=\"ascii.fast_no_header\")\n flux = data['col2']\n tr_flux[i,:] = flux\n sigma = data['col3']\n tr_ivar[i,:] = 1.0 / sigma**2\n\n return np.array(ff), wl, tr_flux, tr_ivar", "response": "Load the training spectra from the first file in the directory and return the wavelength array"} {"SOURCE": "codesearchnet", "instruction": "Can you write a function in Python 3 where it\nloads the data for the AAOmega object.", "response": "def load_data():\n data_dir = \"/Users/annaho/Data/AAOmega\"\n out_dir = \"%s/%s\" %(data_dir, \"Run_13_July\")\n\n \"\"\" Use all the above functions to set data up for The Cannon \"\"\"\n ff, wl, tr_flux, tr_ivar = load_ref_spectra()\n\n \"\"\" pick one that doesn't have extra dead pixels \"\"\"\n skylines = tr_ivar[4,:] # should be the same across all obj\n np.savez(\"%s/skylines.npz\" %out_dir, skylines)\n\n contmask = np.load(\"%s/contmask_regions.npz\" %data_dir)['arr_0']\n scatter = estimate_noise(tr_flux, contmask)\n ids, labels = load_labels()\n \n # Select the objects in the catalog corresponding to the files\n inds = []\n ff_short = []\n for fname in ff:\n val = fname.split(\"/\")[-1]\n short = (val.split('.')[0] + '.' + val.split('.')[1])\n ff_short.append(short)\n if short in ids:\n ind = np.where(ids==short)[0][0]\n inds.append(ind)\n\n # choose the labels\n tr_id = ids[inds]\n tr_label = labels[inds]\n\n # find the corresponding spectra\n ff_short = np.array(ff_short)\n inds = np.array([np.where(ff_short==val)[0][0] for val in tr_id])\n tr_flux_choose = tr_flux[inds]\n tr_ivar_choose = tr_ivar[inds]\n scatter_choose = scatter[inds]\n np.savez(\"%s/wl.npz\" %out_dir, wl)\n np.savez(\"%s/ref_id_all.npz\" %out_dir, tr_id)\n np.savez(\"%s/ref_flux_all.npz\" %out_dir, tr_flux_choose)\n np.savez(\"%s/ref_ivar_all.npz\" %out_dir, tr_ivar_choose)\n np.savez(\"%s/ref_label_all.npz\" %out_dir, tr_label)\n np.savez(\"%s/ref_spec_scat_all.npz\" %out_dir, scatter_choose)\n\n # now, the test spectra\n test_id, test_flux = load_test_spectra()\n scatter = estimate_noise(test_flux, contmask) \n np.savez(\"%s/test_id.npz\" %out_dir, test_id)\n np.savez(\"%s/test_flux.npz\" %out_dir, test_flux)\n np.savez(\"%s/test_spec_scat.npz\" %out_dir, scatter)"} {"SOURCE": "codesearchnet", "instruction": "Create a Python 3 function for\ntaking the scatters and skylines and make final ivars", "response": "def make_full_ivar():\n \"\"\" take the scatters and skylines and make final ivars \"\"\"\n\n # skylines come as an ivar\n # don't use them for now, because I don't really trust them...\n # skylines = np.load(\"%s/skylines.npz\" %DATA_DIR)['arr_0']\n\n ref_flux = np.load(\"%s/ref_flux_all.npz\" %DATA_DIR)['arr_0']\n ref_scat = np.load(\"%s/ref_spec_scat_all.npz\" %DATA_DIR)['arr_0']\n test_flux = np.load(\"%s/test_flux.npz\" %DATA_DIR)['arr_0']\n test_scat = np.load(\"%s/test_spec_scat.npz\" %DATA_DIR)['arr_0']\n ref_ivar = np.ones(ref_flux.shape) / ref_scat[:,None]**2\n test_ivar = np.ones(test_flux.shape) / test_scat[:,None]**2\n\n # ref_ivar = (ref_ivar_temp * skylines[None,:]) / (ref_ivar_temp + skylines)\n # test_ivar = (test_ivar_temp * skylines[None,:]) / (test_ivar_temp + skylines)\n\n ref_bad = np.logical_or(ref_flux <= 0, ref_flux > 1.1)\n test_bad = np.logical_or(test_flux <= 0, test_flux > 1.1)\n SMALL = 1.0 / 1000000000.0\n ref_ivar[ref_bad] = SMALL\n test_ivar[test_bad] = SMALL\n np.savez(\"%s/ref_ivar_corr.npz\" %DATA_DIR, ref_ivar)\n np.savez(\"%s/test_ivar_corr.npz\" %DATA_DIR, test_ivar)"} {"SOURCE": "codesearchnet", "instruction": "Given the following Python 3 function, write the documentation\ndef _sinusoid(x, p, L, y):\n N = int(len(p)/2)\n n = np.linspace(0, N, N+1)\n k = n*np.pi/L\n func = 0\n for n in range(0, N):\n func += p[2*n]*np.sin(k[n]*x)+p[2*n+1]*np.cos(k[n]*x)\n return func", "response": "Returns the sinusoid contfunc evaluated at input x for the continuum."} {"SOURCE": "codesearchnet", "instruction": "Can you generate a brief explanation for the following Python 3 code\ndef _weighted_median(values, weights, quantile):\n sindx = np.argsort(values)\n cvalues = 1. * np.cumsum(weights[sindx])\n if cvalues[-1] == 0: # means all the values are 0\n return values[0]\n cvalues = cvalues / cvalues[-1] # div by largest value\n foo = sindx[cvalues > quantile]\n if len(foo) == 0:\n return values[0]\n indx = foo[0]\n return values[indx]", "response": "Calculate a weighted median of a set of values"} {"SOURCE": "codesearchnet", "instruction": "How would you explain what the following Python 3 function does\ndef _find_cont_gaussian_smooth(wl, fluxes, ivars, w):\n print(\"Finding the continuum\")\n bot = np.dot(ivars, w.T)\n top = np.dot(fluxes*ivars, w.T)\n bad = bot == 0\n cont = np.zeros(top.shape)\n cont[~bad] = top[~bad] / bot[~bad]\n return cont", "response": "Finds the weighted mean block of spectra with Gaussian smoothed fluxes."} {"SOURCE": "codesearchnet", "instruction": "Can you tell what is the following Python 3 function doing\ndef _cont_norm_gaussian_smooth(dataset, L):\n print(\"Gaussian smoothing the entire dataset...\")\n w = gaussian_weight_matrix(dataset.wl, L)\n\n print(\"Gaussian smoothing the training set\")\n cont = _find_cont_gaussian_smooth(\n dataset.wl, dataset.tr_flux, dataset.tr_ivar, w)\n norm_tr_flux, norm_tr_ivar = _cont_norm(\n dataset.tr_flux, dataset.tr_ivar, cont)\n print(\"Gaussian smoothing the test set\")\n cont = _find_cont_gaussian_smooth(\n dataset.wl, dataset.test_flux, dataset.test_ivar, w)\n norm_test_flux, norm_test_ivar = _cont_norm(\n dataset.test_flux, dataset.test_ivar, cont)\n return norm_tr_flux, norm_tr_ivar, norm_test_flux, norm_test_ivar", "response": "Continuum normalize by dividing by a Gaussian - weighted smoothed spectrum\n "} {"SOURCE": "codesearchnet", "instruction": "Can you create a Python 3 function that\nfits a continuum to a single segment of spectra.", "response": "def _find_cont_fitfunc(fluxes, ivars, contmask, deg, ffunc, n_proc=1):\n \"\"\" Fit a continuum to a continuum pixels in a segment of spectra\n\n Functional form can be either sinusoid or chebyshev, with specified degree\n\n Parameters\n ----------\n fluxes: numpy ndarray of shape (nstars, npixels)\n training set or test set pixel intensities\n\n ivars: numpy ndarray of shape (nstars, npixels)\n inverse variances, parallel to fluxes\n\n contmask: numpy ndarray of length (npixels)\n boolean pixel mask, True indicates that pixel is continuum \n\n deg: int\n degree of fitting function\n\n ffunc: str\n type of fitting function, chebyshev or sinusoid\n\n Returns\n -------\n cont: numpy ndarray of shape (nstars, npixels)\n the continuum, parallel to fluxes\n \"\"\"\n nstars = fluxes.shape[0]\n npixels = fluxes.shape[1]\n cont = np.zeros(fluxes.shape)\n\n if n_proc == 1:\n for jj in range(nstars):\n flux = fluxes[jj,:]\n ivar = ivars[jj,:]\n pix = np.arange(0, npixels)\n y = flux[contmask]\n x = pix[contmask]\n yivar = ivar[contmask]\n yivar[yivar == 0] = SMALL**2\n if ffunc==\"sinusoid\":\n p0 = np.ones(deg*2) # one for cos, one for sin\n L = max(x)-min(x)\n pcont_func = _partial_func(_sinusoid, L=L, y=flux)\n popt, pcov = opt.curve_fit(pcont_func, x, y, p0=p0,\n sigma=1./np.sqrt(yivar))\n elif ffunc==\"chebyshev\":\n fit = np.polynomial.chebyshev.Chebyshev.fit(x=x,y=y,w=yivar,deg=deg)\n for element in pix:\n if ffunc==\"sinusoid\":\n cont[jj,element] = _sinusoid(element, popt, L=L, y=flux)\n elif ffunc==\"chebyshev\":\n cont[jj,element] = fit(element)\n else:\n # start mp.Pool\n pool = mp.Pool(processes=n_proc)\n mp_results = []\n for i in xrange(nstars):\n mp_results.append(pool.apply_async(\\\n _find_cont_fitfunc,\n (fluxes[i, :].reshape((1, -1)),\n ivars[i, :].reshape((1, -1)),\n contmask[:]),\n {'deg':deg, 'ffunc':ffunc}))\n # close mp.Pool\n pool.close()\n pool.join()\n\n cont = np.array([mp_results[i].get().flatten() for i in xrange(nstars)])\n\n return cont"} {"SOURCE": "codesearchnet", "instruction": "How would you code a function in Python 3 to\nfind the continuum in the given set of fluxes and the given mask.", "response": "def _find_cont_fitfunc_regions(fluxes, ivars, contmask, deg, ranges, ffunc,\n n_proc=1):\n \"\"\" Run fit_cont, dealing with spectrum in regions or chunks\n\n This is useful if a spectrum has gaps.\n\n Parameters\n ----------\n fluxes: ndarray of shape (nstars, npixels)\n training set or test set pixel intensities\n\n ivars: numpy ndarray of shape (nstars, npixels)\n inverse variances, parallel to fluxes\n\n contmask: numpy ndarray of length (npixels)\n boolean pixel mask, True indicates that pixel is continuum \n\n deg: int\n degree of fitting function\n\n ffunc: str\n type of fitting function, chebyshev or sinusoid\n\n Returns\n -------\n cont: numpy ndarray of shape (nstars, npixels)\n the continuum, parallel to fluxes\n \"\"\"\n nstars = fluxes.shape[0]\n npixels = fluxes.shape[1]\n cont = np.zeros(fluxes.shape)\n for chunk in ranges:\n start = chunk[0]\n stop = chunk[1]\n if ffunc==\"chebyshev\":\n output = _find_cont_fitfunc(fluxes[:,start:stop],\n ivars[:,start:stop],\n contmask[start:stop],\n deg=deg, ffunc=\"chebyshev\",\n n_proc=n_proc)\n elif ffunc==\"sinusoid\":\n output = _find_cont_fitfunc(fluxes[:,start:stop],\n ivars[:,start:stop],\n contmask[start:stop],\n deg=deg, ffunc=\"sinusoid\",\n n_proc=n_proc)\n cont[:, start:stop] = output\n\n return cont"} {"SOURCE": "codesearchnet", "instruction": "Explain what the following Python 3 code does\ndef _find_cont_running_quantile(wl, fluxes, ivars, q, delta_lambda,\n verbose=False):\n \"\"\" Perform continuum normalization using a running quantile\n\n Parameters\n ----------\n wl: numpy ndarray \n wavelength vector\n fluxes: numpy ndarray of shape (nstars, npixels)\n pixel intensities\n ivars: numpy ndarray of shape (nstars, npixels)\n inverse variances, parallel to fluxes\n q: float\n the desired quantile cut\n delta_lambda: int\n the number of pixels over which the median is calculated\n\n Output\n ------\n norm_fluxes: numpy ndarray of shape (nstars, npixels)\n normalized pixel intensities\n norm_ivars: numpy ndarray of shape (nstars, npixels)\n rescaled pixel invariances\n \"\"\"\n cont = np.zeros(fluxes.shape)\n nstars = fluxes.shape[0]\n for jj in range(nstars):\n if verbose:\n print(\"cont_norm_q(): working on star [%s/%s]...\" % (jj+1, nstars))\n flux = fluxes[jj,:]\n ivar = ivars[jj,:]\n for ll, lam in enumerate(wl):\n indx = (np.where(abs(wl-lam) < delta_lambda))[0]\n flux_cut = flux[indx]\n ivar_cut = ivar[indx]\n cont[jj, ll] = _weighted_median(flux_cut, ivar_cut, q)\n return cont", "response": "Perform continuum normalization using a running quantile"} {"SOURCE": "codesearchnet", "instruction": "Can you tell what is the following Python 3 function doing\ndef _cont_norm_running_quantile_mp(wl, fluxes, ivars, q, delta_lambda,\n n_proc=2, verbose=False):\n \"\"\"\n The same as _cont_norm_running_quantile() above,\n but using multi-processing.\n\n Bo Zhang (NAOC)\n \"\"\"\n nStar = fluxes.shape[0]\n\n # start mp.Pool\n mp_results = []\n pool = mp.Pool(processes=n_proc)\n for i in xrange(nStar):\n mp_results.append(pool.apply_async(\\\n _find_cont_running_quantile,\n (wl, fluxes[i, :].reshape((1, -1)), ivars[i, :].reshape((1, -1)),\n q, delta_lambda), {'verbose': False}))\n if verbose:\n print('@Bo Zhang: continuum normalizing star [%d/%d] ...'\\\n % (i + 1, nStar))\n # close mp.Pool\n pool.close()\n pool.join()\n\n # reshape results --> cont\n cont = np.zeros_like(fluxes)\n for i in xrange(nStar):\n cont[i, :] = mp_results[i].get() #.flatten()\n norm_fluxes = np.ones(fluxes.shape)\n norm_fluxes[cont!=0] = fluxes[cont!=0] / cont[cont!=0]\n norm_ivars = cont**2 * ivars\n\n print('@Bo Zhang: continuum normalization finished!')\n return norm_fluxes, norm_ivars", "response": "This function normalizes the running quantiles in a running process."} {"SOURCE": "codesearchnet", "instruction": "Write a Python 3 function that can\nperform continuum normalization using running quantile", "response": "def _cont_norm_running_quantile_regions(wl, fluxes, ivars, q, delta_lambda,\n ranges, verbose=True):\n \"\"\" Perform continuum normalization using running quantile, for spectrum\n that comes in chunks\n \"\"\"\n print(\"contnorm.py: continuum norm using running quantile\")\n print(\"Taking spectra in %s chunks\" % len(ranges))\n nstars = fluxes.shape[0]\n norm_fluxes = np.zeros(fluxes.shape)\n norm_ivars = np.zeros(ivars.shape)\n for chunk in ranges:\n start = chunk[0]\n stop = chunk[1]\n output = _cont_norm_running_quantile(\n wl[start:stop], fluxes[:,start:stop],\n ivars[:,start:stop], q, delta_lambda)\n norm_fluxes[:,start:stop] = output[0]\n norm_ivars[:,start:stop] = output[1]\n return norm_fluxes, norm_ivars"} {"SOURCE": "codesearchnet", "instruction": "How would you code a function in Python 3 to\nperform continuum normalization using running quantile for spectrum", "response": "def _cont_norm_running_quantile_regions_mp(wl, fluxes, ivars, q, delta_lambda,\n ranges, n_proc=2, verbose=False):\n \"\"\"\n Perform continuum normalization using running quantile, for spectrum\n that comes in chunks.\n\n The same as _cont_norm_running_quantile_regions(),\n but using multi-processing.\n\n Bo Zhang (NAOC)\n \"\"\"\n print(\"contnorm.py: continuum norm using running quantile\")\n print(\"Taking spectra in %s chunks\" % len(ranges))\n # nstars = fluxes.shape[0]\n nchunks = len(ranges)\n norm_fluxes = np.zeros(fluxes.shape)\n norm_ivars = np.zeros(ivars.shape)\n for i in xrange(nchunks):\n chunk = ranges[i, :]\n start = chunk[0]\n stop = chunk[1]\n if verbose:\n print('@Bo Zhang: Going to normalize Chunk [%d/%d], pixel:[%d, %d] ...'\n % (i+1, nchunks, start, stop))\n output = _cont_norm_running_quantile_mp(\n wl[start:stop], fluxes[:, start:stop],\n ivars[:, start:stop], q, delta_lambda,\n n_proc=n_proc, verbose=verbose)\n norm_fluxes[:, start:stop] = output[0]\n norm_ivars[:, start:stop] = output[1]\n return norm_fluxes, norm_ivars"} {"SOURCE": "codesearchnet", "instruction": "How would you implement a function in Python 3 that\nperforms continuum normalization for spectra in chunks", "response": "def _cont_norm_regions(fluxes, ivars, cont, ranges):\n \"\"\" Perform continuum normalization for spectra in chunks\n\n Useful for spectra that have gaps\n\n Parameters\n ---------\n fluxes: numpy ndarray\n pixel intensities\n ivars: numpy ndarray\n inverse variances, parallel to fluxes\n cont: numpy ndarray\n the continuum\n ranges: list or np ndarray\n the chunks that the spectrum should be split into\n\n Returns\n -------\n norm_fluxes: numpy ndarray\n normalized pixel intensities\n norm_ivars: numpy ndarray\n rescaled inverse variances\n \"\"\"\n nstars = fluxes.shape[0]\n norm_fluxes = np.zeros(fluxes.shape)\n norm_ivars = np.zeros(ivars.shape)\n for chunk in ranges:\n start = chunk[0]\n stop = chunk[1]\n output = _cont_norm(fluxes[:,start:stop],\n ivars[:,start:stop],\n cont[:,start:stop])\n norm_fluxes[:,start:stop] = output[0]\n norm_ivars[:,start:stop] = output[1]\n for jj in range(nstars):\n bad = (norm_ivars[jj,:] == 0.)\n norm_fluxes[jj,:][bad] = 1.\n return norm_fluxes, norm_ivars"} {"SOURCE": "codesearchnet", "instruction": "Implement a Python 3 function for\ntraining the spectral model on the data set.", "response": "def train(self, ds):\n \"\"\" Run training step: solve for best-fit spectral model \"\"\"\n if self.useErrors:\n self.coeffs, self.scatters, self.new_tr_labels, self.chisqs, self.pivots, self.scales = _train_model_new(ds)\n else:\n self.coeffs, self.scatters, self.chisqs, self.pivots, self.scales = _train_model(ds)"} {"SOURCE": "codesearchnet", "instruction": "Make a summary of the following Python 3 code\ndef infer_spectra(self, ds):\n lvec_all = _get_lvec(ds.test_label_vals, self.pivots, self.scales, derivs=False)\n self.model_spectra = np.dot(lvec_all, self.coeffs.T)", "response": "Infer the model spectra for the object holding the test set."} {"SOURCE": "codesearchnet", "instruction": "How would you explain what the following Python 3 function does\ndef plot_contpix(self, x, y, contpix_x, contpix_y, figname):\n fig, axarr = plt.subplots(2, sharex=True)\n plt.xlabel(r\"Wavelength $\\lambda (\\AA)$\")\n plt.xlim(min(x), max(x))\n ax = axarr[0]\n ax.step(x, y, where='mid', c='k', linewidth=0.3,\n label=r'$\\theta_0$' + \"= the leading fit coefficient\")\n ax.scatter(contpix_x, contpix_y, s=1, color='r',\n label=\"continuum pixels\")\n ax.legend(loc='lower right', \n prop={'family':'serif', 'size':'small'})\n ax.set_title(\"Baseline Spectrum with Continuum Pixels\")\n ax.set_ylabel(r'$\\theta_0$')\n ax = axarr[1]\n ax.step(x, y, where='mid', c='k', linewidth=0.3,\n label=r'$\\theta_0$' + \"= the leading fit coefficient\")\n ax.scatter(contpix_x, contpix_y, s=1, color='r',\n label=\"continuum pixels\")\n ax.set_title(\"Baseline Spectrum with Continuum Pixels, Zoomed\")\n ax.legend(loc='upper right', prop={'family':'serif', \n 'size':'small'})\n ax.set_ylabel(r'$\\theta_0$')\n ax.set_ylim(0.95, 1.05)\n print(\"Diagnostic plot: fitted 0th order spec w/ cont pix\")\n print(\"Saved as %s.png\" % (figname))\n plt.savefig(figname)\n plt.close()", "response": "Plot baseline spec with continuum pix overlaid \n"} {"SOURCE": "codesearchnet", "instruction": "How would you code a function in Python 3 to\ncall plot_contpix once for each nth of the spectrum", "response": "def diagnostics_contpix(self, data, nchunks=10, fig = \"baseline_spec_with_cont_pix\"):\n \"\"\" Call plot_contpix once for each nth of the spectrum \"\"\"\n if data.contmask is None:\n print(\"No contmask set\")\n else:\n coeffs_all = self.coeffs\n wl = data.wl\n baseline_spec = coeffs_all[:,0]\n contmask = data.contmask\n contpix_x = wl[contmask]\n contpix_y = baseline_spec[contmask]\n rem = len(wl)%nchunks\n wl_split = np.array(np.split(wl[0:len(wl)-rem],nchunks))\n baseline_spec_split = np.array(\n np.split(baseline_spec[0:len(wl)-rem],nchunks))\n nchunks = wl_split.shape[0]\n for i in range(nchunks):\n fig_chunk = fig + \"_%s\" %str(i)\n wl_chunk = wl_split[i,:]\n baseline_spec_chunk = baseline_spec_split[i,:]\n take = np.logical_and(\n contpix_x>wl_chunk[0], contpix_x 0:\n qvalues = quantile(x, quantiles, weights=weights)\n for q in qvalues:\n ax.axvline(q, ls=\"dashed\", color=color)\n\n if verbose:\n print(\"Quantiles:\")\n print([item for item in zip(quantiles, qvalues)])\n\n if show_titles:\n title = None\n if title_fmt is not None:\n # Compute the quantiles for the title. This might redo\n # unneeded computation but who cares.\n q_16, q_50, q_84 = quantile(x, [0.16, 0.5, 0.84],\n weights=weights)\n q_m, q_p = q_50-q_16, q_84-q_50\n\n # Format the quantile display.\n fmt = \"{{0:{0}}}\".format(title_fmt).format\n title = r\"${{{0}}}_{{-{1}}}^{{+{2}}}$\"\n title = title.format(fmt(q_50), fmt(q_m), fmt(q_p))\n\n # Add in the column name if it's given.\n if labels is not None:\n title = \"{0} = {1}\".format(labels[i], title)\n\n elif labels is not None:\n title = \"{0}\".format(labels[i])\n\n if title is not None:\n ax.set_title(title, **title_kwargs)\n\n # Set up the axes.\n ax.set_xlim(range[i])\n if scale_hist:\n maxn = np.max(n)\n ax.set_ylim(-0.1 * maxn, 1.1 * maxn)\n else:\n ax.set_ylim(0, 1.1 * np.max(n))\n ax.set_yticklabels([])\n ax.xaxis.set_major_locator(MaxNLocator(max_n_ticks, prune=\"lower\"))\n\n if i < K - 1:\n if top_ticks:\n ax.xaxis.set_ticks_position(\"top\")\n [l.set_rotation(45) for l in ax.get_xticklabels()]\n else:\n ax.set_xticklabels([])\n else:\n [l.set_rotation(45) for l in ax.get_xticklabels()]\n if labels is not None:\n ax.set_xlabel(labels[i], **label_kwargs)\n ax.xaxis.set_label_coords(0.5, -0.3)\n\n # use MathText for axes ticks\n ax.xaxis.set_major_formatter(\n ScalarFormatter(useMathText=use_math_text))\n\n for j, y in enumerate(xs):\n if np.shape(xs)[0] == 1:\n ax = axes\n else:\n ax = axes[i, j]\n if j > i:\n ax.set_frame_on(False)\n ax.set_xticks([])\n ax.set_yticks([])\n continue\n elif j == i:\n continue\n\n # Deal with masked arrays.\n if hasattr(y, \"compressed\"):\n y = y.compressed()\n\n hist2d(y, x, ax=ax, range=[range[j], range[i]], weights=weights,\n color=color, smooth=smooth, bins=[bins[j], bins[i]],\n **hist2d_kwargs)\n\n if truths is not None:\n if truths[i] is not None and truths[j] is not None:\n ax.plot(truths[j], truths[i], \"s\", color=truth_color)\n if truths[j] is not None:\n ax.axvline(truths[j], color=truth_color)\n if truths[i] is not None:\n ax.axhline(truths[i], color=truth_color)\n\n ax.xaxis.set_major_locator(MaxNLocator(max_n_ticks, prune=\"lower\"))\n ax.yaxis.set_major_locator(MaxNLocator(max_n_ticks, prune=\"lower\"))\n\n if i < K - 1:\n ax.set_xticklabels([])\n else:\n [l.set_rotation(45) for l in ax.get_xticklabels()]\n if labels is not None:\n ax.set_xlabel(labels[j], **label_kwargs)\n ax.xaxis.set_label_coords(0.5, -0.3)\n\n # use MathText for axes ticks\n ax.xaxis.set_major_formatter(\n ScalarFormatter(useMathText=use_math_text))\n\n if j > 0:\n ax.set_yticklabels([])\n else:\n [l.set_rotation(45) for l in ax.get_yticklabels()]\n if labels is not None:\n ax.set_ylabel(labels[i], **label_kwargs)\n ax.yaxis.set_label_coords(-0.3, 0.5)\n\n # use MathText for axes ticks\n ax.yaxis.set_major_formatter(\n ScalarFormatter(useMathText=use_math_text))\n\n return fig", "response": "A simple corner plot showing the projections of a data set in a multi - dimensional space."} {"SOURCE": "codesearchnet", "instruction": "Implement a function in Python 3 to\nlike np. percentile but returns a list of the n - th quantile of the elements in x.", "response": "def quantile(x, q, weights=None):\n \"\"\"\n Like numpy.percentile, but:\n\n * Values of q are quantiles [0., 1.] rather than percentiles [0., 100.]\n * scalar q not supported (q must be iterable)\n * optional weights on x\n\n \"\"\"\n if weights is None:\n return np.percentile(x, [100. * qi for qi in q])\n else:\n idx = np.argsort(x)\n xsorted = x[idx]\n cdf = np.add.accumulate(weights[idx])\n cdf /= cdf[-1]\n return np.interp(q, cdf, xsorted).tolist()"} {"SOURCE": "codesearchnet", "instruction": "Implement a Python 3 function for\nplotting a 2 - D histogram of samples.", "response": "def hist2d(x, y, bins=20, range=None, weights=None, levels=None, smooth=None,\n ax=None, color=None, plot_datapoints=True, plot_density=True,\n plot_contours=True, no_fill_contours=False, fill_contours=False,\n contour_kwargs=None, contourf_kwargs=None, data_kwargs=None,\n **kwargs):\n \"\"\"\n Plot a 2-D histogram of samples.\n\n Parameters\n ----------\n x, y : array_like (nsamples,)\n The samples.\n\n levels : array_like\n The contour levels to draw.\n\n ax : matplotlib.Axes (optional)\n A axes instance on which to add the 2-D histogram.\n\n plot_datapoints : bool (optional)\n Draw the individual data points.\n\n plot_density : bool (optional)\n Draw the density colormap.\n\n plot_contours : bool (optional)\n Draw the contours.\n\n no_fill_contours : bool (optional)\n Add no filling at all to the contours (unlike setting\n ``fill_contours=False``, which still adds a white fill at the densest\n points).\n\n fill_contours : bool (optional)\n Fill the contours.\n\n contour_kwargs : dict (optional)\n Any additional keyword arguments to pass to the `contour` method.\n\n contourf_kwargs : dict (optional)\n Any additional keyword arguments to pass to the `contourf` method.\n\n data_kwargs : dict (optional)\n Any additional keyword arguments to pass to the `plot` method when\n adding the individual data points.\n \"\"\"\n if ax is None:\n ax = pl.gca()\n\n # Set the default range based on the data range if not provided.\n if range is None:\n if \"extent\" in kwargs:\n logging.warn(\"Deprecated keyword argument 'extent'. \"\n \"Use 'range' instead.\")\n range = kwargs[\"extent\"]\n else:\n range = [[x.min(), x.max()], [y.min(), y.max()]]\n\n # Set up the default plotting arguments.\n if color is None:\n color = \"k\"\n\n # Choose the default \"sigma\" contour levels.\n if levels is None:\n levels = 1.0 - np.exp(-0.5 * np.arange(0.5, 2.1, 0.5) ** 2)\n\n # This is the color map for the density plot, over-plotted to indicate the\n # density of the points near the center.\n density_cmap = LinearSegmentedColormap.from_list(\n \"density_cmap\", [color, (1, 1, 1, 0)])\n\n # This color map is used to hide the points at the high density areas.\n white_cmap = LinearSegmentedColormap.from_list(\n \"white_cmap\", [(1, 1, 1), (1, 1, 1)], N=2)\n\n # This \"color map\" is the list of colors for the contour levels if the\n # contours are filled.\n rgba_color = colorConverter.to_rgba(color)\n contour_cmap = [list(rgba_color) for l in levels] + [rgba_color]\n for i, l in enumerate(levels):\n contour_cmap[i][-1] *= float(i) / (len(levels)+1)\n\n # We'll make the 2D histogram to directly estimate the density.\n try:\n H, X, Y = np.histogram2d(x.flatten(), y.flatten(), bins=bins,\n range=range, weights=weights)\n except ValueError:\n raise ValueError(\"It looks like at least one of your sample columns \"\n \"have no dynamic range. You could try using the \"\n \"'range' argument.\")\n\n if smooth is not None:\n if gaussian_filter is None:\n raise ImportError(\"Please install scipy for smoothing\")\n H = gaussian_filter(H, smooth)\n\n # Compute the density levels.\n Hflat = H.flatten()\n inds = np.argsort(Hflat)[::-1]\n Hflat = Hflat[inds]\n sm = np.cumsum(Hflat)\n sm /= sm[-1]\n V = np.empty(len(levels))\n for i, v0 in enumerate(levels):\n try:\n V[i] = Hflat[sm <= v0][-1]\n except:\n V[i] = Hflat[0]\n V.sort()\n m = np.diff(V) == 0\n if np.any(m):\n logging.warning(\"Too few points to create valid contours\")\n while np.any(m):\n V[np.where(m)[0][0]] *= 1.0 - 1e-4\n m = np.diff(V) == 0\n V.sort()\n\n # Compute the bin centers.\n X1, Y1 = 0.5 * (X[1:] + X[:-1]), 0.5 * (Y[1:] + Y[:-1])\n\n # Extend the array for the sake of the contours at the plot edges.\n H2 = H.min() + np.zeros((H.shape[0] + 4, H.shape[1] + 4))\n H2[2:-2, 2:-2] = H\n H2[2:-2, 1] = H[:, 0]\n H2[2:-2, -2] = H[:, -1]\n H2[1, 2:-2] = H[0]\n H2[-2, 2:-2] = H[-1]\n H2[1, 1] = H[0, 0]\n H2[1, -2] = H[0, -1]\n H2[-2, 1] = H[-1, 0]\n H2[-2, -2] = H[-1, -1]\n X2 = np.concatenate([\n X1[0] + np.array([-2, -1]) * np.diff(X1[:2]),\n X1,\n X1[-1] + np.array([1, 2]) * np.diff(X1[-2:]),\n ])\n Y2 = np.concatenate([\n Y1[0] + np.array([-2, -1]) * np.diff(Y1[:2]),\n Y1,\n Y1[-1] + np.array([1, 2]) * np.diff(Y1[-2:]),\n ])\n\n if plot_datapoints:\n if data_kwargs is None:\n data_kwargs = dict()\n data_kwargs[\"color\"] = data_kwargs.get(\"color\", color)\n data_kwargs[\"ms\"] = data_kwargs.get(\"ms\", 2.0)\n data_kwargs[\"mec\"] = data_kwargs.get(\"mec\", \"none\")\n data_kwargs[\"alpha\"] = data_kwargs.get(\"alpha\", 0.1)\n ax.plot(x, y, \"o\", zorder=-1, rasterized=True, **data_kwargs)\n\n # Plot the base fill to hide the densest data points.\n if (plot_contours or plot_density) and not no_fill_contours:\n ax.contourf(X2, Y2, H2.T, [V.min(), H.max()],\n cmap=white_cmap, antialiased=False)\n\n if plot_contours and fill_contours:\n if contourf_kwargs is None:\n contourf_kwargs = dict()\n contourf_kwargs[\"colors\"] = contourf_kwargs.get(\"colors\", contour_cmap)\n contourf_kwargs[\"antialiased\"] = contourf_kwargs.get(\"antialiased\",\n False)\n ax.contourf(X2, Y2, H2.T, np.concatenate([[0], V, [H.max()*(1+1e-4)]]),\n **contourf_kwargs)\n\n # Plot the density map. This can't be plotted at the same time as the\n # contour fills.\n elif plot_density:\n ax.pcolor(X, Y, H.max() - H.T, cmap=density_cmap)\n\n # Plot the contour edge colors.\n if plot_contours:\n if contour_kwargs is None:\n contour_kwargs = dict()\n contour_kwargs[\"colors\"] = contour_kwargs.get(\"colors\", color)\n ax.contour(X2, Y2, H2.T, V, **contour_kwargs)\n\n ax.set_xlim(range[0])\n ax.set_ylim(range[1])"} {"SOURCE": "codesearchnet", "instruction": "How would you explain what the following Python 3 function does\ndef make_classifier(self, name, ids, labels):\n if not all(np.in1d(ids, self.ids)):\n raise ValueError(\"Hay ids de textos que no se encuentran \\\n almacenados.\")\n setattr(self, name, SGDClassifier())\n classifier = getattr(self, name)\n indices = np.searchsorted(self.ids, ids)\n classifier.fit(self.tfidf_mat[indices, :], labels)", "response": "Enregistro un clasificador SVM sobre los textos cargados."} {"SOURCE": "codesearchnet", "instruction": "Can you tell what is the following Python 3 function doing\ndef classify(self, classifier_name, examples, max_labels=None,\n goodness_of_fit=False):\n \"\"\"Usar un clasificador SVM para etiquetar textos nuevos.\n\n Args:\n classifier_name (str): Nombre del clasidicador a usar.\n examples (list or str): Se espera un ejemplo o una lista de\n ejemplos a clasificar en texto plano o en ids.\n max_labels (int, optional): Cantidad de etiquetas a devolver para\n cada ejemplo. Si se devuelve mas de una el orden corresponde a\n la plausibilidad de cada etiqueta. Si es None devuelve todas\n las etiquetas posibles.\n goodness_of_fit (bool, optional): Indica si devuelve o no una\n medida de cuan buenas son las etiquetas.\n Nota:\n Usa el clasificador de `Scikit-learn `_\n\n Returns:\n tuple (array, array): (labels_considerados, puntajes)\n labels_considerados: Las etiquetas que se consideraron para\n clasificar.\n puntajes: Cuanto m\u00e1s alto el puntaje, m\u00e1s probable es que la\n etiqueta considerada sea la adecuada.\n \"\"\"\n classifier = getattr(self, classifier_name)\n texts_vectors = self._make_text_vectors(examples)\n return classifier.classes_, classifier.decision_function(texts_vectors)", "response": "Usar un clasificador SVM para etiquetar textos nuevos."} {"SOURCE": "codesearchnet", "instruction": "Make a summary of the following Python 3 code\ndef get_similar(self, example, max_similars=3, similarity_cutoff=None,\n term_diff_max_rank=10, filter_list=None,\n term_diff_cutoff=None):\n \"\"\"Devuelve textos similares al ejemplo dentro de los textos entrenados.\n\n Nota:\n Usa la distancia de coseno del vector de features TF-IDF\n\n Args:\n example (str): Se espera un id de texto o un texto a partir del\n cual se buscaran otros textos similares.\n max_similars (int, optional): Cantidad de textos similares a\n devolver.\n similarity_cutoff (float, optional): Valor umbral de similaridad\n para definir que dos textos son similares entre si.\n term_diff_max_rank (int, optional): Este valor sirve para controlar\n el umbral con el que los terminos son considerados importantes\n a la hora de recuperar textos (no afecta el funcionamiento de\n que textos se consideran cercanos, solo la cantidad de terminos\n que se devuelven en best_words).\n filter_list (list): Lista de ids de textos en la cual buscar textos\n similares.\n term_diff_cutoff (float): Deprecado. Se quitara en el futuro.\n\n Returns:\n tuple (list, list, list): (text_ids, sorted_dist, best_words)\n text_ids (list of str): Devuelve los ids de los textos\n sugeridos.\n sorted_dist (list of float): Devuelve la distancia entre las\n opciones sugeridas y el ejemplo dado como entrada.\n best_words (list of list): Para cada sugerencia devuelve las\n palabras mas relevantes que se usaron para seleccionar esa\n sugerencia.\n \"\"\"\n\n if term_diff_cutoff:\n warnings.warn('Deprecado. Quedo sin uso. Se quitara en el futuro.',\n DeprecationWarning)\n if filter_list:\n if max_similars > len(filter_list):\n raise ValueError(\"No se pueden pedir mas sugerencias que la \\\n cantidad de textos en `filter_list`.\")\n else:\n filt_idx = np.in1d(self.ids, filter_list)\n\n elif max_similars > self.term_mat.shape[0]:\n raise ValueError(\"No se pueden pedir mas sugerencias que la \\\n cantidad de textos que hay almacenados.\")\n else:\n filt_idx = np.ones(len(self.ids), dtype=bool)\n # Saco los textos compuestos solo por stop_words\n good_ids = np.array(np.sum(self.term_mat, 1) > 0).squeeze()\n filt_idx = filt_idx & good_ids\n filt_idx_to_general_idx = np.flatnonzero(filt_idx)\n if example in self.ids:\n index = self.ids == example\n exmpl_vec = self.tfidf_mat[index, :]\n distances = np.squeeze(pairwise_distances(self.tfidf_mat[filt_idx],\n exmpl_vec))\n # Pongo la distancia a si mismo como inf, par que no se devuelva a\n # si mismo como una opcion\n if filter_list and example in filter_list:\n distances[filter_list.index(example)] = np.inf\n elif not filter_list:\n idx_example = np.searchsorted(self.ids, example)\n filt_idx_example = np.searchsorted(np.flatnonzero(filt_idx),\n idx_example)\n distances[filt_idx_example] = np.inf\n else:\n exmpl_vec = self.vectorizer.transform([example]) # contar terminos\n exmpl_vec = self.transformer.transform(exmpl_vec) # calcular tfidf\n distances = np.squeeze(pairwise_distances(self.tfidf_mat[filt_idx],\n exmpl_vec))\n if np.sum(exmpl_vec) == 0:\n return [], [], []\n sorted_indices = np.argsort(distances)\n closest_n = sorted_indices[:max_similars]\n sorted_dist = distances[closest_n]\n if similarity_cutoff:\n closest_n = closest_n[sorted_dist < similarity_cutoff]\n sorted_dist = sorted_dist[sorted_dist < similarity_cutoff]\n best_words = []\n\n # Calculo palabras relevantes para cada sugerencia\n best_example = np.squeeze(exmpl_vec.toarray())\n sorted_example_weights = np.flipud(np.argsort(best_example))\n truncated_max_rank = min(term_diff_max_rank, np.sum(best_example > 0))\n best_example_words = sorted_example_weights[:truncated_max_rank]\n for suggested in closest_n:\n suggested_idx = filt_idx_to_general_idx[suggested]\n test_vec = np.squeeze(self.tfidf_mat[suggested_idx, :].toarray())\n sorted_test_weights = np.flipud(np.argsort(test_vec))\n truncated_max_rank = min(term_diff_max_rank,\n np.sum(test_vec > 0))\n best_test = sorted_test_weights[:truncated_max_rank]\n best_words_ids = np.intersect1d(best_example_words, best_test)\n best_words.append([k for k, v in\n self.vectorizer.vocabulary_.items()\n if v in best_words_ids])\n\n # Filtro dentro de las buscadas\n if filter_list:\n text_ids = self.ids[filt_idx_to_general_idx[closest_n]]\n else:\n text_ids = self.ids[closest_n]\n return list(text_ids), list(sorted_dist), best_words", "response": "Return a list of similares to the given texto."} {"SOURCE": "codesearchnet", "instruction": "Can you generate a brief explanation for the following Python 3 code\ndef reload_texts(self, texts, ids, vocabulary=None):\n self._check_id_length(ids)\n self.ids = np.array(sorted(ids))\n if vocabulary:\n self.vectorizer.vocabulary = vocabulary\n sorted_texts = [x for (y, x) in sorted(zip(ids, texts))]\n self.term_mat = self.vectorizer.fit_transform(sorted_texts)\n self._update_tfidf()", "response": "Re - defines the term_mat and vocabulary of the terminos de textos."} {"SOURCE": "codesearchnet", "instruction": "Explain what the following Python 3 code does\ndef dataset_metrics(uuid, **kwargs):\n\t'''\n\tGet details on a GBIF dataset.\n\n\t:param uuid: [str] One or more dataset UUIDs. See examples.\n\n\tReferences: http://www.gbif.org/developer/registry#datasetMetrics\n\n\tUsage::\n\n\t\t\tfrom pygbif import registry\n\t\t\tregistry.dataset_metrics(uuid='3f8a1297-3259-4700-91fc-acc4170b27ce')\n\t\t\tregistry.dataset_metrics(uuid='66dd0960-2d7d-46ee-a491-87b9adcfe7b1')\n\t\t\tregistry.dataset_metrics(uuid=['3f8a1297-3259-4700-91fc-acc4170b27ce', '66dd0960-2d7d-46ee-a491-87b9adcfe7b1'])\n\t'''\n\tdef getdata(x, **kwargs):\n\t\turl = gbif_baseurl + 'dataset/' + x + '/metrics'\n\t\treturn gbif_GET(url, {}, **kwargs)\n\n\tif len2(uuid) == 1:\n\t\treturn getdata(uuid)\n\telse:\n\t\treturn [getdata(x) for x in uuid]", "response": "Get details on a GBIF dataset."} {"SOURCE": "codesearchnet", "instruction": "How would you implement a function in Python 3 that\nsearches for datasets and dataset metadata.", "response": "def datasets(data = 'all', type = None, uuid = None, query = None, id = None,\n\t\t\t\t\t\t\tlimit = 100, offset = None, **kwargs):\n\t'''\n\tSearch for datasets and dataset metadata.\n\n\t:param data: [str] The type of data to get. Default: ``all``\n\t:param type: [str] Type of dataset, options include ``OCCURRENCE``, etc.\n\t:param uuid: [str] UUID of the data node provider. This must be specified if data\n\t\t is anything other than ``all``.\n\t:param query: [str] Query term(s). Only used when ``data = 'all'``\n\t:param id: [int] A metadata document id.\n\n\tReferences http://www.gbif.org/developer/registry#datasets\n\n\tUsage::\n\n\t\t\tfrom pygbif import registry\n\t\t\tregistry.datasets(limit=5)\n\t\t\tregistry.datasets(type=\"OCCURRENCE\")\n\t\t\tregistry.datasets(uuid=\"a6998220-7e3a-485d-9cd6-73076bd85657\")\n\t\t\tregistry.datasets(data='contact', uuid=\"a6998220-7e3a-485d-9cd6-73076bd85657\")\n\t\t\tregistry.datasets(data='metadata', uuid=\"a6998220-7e3a-485d-9cd6-73076bd85657\")\n\t\t\tregistry.datasets(data='metadata', uuid=\"a6998220-7e3a-485d-9cd6-73076bd85657\", id=598)\n\t\t\tregistry.datasets(data=['deleted','duplicate'])\n\t\t\tregistry.datasets(data=['deleted','duplicate'], limit=1)\n\t'''\n\targs = {'q': query, 'type': type, 'limit': limit, 'offset': offset}\n\tdata_choices = ['all', 'organization', 'contact', 'endpoint',\n\t\t\t\t\t\t\t\t\t'identifier', 'tag', 'machinetag', 'comment',\n\t\t\t\t\t\t\t\t\t'constituents', 'document', 'metadata', 'deleted',\n\t\t\t\t\t\t\t\t\t'duplicate', 'subDataset', 'withNoEndpoint']\n\tcheck_data(data, data_choices)\n\tif len2(data) ==1:\n\t\treturn datasets_fetch(data, uuid, args, **kwargs)\n\telse:\n\t\treturn [datasets_fetch(x, uuid, args, **kwargs) for x in data]"} {"SOURCE": "codesearchnet", "instruction": "Make a summary of the following Python 3 code\ndef dataset_suggest(q=None, type=None, keyword=None, owningOrg=None,\n\tpublishingOrg=None, hostingOrg=None, publishingCountry=None, decade=None,\n\tlimit = 100, offset = None, **kwargs):\n\t'''\n\tSearch that returns up to 20 matching datasets. Results are ordered by relevance.\n\n\n\t:param q: [str] Query term(s) for full text search. The value for this parameter can be a simple word or a phrase. Wildcards can be added to the simple word parameters only, e.g. ``q=*puma*``\n\t:param type: [str] Type of dataset, options include OCCURRENCE, etc.\n\t:param keyword: [str] Keyword to search by. Datasets can be tagged by keywords, which you can search on. The search is done on the merged collection of tags, the dataset keywordCollections and temporalCoverages. SEEMS TO NOT BE WORKING ANYMORE AS OF 2016-09-02.\n\t:param owningOrg: [str] Owning organization. A uuid string. See :func:`~pygbif.registry.organizations`\n\t:param publishingOrg: [str] Publishing organization. A uuid string. See :func:`~pygbif.registry.organizations`\n\t:param hostingOrg: [str] Hosting organization. A uuid string. See :func:`~pygbif.registry.organizations`\n\t:param publishingCountry: [str] Publishing country.\n\t:param decade: [str] Decade, e.g., 1980. Filters datasets by their temporal coverage broken down to decades. Decades are given as a full year, e.g. 1880, 1960, 2000, etc, and will return datasets wholly contained in the decade as well as those that cover the entire decade or more. Facet by decade to get the break down, e.g. ``/search?facet=DECADE&facet_only=true`` (see example below)\n\t:param limit: [int] Number of results to return. Default: ``300``\n\t:param offset: [int] Record to start at. Default: ``0``\n\n\t:return: A dictionary\n\n\tReferences: http://www.gbif.org/developer/registry#datasetSearch\n\n\tUsage::\n\n\t\t\tfrom pygbif import registry\n\t\t\tregistry.dataset_suggest(q=\"Amazon\", type=\"OCCURRENCE\")\n\n\t\t\t# Suggest datasets tagged with keyword \"france\".\n\t\t\tregistry.dataset_suggest(keyword=\"france\")\n\n\t\t\t# Suggest datasets owned by the organization with key\n\t\t\t# \"07f617d0-c688-11d8-bf62-b8a03c50a862\" (UK NBN).\n\t\t\tregistry.dataset_suggest(owningOrg=\"07f617d0-c688-11d8-bf62-b8a03c50a862\")\n\n\t\t\t# Fulltext search for all datasets having the word \"amsterdam\" somewhere in\n\t\t\t# its metadata (title, description, etc).\n\t\t\tregistry.dataset_suggest(q=\"amsterdam\")\n\n\t\t\t# Limited search\n\t\t\tregistry.dataset_suggest(type=\"OCCURRENCE\", limit=2)\n\t\t\tregistry.dataset_suggest(type=\"OCCURRENCE\", limit=2, offset=10)\n\n\t\t\t# Return just descriptions\n\t\t\tregistry.dataset_suggest(type=\"OCCURRENCE\", limit = 5, description=True)\n\n\t\t\t# Search by decade\n\t\t\tregistry.dataset_suggest(decade=1980, limit = 30)\n\t'''\n\turl = gbif_baseurl + 'dataset/suggest'\n\targs = {'q': q, 'type': type, 'keyword': keyword,\n\t\t\t\t'publishingOrg': publishingOrg, 'hostingOrg': hostingOrg,\n\t\t\t\t'owningOrg': owningOrg, 'decade': decade,\n\t\t\t\t'publishingCountry': publishingCountry,\n\t\t\t\t'limit': limit, 'offset': offset}\n\tout = gbif_GET(url, args, **kwargs)\n\treturn out", "response": "A simple text search that returns up to 20 matching datasets."} {"SOURCE": "codesearchnet", "instruction": "Can you generate the documentation for the following Python 3 function\ndef dataset_search(q=None, type=None, keyword=None,\n\towningOrg=None, publishingOrg=None, hostingOrg=None, decade=None,\n\tpublishingCountry = None, facet = None, facetMincount=None,\n\tfacetMultiselect = None, hl = False, limit = 100, offset = None,\n\t**kwargs):\n\t'''\n\tFull text search across all datasets. Results are ordered by relevance.\n\n\t:param q: [str] Query term(s) for full text search. The value for this parameter\n\t\t can be a simple word or a phrase. Wildcards can be added to the simple word\n\t\t parameters only, e.g. ``q=*puma*``\n\t:param type: [str] Type of dataset, options include OCCURRENCE, etc.\n\t:param keyword: [str] Keyword to search by. Datasets can be tagged by keywords, which\n\t\t you can search on. The search is done on the merged collection of tags, the\n\t\t dataset keywordCollections and temporalCoverages. SEEMS TO NOT BE WORKING\n\t\t ANYMORE AS OF 2016-09-02.\n\t:param owningOrg: [str] Owning organization. A uuid string. See :func:`~pygbif.registry.organizations`\n\t:param publishingOrg: [str] Publishing organization. A uuid string. See :func:`~pygbif.registry.organizations`\n\t:param hostingOrg: [str] Hosting organization. A uuid string. See :func:`~pygbif.registry.organizations`\n\t:param publishingCountry: [str] Publishing country.\n\t:param decade: [str] Decade, e.g., 1980. Filters datasets by their temporal coverage\n\t\t broken down to decades. Decades are given as a full year, e.g. 1880, 1960, 2000,\n\t\t etc, and will return datasets wholly contained in the decade as well as those\n\t\t that cover the entire decade or more. Facet by decade to get the break down,\n\t\t e.g. ``/search?facet=DECADE&facet_only=true`` (see example below)\n\t:param facet: [str] A list of facet names used to retrieve the 100 most frequent values\n\t\t\tfor a field. Allowed facets are: type, keyword, publishingOrg, hostingOrg, decade,\n\t\t\tand publishingCountry. Additionally subtype and country are legal values but not\n\t\t\tyet implemented, so data will not yet be returned for them.\n\t:param facetMincount: [str] Used in combination with the facet parameter. Set\n\t\t\tfacetMincount={#} to exclude facets with a count less than {#}, e.g.\n\t\t\thttp://api.gbif.org/v1/dataset/search?facet=type&limit=0&facetMincount=10000\n\t\t\tonly shows the type value 'OCCURRENCE' because 'CHECKLIST' and 'METADATA' have\n\t\t\tcounts less than 10000.\n\t:param facetMultiselect: [bool] Used in combination with the facet parameter. Set\n\t\t\tfacetMultiselect=True to still return counts for values that are not currently\n\t\t\tfiltered, e.g.\n\t\t\thttp://api.gbif.org/v1/dataset/search?facet=type&limit=0&type=CHECKLIST&facetMultiselect=true\n\t\t\tstill shows type values 'OCCURRENCE' and 'METADATA' even though type is being\n\t\t\tfiltered by type=CHECKLIST\n\t:param hl: [bool] Set ``hl=True`` to highlight terms matching the query when in fulltext\n\t\t\tsearch fields. The highlight will be an emphasis tag of class 'gbifH1' e.g.\n\t\t\thttp://api.gbif.org/v1/dataset/search?q=plant&hl=true\n\t\t\tFulltext search fields include: title, keyword, country, publishing country,\n\t\t\tpublishing organization title, hosting organization title, and description. One\n\t\t\tadditional full text field is searched which includes information from metadata\n\t\t\tdocuments, but the text of this field is not returned in the response.\n\t:param limit: [int] Number of results to return. Default: ``300``\n\t:param offset: [int] Record to start at. Default: ``0``\n\n\t:note: Note that you can pass in additional faceting parameters on a per field basis.\n\t\t\tFor example, if you want to limit the numbef of facets returned from a field ``foo`` to\n\t\t\t3 results, pass in ``foo_facetLimit = 3``. GBIF does not allow all per field parameters,\n\t\t\tbut does allow some. See also examples.\n\n\t:return: A dictionary\n\n\tReferences: http://www.gbif.org/developer/registry#datasetSearch\n\n\tUsage::\n\n\t\t\tfrom pygbif import registry\n\t\t\t# Gets all datasets of type \"OCCURRENCE\".\n\t\t\tregistry.dataset_search(type=\"OCCURRENCE\", limit = 10)\n\n\t\t\t# Fulltext search for all datasets having the word \"amsterdam\" somewhere in\n\t\t\t# its metadata (title, description, etc).\n\t\t\tregistry.dataset_search(q=\"amsterdam\", limit = 10)\n\n\t\t\t# Limited search\n\t\t\tregistry.dataset_search(type=\"OCCURRENCE\", limit=2)\n\t\t\tregistry.dataset_search(type=\"OCCURRENCE\", limit=2, offset=10)\n\n\t\t\t# Search by decade\n\t\t\tregistry.dataset_search(decade=1980, limit = 10)\n\n\t\t\t# Faceting\n\t\t\t## just facets\n\t\t\tregistry.dataset_search(facet=\"decade\", facetMincount=10, limit=0)\n\n\t\t\t## data and facets\n\t\t\tregistry.dataset_search(facet=\"decade\", facetMincount=10, limit=2)\n\n\t\t\t## many facet variables\n\t\t\tregistry.dataset_search(facet=[\"decade\", \"type\"], facetMincount=10, limit=0)\n\n\t\t\t## facet vars\n\t\t\t### per variable paging\n\t\t\tx = registry.dataset_search(\n\t\t\t\tfacet = [\"decade\", \"type\"],\n\t\t\t\tdecade_facetLimit = 3,\n\t\t\t\ttype_facetLimit = 3,\n\t\t\t\tlimit = 0\n\t\t\t)\n\n\t\t\t## highlight\n\t\t\tx = registry.dataset_search(q=\"plant\", hl=True, limit = 10)\n\t\t\t[ z['description'] for z in x['results'] ]\n\t'''\n\turl = gbif_baseurl + 'dataset/search'\n\targs = {'q': q, 'type': type, 'keyword': keyword,\n\t\t\t\t'owningOrg': owningOrg, 'publishingOrg': publishingOrg,\n\t\t\t\t'hostingOrg': hostingOrg, 'decade': decade,\n\t\t\t\t'publishingCountry': publishingCountry, 'facet': facet,\n\t\t\t\t'facetMincount': facetMincount, 'facetMultiselect': facetMultiselect,\n\t\t\t\t'hl': hl, 'limit': limit, 'offset': offset}\n\tgbif_kwargs = {key: kwargs[key] for key in kwargs if key not in requests_argset}\n\tif gbif_kwargs is not None:\n\t\txx = dict(zip( [ re.sub('_', '.', x) for x in gbif_kwargs.keys() ], gbif_kwargs.values() ))\n\t\targs.update(xx)\n\tkwargs = {key: kwargs[key] for key in kwargs if key in requests_argset}\n\tout = gbif_GET(url, args, **kwargs)\n\treturn out", "response": "Search for datasets in the current language."} {"SOURCE": "codesearchnet", "instruction": "Can you tell what is the following Python 3 function doing\ndef wkt_rewind(x, digits = None):\n '''\n reverse WKT winding order\n\n :param x: [str] WKT string\n :param digits: [int] number of digits after decimal to use for the return string. \n by default, we use the mean number of digits in your string.\n\n :return: a string\n\n Usage::\n \n from pygbif import wkt_rewind\n x = 'POLYGON((144.6 13.2, 144.6 13.6, 144.9 13.6, 144.9 13.2, 144.6 13.2))'\n wkt_rewind(x)\n wkt_rewind(x, digits = 0)\n wkt_rewind(x, digits = 3)\n wkt_rewind(x, digits = 7)\n '''\n z = wkt.loads(x)\n if digits is None:\n coords = z['coordinates']\n nums = __flatten(coords)\n dec_n = [ decimal.Decimal(str(w)).as_tuple().exponent for w in nums ]\n digits = abs(statistics.mean(dec_n))\n else:\n if not isinstance(digits, int):\n raise TypeError(\"'digits' must be an int\")\n wound = rewind(z)\n back_to_wkt = wkt.dumps(wound, decimals = digits)\n return back_to_wkt", "response": "rewinds the WKT string x to the nearest digit"} {"SOURCE": "codesearchnet", "instruction": "Can you tell what is the following Python 3 function doing\ndef occ_issues_lookup(issue=None, code=None):\n '''\n Lookup occurrence issue definitions and short codes\n\n :param issue: Full name of issue, e.g, CONTINENT_COUNTRY_MISMATCH\n :param code: an issue short code, e.g. ccm\n\n Usage\n pygbif.occ_issues_lookup(issue = 'CONTINENT_COUNTRY_MISMATCH')\n pygbif.occ_issues_lookup(issue = 'MULTIMEDIA_DATE_INVALID')\n pygbif.occ_issues_lookup(issue = 'ZERO_COORDINATE')\n pygbif.occ_issues_lookup(code = 'cdiv')\n '''\n if code is None:\n bb = [trymatch(issue, x) for x in gbifissues['issue'] ]\n tmp = filter(None, bb)\n else:\n bb = [trymatch(code, x) for x in gbifissues['code'] ]\n tmp = filter(None, bb)\n return tmp", "response": "Return a sequence of occurrence issue definitions and short codes for the given issue and short code."} {"SOURCE": "codesearchnet", "instruction": "Implement a function in Python 3 to\nsearch GBIF occurrences in a specific GBIF file.", "response": "def search(taxonKey=None, repatriated=None,\n kingdomKey=None, phylumKey=None, classKey=None, orderKey=None,\n familyKey=None, genusKey=None, subgenusKey=None, scientificName=None,\n country=None, publishingCountry=None, hasCoordinate=None, typeStatus=None,\n recordNumber=None, lastInterpreted=None, continent=None,\n geometry=None, recordedBy=None, basisOfRecord=None, datasetKey=None,\n eventDate=None, catalogNumber=None, year=None, month=None,\n decimalLatitude=None, decimalLongitude=None, elevation=None,\n depth=None, institutionCode=None, collectionCode=None,\n hasGeospatialIssue=None, issue=None, q=None, spellCheck=None, mediatype=None,\n limit=300, offset=0, establishmentMeans=None,\n facet=None, facetMincount=None, facetMultiselect=None, **kwargs):\n '''\n Search GBIF occurrences\n\n :param taxonKey: [int] A GBIF occurrence identifier\n :param q: [str] Simple search parameter. The value for this parameter can be a simple word or a phrase.\n :param spellCheck: [bool] If ``True`` ask GBIF to check your spelling of the value passed to the ``search`` parameter.\n IMPORTANT: This only checks the input to the ``search`` parameter, and no others. Default: ``False``\n :param repatriated: [str] Searches for records whose publishing country is different to the country where the record was recorded in\n :param kingdomKey: [int] Kingdom classification key\n :param phylumKey: [int] Phylum classification key\n :param classKey: [int] Class classification key\n :param orderKey: [int] Order classification key\n :param familyKey: [int] Family classification key\n :param genusKey: [int] Genus classification key\n :param subgenusKey: [int] Subgenus classification key\n :param scientificName: [str] A scientific name from the GBIF backbone. All included and synonym taxa are included in the search.\n :param datasetKey: [str] The occurrence dataset key (a uuid)\n :param catalogNumber: [str] An identifier of any form assigned by the source within a physical collection or digital dataset for the record which may not unique, but should be fairly unique in combination with the institution and collection code.\n :param recordedBy: [str] The person who recorded the occurrence.\n :param collectionCode: [str] An identifier of any form assigned by the source to identify the physical collection or digital dataset uniquely within the text of an institution.\n :param institutionCode: [str] An identifier of any form assigned by the source to identify the institution the record belongs to. Not guaranteed to be que.\n :param country: [str] The 2-letter country code (as per ISO-3166-1) of the country in which the occurrence was recorded. See here http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2\n :param basisOfRecord: [str] Basis of record, as defined in our BasisOfRecord enum here http://gbif.github.io/gbif-api/apidocs/org/gbif/api/vocabulary/BasisOfRecord.html Acceptable values are:\n\n - ``FOSSIL_SPECIMEN`` An occurrence record describing a fossilized specimen.\n - ``HUMAN_OBSERVATION`` An occurrence record describing an observation made by one or more people.\n - ``LITERATURE`` An occurrence record based on literature alone.\n - ``LIVING_SPECIMEN`` An occurrence record describing a living specimen, e.g.\n - ``MACHINE_OBSERVATION`` An occurrence record describing an observation made by a machine.\n - ``OBSERVATION`` An occurrence record describing an observation.\n - ``PRESERVED_SPECIMEN`` An occurrence record describing a preserved specimen.\n - ``UNKNOWN`` Unknown basis for the record.\n\n :param eventDate: [date] Occurrence date in ISO 8601 format: yyyy, yyyy-MM, yyyy-MM-dd, or\n MM-dd. Supports range queries, smaller,larger (e.g., ``1990,1991``, whereas ``1991,1990``\n wouldn't work)\n :param year: [int] The 4 digit year. A year of 98 will be interpreted as AD 98. Supports range queries,\n smaller,larger (e.g., ``1990,1991``, whereas ``1991,1990`` wouldn't work)\n :param month: [int] The month of the year, starting with 1 for January. Supports range queries,\n smaller,larger (e.g., ``1,2``, whereas ``2,1`` wouldn't work)\n :param q: [str] Query terms. The value for this parameter can be a simple word or a phrase.\n :param decimalLatitude: [float] Latitude in decimals between -90 and 90 based on WGS 84.\n Supports range queries, smaller,larger (e.g., ``25,30``, whereas ``30,25`` wouldn't work)\n :param decimalLongitude: [float] Longitude in decimals between -180 and 180 based on WGS 84.\n Supports range queries (e.g., ``-0.4,-0.2``, whereas ``-0.2,-0.4`` wouldn't work).\n :param publishingCountry: [str] The 2-letter country code (as per ISO-3166-1) of the\n country in which the occurrence was recorded.\n :param elevation: [int/str] Elevation in meters above sea level. Supports range queries, smaller,larger\n (e.g., ``5,30``, whereas ``30,5`` wouldn't work)\n :param depth: [int/str] Depth in meters relative to elevation. For example 10 meters below a\n lake surface with given elevation. Supports range queries, smaller,larger (e.g., ``5,30``,\n whereas ``30,5`` wouldn't work)\n :param geometry: [str] Searches for occurrences inside a polygon described in Well Known\n Text (WKT) format. A WKT shape written as either POINT, LINESTRING, LINEARRING\n POLYGON, or MULTIPOLYGON. Example of a polygon: ``((30.1 10.1, 20, 20 40, 40 40, 30.1 10.1))`` would be queried as http://bit.ly/1BzNwDq.\n Polygons must have counter-clockwise ordering of points.\n :param hasGeospatialIssue: [bool] Includes/excludes occurrence records which contain spatial\n issues (as determined in our record interpretation), i.e. ``hasGeospatialIssue=TRUE``\n returns only those records with spatial issues while ``hasGeospatialIssue=FALSE`` includes\n only records without spatial issues. The absence of this parameter returns any\n record with or without spatial issues.\n :param issue: [str] One or more of many possible issues with each occurrence record. See\n Details. Issues passed to this parameter filter results by the issue.\n :param hasCoordinate: [bool] Return only occurence records with lat/long data (``true``) or\n all records (``false``, default).\n :param typeStatus: [str] Type status of the specimen. One of many options. See ?typestatus\n :param recordNumber: [int] Number recorded by collector of the data, different from GBIF record\n number. See http://rs.tdwg.org/dwc/terms/#recordNumber} for more info\n :param lastInterpreted: [date] Date the record was last modified in GBIF, in ISO 8601 format:\n yyyy, yyyy-MM, yyyy-MM-dd, or MM-dd. Supports range queries, smaller,larger (e.g.,\n ``1990,1991``, whereas ``1991,1990`` wouldn't work)\n :param continent: [str] Continent. One of ``africa``, ``antarctica``, ``asia``, ``europe``, ``north_america``\n (North America includes the Caribbean and reachies down and includes Panama), ``oceania``,\n or ``south_america``\n :param fields: [str] Default (``all``) returns all fields. ``minimal`` returns just taxon name,\n key, latitude, and longitude. Or specify each field you want returned by name, e.g.\n ``fields = c('name','latitude','elevation')``.\n :param mediatype: [str] Media type. Default is ``NULL``, so no filtering on mediatype. Options:\n ``NULL``, ``MovingImage``, ``Sound``, and ``StillImage``\n :param limit: [int] Number of results to return. Default: ``300``\n :param offset: [int] Record to start at. Default: ``0``\n :param facet: [str] a character vector of length 1 or greater\n :param establishmentMeans: [str] EstablishmentMeans, possible values include: INTRODUCED,\n INVASIVE, MANAGED, NATIVE, NATURALISED, UNCERTAIN\n :param facetMincount: [int] minimum number of records to be included in the faceting results\n :param facetMultiselect: [bool] Set to ``true`` to still return counts for values that are not currently\n filtered. See examples. Default: ``false``\n\n :return: A dictionary\n\n Usage::\n\n from pygbif import occurrences\n occurrences.search(taxonKey = 3329049)\n\n # Return 2 results, this is the default by the way\n occurrences.search(taxonKey=3329049, limit=2)\n\n # Instead of getting a taxon key first, you can search for a name directly\n # However, note that using this approach (with `scientificName=\"...\"`)\n # you are getting synonyms too. The results for using `scientifcName` and\n # `taxonKey` parameters are the same in this case, but I wouldn't be surprised if for some\n # names they return different results\n occurrences.search(scientificName = 'Ursus americanus')\n from pygbif import species\n key = species.name_backbone(name = 'Ursus americanus', rank='species')['usageKey']\n occurrences.search(taxonKey = key)\n\n # Search by dataset key\n occurrences.search(datasetKey='7b5d6a48-f762-11e1-a439-00145eb45e9a', limit=20)\n\n # Search by catalog number\n occurrences.search(catalogNumber=\"49366\", limit=20)\n # occurrences.search(catalogNumber=[\"49366\",\"Bird.27847588\"], limit=20)\n\n # Use paging parameters (limit and offset) to page. Note the different results\n # for the two queries below.\n occurrences.search(datasetKey='7b5d6a48-f762-11e1-a439-00145eb45e9a', offset=10, limit=5)\n occurrences.search(datasetKey='7b5d6a48-f762-11e1-a439-00145eb45e9a', offset=20, limit=5)\n\n # Many dataset keys\n # occurrences.search(datasetKey=[\"50c9509d-22c7-4a22-a47d-8c48425ef4a7\", \"7b5d6a48-f762-11e1-a439-00145eb45e9a\"], limit=20)\n\n # Search by collector name\n res = occurrences.search(recordedBy=\"smith\", limit=20)\n [ x['recordedBy'] for x in res['results'] ]\n\n # Many collector names\n # occurrences.search(recordedBy=[\"smith\",\"BJ Stacey\"], limit=20)\n\n # Search for many species\n splist = ['Cyanocitta stelleri', 'Junco hyemalis', 'Aix sponsa']\n keys = [ species.name_suggest(x)[0]['key'] for x in splist ]\n out = [ occurrences.search(taxonKey = x, limit=1) for x in keys ]\n [ x['results'][0]['speciesKey'] for x in out ]\n\n # Search - q parameter\n occurrences.search(q = \"kingfisher\", limit=20)\n ## spell check - only works with the `search` parameter\n ### spelled correctly - same result as above call\n occurrences.search(q = \"kingfisher\", limit=20, spellCheck = True)\n ### spelled incorrectly - stops with suggested spelling\n occurrences.search(q = \"kajsdkla\", limit=20, spellCheck = True)\n ### spelled incorrectly - stops with many suggested spellings\n ### and number of results for each\n occurrences.search(q = \"helir\", limit=20, spellCheck = True)\n\n # Search on latitidue and longitude\n occurrences.search(decimalLatitude=50, decimalLongitude=10, limit=2)\n\n # Search on a bounding box\n ## in well known text format\n occurrences.search(geometry='POLYGON((30.1 10.1, 10 20, 20 40, 40 40, 30.1 10.1))', limit=20)\n from pygbif import species\n key = species.name_suggest(q='Aesculus hippocastanum')[0]['key']\n occurrences.search(taxonKey=key, geometry='POLYGON((30.1 10.1, 10 20, 20 40, 40 40, 30.1 10.1))', limit=20)\n ## multipolygon\n wkt = 'MULTIPOLYGON(((-123 38, -123 43, -116 43, -116 38, -123 38)),((-97 41, -97 45, -93 45, -93 41, -97 41)))'\n occurrences.search(geometry = wkt, limit = 20)\n\n # Search on country\n occurrences.search(country='US', limit=20)\n occurrences.search(country='FR', limit=20)\n occurrences.search(country='DE', limit=20)\n\n # Get only occurrences with lat/long data\n occurrences.search(taxonKey=key, hasCoordinate=True, limit=20)\n\n # Get only occurrences that were recorded as living specimens\n occurrences.search(taxonKey=key, basisOfRecord=\"LIVING_SPECIMEN\", hasCoordinate=True, limit=20)\n\n # Get occurrences for a particular eventDate\n occurrences.search(taxonKey=key, eventDate=\"2013\", limit=20)\n occurrences.search(taxonKey=key, year=\"2013\", limit=20)\n occurrences.search(taxonKey=key, month=\"6\", limit=20)\n\n # Get occurrences based on depth\n key = species.name_backbone(name='Salmo salar', kingdom='animals')['usageKey']\n occurrences.search(taxonKey=key, depth=\"5\", limit=20)\n\n # Get occurrences based on elevation\n key = species.name_backbone(name='Puma concolor', kingdom='animals')['usageKey']\n occurrences.search(taxonKey=key, elevation=50, hasCoordinate=True, limit=20)\n\n # Get occurrences based on institutionCode\n occurrences.search(institutionCode=\"TLMF\", limit=20)\n\n # Get occurrences based on collectionCode\n occurrences.search(collectionCode=\"Floristic Databases MV - Higher Plants\", limit=20)\n\n # Get only those occurrences with spatial issues\n occurrences.search(taxonKey=key, hasGeospatialIssue=True, limit=20)\n\n # Search using a query string\n occurrences.search(q=\"kingfisher\", limit=20)\n\n # Range queries\n ## See Detail for parameters that support range queries\n ### this is a range depth, with lower/upper limits in character string\n occurrences.search(depth='50,100')\n\n ## Range search with year\n occurrences.search(year='1999,2000', limit=20)\n\n ## Range search with latitude\n occurrences.search(decimalLatitude='29.59,29.6')\n\n # Search by specimen type status\n ## Look for possible values of the typeStatus parameter looking at the typestatus dataset\n occurrences.search(typeStatus = 'allotype')\n\n # Search by specimen record number\n ## This is the record number of the person/group that submitted the data, not GBIF's numbers\n ## You can see that many different groups have record number 1, so not super helpful\n occurrences.search(recordNumber = 1)\n\n # Search by last time interpreted: Date the record was last modified in GBIF\n ## The lastInterpreted parameter accepts ISO 8601 format dates, including\n ## yyyy, yyyy-MM, yyyy-MM-dd, or MM-dd. Range queries are accepted for lastInterpreted\n occurrences.search(lastInterpreted = '2014-04-01')\n\n # Search by continent\n ## One of africa, antarctica, asia, europe, north_america, oceania, or south_america\n occurrences.search(continent = 'south_america')\n occurrences.search(continent = 'africa')\n occurrences.search(continent = 'oceania')\n occurrences.search(continent = 'antarctica')\n\n # Search for occurrences with images\n occurrences.search(mediatype = 'StillImage')\n occurrences.search(mediatype = 'MovingImage')\n x = occurrences.search(mediatype = 'Sound')\n [z['media'] for z in x['results']]\n\n # Query based on issues\n occurrences.search(taxonKey=1, issue='DEPTH_UNLIKELY')\n occurrences.search(taxonKey=1, issue=['DEPTH_UNLIKELY','COORDINATE_ROUNDED'])\n # Show all records in the Arizona State Lichen Collection that cant be matched to the GBIF\n # backbone properly:\n occurrences.search(datasetKey='84c0e1a0-f762-11e1-a439-00145eb45e9a', issue=['TAXON_MATCH_NONE','TAXON_MATCH_HIGHERRANK'])\n\n # If you pass in an invalid polygon you get hopefully informative errors\n ### the WKT string is fine, but GBIF says bad polygon\n wkt = 'POLYGON((-178.59375 64.83258989321493,-165.9375 59.24622380205539,\n -147.3046875 59.065977905449806,-130.78125 51.04484764446178,-125.859375 36.70806354647625,\n -112.1484375 23.367471303759686,-105.1171875 16.093320185359257,-86.8359375 9.23767076398516,\n -82.96875 2.9485268155066175,-82.6171875 -14.812060061226388,-74.8828125 -18.849111862023985,\n -77.34375 -47.661687803329166,-84.375 -49.975955187343295,174.7265625 -50.649460483096114,\n 179.296875 -42.19189902447192,-176.8359375 -35.634976650677295,176.8359375 -31.835565983656227,\n 163.4765625 -6.528187613695323,152.578125 1.894796132058301,135.703125 4.702353722559447,\n 127.96875 15.077427674847987,127.96875 23.689804541429606,139.921875 32.06861069132688,\n 149.4140625 42.65416193033991,159.2578125 48.3160811030533,168.3984375 57.019804336633165,\n 178.2421875 59.95776046458139,-179.6484375 61.16708631440347,-178.59375 64.83258989321493))'\n occurrences.search(geometry = wkt)\n\n # Faceting\n ## return no occurrence records with limit=0\n x = occurrences.search(facet = \"country\", limit = 0)\n x['facets']\n\n ## also return occurrence records\n x = occurrences.search(facet = \"establishmentMeans\", limit = 10)\n x['facets']\n x['results']\n\n ## multiple facet variables\n x = occurrences.search(facet = [\"country\", \"basisOfRecord\"], limit = 10)\n x['results']\n x['facets']\n x['facets']['country']\n x['facets']['basisOfRecord']\n x['facets']['basisOfRecord']['count']\n\n ## set a minimum facet count\n x = occurrences.search(facet = \"country\", facetMincount = 30000000L, limit = 0)\n x['facets']\n\n ## paging per each faceted variable\n ### do so by passing in variables like \"country\" + \"_facetLimit\" = \"country_facetLimit\"\n ### or \"country\" + \"_facetOffset\" = \"country_facetOffset\"\n x = occurrences.search(\n facet = [\"country\", \"basisOfRecord\", \"hasCoordinate\"],\n country_facetLimit = 3,\n basisOfRecord_facetLimit = 6,\n limit = 0\n )\n x['facets']\n\n # requests package options\n ## There's an acceptable set of requests options (['timeout', 'cookies', 'auth',\n ## 'allow_redirects', 'proxies', 'verify', 'stream', 'cert']) you can pass\n ## in via **kwargs, e.g., set a timeout\n x = occurrences.search(timeout = 1)\n '''\n url = gbif_baseurl + 'occurrence/search'\n args = {'taxonKey': taxonKey, 'repatriated': repatriated,\n 'kingdomKey': kingdomKey, 'phylumKey': phylumKey, 'classKey': classKey,\n 'orderKey': orderKey, 'familyKey': familyKey, 'genusKey': genusKey,\n 'subgenusKey': subgenusKey, 'scientificName': scientificName,\n 'country': country, 'publishingCountry': publishingCountry,\n 'hasCoordinate': hasCoordinate, 'typeStatus': typeStatus,\n 'recordNumber': recordNumber, 'lastInterpreted': lastInterpreted,\n 'continent': continent, 'geometry': geometry, 'recordedBy': recordedBy,\n 'basisOfRecord': basisOfRecord, 'datasetKey': datasetKey, 'eventDate': eventDate,\n 'catalogNumber': catalogNumber, 'year': year, 'month': month,\n 'decimalLatitude': decimalLatitude, 'decimalLongitude': decimalLongitude,\n 'elevation': elevation, 'depth': depth, 'institutionCode': institutionCode,\n 'collectionCode': collectionCode, 'hasGeospatialIssue': hasGeospatialIssue,\n 'issue': issue, 'q': q, 'spellCheck': spellCheck, 'mediatype': mediatype,\n 'limit': limit, 'offset': offset, 'establishmentMeans': establishmentMeans,\n 'facetMincount': facetMincount, 'facet': facet,\n 'facetMultiselect': facetMultiselect}\n gbif_kwargs = {key: kwargs[key] for key in kwargs if key not in requests_argset}\n if gbif_kwargs is not None:\n xx = dict(zip( [ re.sub('_', '.', x) for x in gbif_kwargs.keys() ], gbif_kwargs.values() ))\n args.update(xx)\n kwargs = {key: kwargs[key] for key in kwargs if key in requests_argset}\n out = gbif_GET(url, args, **kwargs)\n return out"} {"SOURCE": "codesearchnet", "instruction": "Make a summary of the following Python 3 code\ndef networks(data = 'all', uuid = None, q = None, identifier = None,\n identifierType = None, limit = 100, offset = None, **kwargs):\n '''\n Networks metadata.\n\n Note: there's only 1 network now, so there's not a lot you can do with this method.\n\n :param data: [str] The type of data to get. Default: ``all``\n :param uuid: [str] UUID of the data network provider. This must be specified if data\n is anything other than ``all``.\n :param q: [str] Query networks. Only used when ``data = 'all'``. Ignored otherwise.\n :param identifier: [fixnum] The value for this parameter can be a simple string or integer,\n e.g. identifier=120\n :param identifierType: [str] Used in combination with the identifier parameter to filter\n identifiers by identifier type: ``DOI``, ``FTP``, ``GBIF_NODE``, ``GBIF_PARTICIPANT``,\n ``GBIF_PORTAL``, ``HANDLER``, ``LSID``, ``UNKNOWN``, ``URI``, ``URL``, ``UUID``\n :param limit: [int] Number of results to return. Default: ``100``\n :param offset: [int] Record to start at. Default: ``0``\n\n :return: A dictionary\n\n References: http://www.gbif.org/developer/registry#networks\n\n Usage::\n\n from pygbif import registry\n registry.networks(limit=1)\n registry.networks(uuid='2b7c7b4f-4d4f-40d3-94de-c28b6fa054a6')\n '''\n args = {'q': q, 'limit': limit, 'offset': offset, 'identifier': identifier,\n 'identifierType': identifierType}\n data_choices = ['all', 'contact', 'endpoint', 'identifier',\n 'tag', 'machineTag', 'comment', 'constituents']\n check_data(data, data_choices)\n\n def getdata(x, uuid, args, **kwargs):\n if x is not 'all' and uuid is None:\n stop('You must specify a uuid if data does not equal \"all\"')\n\n if uuid is None:\n url = gbif_baseurl + 'network'\n else:\n if x is 'all':\n url = gbif_baseurl + 'network/' + uuid\n else:\n url = gbif_baseurl + 'network/' + uuid + '/' + x\n\n res = gbif_GET(url, args, **kwargs)\n return {'meta': get_meta(res), 'data': parse_results(res, uuid)}\n\n if len2(data) == 1:\n return getdata(data, uuid, args, **kwargs)\n else:\n return [getdata(x, uuid, args, **kwargs) for x in data]", "response": "Get a dictionary of network metadata."} {"SOURCE": "codesearchnet", "instruction": "Can you write a function in Python 3 where it\nreturns a GBIF map for the specified source.", "response": "def map(source = 'density', z = 0, x = 0, y = 0, format = '@1x.png', \n srs='EPSG:4326', bin=None, hexPerTile=None, style='classic.point', \n taxonKey=None, country=None, publishingCountry=None, publisher=None,\n datasetKey=None, year=None, basisOfRecord=None, **kwargs):\n '''\n GBIF maps API\n\n :param source: [str] Either ``density`` for fast, precalculated tiles,\n or ``adhoc`` for any search\n :param z: [str] zoom level\n :param x: [str] longitude\n :param y: [str] latitude\n :param format: [str] format of returned data. One of:\n\n - ``.mvt`` - vector tile\n - ``@Hx.png`` - 256px raster tile (for legacy clients)\n - ``@1x.png`` - 512px raster tile, @2x.png for a 1024px raster tile\n - ``@2x.png`` - 1024px raster tile\n - ``@3x.png`` - 2048px raster tile\n - ``@4x.png`` - 4096px raster tile\n\n :param srs: [str] Spatial reference system. One of:\n\n - ``EPSG:3857`` (Web Mercator)\n - ``EPSG:4326`` (WGS84 plate caree)\n - ``EPSG:3575`` (Arctic LAEA)\n - ``EPSG:3031`` (Antarctic stereographic)\n\n :param bin: [str] square or hex to aggregate occurrence counts into\n squares or hexagons. Points by default.\n :param hexPerTile: [str] sets the size of the hexagons (the number \n horizontally across a tile)\n :param squareSize: [str] sets the size of the squares. Choose a factor \n of 4096 so they tessalate correctly: probably from 8, 16, 32, 64, \n 128, 256, 512.\n :param style: [str] for raster tiles, choose from the available styles.\n Defaults to classic.point.\n :param taxonKey: [int] A GBIF occurrence identifier\n :param datasetKey: [str] The occurrence dataset key (a uuid)\n :param country: [str] The 2-letter country code (as per ISO-3166-1) of \n the country in which the occurrence was recorded. See here\n http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2\n :param basisOfRecord: [str] Basis of record, as defined in the BasisOfRecord enum \n http://gbif.github.io/gbif-api/apidocs/org/gbif/api/vocabulary/BasisOfRecord.html\n Acceptable values are\n\n - ``FOSSIL_SPECIMEN`` An occurrence record describing a fossilized specimen.\n - ``HUMAN_OBSERVATION`` An occurrence record describing an observation made by one or more people.\n - ``LITERATURE`` An occurrence record based on literature alone.\n - ``LIVING_SPECIMEN`` An occurrence record describing a living specimen, e.g.\n - ``MACHINE_OBSERVATION`` An occurrence record describing an observation made by a machine.\n - ``OBSERVATION`` An occurrence record describing an observation.\n - ``PRESERVED_SPECIMEN`` An occurrence record describing a preserved specimen.\n - ``UNKNOWN`` Unknown basis for the record.\n\n :param year: [int] The 4 digit year. A year of 98 will be interpreted as \n AD 98. Supports range queries, smaller,larger (e.g., ``1990,1991``, \n whereas ``1991,1990`` wouldn't work)\n :param publishingCountry: [str] The 2-letter country code (as per \n ISO-3166-1) of the country in which the occurrence was recorded.\n\n :return: An object of class GbifMap\n\n For mvt format, see https://github.com/tilezen/mapbox-vector-tile to \n decode, and example below\n\n Usage::\n\n from pygbif import maps\n out = maps.map(taxonKey = 2435098)\n out.response\n out.path\n out.img\n out.plot()\n\n out = maps.map(taxonKey = 2480498, year = range(2008, 2011+1))\n out.response\n out.path\n out.img\n out.plot()\n \n # srs\n maps.map(taxonKey = 2480498, year = 2010, srs = \"EPSG:3857\")\n # bin\n maps.map(taxonKey = 212, year = 1998, bin = \"hex\",\n hexPerTile = 30, style = \"classic-noborder.poly\")\n # style\n maps.map(taxonKey = 2480498, style = \"purpleYellow.point\").plot()\n # basisOfRecord\n maps.map(taxonKey = 2480498, year = 2010,\n basisOfRecord = \"HUMAN_OBSERVATION\", bin = \"hex\", \n hexPerTile = 500).plot()\n maps.map(taxonKey = 2480498, year = 2010, \n basisOfRecord = [\"HUMAN_OBSERVATION\", \"LIVING_SPECIMEN\"],\n hexPerTile = 500, bin = \"hex\").plot()\n\n # map vector tiles, gives back raw bytes\n from pygbif import maps\n x = maps.map(taxonKey = 2480498, year = 2010,\n format = \".mvt\")\n x.response\n x.path\n x.img # None\n import mapbox_vector_tile\n mapbox_vector_tile.decode(x.response.content)\n '''\n if format not in ['.mvt', '@Hx.png', '@1x.png', '@2x.png', '@3x.png', '@4x.png']:\n raise ValueError(\"'format' not in allowed set, see docs\")\n if source not in ['density', 'adhoc']:\n raise ValueError(\"'source' not in allowed set, see docs\")\n if srs not in ['EPSG:3857', 'EPSG:4326', 'EPSG:3575', 'EPSG:3031']:\n raise ValueError(\"'srs' not in allowed set, see docs\")\n if bin is not None:\n if bin not in ['square', 'hex']:\n raise ValueError(\"'bin' not in allowed set, see docs\")\n if style is not None:\n if style not in map_styles:\n raise ValueError(\"'style' not in allowed set, see docs\")\n\n maps_baseurl = 'https://api.gbif.org'\n url = maps_baseurl + '/v2/map/occurrence/%s/%s/%s/%s%s'\n url = url % ( source, z, x, y, format )\n year = __handle_year(year)\n basisOfRecord = __handle_bor(basisOfRecord)\n args = {'srs': srs, 'bin': bin, 'hexPerTile': hexPerTile, 'style': style,\n 'taxonKey': taxonKey, 'country': country,\n 'publishingCountry': publishingCountry, 'publisher': publisher,\n 'datasetKey': datasetKey, 'year': year,\n 'basisOfRecord': basisOfRecord}\n kw = {key: kwargs[key] for key in kwargs if key not in requests_argset}\n if kw is not None:\n xx = dict(zip( [ re.sub('_', '.', x) for x in kw.keys() ], kw.values() ))\n args.update(xx)\n kwargs = {key: kwargs[key] for key in kwargs if key in requests_argset}\n ctype = 'image/png' if has(format, \"png\") else 'application/x-protobuf'\n out = gbif_GET_map(url, args, ctype, **kwargs)\n # return out\n return GbifMap(out)"} {"SOURCE": "codesearchnet", "instruction": "Make a summary of the following Python 3 code\ndef _check_environ(variable, value):\n if is_not_none(value):\n return value\n else:\n value = os.environ.get(variable)\n if is_none(value):\n stop(''.join([variable,\n \"\"\" not supplied and no entry in environmental\n variables\"\"\"]))\n else:\n return value", "response": "check if a variable is present in the environmental variables"} {"SOURCE": "codesearchnet", "instruction": "Make a summary of the following Python 3 code\ndef download(queries, user=None, pwd=None,\n email=None, pred_type='and'):\n \"\"\"\n Spin up a download request for GBIF occurrence data.\n\n :param queries: One or more of query arguments to kick of a download job.\n See Details.\n :type queries: str or list\n :param pred_type: (character) One of ``equals`` (``=``), ``and`` (``&``),\n `or`` (``|``), ``lessThan`` (``<``), ``lessThanOrEquals`` (``<=``),\n ``greaterThan`` (``>``), ``greaterThanOrEquals`` (``>=``),\n ``in``, ``within``, ``not`` (``!``), ``like``\n :param user: (character) User name within GBIF's website.\n Required. Set in your env vars with the option ``GBIF_USER``\n :param pwd: (character) User password within GBIF's website. Required.\n Set in your env vars with the option ``GBIF_PWD``\n :param email: (character) Email address to recieve download notice done\n email. Required. Set in your env vars with the option ``GBIF_EMAIL``\n\n Argument passed have to be passed as character (e.g., ``country = US``),\n with a space between key (``country``), operator (``=``), and value (``US``).\n See the ``type`` parameter for possible options for the operator.\n This character string is parsed internally.\n\n Acceptable arguments to ``...`` (args) are:\n\n - taxonKey = ``TAXON_KEY``\n - scientificName = ``SCIENTIFIC_NAME``\n - country = ``COUNTRY``\n - publishingCountry = ``PUBLISHING_COUNTRY``\n - hasCoordinate = ``HAS_COORDINATE``\n - hasGeospatialIssue = ``HAS_GEOSPATIAL_ISSUE``\n - typeStatus = ``TYPE_STATUS``\n - recordNumber = ``RECORD_NUMBER``\n - lastInterpreted = ``LAST_INTERPRETED``\n - continent = ``CONTINENT``\n - geometry = ``GEOMETRY``\n - basisOfRecord = ``BASIS_OF_RECORD``\n - datasetKey = ``DATASET_KEY``\n - eventDate = ``EVENT_DATE``\n - catalogNumber = ``CATALOG_NUMBER``\n - year = ``YEAR``\n - month = ``MONTH``\n - decimalLatitude = ``DECIMAL_LATITUDE``\n - decimalLongitude = ``DECIMAL_LONGITUDE``\n - elevation = ``ELEVATION``\n - depth = ``DEPTH``\n - institutionCode = ``INSTITUTION_CODE``\n - collectionCode = ``COLLECTION_CODE``\n - issue = ``ISSUE``\n - mediatype = ``MEDIA_TYPE``\n - recordedBy = ``RECORDED_BY``\n - repatriated = ``REPATRIATED``\n\n See the API docs http://www.gbif.org/developer/occurrence#download\n for more info, and the predicates docs\n http://www.gbif.org/developer/occurrence#predicates\n\n GBIF has a limit of 12,000 characters for download queries - so\n if you're download request is really, really long and complex,\n consider breaking it up into multiple requests by one factor or\n another.\n\n :return: A dictionary, of results\n\n Usage::\n\n from pygbif import occurrences as occ\n\n occ.download('basisOfRecord = LITERATURE')\n occ.download('taxonKey = 3119195')\n occ.download('decimalLatitude > 50')\n occ.download('elevation >= 9000')\n occ.download('decimalLatitude >= 65')\n occ.download('country = US')\n occ.download('institutionCode = TLMF')\n occ.download('catalogNumber = Bird.27847588')\n\n res = occ.download(['taxonKey = 7264332', 'hasCoordinate = TRUE'])\n\n # pass output to download_meta for more information\n occ.download_meta(occ.download('decimalLatitude > 75'))\n\n # Multiple queries\n gg = occ.download(['decimalLatitude >= 65',\n 'decimalLatitude <= -65'], type='or')\n gg = occ.download(['depth = 80', 'taxonKey = 2343454'],\n type='or')\n\n # Repratriated data for Costa Rica\n occ.download(['country = CR', 'repatriated = true'])\n \"\"\"\n\n user = _check_environ('GBIF_USER', user)\n pwd = _check_environ('GBIF_PWD', pwd)\n email = _check_environ('GBIF_EMAIL', email)\n\n if isinstance(queries, str):\n queries = [queries]\n\n keyval = [_parse_args(z) for z in queries]\n\n # USE GBIFDownload class to set up the predicates\n req = GbifDownload(user, email)\n req.main_pred_type = pred_type\n for predicate in keyval:\n req.add_predicate(predicate['key'],\n predicate['value'],\n predicate['type'])\n\n out = req.post_download(user, pwd)\n return out, req.payload", "response": "This function is used to kick up a download job for a GBIF occurrence data."} {"SOURCE": "codesearchnet", "instruction": "Make a summary of the following Python 3 code\ndef download_list(user=None, pwd=None, limit=20, offset=0):\n\n user = _check_environ('GBIF_USER', user)\n pwd = _check_environ('GBIF_PWD', pwd)\n\n url = 'http://api.gbif.org/v1/occurrence/download/user/' + user\n args = {'limit': limit, 'offset': offset}\n res = gbif_GET(url, args, auth=(user, pwd))\n return {'meta': {'offset': res['offset'],\n 'limit': res['limit'],\n 'endofrecords': res['endOfRecords'],\n 'count': res['count']},\n 'results': res['results']}", "response": "Lists the downloads created by a user."} {"SOURCE": "codesearchnet", "instruction": "Make a summary of the following Python 3 code\ndef download_get(key, path=\".\", **kwargs):\n meta = pygbif.occurrences.download_meta(key)\n if meta['status'] != 'SUCCEEDED':\n raise Exception('download \"%s\" not of status SUCCEEDED' % key)\n else:\n print('Download file size: %s bytes' % meta['size'])\n url = 'http://api.gbif.org/v1/occurrence/download/request/' + key\n path = \"%s/%s.zip\" % (path, key)\n gbif_GET_write(url, path, **kwargs)\n print(\"On disk at \" + path)\n return {'path': path, 'size': meta['size'], 'key': key}", "response": "Download a file from GBIF."} {"SOURCE": "codesearchnet", "instruction": "How would you explain what the following Python 3 function does\ndef main_pred_type(self, value):\n if value not in operators:\n value = operator_lkup.get(value)\n if value:\n self._main_pred_type = value\n self.payload['predicate']['type'] = self._main_pred_type\n else:\n raise Exception(\"main predicate combiner not a valid operator\")", "response": "set main predicate combination type"} {"SOURCE": "codesearchnet", "instruction": "Can you write a function in Python 3 where it\nadds predicate to the list of predicates", "response": "def add_predicate(self, key, value, predicate_type='equals'):\n \"\"\"\n add key, value, type combination of a predicate\n\n :param key: query KEY parameter\n :param value: the value used in the predicate\n :param predicate_type: the type of predicate (e.g. ``equals``)\n \"\"\"\n if predicate_type not in operators:\n predicate_type = operator_lkup.get(predicate_type)\n if predicate_type:\n self.predicates.append({'type': predicate_type,\n 'key': key,\n 'value': value\n })\n else:\n raise Exception(\"predicate type not a valid operator\")"} {"SOURCE": "codesearchnet", "instruction": "Create a Python 3 function to\nextract values from either file or list of values", "response": "def _extract_values(values_list):\n \"\"\"extract values from either file or list\n\n :param values_list: list or file name (str) with list of values\n \"\"\"\n values = []\n # check if file or list of values to iterate\n if isinstance(values_list, str):\n with open(values_list) as ff:\n reading = csv.reader(ff)\n for j in reading:\n values.append(j[0])\n elif isinstance(values_list, list):\n values = values_list\n else:\n raise Exception(\"input datatype not supported.\")\n return values"} {"SOURCE": "codesearchnet", "instruction": "How would you code a function in Python 3 to\nadd an iterative predicate with a key and set of values_list which it can be equal to in and or function.", "response": "def add_iterative_predicate(self, key, values_list):\n \"\"\"add an iterative predicate with a key and set of values\n which it can be equal to in and or function.\n The individual predicates are specified with the type ``equals`` and\n combined with a type ``or``.\n\n The main reason for this addition is the inability of using ``in`` as\n predicate type wfor multiple taxon_key values\n (cfr. http://dev.gbif.org/issues/browse/POR-2753)\n\n :param key: API key to use for the query.\n :param values_list: Filename or list containing the taxon keys to be s\n searched.\n\n \"\"\"\n values = self._extract_values(values_list)\n\n predicate = {'type': 'equals', 'key': key, 'value': None}\n predicates = []\n while values:\n predicate['value'] = values.pop()\n predicates.append(predicate.copy())\n self.predicates.append({'type': 'or', 'predicates': predicates})"} {"SOURCE": "codesearchnet", "instruction": "Write a Python 3 function for\ngetting details for a single interpreted occurrence", "response": "def get(key, **kwargs):\n '''\n Gets details for a single, interpreted occurrence\n\n :param key: [int] A GBIF occurrence key\n\n :return: A dictionary, of results\n\n Usage::\n\n from pygbif import occurrences\n occurrences.get(key = 1258202889)\n occurrences.get(key = 1227768771)\n occurrences.get(key = 1227769518)\n '''\n url = gbif_baseurl + 'occurrence/' + str(key)\n out = gbif_GET(url, {}, **kwargs)\n return out"} {"SOURCE": "codesearchnet", "instruction": "Write a Python 3 function for\ngetting a verbatim occurrence record without any interpretation", "response": "def get_verbatim(key, **kwargs):\n '''\n Gets a verbatim occurrence record without any interpretation\n\n :param key: [int] A GBIF occurrence key\n\n :return: A dictionary, of results\n\n Usage::\n\n from pygbif import occurrences\n occurrences.get_verbatim(key = 1258202889)\n occurrences.get_verbatim(key = 1227768771)\n occurrences.get_verbatim(key = 1227769518)\n '''\n url = gbif_baseurl + 'occurrence/' + str(key) + '/verbatim'\n out = gbif_GET(url, {}, **kwargs)\n return out"} {"SOURCE": "codesearchnet", "instruction": "Given the following Python 3 function, write the documentation\ndef get_fragment(key, **kwargs):\n '''\n Get a single occurrence fragment in its raw form (xml or json)\n\n :param key: [int] A GBIF occurrence key\n\n :return: A dictionary, of results\n\n Usage::\n\n from pygbif import occurrences\n occurrences.get_fragment(key = 1052909293)\n occurrences.get_fragment(key = 1227768771)\n occurrences.get_fragment(key = 1227769518)\n '''\n url = gbif_baseurl + 'occurrence/' + str(key) + '/fragment'\n out = gbif_GET(url, {}, **kwargs)\n return out", "response": "Get a single occurrence fragment in its raw form."} {"SOURCE": "codesearchnet", "instruction": "Write a Python 3 script for\nparsing taxon names using the GBIF name parser.", "response": "def name_parser(name, **kwargs):\n '''\n Parse taxon names using the GBIF name parser\n\n :param name: [str] A character vector of scientific names. (required)\n\n reference: http://www.gbif.org/developer/species#parser\n\n Usage::\n\n from pygbif import species\n species.name_parser('x Agropogon littoralis')\n species.name_parser(['Arrhenatherum elatius var. elatius',\n 'Secale cereale subsp. cereale', 'Secale cereale ssp. cereale',\n 'Vanessa atalanta (Linnaeus, 1758)'])\n '''\n url = gbif_baseurl + 'parser/name'\n if name.__class__ == str:\n name = [name]\n return gbif_POST(url, name, **kwargs)"} {"SOURCE": "codesearchnet", "instruction": "How would you explain what the following Python 3 function does\ndef name_lookup(q=None, rank=None, higherTaxonKey=None, status=None, isExtinct=None,\n\thabitat=None, nameType=None, datasetKey=None, nomenclaturalStatus=None,\n\tlimit=100, offset=None, facet=False, facetMincount=None, facetMultiselect=None,\n\ttype=None, hl=False, verbose=False, **kwargs):\n\t'''\n\tLookup names in all taxonomies in GBIF.\n\n\tThis service uses fuzzy lookup so that you can put in partial names and\n\tyou should get back those things that match. See examples below.\n\n\t:param q: [str] Query term(s) for full text search (optional)\n\t:param rank: [str] ``CLASS``, ``CULTIVAR``, ``CULTIVAR_GROUP``, ``DOMAIN``, ``FAMILY``,\n\t\t ``FORM``, ``GENUS``, ``INFORMAL``, ``INFRAGENERIC_NAME``, ``INFRAORDER``, ``INFRASPECIFIC_NAME``,\n\t\t ``INFRASUBSPECIFIC_NAME``, ``KINGDOM``, ``ORDER``, ``PHYLUM``, ``SECTION``, ``SERIES``, ``SPECIES``, ``STRAIN``, ``SUBCLASS``,\n\t\t ``SUBFAMILY``, ``SUBFORM``, ``SUBGENUS``, ``SUBKINGDOM``, ``SUBORDER``, ``SUBPHYLUM``, ``SUBSECTION``, ``SUBSERIES``,\n\t\t ``SUBSPECIES``, ``SUBTRIBE``, ``SUBVARIETY``, ``SUPERCLASS``, ``SUPERFAMILY``, ``SUPERORDER``, ``SUPERPHYLUM``,\n\t\t ``SUPRAGENERIC_NAME``, ``TRIBE``, ``UNRANKED``, ``VARIETY`` (optional)\n\t:param verbose: [bool] If True show alternative matches considered which had been rejected.\n\t:param higherTaxonKey: [str] Filters by any of the higher Linnean rank keys. Note this\n\t\t\tis within the respective checklist and not searching nub keys across all checklists (optional)\n\t:param status: [str] (optional) Filters by the taxonomic status as one of:\n\n\t\t* ``ACCEPTED``\n\t\t* ``DETERMINATION_SYNONYM`` Used for unknown child taxa referred to via spec, ssp, ...\n\t\t* ``DOUBTFUL`` Treated as accepted, but doubtful whether this is correct.\n\t\t* ``HETEROTYPIC_SYNONYM`` More specific subclass of ``SYNONYM``.\n\t\t* ``HOMOTYPIC_SYNONYM`` More specific subclass of ``SYNONYM``.\n\t\t* ``INTERMEDIATE_RANK_SYNONYM`` Used in nub only.\n\t\t* ``MISAPPLIED`` More specific subclass of ``SYNONYM``.\n\t\t* ``PROPARTE_SYNONYM`` More specific subclass of ``SYNONYM``.\n\t\t* ``SYNONYM`` A general synonym, the exact type is unknown.\n\n\t:param isExtinct: [bool] Filters by extinction status (e.g. ``isExtinct=True``)\n\t:param habitat: [str] Filters by habitat. One of: ``marine``, ``freshwater``, or\n\t\t\t``terrestrial`` (optional)\n\t:param nameType: [str] (optional) Filters by the name type as one of:\n\n\t\t* ``BLACKLISTED`` surely not a scientific name.\n\t\t* ``CANDIDATUS`` Candidatus is a component of the taxonomic name for a bacterium that cannot be maintained in a Bacteriology Culture Collection.\n\t\t* ``CULTIVAR`` a cultivated plant name.\n\t\t* ``DOUBTFUL`` doubtful whether this is a scientific name at all.\n\t\t* ``HYBRID`` a hybrid formula (not a hybrid name).\n\t\t* ``INFORMAL`` a scientific name with some informal addition like \"cf.\" or indetermined like Abies spec.\n\t\t* ``SCINAME`` a scientific name which is not well formed.\n\t\t* ``VIRUS`` a virus name.\n\t\t* ``WELLFORMED`` a well formed scientific name according to present nomenclatural rules.\n\n\t:param datasetKey: [str] Filters by the dataset's key (a uuid) (optional)\n\t:param nomenclaturalStatus: [str] Not yet implemented, but will eventually allow for\n\t\t\tfiltering by a nomenclatural status enum\n\t:param limit: [fixnum] Number of records to return. Maximum: ``1000``. (optional)\n\t:param offset: [fixnum] Record number to start at. (optional)\n\t:param facet: [str] A list of facet names used to retrieve the 100 most frequent values\n\t\t\tfor a field. Allowed facets are: ``datasetKey``, ``higherTaxonKey``, ``rank``, ``status``,\n\t\t\t``isExtinct``, ``habitat``, and ``nameType``. Additionally ``threat`` and ``nomenclaturalStatus``\n\t\t\tare legal values but not yet implemented, so data will not yet be returned for them. (optional)\n\t:param facetMincount: [str] Used in combination with the facet parameter. Set\n\t\t\t``facetMincount={#}`` to exclude facets with a count less than {#}, e.g.\n\t\t\thttp://bit.ly/1bMdByP only shows the type value ``ACCEPTED`` because the other\n\t\t\tstatuses have counts less than 7,000,000 (optional)\n\t:param facetMultiselect: [bool] Used in combination with the facet parameter. Set\n\t\t\t``facetMultiselect=True`` to still return counts for values that are not currently\n\t\t\tfiltered, e.g. http://bit.ly/19YLXPO still shows all status values even though\n\t\t\tstatus is being filtered by ``status=ACCEPTED`` (optional)\n\t:param type: [str] Type of name. One of ``occurrence``, ``checklist``, or ``metadata``. (optional)\n\t:param hl: [bool] Set ``hl=True`` to highlight terms matching the query when in fulltext\n\t\t\tsearch fields. The highlight will be an emphasis tag of class ``gbifH1`` e.g.\n\t\t\t``q='plant', hl=True``. Fulltext search fields include: ``title``, ``keyword``, ``country``,\n\t\t\t``publishing country``, ``publishing organization title``, ``hosting organization title``, and\n\t\t\t``description``. One additional full text field is searched which includes information from\n\t\t\tmetadata documents, but the text of this field is not returned in the response. (optional)\n\n\t:return: A dictionary\n\n\t:references: http://www.gbif.org/developer/species#searching\n\n\tUsage::\n\n\t\t\tfrom pygbif import species\n\n\t\t\t# Look up names like mammalia\n\t\t\tspecies.name_lookup(q='mammalia')\n\n\t\t\t# Paging\n\t\t\tspecies.name_lookup(q='mammalia', limit=1)\n\t\t\tspecies.name_lookup(q='mammalia', limit=1, offset=2)\n\n\t\t\t# large requests, use offset parameter\n\t\t\tfirst = species.name_lookup(q='mammalia', limit=1000)\n\t\t\tsecond = species.name_lookup(q='mammalia', limit=1000, offset=1000)\n\n\t\t\t# Get all data and parse it, removing descriptions which can be quite long\n\t\t\tspecies.name_lookup('Helianthus annuus', rank=\"species\", verbose=True)\n\n\t\t\t# Get all data and parse it, removing descriptions field which can be quite long\n\t\t\tout = species.name_lookup('Helianthus annuus', rank=\"species\")\n\t\t\tres = out['results']\n\t\t\t[ z.pop('descriptions', None) for z in res ]\n\t\t\tres\n\n\t\t\t# Fuzzy searching\n\t\t\tspecies.name_lookup(q='Heli', rank=\"genus\")\n\n\t\t\t# Limit records to certain number\n\t\t\tspecies.name_lookup('Helianthus annuus', rank=\"species\", limit=2)\n\n\t\t\t# Query by habitat\n\t\t\tspecies.name_lookup(habitat = \"terrestrial\", limit=2)\n\t\t\tspecies.name_lookup(habitat = \"marine\", limit=2)\n\t\t\tspecies.name_lookup(habitat = \"freshwater\", limit=2)\n\n\t\t\t# Using faceting\n\t\t\tspecies.name_lookup(facet='status', limit=0, facetMincount='70000')\n\t\t\tspecies.name_lookup(facet=['status', 'higherTaxonKey'], limit=0, facetMincount='700000')\n\n\t\t\tspecies.name_lookup(facet='nameType', limit=0)\n\t\t\tspecies.name_lookup(facet='habitat', limit=0)\n\t\t\tspecies.name_lookup(facet='datasetKey', limit=0)\n\t\t\tspecies.name_lookup(facet='rank', limit=0)\n\t\t\tspecies.name_lookup(facet='isExtinct', limit=0)\n\n\t\t\t# text highlighting\n\t\t\tspecies.name_lookup(q='plant', hl=True, limit=30)\n\n\t\t\t# Lookup by datasetKey\n\t\t\tspecies.name_lookup(datasetKey='3f8a1297-3259-4700-91fc-acc4170b27ce')\n\t'''\n\targs = {'q': q, 'rank': rank, 'higherTaxonKey': higherTaxonKey,\n\t\t'status': status, 'isExtinct': isExtinct, 'habitat': habitat,\n\t\t'nameType': nameType, 'datasetKey': datasetKey,\n\t\t'nomenclaturalStatus': nomenclaturalStatus, 'limit': limit, 'offset': offset,\n\t\t'facet': bn(facet), 'facetMincount': facetMincount, 'facetMultiselect': facetMultiselect,\n\t\t'hl': bn(hl), 'verbose': bn(verbose), 'type': type}\n\tgbif_kwargs = {key: kwargs[key] for key in kwargs if key not in requests_argset}\n\tif gbif_kwargs is not None:\n\t\t\txx = dict(zip( [ re.sub('_', '.', x) for x in gbif_kwargs.keys() ], gbif_kwargs.values() ))\n\t\t\targs.update(xx)\n\tkwargs = {key: kwargs[key] for key in kwargs if key in requests_argset}\n\treturn gbif_GET(gbif_baseurl + 'species/search', args, **kwargs)", "response": "This function is used to perform a fuzzy name lookup on a set of terms."} {"SOURCE": "codesearchnet", "instruction": "Can you implement a function in Python 3 that\nreturns occurrence counts for a predefined set of dimensions of a predefined set of dimensions of a GBIF occurrence identifier.", "response": "def count(taxonKey=None, basisOfRecord=None, country=None, isGeoreferenced=None,\n datasetKey=None, publishingCountry=None, typeStatus=None,\n issue=None, year=None, **kwargs):\n '''\n Returns occurrence counts for a predefined set of dimensions\n\n :param taxonKey: [int] A GBIF occurrence identifier\n :param basisOfRecord: [str] A GBIF occurrence identifier\n :param country: [str] A GBIF occurrence identifier\n :param isGeoreferenced: [bool] A GBIF occurrence identifier\n :param datasetKey: [str] A GBIF occurrence identifier\n :param publishingCountry: [str] A GBIF occurrence identifier\n :param typeStatus: [str] A GBIF occurrence identifier\n :param issue: [str] A GBIF occurrence identifier\n :param year: [int] A GBIF occurrence identifier\n\n :return: dict\n\n Usage::\n\n from pygbif import occurrences\n occurrences.count(taxonKey = 3329049)\n occurrences.count(country = 'CA')\n occurrences.count(isGeoreferenced = True)\n occurrences.count(basisOfRecord = 'OBSERVATION')\n '''\n url = gbif_baseurl + 'occurrence/count'\n out = gbif_GET(url, {'taxonKey': taxonKey, 'basisOfRecord': basisOfRecord, 'country': country,\n 'isGeoreferenced': isGeoreferenced, 'datasetKey': datasetKey,\n 'publishingCountry': publishingCountry, 'typeStatus': typeStatus,\n 'issue': issue, 'year': year}, **kwargs)\n return out"} {"SOURCE": "codesearchnet", "instruction": "Can you generate the documentation for the following Python 3 function\ndef count_year(year, **kwargs):\n '''\n Lists occurrence counts by year\n\n :param year: [int] year range, e.g., ``1990,2000``. Does not support ranges like ``asterisk,2010``\n\n :return: dict\n\n Usage::\n\n from pygbif import occurrences\n occurrences.count_year(year = '1990,2000')\n '''\n url = gbif_baseurl + 'occurrence/counts/year'\n out = gbif_GET(url, {'year': year}, **kwargs)\n return out", "response": "Lists occurrence counts by year"} {"SOURCE": "codesearchnet", "instruction": "Create a Python 3 function to\nreturn a dictionary of occurrence counts for datasets that cover a given taxon or country.", "response": "def count_datasets(taxonKey = None, country = None, **kwargs):\n '''\n Lists occurrence counts for datasets that cover a given taxon or country\n\n :param taxonKey: [int] Taxon key\n :param country: [str] A country, two letter code\n\n :return: dict\n\n Usage::\n\n from pygbif import occurrences\n occurrences.count_datasets(country = \"DE\")\n '''\n url = gbif_baseurl + 'occurrence/counts/datasets'\n out = gbif_GET(url, {'taxonKey': taxonKey, 'country': country}, **kwargs)\n return out"} {"SOURCE": "codesearchnet", "instruction": "Create a Python 3 function for\nreturning a dictionary containing the occurrence counts for all countries covered by the data published by the given country.", "response": "def count_countries(publishingCountry, **kwargs):\n '''\n Lists occurrence counts for all countries covered by the data published by the given country\n\n :param publishingCountry: [str] A two letter country code\n\n :return: dict\n\n Usage::\n\n from pygbif import occurrences\n occurrences.count_countries(publishingCountry = \"DE\")\n '''\n url = gbif_baseurl + 'occurrence/counts/countries'\n out = gbif_GET(url, {'publishingCountry': publishingCountry}, **kwargs)\n return out"} {"SOURCE": "codesearchnet", "instruction": "Here you have a function in Python 3, explain what it does\ndef count_publishingcountries(country, **kwargs):\n '''\n Lists occurrence counts for all countries that publish data about the given country\n\n :param country: [str] A country, two letter code\n\n :return: dict\n\n Usage::\n\n from pygbif import occurrences\n occurrences.count_publishingcountries(country = \"DE\")\n '''\n url = gbif_baseurl + 'occurrence/counts/publishingCountries'\n out = gbif_GET(url, {\"country\": country}, **kwargs)\n return out", "response": "Returns a dictionary of occurrence counts for all countries that publish data about the given country."} {"SOURCE": "codesearchnet", "instruction": "Given the following Python 3 function, write the documentation\ndef _detect_notebook() -> bool:\n try:\n from IPython import get_ipython\n from ipykernel import zmqshell\n except ImportError:\n return False\n kernel = get_ipython()\n try:\n from spyder.utils.ipython.spyder_kernel import SpyderKernel\n\n if isinstance(kernel.kernel, SpyderKernel):\n return False\n except (ImportError, AttributeError):\n pass\n return isinstance(kernel, zmqshell.ZMQInteractiveShell)", "response": "Detect if code is running in a Jupyter Notebook."} {"SOURCE": "codesearchnet", "instruction": "Given the following Python 3 function, write the documentation\ndef _merge_layout(x: go.Layout, y: go.Layout) -> go.Layout:\n xjson = x.to_plotly_json()\n yjson = y.to_plotly_json()\n if 'shapes' in yjson and 'shapes' in xjson:\n xjson['shapes'] += yjson['shapes']\n yjson.update(xjson)\n return go.Layout(yjson)", "response": "Merge attributes from two layouts."} {"SOURCE": "codesearchnet", "instruction": "Can you write a function in Python 3 where it\ntries to convert to pandas objects to datetimes.", "response": "def _try_pydatetime(x):\n \"\"\"Try to convert to pandas objects to datetimes.\n\n Plotly doesn't know how to handle them.\n \"\"\"\n try:\n # for datetimeindex\n x = [y.isoformat() for y in x.to_pydatetime()]\n except AttributeError:\n pass\n try:\n # for generic series\n x = [y.isoformat() for y in x.dt.to_pydatetime()]\n except AttributeError:\n pass\n return x"} {"SOURCE": "codesearchnet", "instruction": "Create a Python 3 function for\nreturning a spark chart with the specified shape.", "response": "def spark_shape(points, shapes, fill=None, color='blue', width=5, yindex=0, heights=None):\n \"\"\"TODO: Docstring for spark.\n\n Parameters\n ----------\n points : array-like\n shapes : array-like\n fill : array-like, optional\n\n Returns\n -------\n Chart\n\n \"\"\"\n assert len(points) == len(shapes) + 1\n data = [{'marker': {'color': 'white'}, 'x': [points[0], points[-1]], 'y': [yindex, yindex]}]\n\n if fill is None:\n fill = [False] * len(shapes)\n\n if heights is None:\n heights = [0.4] * len(shapes)\n\n lays = []\n for i, (shape, height) in enumerate(zip(shapes, heights)):\n if shape is None:\n continue\n if fill[i]:\n fillcolor = color\n else:\n fillcolor = 'white'\n lays.append(\n dict(\n type=shape,\n x0=points[i],\n x1=points[i + 1],\n y0=yindex - height,\n y1=yindex + height,\n xref='x',\n yref='y',\n fillcolor=fillcolor,\n line=dict(color=color, width=width),\n )\n )\n\n layout = dict(shapes=lays)\n\n return Chart(data=data, layout=layout)"} {"SOURCE": "codesearchnet", "instruction": "Create a Python 3 function to\ndraw a vertical line from xmin xmax color dash opacity opacity", "response": "def vertical(x, ymin=0, ymax=1, color=None, width=None, dash=None, opacity=None):\n \"\"\"Draws a vertical line from `ymin` to `ymax`.\n\n Parameters\n ----------\n xmin : int, optional\n xmax : int, optional\n color : str, optional\n width : number, optional\n\n Returns\n -------\n Chart\n\n \"\"\"\n lineattr = {}\n if color:\n lineattr['color'] = color\n if width:\n lineattr['width'] = width\n if dash:\n lineattr['dash'] = dash\n\n layout = dict(\n shapes=[dict(type='line', x0=x, x1=x, y0=ymin, y1=ymax, opacity=opacity, line=lineattr)]\n )\n return Chart(layout=layout)"} {"SOURCE": "codesearchnet", "instruction": "Can you implement a function in Python 3 that\ndraws a horizontal line from xmin to xmax.", "response": "def horizontal(y, xmin=0, xmax=1, color=None, width=None, dash=None, opacity=None):\n \"\"\"Draws a horizontal line from `xmin` to `xmax`.\n\n Parameters\n ----------\n xmin : int, optional\n xmax : int, optional\n color : str, optional\n width : number, optional\n\n Returns\n -------\n Chart\n\n \"\"\"\n lineattr = {}\n if color:\n lineattr['color'] = color\n if width:\n lineattr['width'] = width\n if dash:\n lineattr['dash'] = dash\n\n layout = dict(\n shapes=[dict(type='line', x0=xmin, x1=xmax, y0=y, y1=y, opacity=opacity, line=lineattr)]\n )\n return Chart(layout=layout)"} {"SOURCE": "codesearchnet", "instruction": "Can you generate the documentation for the following Python 3 function\ndef line(\n x=None,\n y=None,\n label=None,\n color=None,\n width=None,\n dash=None,\n opacity=None,\n mode='lines+markers',\n yaxis=1,\n fill=None,\n text=\"\",\n markersize=6,\n):\n \"\"\"Draws connected dots.\n\n Parameters\n ----------\n x : array-like, optional\n y : array-like, optional\n label : array-like, optional\n\n Returns\n -------\n Chart\n\n \"\"\"\n assert x is not None or y is not None, \"x or y must be something\"\n yn = 'y' + str(yaxis)\n lineattr = {}\n if color:\n lineattr['color'] = color\n if width:\n lineattr['width'] = width\n if dash:\n lineattr['dash'] = dash\n if y is None:\n y = x\n x = None\n if x is None:\n x = np.arange(len(y))\n else:\n x = _try_pydatetime(x)\n x = np.atleast_1d(x)\n y = np.atleast_1d(y)\n assert x.shape[0] == y.shape[0]\n if y.ndim == 2:\n if not hasattr(label, '__iter__'):\n if label is None:\n label = _labels()\n else:\n label = _labels(label)\n data = [\n go.Scatter(\n x=x,\n y=yy,\n name=ll,\n line=lineattr,\n mode=mode,\n text=text,\n fill=fill,\n opacity=opacity,\n yaxis=yn,\n marker=dict(size=markersize, opacity=opacity),\n )\n for ll, yy in zip(label, y.T)\n ]\n else:\n data = [\n go.Scatter(\n x=x,\n y=y,\n name=label,\n line=lineattr,\n mode=mode,\n text=text,\n fill=fill,\n opacity=opacity,\n yaxis=yn,\n marker=dict(size=markersize, opacity=opacity),\n )\n ]\n if yaxis == 1:\n return Chart(data=data)\n\n return Chart(data=data, layout={'yaxis' + str(yaxis): dict(overlaying='y')})", "response": "Draws connected dots.\n\n Parameters\n ----------\n x : array-like, optional\n y : array-like, optional\n label : array-like, optional\n\n Returns\n -------\n Chart"} {"SOURCE": "codesearchnet", "instruction": "Create a Python 3 function for\ncreating a 3d line chart.", "response": "def line3d(\n x, y, z, label=None, color=None, width=None, dash=None, opacity=None, mode='lines+markers'\n):\n \"\"\"Create a 3d line chart.\"\"\"\n x = np.atleast_1d(x)\n y = np.atleast_1d(y)\n z = np.atleast_1d(z)\n assert x.shape == y.shape\n assert y.shape == z.shape\n lineattr = {}\n if color:\n lineattr['color'] = color\n if width:\n lineattr['width'] = width\n if dash:\n lineattr['dash'] = dash\n if y.ndim == 2:\n if not hasattr(label, '__iter__'):\n if label is None:\n label = _labels()\n else:\n label = _labels(label)\n data = [\n go.Scatter3d(x=xx, y=yy, z=zz, name=ll, line=lineattr, mode=mode, opacity=opacity)\n for ll, xx, yy, zz in zip(label, x.T, y.T, z.T)\n ]\n else:\n data = [go.Scatter3d(x=x, y=y, z=z, name=label, line=lineattr, mode=mode, opacity=opacity)]\n return Chart(data=data)"} {"SOURCE": "codesearchnet", "instruction": "How would you code a function in Python 3 to\ndraw dots. Parameters ---------- x : array-like, optional y : array-like, optional label : array-like, optional Returns ------- Chart", "response": "def scatter(\n x=None,\n y=None,\n label=None,\n color=None,\n width=None,\n dash=None,\n opacity=None,\n markersize=6,\n yaxis=1,\n fill=None,\n text=\"\",\n mode='markers',\n):\n \"\"\"Draws dots.\n\n Parameters\n ----------\n x : array-like, optional\n y : array-like, optional\n label : array-like, optional\n\n Returns\n -------\n Chart\n\n \"\"\"\n return line(\n x=x,\n y=y,\n label=label,\n color=color,\n width=width,\n dash=dash,\n opacity=opacity,\n mode=mode,\n yaxis=yaxis,\n fill=fill,\n text=text,\n markersize=markersize,\n )"} {"SOURCE": "codesearchnet", "instruction": "Write a Python 3 function that can\ncreate a bar chart.", "response": "def bar(x=None, y=None, label=None, mode='group', yaxis=1, opacity=None):\n \"\"\"Create a bar chart.\n\n Parameters\n ----------\n x : array-like, optional\n y : TODO, optional\n label : TODO, optional\n mode : 'group' or 'stack', default 'group'\n opacity : TODO, optional\n\n Returns\n -------\n Chart\n A Chart with bar graph data.\n\n \"\"\"\n assert x is not None or y is not None, \"x or y must be something\"\n yn = 'y' + str(yaxis)\n if y is None:\n y = x\n x = None\n if x is None:\n x = np.arange(len(y))\n else:\n x = _try_pydatetime(x)\n x = np.atleast_1d(x)\n y = np.atleast_1d(y)\n if y.ndim == 2:\n if not hasattr(label, '__iter__'):\n if label is None:\n label = _labels()\n else:\n label = _labels(label)\n data = [go.Bar(x=x, y=yy, name=ll, yaxis=yn, opacity=opacity) for ll, yy in zip(label, y.T)]\n else:\n data = [go.Bar(x=x, y=y, name=label, yaxis=yn, opacity=opacity)]\n if yaxis == 1:\n return Chart(data=data, layout={'barmode': mode})\n\n return Chart(data=data, layout={'barmode': mode, 'yaxis' + str(yaxis): dict(overlaying='y')})"} {"SOURCE": "codesearchnet", "instruction": "Write a Python 3 function for\ncreating a heatmap. Parameters ---------- z : TODO x : TODO, optional y : TODO, optional colorscale : TODO, optional Returns ------- Chart", "response": "def heatmap(z, x=None, y=None, colorscale='Viridis'):\n \"\"\"Create a heatmap.\n\n Parameters\n ----------\n z : TODO\n x : TODO, optional\n y : TODO, optional\n colorscale : TODO, optional\n\n Returns\n -------\n Chart\n\n\n \"\"\"\n z = np.atleast_1d(z)\n data = [go.Heatmap(z=z, x=x, y=y, colorscale=colorscale)]\n return Chart(data=data)"} {"SOURCE": "codesearchnet", "instruction": "Can you tell what is the following Python 3 function doing\ndef fill_zero(\n x=None,\n y=None,\n label=None,\n color=None,\n width=None,\n dash=None,\n opacity=None,\n mode='lines+markers',\n **kargs\n):\n \"\"\"Fill to zero.\n\n Parameters\n ----------\n x : array-like, optional\n y : TODO, optional\n label : TODO, optional\n\n Returns\n -------\n Chart\n\n \"\"\"\n return line(\n x=x,\n y=y,\n label=label,\n color=color,\n width=width,\n dash=dash,\n opacity=opacity,\n mode=mode,\n fill='tozeroy',\n **kargs\n )", "response": "Fill to zero.\n\n Parameters\n ----------\n x : array-like, optional\n y : TODO, optional\n label : TODO, optional\n\n Returns\n -------\n Chart"} {"SOURCE": "codesearchnet", "instruction": "How would you explain what the following Python 3 function does\ndef fill_between(\n x=None,\n ylow=None,\n yhigh=None,\n label=None,\n color=None,\n width=None,\n dash=None,\n opacity=None,\n mode='lines+markers',\n **kargs\n):\n \"\"\"Fill between `ylow` and `yhigh`.\n\n Parameters\n ----------\n x : array-like, optional\n ylow : TODO, optional\n yhigh : TODO, optional\n\n Returns\n -------\n Chart\n\n \"\"\"\n plot = line(\n x=x,\n y=ylow,\n label=label,\n color=color,\n width=width,\n dash=dash,\n opacity=opacity,\n mode=mode,\n fill=None,\n **kargs\n )\n plot += line(\n x=x,\n y=yhigh,\n label=label,\n color=color,\n width=width,\n dash=dash,\n opacity=opacity,\n mode=mode,\n fill='tonexty',\n **kargs\n )\n return plot", "response": "Fill between ylow and yhigh."} {"SOURCE": "codesearchnet", "instruction": "Create a Python 3 function to\nreturn a chart that shows the rug of the object.", "response": "def rug(x, label=None, opacity=None):\n \"\"\"Rug chart.\n\n Parameters\n ----------\n x : array-like, optional\n label : TODO, optional\n opacity : TODO, optional\n\n Returns\n -------\n Chart\n\n \"\"\"\n x = _try_pydatetime(x)\n x = np.atleast_1d(x)\n data = [\n go.Scatter(\n x=x,\n y=np.ones_like(x),\n name=label,\n opacity=opacity,\n mode='markers',\n marker=dict(symbol='line-ns-open'),\n )\n ]\n layout = dict(\n barmode='overlay',\n hovermode='closest',\n legend=dict(traceorder='reversed'),\n xaxis1=dict(zeroline=False),\n yaxis1=dict(\n domain=[0.85, 1],\n showline=False,\n showgrid=False,\n zeroline=False,\n anchor='free',\n position=0.0,\n showticklabels=False,\n ),\n )\n return Chart(data=data, layout=layout)"} {"SOURCE": "codesearchnet", "instruction": "Implement a Python 3 function for\nsurfacing plot. Parameters ---------- x : array-like, optional y : array-like, optional z : array-like, optional Returns ------- Chart", "response": "def surface(x, y, z):\n \"\"\"Surface plot.\n\n Parameters\n ----------\n x : array-like, optional\n y : array-like, optional\n z : array-like, optional\n\n Returns\n -------\n Chart\n\n \"\"\"\n data = [go.Surface(x=x, y=y, z=z)]\n return Chart(data=data)"} {"SOURCE": "codesearchnet", "instruction": "Write a Python 3 function for\nreturning a chart that contains a histogram of the data points x.", "response": "def hist(x, mode='overlay', label=None, opacity=None, horz=False, histnorm=None):\n \"\"\"Histogram.\n\n Parameters\n ----------\n x : array-like\n mode : str, optional\n label : TODO, optional\n opacity : float, optional\n horz : bool, optional\n histnorm : None, \"percent\", \"probability\", \"density\", \"probability density\", optional\n Specifies the type of normalization used for this histogram trace.\n If ``None``, the span of each bar corresponds to the number of occurrences\n (i.e. the number of data points lying inside the bins). If \"percent\",\n the span of each bar corresponds to the percentage of occurrences with\n respect to the total number of sample points (here, the sum of all bin\n area equals 100%). If \"density\", the span of each bar corresponds to the\n number of occurrences in a bin divided by the size of the bin interval\n (here, the sum of all bin area equals the total number of sample\n points). If \"probability density\", the span of each bar corresponds to\n the probability that an event will fall into the corresponding bin\n (here, the sum of all bin area equals 1).\n\n Returns\n -------\n Chart\n\n \"\"\"\n x = np.atleast_1d(x)\n if horz:\n kargs = dict(y=x)\n else:\n kargs = dict(x=x)\n layout = dict(barmode=mode)\n data = [go.Histogram(opacity=opacity, name=label, histnorm=histnorm, **kargs)]\n return Chart(data=data, layout=layout)"} {"SOURCE": "codesearchnet", "instruction": "Explain what the following Python 3 code does\ndef ytickangle(self, angle, index=1):\n self.layout['yaxis' + str(index)]['tickangle'] = angle\n return self", "response": "Set the angle of the y - axis tick labels."} {"SOURCE": "codesearchnet", "instruction": "Write a Python 3 function for\nsetting the size of the label.", "response": "def ylabelsize(self, size, index=1):\n \"\"\"Set the size of the label.\n\n Parameters\n ----------\n size : int\n\n Returns\n -------\n Chart\n\n \"\"\"\n self.layout['yaxis' + str(index)]['titlefont']['size'] = size\n return self"} {"SOURCE": "codesearchnet", "instruction": "Given the following Python 3 function, write the documentation\ndef yticksize(self, size, index=1):\n self.layout['yaxis' + str(index)]['tickfont']['size'] = size\n return self", "response": "Set the tick font size."} {"SOURCE": "codesearchnet", "instruction": "Can you create a Python 3 function that\nsets the tick values.", "response": "def ytickvals(self, values, index=1):\n \"\"\"Set the tick values.\n\n Parameters\n ----------\n values : array-like\n\n Returns\n -------\n Chart\n\n \"\"\"\n self.layout['yaxis' + str(index)]['tickvals'] = values\n return self"} {"SOURCE": "codesearchnet", "instruction": "Can you tell what is the following Python 3 function doing\ndef yticktext(self, labels, index=1):\n self.layout['yaxis' + str(index)]['ticktext'] = labels\n return self", "response": "Set the tick labels."} {"SOURCE": "codesearchnet", "instruction": "Can you generate the documentation for the following Python 3 function\ndef ylim(self, low, high, index=1):\n self.layout['yaxis' + str(index)]['range'] = [low, high]\n return self", "response": "Set yaxis limits.\n\n Parameters\n ----------\n low : number\n high : number\n index : int, optional\n\n Returns\n -------\n Chart"} {"SOURCE": "codesearchnet", "instruction": "Write a Python 3 function for\nsetting the tick distance.", "response": "def ydtick(self, dtick, index=1):\n \"\"\"Set the tick distance.\"\"\"\n self.layout['yaxis' + str(index)]['dtick'] = dtick\n return self"} {"SOURCE": "codesearchnet", "instruction": "Here you have a function in Python 3, explain what it does\ndef ynticks(self, nticks, index=1):\n self.layout['yaxis' + str(index)]['nticks'] = nticks\n return self", "response": "Set the number of ticks."} {"SOURCE": "codesearchnet", "instruction": "Can you tell what is the following Python 3 function doing\ndef show(\n self,\n filename: Optional[str] = None,\n show_link: bool = True,\n auto_open: bool = True,\n detect_notebook: bool = True,\n ) -> None:\n \"\"\"Display the chart.\n\n Parameters\n ----------\n filename : str, optional\n Save plot to this filename, otherwise it's saved to a temporary file.\n show_link : bool, optional\n Show link to plotly.\n auto_open : bool, optional\n Automatically open the plot (in the browser).\n detect_notebook : bool, optional\n Try to detect if we're running in a notebook.\n\n \"\"\"\n kargs = {}\n if detect_notebook and _detect_notebook():\n py.init_notebook_mode()\n plot = py.iplot\n else:\n plot = py.plot\n if filename is None:\n filename = NamedTemporaryFile(prefix='plotly', suffix='.html', delete=False).name\n kargs['filename'] = filename\n kargs['auto_open'] = auto_open\n\n plot(self, show_link=show_link, **kargs)", "response": "Display the chart.\n\n Parameters\n ----------\n filename : str, optional\n Save plot to this filename, otherwise it's saved to a temporary file.\n show_link : bool, optional\n Show link to plotly.\n auto_open : bool, optional\n Automatically open the plot (in the browser).\n detect_notebook : bool, optional\n Try to detect if we're running in a notebook."} {"SOURCE": "codesearchnet", "instruction": "How would you implement a function in Python 3 that\nsaves the chart to an html file.", "response": "def save(\n self,\n filename: Optional[str] = None,\n show_link: bool = True,\n auto_open: bool = False,\n output: str = 'file',\n plotlyjs: bool = True,\n ) -> str:\n \"\"\"Save the chart to an html file.\"\"\"\n if filename is None:\n filename = NamedTemporaryFile(prefix='plotly', suffix='.html', delete=False).name\n # NOTE: this doesn't work for output 'div'\n py.plot(\n self,\n show_link=show_link,\n filename=filename,\n auto_open=auto_open,\n output_type=output,\n include_plotlyjs=plotlyjs,\n )\n return filename"} {"SOURCE": "codesearchnet", "instruction": "Can you generate a brief explanation for the following Python 3 code\ndef RegisterMethod(cls, *args, **kwargs):\n unpack_error = True\n\n try:\n f, library_path = args\n unpack_error = False\n cls._RegisterMethod(f, library_path, **kwargs)\n\n except:\n if not unpack_error:\n raise\n\n def register_decorator(f):\n library_path, = args\n cls._RegisterMethod(f, library_path, **kwargs)\n\n return f\n return register_decorator", "response": "Register a method in the class."} {"SOURCE": "codesearchnet", "instruction": "How would you explain what the following Python 3 function does\ndef RegisterAt(cls, *args, **kwargs):\n unpack_error = True\n\n try:\n n, f, library_path = args\n unpack_error = False\n cls._RegisterAt(n, f, library_path, **kwargs)\n\n except:\n if not unpack_error:\n raise\n\n def register_decorator(f):\n n, library_path = args\n cls._RegisterAt(n, f, library_path, **kwargs)\n\n return f\n return register_decorator", "response": "**RegisterAt**\n\n RegisterAt(n, f, library_path, alias=None, original_name=None, doc=None, wrapped=None, explanation=\"\", method_type=utils.identity, explain=True, _return_type=None)\n\nMost of the time you don't want to register an method as such, that is, you don't care about the `self` builder object, instead you want to register a function that transforms the value being piped down the DSL. For this you can use `RegisterAt` so e.g.\n\n def some_fun(obj, arg1, arg2):\n # code\n\n @MyBuilder.RegisterMethod(\"my_lib.\")\n def some_fun_wrapper(self, arg1, arg2):\n return self.ThenAt(1, some_fun, arg1, arg2)\n\ncan be written directly as\n\n @MyBuilder.RegisterAt(1, \"my_lib.\")\n def some_fun(obj, arg1, arg2):\n # code\n\nFor this case you can just use `Register` which is a shortcut for `RegisterAt(1, ...)`\n\n @MyBuilder.Register(\"my_lib.\")\n def some_fun(obj, arg1, arg2):\n # code\n\n**Also See**\n\n* `phi.builder.Builder.RegisterMethod`"} {"SOURCE": "codesearchnet", "instruction": "Can you generate a brief explanation for the following Python 3 code\ndef PatchAt(cls, n, module, method_wrapper=None, module_alias=None, method_name_modifier=utils.identity, blacklist_predicate=_False, whitelist_predicate=_True, return_type_predicate=_None, getmembers_predicate=inspect.isfunction, admit_private=False, explanation=\"\"):\n _rtp = return_type_predicate\n\n return_type_predicate = (lambda x: _rtp) if inspect.isclass(_rtp) and issubclass(_rtp, Builder) else _rtp\n module_name = module_alias if module_alias else module.__name__ + '.'\n patch_members = _get_patch_members(module, blacklist_predicate=blacklist_predicate, whitelist_predicate=whitelist_predicate, getmembers_predicate=getmembers_predicate, admit_private=admit_private)\n\n for name, f in patch_members:\n wrapped = None\n\n if method_wrapper:\n g = method_wrapper(f)\n wrapped = f\n else:\n g = f\n\n cls.RegisterAt(n, g, module_name, wrapped=wrapped, _return_type=return_type_predicate(name), alias=method_name_modifier(name), explanation=explanation)", "response": "This classmethod allows you to patch all of functions or methods at a specific position."} {"SOURCE": "codesearchnet", "instruction": "Can you create a Python 3 function that\ngives a function it returns a string that pretty much looks how the method signature_ would be written in python.", "response": "def get_method_sig(method):\n \"\"\" Given a function, it returns a string that pretty much looks how the\n function signature_ would be written in python.\n\n :param method: a python method\n :return: A string similar describing the pythong method signature_.\n eg: \"my_method(first_argArg, second_arg=42, third_arg='something')\"\n \"\"\"\n\n # The return value of ArgSpec is a bit weird, as the list of arguments and\n # list of defaults are returned in separate array.\n # eg: ArgSpec(args=['first_arg', 'second_arg', 'third_arg'],\n # varargs=None, keywords=None, defaults=(42, 'something'))\n argspec = inspect.getargspec(method)\n arg_index=0\n args = []\n\n # Use the args and defaults array returned by argspec and find out\n # which arguments has default\n for arg in argspec.args:\n default_arg = _get_default_arg(argspec.args, argspec.defaults, arg_index)\n if default_arg.has_default:\n args.append(\"%s=%s\" % (arg, default_arg.default_value))\n else:\n args.append(arg)\n arg_index += 1\n return \"%s(%s)\" % (method.__name__, \", \".join(args))"} {"SOURCE": "codesearchnet", "instruction": "How would you explain what the following Python 3 function does\ndef ThenAt(self, n, f, *_args, **kwargs):\n _return_type = None\n n_args = n - 1\n\n if '_return_type' in kwargs:\n _return_type = kwargs['_return_type']\n del kwargs['_return_type']\n\n @utils.lift\n def g(x):\n\n new_args = _args[0:n_args] + (x,) + _args[n_args:] if n_args >= 0 else _args\n return f(*new_args, **kwargs)\n\n return self.__then__(g, _return_type=_return_type)", "response": "A function that takes a single argument and returns a partial that expects a single argument at the given position."} {"SOURCE": "codesearchnet", "instruction": "Given the following Python 3 function, write the documentation\ndef Then(self, f, *args, **kwargs):\n return self.ThenAt(1, f, *args, **kwargs)", "response": "A second order then function."} {"SOURCE": "codesearchnet", "instruction": "Can you generate a brief explanation for the following Python 3 code\ndef Then5(self, f, arg1, arg2, arg3, arg4, *args, **kwargs):\n args = (arg1, arg2, arg3, arg4) + args\n return self.ThenAt(5, f, *args, **kwargs)", "response": "A second order then 5 function."} {"SOURCE": "codesearchnet", "instruction": "Implement a function in Python 3 to\nwhile `Seq` is sequential, `phi.dsl.Expression.List` allows you to split the computation and get back a list with the result of each path. While the list literal should be the most incarnation of this expresion, it can actually be any iterable (implements `__iter__`) that is not a tuple and yields a valid expresion. The expression k = List(f, g) is equivalent to k = lambda x: [ f(x), g(x) ] In general, the following rules apply after compilation: **General Branching** List(f0, f1, ..., fn) is equivalent to lambda x: [ f0(x), f1(x), ..., fn(x) ] **Composing & Branching** It is interesting to see how braching interacts with composing. The expression Seq(f, List(g, h)) is *almost* equivalent to List( Seq(f, g), Seq(f, h) ) As you see its as if `f` where distributed over the List. We say *almost* because their implementation is different def _lambda(x): x = f(x) return [ g(x), h(x) ] vs lambda x: [ g(f(x)), h(f(x)) ] As you see `f` is only executed once in the first one. Both should yield the same result if `f` is a pure function. ### Examples form phi import P, List avg_word_length = P.Pipe( \"1 22 333\", lambda s: s.split(' '), # ['1', '22', '333'] lambda l: map(len, l), # [1, 2, 3] List( sum # 1 + 2 + 3 == 6 , len # len([1, 2, 3]) == 3 ), lambda l: l[0] / l[1] # sum / len == 6 / 3 == 2 ) assert avg_word_length == 2 The previous could also be done more briefly like this form phi import P, Obj, List avg_word_length = P.Pipe( \"1 22 333\", Obj .split(' ') # ['1', '22', '333'] .map(len) # [1, 2, 3] .List( sum #sum([1, 2, 3]) == 6 , len #len([1, 2, 3]) == 3 ), P[0] / P[1] #6 / 3 == 2 ) assert avg_word_length == 2 In the example above the last expression P[0] / P[1] works for a couple of reasons 1. The previous expression returns a list 2. In general the expression `P[x]` compiles to a function with the form `lambda obj: obj[x]` 3. The class `Expression` (the class from which the object `P` inherits) overrides most operators to create functions easily. For example, the expression (P * 2) / (P + 1) compile to a function of the form lambda x: (x * 2) / (x + 1) Check out the documentatio for Phi [lambdas](https://cgarciae.github.io/phi/lambdas.m.html).", "response": "def List(self, *branches, **kwargs):\n \"\"\"\nWhile `Seq` is sequential, `phi.dsl.Expression.List` allows you to split the computation and get back a list with the result of each path. While the list literal should be the most incarnation of this expresion, it can actually be any iterable (implements `__iter__`) that is not a tuple and yields a valid expresion.\n\nThe expression\n\n k = List(f, g)\n\nis equivalent to\n\n k = lambda x: [ f(x), g(x) ]\n\n\nIn general, the following rules apply after compilation:\n\n**General Branching**\n\n List(f0, f1, ..., fn)\n\nis equivalent to\n\n lambda x: [ f0(x), f1(x), ..., fn(x) ]\n\n\n**Composing & Branching**\n\nIt is interesting to see how braching interacts with composing. The expression\n\n Seq(f, List(g, h))\n\nis *almost* equivalent to\n\n List( Seq(f, g), Seq(f, h) )\n\nAs you see its as if `f` where distributed over the List. We say *almost* because their implementation is different\n\n def _lambda(x):\n x = f(x)\n return [ g(x), h(x) ]\n\nvs\n\n lambda x: [ g(f(x)), h(f(x)) ]\n\nAs you see `f` is only executed once in the first one. Both should yield the same result if `f` is a pure function.\n\n### Examples\n\n form phi import P, List\n\n avg_word_length = P.Pipe(\n \"1 22 333\",\n lambda s: s.split(' '), # ['1', '22', '333']\n lambda l: map(len, l), # [1, 2, 3]\n List(\n sum # 1 + 2 + 3 == 6\n ,\n len # len([1, 2, 3]) == 3\n ),\n lambda l: l[0] / l[1] # sum / len == 6 / 3 == 2\n )\n\n assert avg_word_length == 2\n\nThe previous could also be done more briefly like this\n\n form phi import P, Obj, List\n\n avg_word_length = P.Pipe(\n \"1 22 333\", Obj\n .split(' ') # ['1', '22', '333']\n .map(len) # [1, 2, 3]\n .List(\n sum #sum([1, 2, 3]) == 6\n ,\n len #len([1, 2, 3]) == 3\n ),\n P[0] / P[1] #6 / 3 == 2\n )\n\n assert avg_word_length == 2\n\nIn the example above the last expression\n\n P[0] / P[1]\n\nworks for a couple of reasons\n\n1. The previous expression returns a list\n2. In general the expression `P[x]` compiles to a function with the form `lambda obj: obj[x]`\n3. The class `Expression` (the class from which the object `P` inherits) overrides most operators to create functions easily. For example, the expression\n\n (P * 2) / (P + 1)\n\ncompile to a function of the form\n\n lambda x: (x * 2) / (x + 1)\n\nCheck out the documentatio for Phi [lambdas](https://cgarciae.github.io/phi/lambdas.m.html).\n\n \"\"\"\n gs = [ _parse(code)._f for code in branches ]\n\n def h(x, state):\n ys = []\n for g in gs:\n y, state = g(x, state)\n ys.append(y)\n\n return (ys, state)\n\n return self.__then__(h, **kwargs)"} {"SOURCE": "codesearchnet", "instruction": "Can you write a function in Python 3 where it\nreturns a new sequence that is the sequence of the given elements.", "response": "def Seq(self, *sequence, **kwargs):\n \"\"\"\n`Seq` is used to express function composition. The expression\n\n Seq(f, g)\n\nbe equivalent to\n\n lambda x: g(f(x))\n\nAs you see, its a little different from the mathematical definition. Excecution order flow from left to right, this makes reading and reasoning about code way more easy. This bahaviour is based upon the `|>` (pipe) operator found in languages like F#, Elixir and Elm. You can pack as many expressions as you like and they will be applied in order to the data that is passed through them when compiled an excecuted.\n\nIn general, the following rules apply for Seq:\n\n**General Sequence**\n\n Seq(f0, f1, ..., fn-1, fn)\n\nis equivalent to\n\n lambda x: fn(fn-1(...(f1(f0(x)))))\n\n**Single Function**\n\n Seq(f)\n\nis equivalent to\n\n f\n\n**Identity**\n\nThe empty Seq\n\n Seq()\n\nis equivalent to\n\n lambda x: x\n\n### Examples\n\n from phi import P, Seq\n\n f = Seq(\n P * 2,\n P + 1,\n P ** 2\n )\n\n assert f(1) == 9 # ((1 * 2) + 1) ** 2\n\nThe previous example using `P.Pipe`\n\n from phi import P\n\n assert 9 == P.Pipe(\n 1,\n P * 2, #1 * 2 == 2\n P + 1, #2 + 1 == 3\n P ** 2 #3 ** 2 == 9\n )\n \"\"\"\n fs = [ _parse(elem)._f for elem in sequence ]\n\n def g(x, state):\n return functools.reduce(lambda args, f: f(*args), fs, (x, state))\n\n return self.__then__(g, **kwargs)"} {"SOURCE": "codesearchnet", "instruction": "Explain what the following Python 3 code does\ndef With(self, context_manager, *body, **kwargs):\n context_f = _parse(context_manager)._f\n body_f = E.Seq(*body)._f\n\n def g(x, state):\n context, state = context_f(x, state)\n with context as scope:\n with _WithContextManager(scope):\n return body_f(x, state)\n\n return self.__then__(g, **kwargs)", "response": "A simple context manager with a given DSL."} {"SOURCE": "codesearchnet", "instruction": "Explain what the following Python 3 code does\ndef ReadList(self, *branches, **kwargs):\n branches = map(lambda x: E.Read(x) if isinstance(x, str) else x, branches)\n\n return self.List(*branches, **kwargs)", "response": "A version of phi. dsl. Expression. List that handles the case where a string argument x is translated to Read."} {"SOURCE": "codesearchnet", "instruction": "Here you have a function in Python 3, explain what it does\ndef Write(self, *state_args, **state_dict):\n if len(state_dict) + len(state_args) < 1:\n raise Exception(\"Please include at-least 1 state variable, got {0} and {1}\".format(state_args, state_dict))\n\n if len(state_dict) > 1:\n raise Exception(\"Please include at-most 1 keyword argument expression, got {0}\".format(state_dict))\n\n if len(state_dict) > 0:\n state_key = next(iter(state_dict.keys()))\n write_expr = state_dict[state_key]\n\n state_args += (state_key,)\n\n expr = self >> write_expr\n\n else:\n expr = self\n\n\n\n def g(x, state):\n update = { key: x for key in state_args }\n state = utils.merge(state, update)\n\n #side effect for convenience\n _StateContextManager.REFS.update(state)\n\n return x, state\n\n\n return expr.__then__(g)", "response": "See phi. dsl. Expression. Write"} {"SOURCE": "codesearchnet", "instruction": "Write a Python 3 script to\nreturn a new Seq with the given value.", "response": "def Val(self, val, **kwargs):\n \"\"\"\nThe expression\n\n Val(a)\n\nis equivalent to the constant function\n\n lambda x: a\n\nAll expression in this module interprete values that are not functions as constant functions using `Val`, for example\n\n Seq(1, P + 1)\n\nis equivalent to\n\n Seq(Val(1), P + 1)\n\nThe previous expression as a whole is a constant function since it will return `2` no matter what input you give it.\n \"\"\"\n f = utils.lift(lambda z: val)\n\n return self.__then__(f, **kwargs)"} {"SOURCE": "codesearchnet", "instruction": "How would you explain what the following Python 3 function does\ndef Else(self, *Else, **kwargs):\n root = self._root\n ast = self._ast\n\n next_else = E.Seq(*Else)._f\n ast = _add_else(ast, next_else)\n\n g = _compile_if(ast)\n\n return root.__then__(g, **kwargs)", "response": "See phi. dsl. Expression. If"} {"SOURCE": "codesearchnet", "instruction": "Can you generate a brief explanation for the following Python 3 code\ndef length(string, until=None):\n if until is None:\n return sum(1 for _ in GraphemeIterator(string))\n\n iterator = graphemes(string)\n count = 0\n while True:\n try:\n if count >= until:\n break\n next(iterator)\n except StopIteration:\n break\n else:\n count += 1\n\n return count", "response": "Returns the length of the string."} {"SOURCE": "codesearchnet", "instruction": "Can you generate the documentation for the following Python 3 function\ndef slice(string, start=None, end=None):\n\n if start is None:\n start = 0\n if end is not None and start >= end:\n return \"\"\n\n if start < 0:\n raise NotImplementedError(\"Negative indexing is currently not supported.\")\n\n sum_ = 0\n start_index = None\n for grapheme_index, grapheme_length in enumerate(grapheme_lengths(string)):\n if grapheme_index == start:\n start_index = sum_\n elif grapheme_index == end:\n return string[start_index:sum_]\n sum_ += grapheme_length\n\n if start_index is not None:\n return string[start_index:]\n\n return \"\"", "response": "Returns a substring of the given string counting graphemes instead of codepoints."} {"SOURCE": "codesearchnet", "instruction": "Implement a function in Python 3 to\nreturn True if the grapheme sequence of graphemes in substring is also present in string.", "response": "def contains(string, substring):\n \"\"\"\n Returns true if the sequence of graphemes in substring is also present in string.\n\n This differs from the normal python `in` operator, since the python operator will return\n true if the sequence of codepoints are withing the other string without considering\n grapheme boundaries.\n\n Performance notes: Very fast if `substring not in string`, since that also means that\n the same graphemes can not be in the two strings. Otherwise this function has linear time\n complexity in relation to the string length. It will traverse the sequence of graphemes until\n a match is found, so it will generally perform better for grapheme sequences that match early.\n\n >>> \"\ud83c\uddf8\ud83c\uddea\" in \"\ud83c\uddea\ud83c\uddf8\ud83c\uddea\ud83c\uddea\"\n True\n >>> grapheme.contains(\"\ud83c\uddea\ud83c\uddf8\ud83c\uddea\ud83c\uddea\", \"\ud83c\uddf8\ud83c\uddea\")\n False\n \"\"\"\n if substring not in string:\n return False\n\n substr_graphemes = list(graphemes(substring))\n\n if len(substr_graphemes) == 0:\n return True\n elif len(substr_graphemes) == 1:\n return substr_graphemes[0] in graphemes(string)\n else:\n str_iter = graphemes(string)\n str_sub_part = []\n for _ in range(len(substr_graphemes)):\n try:\n str_sub_part.append(next(str_iter))\n except StopIteration:\n return False\n\n for g in str_iter:\n if str_sub_part == substr_graphemes:\n return True\n\n str_sub_part.append(g)\n str_sub_part.pop(0)\n return str_sub_part == substr_graphemes"} {"SOURCE": "codesearchnet", "instruction": "How would you code a function in Python 3 to\nlike str. startswith but checks that the given string starts with the given prefixes sequence of graphemes.", "response": "def startswith(string, prefix):\n \"\"\"\n Like str.startswith, but also checks that the string starts with the given prefixes sequence of graphemes.\n\n str.startswith may return true for a prefix that is not visually represented as a prefix if a grapheme cluster\n is continued after the prefix ends.\n\n >>> grapheme.startswith(\"\u270a\ud83c\udffe\", \"\u270a\")\n False\n >>> \"\u270a\ud83c\udffe\".startswith(\"\u270a\")\n True\n \"\"\"\n return string.startswith(prefix) and safe_split_index(string, len(prefix)) == len(prefix)"} {"SOURCE": "codesearchnet", "instruction": "Can you tell what is the following Python 3 function doing\ndef endswith(string, suffix):\n expected_index = len(string) - len(suffix)\n return string.endswith(suffix) and safe_split_index(string, expected_index) == expected_index", "response": "Like str. endswith but checks that the given string ends with the given suffix."} {"SOURCE": "codesearchnet", "instruction": "Can you create a Python 3 function that\nreturns the highest index up to `max_len` at which the given string can be sliced, without breaking a grapheme. This is useful for when you want to split or take a substring from a string, and don't really care about the exact grapheme length, but don't want to risk breaking existing graphemes. This function does normally not traverse the full grapheme sequence up to the given length, so it can be used for arbitrarily long strings and high `max_len`s. However, some grapheme boundaries depend on the previous state, so the worst case performance is O(n). In practice, it's only very long non-broken sequences of country flags (represented as Regional Indicators) that will perform badly. The return value will always be between `0` and `len(string)`. >>> string = \"tamil \u0ba8\u0bbf (ni)\" >>> i = grapheme.safe_split_index(string, 7) >>> i 6 >>> string[:i] 'tamil ' >>> string[i:] '\u0ba8\u0bbf (ni)'", "response": "def safe_split_index(string, max_len):\n \"\"\"\n Returns the highest index up to `max_len` at which the given string can be sliced, without breaking a grapheme.\n\n This is useful for when you want to split or take a substring from a string, and don't really care about\n the exact grapheme length, but don't want to risk breaking existing graphemes.\n\n This function does normally not traverse the full grapheme sequence up to the given length, so it can be used\n for arbitrarily long strings and high `max_len`s. However, some grapheme boundaries depend on the previous state,\n so the worst case performance is O(n). In practice, it's only very long non-broken sequences of country flags\n (represented as Regional Indicators) that will perform badly.\n\n The return value will always be between `0` and `len(string)`.\n\n >>> string = \"tamil \u0ba8\u0bbf (ni)\"\n >>> i = grapheme.safe_split_index(string, 7)\n >>> i\n 6\n >>> string[:i]\n 'tamil '\n >>> string[i:]\n '\u0ba8\u0bbf (ni)'\n \"\"\"\n last_index = get_last_certain_break_index(string, max_len)\n for l in grapheme_lengths(string[last_index:]):\n if last_index + l > max_len:\n break\n last_index += l\n return last_index"} {"SOURCE": "codesearchnet", "instruction": "Can you generate a brief explanation for the following Python 3 code\ndef readB1logfile(filename):\n dic = dict()\n # try to open. If this fails, an exception is raised\n with open(filename, 'rt', encoding='utf-8') as f:\n for l in f:\n l = l.strip()\n if l[0] in '#!%\\'':\n continue # treat this line as a comment\n try:\n # find the first tuple in _logfile_data where the first element of the\n # tuple is the starting of the line.\n ld = [ld_ for ld_ in _logfile_data if l.split(\n ':', 1)[0].strip() == ld_[0]][0]\n except IndexError:\n # line is not recognized. We can still try to load it: find the first\n # semicolon. If found, the part of the line before it is stripped\n # from whitespaces and will be the key. The part after it is stripped\n # from whitespaces and parsed with misc.parse_number(). If no\n if ':' in l:\n key, val = [x.strip() for x in l.split(':', 1)]\n val = misc.parse_number(val)\n dic[key] = val\n try:\n # fix the character encoding in files written by a\n # previous version of this software.\n dic[key] = dic[key].encode('latin2').decode('utf-8')\n except (UnicodeDecodeError, UnicodeEncodeError, AttributeError):\n pass\n else:\n dic[l.strip()] = True\n continue\n try:\n reader = ld[3]\n except IndexError:\n reader = str\n rhs = l.split(':', 1)[1].strip()\n try:\n vals = reader(rhs)\n except ValueError:\n if rhs.lower() == 'none':\n vals = None\n else:\n raise\n if isinstance(ld[1], tuple):\n # more than one field names. The reader function should return a\n # tuple here, a value for each field.\n if len(vals) != len(ld[1]):\n raise ValueError(\n 'Cannot read %d values from line %s in file!' % (len(ld[1]), l))\n dic.update(dict(list(zip(ld[1], vals))))\n else:\n dic[ld[1]] = vals\n dic['__Origin__'] = 'B1 log'\n dic['__particle__'] = 'photon'\n return dic", "response": "Read the B1 logfile and return a dictionary."} {"SOURCE": "codesearchnet", "instruction": "Given the following Python 3 function, write the documentation\ndef writeB1logfile(filename, data):\n allkeys = list(data.keys())\n f = open(filename, 'wt', encoding='utf-8')\n for ld in _logfile_data: # process each line\n linebegin = ld[0]\n fieldnames = ld[1]\n # set the default formatter if it is not given\n if len(ld) < 3:\n formatter = str\n elif ld[2] is None:\n formatter = str\n else:\n formatter = ld[2]\n # this will contain the formatted values.\n formatted = ''\n if isinstance(fieldnames, str):\n # scalar field name, just one field. Formatter should be a\n # callable.\n if fieldnames not in allkeys:\n # this field has already been processed\n continue\n try:\n formatted = formatter(data[fieldnames])\n except KeyError:\n # field not found in param structure\n continue\n elif isinstance(fieldnames, tuple):\n # more than one field names in a tuple. In this case, formatter can\n # be a tuple of callables...\n if all([(fn not in allkeys) for fn in fieldnames]):\n # if all the fields have been processed:\n continue\n if isinstance(formatter, tuple) and len(formatter) == len(fieldnames):\n formatted = ' '.join([ft(data[fn])\n for ft, fn in zip(formatter, fieldnames)])\n # ...or a single callable...\n elif not isinstance(formatter, tuple):\n formatted = formatter([data[fn] for fn in fieldnames])\n # ...otherwise raise an exception.\n else:\n raise SyntaxError('Programming error: formatter should be a scalar or a tuple\\\nof the same length as the field names in logfile_data.')\n else: # fieldnames is neither a string, nor a tuple.\n raise SyntaxError(\n 'Invalid syntax (programming error) in logfile_data in writeparamfile().')\n # try to get the values\n linetowrite = linebegin + ':\\t' + formatted + '\\n'\n f.write(linetowrite)\n if isinstance(fieldnames, tuple):\n for fn in fieldnames: # remove the params treated.\n if fn in allkeys:\n allkeys.remove(fn)\n else:\n if fieldnames in allkeys:\n allkeys.remove(fieldnames)\n # write untreated params\n for k in allkeys:\n linetowrite = k + ':\\t' + str(data[k]) + '\\n'\n f.write(linetowrite)\n\n f.close()", "response": "Write a header structure into a B1 logfile."} {"SOURCE": "codesearchnet", "instruction": "Can you implement a function in Python 3 that\nreads beamline B1 header data from file.", "response": "def readB1header(filename):\n \"\"\"Read beamline B1 (HASYLAB, Hamburg) header data\n\n Input\n -----\n filename: string\n the file name. If ends with ``.gz``, it is fed through a ``gunzip``\n filter\n\n Output\n ------\n A header dictionary.\n\n Examples\n --------\n read header data from 'ORG000123.DAT'::\n\n header=readB1header('ORG00123.DAT')\n \"\"\"\n # Planck's constant times speed of light: incorrect\n # constant in the old program on hasjusi1, which was\n # taken over by the measurement program, to keep\n # compatibility with that.\n hed = {}\n if libconfig.LENGTH_UNIT == 'A':\n jusifaHC = 12396.4\n elif libconfig.LENGTH_UNIT == 'nm':\n jusifaHC = 1239.64\n else:\n raise NotImplementedError(\n 'Invalid length unit: ' + str(libconfig.LENGTH_UNIT))\n\n if filename.upper().endswith('.GZ'):\n fid = gzip.GzipFile(filename, 'r')\n else:\n fid = open(filename, 'rt')\n lines = fid.readlines()\n fid.close()\n hed['FSN'] = int(lines[0].strip())\n hed['Hour'] = int(lines[17].strip())\n hed['Minutes'] = int(lines[18].strip())\n hed['Month'] = int(lines[19].strip())\n hed['Day'] = int(lines[20].strip())\n hed['Year'] = int(lines[21].strip()) + 2000\n hed['FSNref1'] = int(lines[23].strip())\n hed['FSNdc'] = int(lines[24].strip())\n hed['FSNsensitivity'] = int(lines[25].strip())\n hed['FSNempty'] = int(lines[26].strip())\n hed['FSNref2'] = int(lines[27].strip())\n hed['Monitor'] = float(lines[31].strip())\n hed['Anode'] = float(lines[32].strip())\n hed['MeasTime'] = float(lines[33].strip())\n hed['Temperature'] = float(lines[34].strip())\n hed['BeamPosX'] = float(lines[36].strip())\n hed['BeamPosY'] = float(lines[37].strip())\n hed['Transm'] = float(lines[41].strip())\n hed['Wavelength'] = float(lines[43].strip())\n hed['Energy'] = jusifaHC / hed['Wavelength']\n hed['Dist'] = float(lines[46].strip())\n hed['XPixel'] = 1 / float(lines[49].strip())\n hed['YPixel'] = 1 / float(lines[50].strip())\n hed['Title'] = lines[53].strip().replace(' ', '_').replace('-', '_')\n hed['MonitorDORIS'] = float(lines[56].strip()) # aka. DORIS counter\n hed['Owner'] = lines[57].strip()\n hed['RotXSample'] = float(lines[59].strip())\n hed['RotYSample'] = float(lines[60].strip())\n hed['PosSample'] = float(lines[61].strip())\n hed['DetPosX'] = float(lines[62].strip())\n hed['DetPosY'] = float(lines[63].strip())\n hed['MonitorPIEZO'] = float(lines[64].strip()) # aka. PIEZO counter\n hed['BeamsizeX'] = float(lines[66].strip())\n hed['BeamsizeY'] = float(lines[67].strip())\n hed['PosRef'] = float(lines[70].strip())\n hed['Monochromator1Rot'] = float(lines[77].strip())\n hed['Monochromator2Rot'] = float(lines[78].strip())\n hed['Heidenhain1'] = float(lines[79].strip())\n hed['Heidenhain2'] = float(lines[80].strip())\n hed['Current1'] = float(lines[81].strip())\n hed['Current2'] = float(lines[82].strip())\n hed['Detector'] = 'Unknown'\n hed['PixelSize'] = (hed['XPixel'] + hed['YPixel']) / 2.0\n\n hed['AnodeError'] = math.sqrt(hed['Anode'])\n hed['TransmError'] = 0\n hed['MonitorError'] = math.sqrt(hed['Monitor'])\n hed['MonitorPIEZOError'] = math.sqrt(hed['MonitorPIEZO'])\n hed['MonitorDORISError'] = math.sqrt(hed['MonitorDORIS'])\n hed['Date'] = datetime.datetime(\n hed['Year'], hed['Month'], hed['Day'], hed['Hour'], hed['Minutes'])\n hed['__Origin__'] = 'B1 original'\n hed['__particle__'] = 'photon'\n return hed"} {"SOURCE": "codesearchnet", "instruction": "Can you generate the documentation for the following Python 3 function\ndef readehf(filename):\n f = open(filename, 'r')\n edf = {}\n if not f.readline().strip().startswith('{'):\n raise ValueError('Invalid file format.')\n for l in f:\n l = l.strip()\n if not l:\n continue\n if l.endswith('}'):\n break # last line of header\n try:\n left, right = l.split('=', 1)\n except ValueError:\n raise ValueError('Invalid line: ' + l)\n left = left.strip()\n right = right.strip()\n if not right.endswith(';'):\n raise ValueError(\n 'Invalid line (does not end with a semicolon): ' + l)\n right = right[:-1].strip()\n m = re.match('^(?P.*)~(?P\\d+)$', left)\n if m is not None:\n edf[m.group('left')] = edf[m.group('left')] + right\n else:\n edf[left] = _readedf_extractline(left, right)\n f.close()\n edf['FileName'] = filename\n edf['__Origin__'] = 'EDF ID02'\n edf['__particle__'] = 'photon'\n return edf", "response": "Read the EDF header from a file and return a dictionary of the EDF header structure."} {"SOURCE": "codesearchnet", "instruction": "Write a Python 3 script for\nreading the header data from a Bessy Data Format v1 file.", "response": "def readbhfv1(filename, load_data=False, bdfext='.bdf', bhfext='.bhf'):\n \"\"\"Read header data from bdf/bhf file (Bessy Data Format v1)\n\n Input:\n filename: the name of the file\n load_data: if the matrices are to be loaded\n\n Output:\n bdf: the BDF header structure\n\n Adapted the bdf_read.m macro from Sylvio Haas.\n \"\"\"\n # strip the bhf or bdf extension if there.\n if filename.endswith(bdfext):\n basename = filename[:-len(bdfext)]\n elif filename.endswith(bhfext):\n basename = filename[:-len(bhfext)]\n else: # assume a single file of header and data.\n basename, bhfext = os.path.splitext(filename)\n bdfext = bhfext\n headername = basename + bhfext\n dataname = basename + bdfext\n bdf = {}\n bdf['his'] = [] # empty list for history\n bdf['C'] = {} # empty list for bdf file descriptions\n namelists = {}\n valuelists = {}\n with open(headername, 'rb') as fid: # if fails, an exception is raised\n for line in fid:\n if not line.strip():\n continue # empty line\n mat = line.split(None, 1)\n prefix = mat[0]\n if prefix == '#C':\n left, right = mat[1].split('=', 1)\n left = left.strip()\n right = right.strip()\n if left in ['xdim', 'ydim']:\n bdf[left] = int(right)\n elif left in ['type', 'bdf']:\n bdf[left] = right\n if left in ['Sendtime']:\n bdf['C'][left] = float(right)\n elif left in ['xdim', 'ydim']:\n bdf['C'][left] = int(right)\n else:\n bdf['C'][left] = misc.parse_number(right)\n elif prefix.startswith(\"#H\"):\n bdf['his'].append(mat[1])\n# elif prefix.startswith(\"#DATA\"):\n# if not load_data:\n# break\n# darray = np.fromfile(fid, dtype = bdf['type'], count = int(bdf['xdim'] * bdf['ydim']))\n# bdf['data'] = np.rot90((darray.reshape(bdf['xdim'], bdf['ydim'])).astype('double').T, 1).copy() # this weird transformation is needed to get the matrix in the same form as bdf_read.m gets it.\n# elif prefix.startswith('#ERROR'):\n# if not load_data:\n# break\n# darray = np.fromfile(fid, dtype = bdf['type'], count = int(bdf['xdim'] * bdf['ydim']))\n# bdf['error'] = np.rot90((darray.reshape(bdf['xdim'], bdf['ydim'])).astype('double').T, 1).copy()\n else:\n for prf in ['M', 'G', 'S', 'T']:\n if prefix.startswith('#C%sL' % prf):\n if prf not in namelists:\n namelists[prf] = []\n namelists[prf].extend(mat[1].split())\n elif prefix.startswith('#C%sV' % prf):\n if prf not in valuelists:\n valuelists[prf] = []\n valuelists[prf].extend([float(x)\n for x in mat[1].split()])\n else:\n continue\n for dictname, prfname in zip(['M', 'CG', 'CS', 'CT'], ['M', 'G', 'S', 'T']):\n bdf[dictname] = dict(\n list(zip(namelists[prfname], valuelists[prfname])))\n bdf['__Origin__'] = 'BDFv1'\n bdf['__particle__'] = 'photon'\n\n if load_data:\n f = open(dataname, 'r')\n try:\n s = f.read()\n except IOError as ioe:\n # an ugly bug (M$ KB899149) in W!nd0w$ causes an error if loading too\n # large a file from a network drive and opening it read-only.\n if ioe.errno == 22:\n f.close()\n try:\n # one work-around is to open it read-write.\n f = open(dataname, 'r+b')\n s = f.read()\n except IOError:\n # if this does not work, inform the user to either obtain\n # write permission for that file or copy it to a local\n # drive\n f.close()\n raise IOError(22, \"\"\"\nYou were probably trying to open a read-only file from a network drive on\nWindows, weren\\'t you? There is a bug in Windows causing this error\n(see http://support.microsoft.com/default.aspx?scid=kb;en-us;899149).\nTo work around this, please either obtain write permission for that file\n(I won't write anything to it, I promise!!!) or copy it to a local drive.\nSorry for the inconvenience.\"\"\", ioe.filename)\n datasets = re.findall(\n '#\\s*(?P\\w+)\\[(?P\\d+):(?P\\d+)\\]', s)\n names = [d[0] for d in datasets]\n xsize = [int(d[1]) for d in datasets]\n ysize = [int(d[2]) for d in datasets]\n dt = np.dtype(bdf['type'])\n for i in range(len(datasets)):\n start = s.find('#%s' % names[i])\n if i < len(datasets) - 1:\n end = s.find('#%s' % (names[i + 1]))\n else:\n end = len(s)\n s1 = s[start:end]\n datasize = xsize[i] * ysize[i] * dt.itemsize\n if datasize > len(s1):\n # assume we are dealing with a BOOL matrix\n bdf[names[i]] = np.fromstring(\n s1[-xsize[i] * ysize[i]:], dtype=np.uint8)\n else:\n bdf[names[i]] = np.fromstring(\n s1[-xsize[i] * ysize[i] * dt.itemsize:], dtype=dt)\n # conversion: Matlab saves the array in Fortran-style ordering (columns first).\n # Python however loads in C-style: rows first. We need to take care:\n # 1) reshape from linear to (ysize,xsize) and not (xsize,ysize)\n # 2) transpose (swaps columns and rows)\n # After these operations, we only have to rotate this counter-clockwise by 90\n # degrees because bdf2_write rotates by +270 degrees before saving.\n bdf[names[i]] = np.rot90(\n bdf[names[i]].reshape((ysize[i], xsize[i]), order='F'), 1)\n\n return bdf"} {"SOURCE": "codesearchnet", "instruction": "Can you generate a brief explanation for the following Python 3 code\ndef readmarheader(filename):\n with open(filename, 'rb') as f:\n intheader = np.fromstring(f.read(10 * 4), np.int32)\n floatheader = np.fromstring(f.read(15 * 4), '>> find_subdirs('dir',0) # returns just ['dir']\n\n >>> find_subdirs('dir',1) # returns all direct (first-level) subdirs\n # of 'dir'.\n \"\"\"\n startdir = os.path.expanduser(startdir)\n direct_subdirs = [os.path.join(startdir, x) for x in os.listdir(\n startdir) if os.path.isdir(os.path.join(startdir, x))]\n if recursion_depth is None:\n next_recursion_depth = None\n else:\n next_recursion_depth = recursion_depth - 1\n if (recursion_depth is not None) and (recursion_depth <= 1):\n return [startdir] + direct_subdirs\n else:\n subdirs = []\n for d in direct_subdirs:\n subdirs.extend(find_subdirs(d, next_recursion_depth))\n return [startdir] + subdirs"} {"SOURCE": "codesearchnet", "instruction": "How would you explain what the following Python 3 function does\ndef findpeak(x, y, dy=None, position=None, hwhm=None, baseline=None, amplitude=None, curve='Lorentz'):\n warnings.warn('Function findpeak() is deprecated, please use findpeak_single() instead.', DeprecationWarning)\n pos, hwhm, baseline, ampl = findpeak_single(x, y, dy, position, hwhm, baseline, amplitude, curve)\n return pos.val, pos.err, hwhm.val, hwhm.err, baseline.val, baseline.err, ampl.val, ampl.err", "response": "This function finds a positive peak in the dataset."} {"SOURCE": "codesearchnet", "instruction": "Can you generate the documentation for the following Python 3 function\ndef findpeak_single(x, y, dy=None, position=None, hwhm=None, baseline=None, amplitude=None, curve='Lorentz',\n return_stat=False, signs=(-1, 1), return_x=None):\n \"\"\"Find a (positive or negative) peak in the dataset.\n\n Inputs:\n x, y, dy: abscissa, ordinate and the error of the ordinate (can be None)\n position, hwhm, baseline, amplitude: first guesses for the named parameters\n curve: 'Gauss' or 'Lorentz' (default)\n return_stat: return fitting statistics from easylsq.nlsq_fit()\n signs: a tuple, can be (1,), (-1,), (1,-1). Will try these signs for the peak amplitude\n return_x: abscissa on which the fitted function form has to be evaluated\n\n Outputs:\n peak position, hwhm, baseline, amplitude[, stat][, peakfunction]\n\n where:\n peak position, hwhm, baseline, amplitude are ErrorValue instances.\n stat is the statistics dictionary, returned only if return_stat is True\n peakfunction is the fitted peak evaluated at return_x if it is not None.\n\n Notes:\n A Gauss or a Lorentz curve is fitted, depending on the value of 'curve'. The abscissa\n should be sorted, ascending.\n \"\"\"\n y_orig=y\n if dy is None: dy = np.ones_like(x)\n if curve.upper().startswith('GAUSS'):\n def fitfunc(x_, amplitude_, position_, hwhm_, baseline_):\n return amplitude_ * np.exp(-0.5 * (x_ - position_) ** 2 / hwhm_ ** 2) + baseline_\n elif curve.upper().startswith('LORENTZ'):\n def fitfunc(x_, amplitude_, position_, hwhm_, baseline_):\n return amplitude_ * hwhm_ ** 2 / (hwhm_ ** 2 + (position_ - x_) ** 2) + baseline_\n else:\n raise ValueError('Invalid curve type: {}'.format(curve))\n results=[]\n # we try fitting a positive and a negative peak and return the better fit (where R2 is larger)\n for sign in signs:\n init_params={'position':position,'hwhm':hwhm,'baseline':baseline,'amplitude':amplitude}\n y = y_orig * sign\n if init_params['position'] is None: init_params['position'] = x[y == y.max()][0]\n if init_params['hwhm'] is None: init_params['hwhm'] = 0.5 * (x.max() - x.min())\n if init_params['baseline'] is None: init_params['baseline'] = y.min()\n if init_params['amplitude'] is None: init_params['amplitude'] = y.max() - init_params['baseline']\n results.append(nlsq_fit(x, y, dy, fitfunc, (init_params['amplitude'],\n init_params['position'],\n init_params['hwhm'],\n init_params['baseline']))+(sign,))\n max_R2=max([r[2]['R2'] for r in results])\n p,dp,stat,sign=[r for r in results if r[2]['R2']==max_R2][0]\n retval = [ErrorValue(p[1], dp[1]), ErrorValue(abs(p[2]), dp[2]), sign * ErrorValue(p[3], dp[3]),\n sign * ErrorValue(p[0], dp[0])]\n if return_stat:\n stat['func_value'] = stat['func_value'] * sign\n retval.append(stat)\n if return_x is not None:\n retval.append(sign * fitfunc(return_x, p[0], p[1], p[2], p[3]))\n return tuple(retval)", "response": "Find a single peak in the dataset."} {"SOURCE": "codesearchnet", "instruction": "Given the following Python 3 function, write the documentation\ndef findpeak_multi(x, y, dy, N, Ntolerance, Nfit=None, curve='Lorentz', return_xfit=False, return_stat=False):\n if Nfit is None:\n Nfit = N\n # find points where the curve grows for N points before them and\n # decreases for N points after them. To accomplish this, we create\n # an indicator array of the sign of the first derivative.\n sgndiff = np.sign(np.diff(y))\n xdiff = x[:-1] # associate difference values to the lower 'x' value.\n pix = np.arange(len(x) - 1) # pixel coordinates create an indicator\n # array as the sum of sgndiff shifted left and right. whenever an\n # element of this is 2*N, it fulfills the criteria above.\n indicator = np.zeros(len(sgndiff) - 2 * N)\n for i in range(2 * N):\n indicator += np.sign(N - i) * sgndiff[i:-2 * N + i]\n # add the last one, since the indexing is different (would be\n # [2*N:0], which is not what we want)\n indicator += -sgndiff[2 * N:]\n # find the positions (indices) of the peak. The strict criteria is\n # relaxed somewhat by using the Ntolerance value. Note the use of\n # 2*Ntolerance, since each outlier point creates two outliers in\n # sgndiff (-1 insted of +1 and vice versa).\n peakpospix = pix[N:-N][indicator >= 2 * N - 2 * Ntolerance]\n ypeak = y[peakpospix]\n # Now refine the found positions by least-squares fitting. But\n # first we have to sort out other non-peaks, i.e. found points\n # which have other found points with higher values in their [-N,N]\n # neighbourhood.\n pos = []; ampl = []; hwhm = []; baseline = []; xfit = []; stat = []\n dy1 = None\n for i in range(len(ypeak)):\n if not [j for j in list(range(i + 1, len(ypeak))) + list(range(0, i)) if abs(peakpospix[j] - peakpospix[i]) <= N and ypeak[i] < ypeak[j]]:\n # only leave maxima.\n idx = peakpospix[i]\n if dy is not None:\n dy1 = dy[(idx - Nfit):(idx + Nfit + 1)]\n xfit_ = x[(idx - Nfit):(idx + Nfit + 1)]\n pos_, hwhm_, baseline_, ampl_, stat_ = findpeak_single(xfit_, y[(idx - Nfit):(idx + Nfit + 1)], dy1, position=x[idx], return_stat=True)\n \n stat.append(stat_)\n xfit.append(xfit_)\n pos.append(pos_)\n ampl.append(ampl_)\n hwhm.append(hwhm_)\n baseline.append(baseline_)\n results = [pos, hwhm, baseline, ampl]\n if return_xfit:\n results.append(xfit)\n if return_stat:\n results.append(stat)\n return tuple(results)", "response": "Find multiple peaks in the dataset given by vectors x and y."} {"SOURCE": "codesearchnet", "instruction": "Make a summary of the following Python 3 code\ndef findpeak_asymmetric(x, y, dy=None, curve='Lorentz', return_x=None, init_parameters=None):\n idx = np.logical_and(np.isfinite(x), np.isfinite(y))\n if dy is not None:\n idx = np.logical_and(idx, np.isfinite(dy))\n x=x[idx]\n y=y[idx]\n if dy is not None:\n dy=dy[idx]\n if curve.lower().startswith('loren'):\n lorentzian = True\n elif curve.lower().startswith('gauss'):\n lorentzian = False\n else:\n raise ValueError('Unknown peak type {}'.format(curve))\n\n def peakfunc(pars, x, lorentzian=True):\n x0, sigma1, sigma2, C, A = pars\n result = np.empty_like(x)\n if lorentzian:\n result[x < x0] = A * sigma1 ** 2 / (sigma1 ** 2 + (x0 - x[x < x0]) ** 2) + C\n result[x >= x0] = A * sigma2 ** 2 / (sigma2 ** 2 + (x0 - x[x >= x0]) ** 2) + C\n else:\n result[x < x0] = A * np.exp(-(x[x < x0] - x0) ** 2 / (2 * sigma1 ** 2))\n result[x >= x0] = A * np.exp(-(x[x >= x0] - x0) ** 2 / (2 * sigma1 ** 2))\n return result\n\n def fitfunc(pars, x, y, dy, lorentzian=True):\n yfit = peakfunc(pars, x, lorentzian)\n if dy is None:\n return yfit - y\n else:\n return (yfit - y) / dy\n if init_parameters is not None:\n pos, hwhmleft, hwhmright, baseline, amplitude = [float(x) for x in init_parameters]\n else:\n baseline = y.min()\n amplitude = y.max() - baseline\n hwhmleft = hwhmright = (x.max() - x.min()) * 0.5\n pos = x[np.argmax(y)]\n #print([pos,hwhm,hwhm,baseline,amplitude])\n result = scipy.optimize.least_squares(fitfunc, [pos, hwhmleft, hwhmright, baseline, amplitude],\n args=(x, y, dy, lorentzian),\n bounds=([x.min(), 0, 0, -np.inf, 0],\n [x.max(), np.inf, np.inf, np.inf, np.inf]))\n# print(result.x[0], result.x[1], result.x[2], result.x[3], result.x[4], result.message, result.success)\n if not result.success:\n raise RuntimeError('Error while peak fitting: {}'.format(result.message))\n if dy is None:\n ret = (result.x[0], result.x[1], result.x[2], result.x[3], result.x[4])\n else:\n # noinspection PyTupleAssignmentBalance\n _, s, VT = svd(result.jac, full_matrices=False)\n threshold = np.finfo(float).eps * max(result.jac.shape) * s[0]\n s = s[s > threshold]\n VT = VT[:s.size]\n pcov = np.dot(VT.T / s ** 2, VT)\n ret = tuple([ErrorValue(result.x[i], pcov[i, i] ** 0.5) for i in range(5)])\n if return_x is not None:\n ret = ret + (peakfunc([float(x) for x in ret], return_x, lorentzian),)\n return ret", "response": "This function finds an asymmetric peak in the dataset."} {"SOURCE": "codesearchnet", "instruction": "Can you implement a function in Python 3 that\nreads the next spec scan in the file.", "response": "def readspecscan(f, number=None):\n \"\"\"Read the next spec scan in the file, which starts at the current position.\"\"\"\n scan = None\n scannumber = None\n while True:\n l = f.readline()\n if l.startswith('#S'):\n scannumber = int(l[2:].split()[0])\n if not ((number is None) or (number == scannumber)):\n # break the loop, will skip to the next empty line after this\n # loop\n break\n if scan is None:\n scan = {}\n scan['number'] = scannumber\n scan['command'] = l[2:].split(None, 1)[1].strip()\n scan['data'] = []\n elif l.startswith('#C'):\n scan['comment'] = l[2:].strip()\n elif l.startswith('#D'):\n scan['datestring'] = l[2:].strip()\n elif l.startswith('#T'):\n scan['countingtime'] = float(l[2:].split()[0])\n scan['scantimeunits'] = l[2:].split()[1].strip()\n elif l.startswith('#M'):\n scan['countingcounts'] = float(l[2:].split()[0])\n elif l.startswith('#G'):\n if 'G' not in scan:\n scan['G'] = []\n scan['G'].extend([float(x) for x in l.split()[1:]])\n elif l.startswith('#P'):\n if 'positions' not in scan:\n scan['positions'] = []\n scan['positions'].extend([float(x) for x in l.split()[1:]])\n elif l.startswith('#Q'):\n pass\n elif l.startswith('#N'):\n n = [float(x) for x in l[2:].strip().split()]\n if len(n) == 1:\n scan['N'] = n[0]\n else:\n scan['N'] = n\n elif l.startswith('#L'):\n scan['Columns'] = [x.strip() for x in l[3:].split(' ')]\n elif not l:\n # end of file\n if scan is None:\n raise SpecFileEOF\n else:\n break\n elif not l.strip():\n break # empty line, end of scan in file.\n elif l.startswith('#'):\n # ignore other lines starting with a hashmark.\n continue\n else:\n scan['data'].append(tuple(float(x) for x in l.split()))\n while l.strip():\n l = f.readline()\n if scan is not None:\n scan['data'] = np.array(\n scan['data'], dtype=list(zip(scan['Columns'], itertools.repeat(np.float))))\n return scan\n else:\n return scannumber"} {"SOURCE": "codesearchnet", "instruction": "Can you write a function in Python 3 where it\nopens a SPEC file and read its content into a dict.", "response": "def readspec(filename, read_scan=None):\n \"\"\"Open a SPEC file and read its content\n\n Inputs:\n\n filename: string\n the file to open\n\n read_scan: None, 'all' or integer\n the index of scan to be read from the file. If None, no scan should be read. If\n 'all', all scans should be read. If a number, just the scan with that number\n should be read.\n\n Output:\n the data in the spec file in a dict.\n \"\"\"\n with open(filename, 'rt') as f:\n sf = {'motors': [], 'maxscannumber': 0}\n sf['originalfilename'] = filename\n lastscannumber = None\n while True:\n l = f.readline()\n if l.startswith('#F'):\n sf['filename'] = l[2:].strip()\n elif l.startswith('#E'):\n sf['epoch'] = int(l[2:].strip())\n sf['datetime'] = datetime.datetime.fromtimestamp(sf['epoch'])\n elif l.startswith('#D'):\n sf['datestring'] = l[2:].strip()\n elif l.startswith('#C'):\n sf['comment'] = l[2:].strip()\n elif l.startswith('#O'):\n try:\n l = l.split(None, 1)[1]\n except IndexError:\n continue\n if 'motors' not in list(sf.keys()):\n sf['motors'] = []\n sf['motors'].extend([x.strip() for x in l.split(' ')])\n elif not l.strip():\n # empty line, signifies the end of the header part. The next\n # line will be a scan.\n break\n sf['scans'] = {}\n if read_scan is not None:\n if read_scan == 'all':\n nr = None\n else:\n nr = read_scan\n try:\n while True:\n s = readspecscan(f, nr)\n if isinstance(s, dict):\n sf['scans'][s['number']] = s\n if nr is not None:\n break\n sf['maxscannumber'] = max(\n sf['maxscannumber'], s['number'])\n elif s is not None:\n sf['maxscannumber'] = max(sf['maxscannumber'], s)\n except SpecFileEOF:\n pass\n else:\n while True:\n l = f.readline()\n if not l:\n break\n if l.startswith('#S'):\n n = int(l[2:].split()[0])\n sf['maxscannumber'] = max(sf['maxscannumber'], n)\n for n in sf['scans']:\n s = sf['scans'][n]\n s['motors'] = sf['motors']\n if 'comment' not in s:\n s['comment'] = sf['comment']\n if 'positions' not in s:\n s['positions'] = [None] * len(sf['motors'])\n return sf"} {"SOURCE": "codesearchnet", "instruction": "Write a Python 3 function for\nreading a file from beamline B1 HASYLAB and return a dictionary. The keys are self - planatory. The values are self - planatory.", "response": "def readabt(filename, dirs='.'):\n \"\"\"Read abt_*.fio type files from beamline B1, HASYLAB.\n\n Input:\n filename: the name of the file.\n dirs: directories to search for files in\n\n Output:\n A dictionary. The fields are self-explanatory.\n \"\"\"\n # resolve filename\n filename = misc.findfileindirs(filename, dirs)\n f = open(filename, 'rt')\n abt = {'offsetcorrected': False, 'params': {}, 'columns': [], 'data': [], 'title': '',\n 'offsets': {}, 'filename': filename}\n readingmode = ''\n for l in f:\n l = l.strip()\n if l.startswith('!') or len(l) == 0:\n continue\n elif l.startswith('%c'):\n readingmode = 'comments'\n elif l.startswith('%p'):\n readingmode = 'params'\n elif l.startswith('%d'):\n readingmode = 'data'\n elif readingmode == 'comments':\n m = re.match(\n r'(?P\\w+)-Scan started at (?P\\d+-\\w+-\\d+) (?P\\d+:\\d+:\\d+), ended (?P\\d+:\\d+:\\d+)', l)\n if m:\n abt.update(m.groupdict())\n continue\n else:\n m = re.match(r'Name: (?P\\w+)', l)\n if m:\n abt.update(m.groupdict())\n m1 = re.search(r'from (?P\\d+(?:.\\d+)?)', l)\n if m1:\n abt.update(m1.groupdict())\n m1 = re.search(r'to (?P\\d+(?:.\\d+)?)', l)\n if m1:\n abt.update(m1.groupdict())\n m1 = re.search(r'by (?P\\d+(?:.\\d+)?)', l)\n if m1:\n abt.update(m1.groupdict())\n m1 = re.search(r'sampling (?P\\d+(?:.\\d+)?)', l)\n if m1:\n abt.update(m1.groupdict())\n continue\n if l.find('Counter readings are offset corrected') >= 0:\n abt['offsetcorrected'] = True\n readingmode = 'offsets'\n continue\n # if we reach here in 'comments' mode, this is the title line\n abt['title'] = l\n continue\n elif readingmode == 'offsets':\n m = re.findall(r'(\\w+)\\s(\\d+(?:.\\d+)?)', l)\n if m:\n abt['offsets'].update(dict(m))\n for k in abt['offsets']:\n abt['offsets'][k] = float(abt['offsets'][k])\n elif readingmode == 'params':\n abt['params'][l.split('=')[0].strip()] = float(\n l.split('=')[1].strip())\n elif readingmode == 'data':\n if l.startswith('Col'):\n abt['columns'].append(l.split()[2])\n else:\n abt['data'].append([float(x) for x in l.split()])\n f.close()\n # some post-processing\n # remove common prefix from column names\n maxcolnamelen = max(len(c) for c in abt['columns'])\n l = 1\n for l in range(1, maxcolnamelen):\n if len(set([c[:l] for c in abt['columns']])) > 1:\n break\n abt['columns'] = [c[l - 1:] for c in abt['columns']]\n # represent data as a structured array\n dt = np.dtype(list(zip(abt['columns'], itertools.repeat(np.double))))\n abt['data'] = np.array(abt['data'], dtype=np.double).view(dt)\n # dates and times in datetime formats\n monthnames = ['Jan', 'Feb', 'Mar', 'Apr', 'May',\n 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']\n for m, i in zip(monthnames, itertools.count(1)):\n abt['startdate'] = abt['startdate'].replace(m, str(i))\n abt['startdate'] = datetime.date(\n *reversed([int(x) for x in abt['startdate'].split('-')]))\n abt['starttime'] = datetime.time(\n *[int(x) for x in abt['starttime'].split(':')])\n abt['endtime'] = datetime.time(\n *[int(x) for x in abt['endtime'].split(':')])\n abt['start'] = datetime.datetime.combine(\n abt['startdate'], abt['starttime'])\n if abt['endtime'] <= abt['starttime']:\n abt['end'] = datetime.datetime.combine(\n abt['startdate'] + datetime.timedelta(1), abt['endtime'])\n else:\n abt['end'] = datetime.datetime.combine(\n abt['startdate'], abt['endtime'])\n del abt['starttime']\n del abt['startdate']\n del abt['endtime']\n # convert some fields to float\n for k in ['from', 'to', 'by', 'sampling']:\n if k in abt:\n abt[k] = float(abt[k])\n # change space and dash in title to underscore\n abt['title'] = abt['title'].replace('-', '_').replace(' ', '_')\n return abt"} {"SOURCE": "codesearchnet", "instruction": "Explain what the following Python 3 code does\ndef energy(self) -> ErrorValue:\n return (ErrorValue(*(scipy.constants.physical_constants['speed of light in vacuum'][0::2])) *\n ErrorValue(*(scipy.constants.physical_constants['Planck constant in eV s'][0::2])) /\n scipy.constants.nano /\n self.wavelength)", "response": "X - ray energy"} {"SOURCE": "codesearchnet", "instruction": "Given the following Python 3 function, write the documentation\ndef maskname(self) -> Optional[str]:\n try:\n maskid = self._data['maskname']\n if not maskid.endswith('.mat'):\n maskid = maskid + '.mat'\n return maskid\n except KeyError:\n return None", "response": "Name of the mask matrix file."} {"SOURCE": "codesearchnet", "instruction": "Can you create a Python 3 function that\nfinds the beam center with the gravity method.", "response": "def findbeam_gravity(data, mask):\n \"\"\"Find beam center with the \"gravity\" method\n\n Inputs:\n data: scattering image\n mask: mask matrix\n\n Output:\n a vector of length 2 with the x (row) and y (column) coordinates\n of the origin, starting from 1\n \"\"\"\n # for each row and column find the center of gravity\n data1 = data.copy() # take a copy, because elements will be tampered with\n data1[mask == 0] = 0 # set masked elements to zero\n # vector of x (row) coordinates\n x = np.arange(data1.shape[0])\n # vector of y (column) coordinates\n y = np.arange(data1.shape[1])\n # two column vectors, both containing ones. The length of onex and\n # oney corresponds to length of x and y, respectively.\n onex = np.ones_like(x)\n oney = np.ones_like(y)\n # Multiply the matrix with x. Each element of the resulting column\n # vector will contain the center of gravity of the corresponding row\n # in the matrix, multiplied by the \"weight\". Thus: nix_i=sum_j( A_ij\n # * x_j). If we divide this by spamx_i=sum_j(A_ij), then we get the\n # center of gravity. The length of this column vector is len(y).\n nix = np.dot(x, data1)\n spamx = np.dot(onex, data1)\n # indices where both nix and spamx is nonzero.\n goodx = ((nix != 0) & (spamx != 0))\n # trim y, nix and spamx by goodx, eliminate invalid points.\n nix = nix[goodx]\n spamx = spamx[goodx]\n\n # now do the same for the column direction.\n niy = np.dot(data1, y)\n spamy = np.dot(data1, oney)\n goody = ((niy != 0) & (spamy != 0))\n niy = niy[goody]\n spamy = spamy[goody]\n # column coordinate of the center in each row will be contained in\n # ycent, the row coordinate of the center in each column will be\n # in xcent.\n ycent = nix / spamx\n xcent = niy / spamy\n # return the mean values as the centers.\n return [xcent.mean(), ycent.mean()]"} {"SOURCE": "codesearchnet", "instruction": "How would you code a function in Python 3 to\nfind beam center with the slices method.", "response": "def findbeam_slices(data, orig_initial, mask=None, maxiter=0, epsfcn=0.001,\n dmin=0, dmax=np.inf, sector_width=np.pi / 9.0, extent=10, callback=None):\n \"\"\"Find beam center with the \"slices\" method\n\n Inputs:\n data: scattering matrix\n orig_initial: estimated value for x (row) and y (column)\n coordinates of the beam center, starting from 1.\n mask: mask matrix. If None, nothing will be masked. Otherwise it\n should be of the same size as data. Nonzero means non-masked.\n maxiter: maximum number of iterations for scipy.optimize.leastsq\n epsfcn: input for scipy.optimize.leastsq\n dmin: disregard pixels nearer to the origin than this\n dmax: disregard pixels farther from the origin than this\n sector_width: width of sectors in radians\n extent: approximate distance of the current and the real origin in pixels.\n Too high a value makes the fitting procedure unstable. Too low a value\n does not permit to move away the current origin.\n callback: callback function (expects no arguments)\n\n Output:\n a vector of length 2 with the x (row) and y (column) coordinates\n of the origin.\n \"\"\"\n if mask is None:\n mask = np.ones(data.shape)\n data = data.astype(np.double)\n\n def targetfunc(orig, data, mask, orig_orig, callback):\n # integrate four sectors\n I = [None] * 4\n p, Ints, A = radint_nsector(data, None, -1, -1, -1, orig[0] + orig_orig[0], orig[1] + orig_orig[1], mask=mask,\n phi0=np.pi / 4 - 0.5 * sector_width, dphi=sector_width,\n Nsector=4)\n minpix = max(max(p.min(0).tolist()), dmin)\n maxpix = min(min(p.max(0).tolist()), dmax)\n if (maxpix < minpix):\n raise ValueError('The four slices do not overlap! Please give a\\\n better approximation for the origin or use another centering method.')\n for i in range(4):\n I[i] = Ints[:, i][(p[:, i] >= minpix) & (p[:, i] <= maxpix)]\n ret = ((I[0] - I[2]) ** 2 + (I[1] - I[3]) ** 2) / (maxpix - minpix)\n if callback is not None:\n callback()\n return ret\n orig = scipy.optimize.leastsq(targetfunc, np.array([extent, extent]),\n args=(data, 1 - mask.astype(np.uint8),\n np.array(orig_initial) - extent, callback),\n maxfev=maxiter, epsfcn=0.01)\n return orig[0] + np.array(orig_initial) - extent"} {"SOURCE": "codesearchnet", "instruction": "Here you have a function in Python 3, explain what it does\ndef findbeam_azimuthal(data, orig_initial, mask=None, maxiter=100, Ntheta=50,\n dmin=0, dmax=np.inf, extent=10, callback=None):\n \"\"\"Find beam center using azimuthal integration\n\n Inputs:\n data: scattering matrix\n orig_initial: estimated value for x (row) and y (column)\n coordinates of the beam center, starting from 1.\n mask: mask matrix. If None, nothing will be masked. Otherwise it\n should be of the same size as data. Nonzero means non-masked.\n maxiter: maximum number of iterations for scipy.optimize.fmin\n Ntheta: the number of theta points for the azimuthal integration\n dmin: pixels nearer to the origin than this will be excluded from\n the azimuthal integration\n dmax: pixels farther from the origin than this will be excluded from\n the azimuthal integration\n extent: approximate distance of the current and the real origin in pixels.\n Too high a value makes the fitting procedure unstable. Too low a value\n does not permit to move away the current origin.\n callback: callback function (expects no arguments)\n Output:\n a vector of length 2 with the x and y coordinates of the origin,\n starting from 1\n \"\"\"\n if mask is None:\n mask = np.ones(data.shape)\n data = data.astype(np.double)\n\n def targetfunc(orig, data, mask, orig_orig, callback):\n def sinfun(p, x, y):\n return (y - np.sin(x + p[1]) * p[0] - p[2]) / np.sqrt(len(x))\n t, I, a = azimintpix(data, None, orig[\n 0] + orig_orig[0], orig[1] + orig_orig[1], mask.astype('uint8'), Ntheta, dmin, dmax)\n if len(a) > (a > 0).sum():\n raise ValueError('findbeam_azimuthal: non-complete azimuthal average, please consider changing dmin, dmax and/or orig_initial!')\n p = ((I.max() - I.min()) / 2.0, t[I == I.max()][0], I.mean())\n p = scipy.optimize.leastsq(sinfun, p, (t, I))[0]\n # print \"findbeam_azimuthal: orig=\",orig,\"amplitude=\",abs(p[0])\n if callback is not None:\n callback()\n return abs(p[0])\n orig1 = scipy.optimize.fmin(targetfunc, np.array([extent, extent]),\n args=(data, 1 - mask, np.array(orig_initial) - extent,\n callback), maxiter=maxiter, disp=0)\n return orig1 + np.array(orig_initial) - extent", "response": "This function finds the beam center using azimuthal integration."} {"SOURCE": "codesearchnet", "instruction": "Make a summary of the following Python 3 code\ndef findbeam_azimuthal_fold(data, orig_initial, mask=None, maxiter=100,\n Ntheta=50, dmin=0, dmax=np.inf, extent=10, callback=None):\n \"\"\"Find beam center using azimuthal integration and folding\n\n Inputs:\n data: scattering matrix\n orig_initial: estimated value for x (row) and y (column)\n coordinates of the beam center, starting from 1.\n mask: mask matrix. If None, nothing will be masked. Otherwise it\n should be of the same size as data. Nonzero means non-masked.\n maxiter: maximum number of iterations for scipy.optimize.fmin\n Ntheta: the number of theta points for the azimuthal integration.\n Should be even!\n dmin: pixels nearer to the origin than this will be excluded from\n the azimuthal integration\n dmax: pixels farther from the origin than this will be excluded from\n the azimuthal integration\n extent: approximate distance of the current and the real origin in pixels.\n Too high a value makes the fitting procedure unstable. Too low a value\n does not permit to move away the current origin.\n callback: callback function (expects no arguments)\n Output:\n a vector of length 2 with the x and y coordinates of the origin,\n starting from 1\n \"\"\"\n if Ntheta % 2:\n raise ValueError('Ntheta should be even!')\n if mask is None:\n mask = np.ones_like(data).astype(np.uint8)\n data = data.astype(np.double)\n # the function to minimize is the sum of squared difference of two halves of\n # the azimuthal integral.\n\n def targetfunc(orig, data, mask, orig_orig, callback):\n I = azimintpix(data, None, orig[\n 0] + orig_orig[0], orig[1] + orig_orig[1], mask, Ntheta, dmin, dmax)[1]\n if callback is not None:\n callback()\n return np.sum((I[:Ntheta / 2] - I[Ntheta / 2:]) ** 2) / Ntheta\n orig1 = scipy.optimize.fmin(targetfunc, np.array([extent, extent]),\n args=(data, 1 - mask, np.array(orig_initial) - extent, callback), maxiter=maxiter, disp=0)\n return orig1 + np.array(orig_initial) - extent", "response": "This function finds the beam center using azimuthal integration and folding."} {"SOURCE": "codesearchnet", "instruction": "Write a Python 3 script for\nfinding beam with 2D weighting of semitransparent beamstop area.", "response": "def findbeam_semitransparent(data, pri, threshold=0.05):\n \"\"\"Find beam with 2D weighting of semitransparent beamstop area\n\n Inputs:\n data: scattering matrix\n pri: list of four: [xmin,xmax,ymin,ymax] for the borders of the beam\n area under the semitransparent beamstop. X corresponds to the column\n index (ie. A[Y,X] is the element of A from the Xth column and the\n Yth row). You can get these by zooming on the figure and retrieving\n the result of axis() (like in Matlab)\n threshold: do not count pixels if their intensity falls below\n max_intensity*threshold. max_intensity is the highest count rate\n in the current row or column, respectively. Set None to disable\n this feature.\n\n Outputs: bcx,bcy\n the x and y coordinates of the primary beam\n \"\"\"\n rowmin = np.floor(min(pri[2:]))\n rowmax = np.ceil(max(pri[2:]))\n colmin = np.floor(min(pri[:2]))\n colmax = np.ceil(max(pri[:2]))\n\n if threshold is not None:\n # beam area on the scattering image\n B = data[rowmin:rowmax, colmin:colmax]\n # print B.shape\n # row and column indices\n Ri = np.arange(rowmin, rowmax)\n Ci = np.arange(colmin, colmax)\n # print len(Ri)\n # print len(Ci)\n Ravg = B.mean(1) # average over column index, will be a concave curve\n Cavg = B.mean(0) # average over row index, will be a concave curve\n # find the maxima im both directions and their positions\n maxR = Ravg.max()\n maxRpos = Ravg.argmax()\n maxC = Cavg.max()\n maxCpos = Cavg.argmax()\n # cut off pixels which are smaller than threshold*peak_height\n Rmin = Ri[\n ((Ravg - Ravg[0]) >= ((maxR - Ravg[0]) * threshold)) & (Ri < maxRpos)][0]\n Rmax = Ri[\n ((Ravg - Ravg[-1]) >= ((maxR - Ravg[-1]) * threshold)) & (Ri > maxRpos)][-1]\n Cmin = Ci[\n ((Cavg - Cavg[0]) >= ((maxC - Cavg[0]) * threshold)) & (Ci < maxCpos)][0]\n Cmax = Ci[\n ((Cavg - Cavg[-1]) >= ((maxC - Cavg[-1]) * threshold)) & (Ci > maxCpos)][-1]\n else:\n Rmin = rowmin\n Rmax = rowmax\n Cmin = colmin\n Cmax = colmax\n d = data[Rmin:Rmax + 1, Cmin:Cmax + 1]\n x = np.arange(Rmin, Rmax + 1)\n y = np.arange(Cmin, Cmax + 1)\n bcx = (d.sum(1) * x).sum() / d.sum()\n bcy = (d.sum(0) * y).sum() / d.sum()\n return bcx, bcy"} {"SOURCE": "codesearchnet", "instruction": "Write a Python 3 script to\nfind the beam in the radial average.", "response": "def findbeam_radialpeak(data, orig_initial, mask, rmin, rmax, maxiter=100,\n drive_by='amplitude', extent=10, callback=None):\n \"\"\"Find the beam by minimizing the width of a peak in the radial average.\n\n Inputs:\n data: scattering matrix\n orig_initial: first guess for the origin\n mask: mask matrix. Nonzero is non-masked.\n rmin,rmax: distance from the origin (in pixels) of the peak range.\n drive_by: 'hwhm' to minimize the hwhm of the peak or 'amplitude' to\n maximize the peak amplitude\n extent: approximate distance of the current and the real origin in pixels.\n Too high a value makes the fitting procedure unstable. Too low a value\n does not permit to move away the current origin.\n callback: callback function (expects no arguments)\n Outputs:\n the beam coordinates\n\n Notes:\n A Gaussian will be fitted.\n \"\"\"\n orig_initial = np.array(orig_initial)\n mask = 1 - mask.astype(np.uint8)\n data = data.astype(np.double)\n pix = np.arange(rmin * 1.0, rmax * 1.0, 1)\n if drive_by.lower() == 'hwhm':\n def targetfunc(orig, data, mask, orig_orig, callback):\n I = radintpix(\n data, None, orig[0] + orig_orig[0], orig[1] + orig_orig[1], mask, pix)[1]\n hwhm = float(misc.findpeak_single(pix, I)[1])\n # print orig[0] + orig_orig[0], orig[1] + orig_orig[1], p\n if callback is not None:\n callback()\n return abs(hwhm)\n elif drive_by.lower() == 'amplitude':\n def targetfunc(orig, data, mask, orig_orig, callback):\n I = radintpix(\n data, None, orig[0] + orig_orig[0], orig[1] + orig_orig[1], mask, pix)[1]\n fp = misc.findpeak_single(pix, I)\n height = -float(fp[2] + fp[3])\n # print orig[0] + orig_orig[0], orig[1] + orig_orig[1], p\n if callback is not None:\n callback()\n return height\n else:\n raise ValueError('Invalid argument for drive_by %s' % drive_by)\n orig1 = scipy.optimize.fmin(targetfunc, np.array([extent, extent]),\n args=(\n data, mask, orig_initial - extent, callback),\n maxiter=maxiter, disp=0)\n return np.array(orig_initial) - extent + orig1"} {"SOURCE": "codesearchnet", "instruction": "How would you code a function in Python 3 to\nfind the beam of a Guinier scattering.", "response": "def findbeam_Guinier(data, orig_initial, mask, rmin, rmax, maxiter=100,\n extent=10, callback=None):\n \"\"\"Find the beam by minimizing the width of a Gaussian centered at the\n origin (i.e. maximizing the radius of gyration in a Guinier scattering).\n\n Inputs:\n data: scattering matrix\n orig_initial: first guess for the origin\n mask: mask matrix. Nonzero is non-masked.\n rmin,rmax: distance from the origin (in pixels) of the Guinier range.\n extent: approximate distance of the current and the real origin in pixels.\n Too high a value makes the fitting procedure unstable. Too low a value\n does not permit to move away the current origin.\n callback: callback function (expects no arguments)\n Outputs:\n the beam coordinates\n\n Notes:\n A Gaussian with its will be fitted.\n \"\"\"\n orig_initial = np.array(orig_initial)\n mask = 1 - mask.astype(np.uint8)\n data = data.astype(np.double)\n pix = np.arange(rmin * 1.0, rmax * 1.0, 1)\n pix2 = pix ** 2\n\n def targetfunc(orig, data, mask, orig_orig, callback):\n I = radintpix(\n data, None, orig[0] + orig_orig[0], orig[1] + orig_orig[1], mask, pix)[1]\n p = np.polyfit(pix2, np.log(I), 1)[0]\n if callback is not None:\n callback()\n return p\n orig1 = scipy.optimize.fmin(targetfunc, np.array([extent, extent]),\n args=(\n data, mask, orig_initial - extent, callback),\n maxiter=maxiter, disp=0)\n return np.array(orig_initial) - extent + orig1"} {"SOURCE": "codesearchnet", "instruction": "Write a Python 3 function that can\nfind the beam of a Guinier scattering.", "response": "def findbeam_powerlaw(data, orig_initial, mask, rmin, rmax, maxiter=100,\n drive_by='R2', extent=10, callback=None):\n \"\"\"Find the beam by minimizing the width of a Gaussian centered at the\n origin (i.e. maximizing the radius of gyration in a Guinier scattering).\n\n Inputs:\n data: scattering matrix\n orig_initial: first guess for the origin\n mask: mask matrix. Nonzero is non-masked.\n rmin,rmax: distance from the origin (in pixels) of the fitting range\n drive_by: 'R2' or 'Chi2'\n extent: approximate distance of the current and the real origin in pixels.\n Too high a value makes the fitting procedure unstable. Too low a value\n does not permit to move away the current origin.\n callback: callback function (expects no arguments)\n Outputs:\n the beam coordinates\n\n Notes:\n A power-law will be fitted\n \"\"\"\n orig_initial = np.array(orig_initial)\n mask = 1 - mask.astype(np.uint8)\n data = data.astype(np.double)\n pix = np.arange(rmin * 1.0, rmax * 1.0, 1)\n\n def targetfunc(orig, data, mask, orig_orig, callback):\n I, E = radintpix(\n data, None, orig[0] + orig_orig[0], orig[1] + orig_orig[1], mask, pix)[1:3]\n p, dp, stat = misc.easylsq.nlsq_fit(\n pix, I, E, lambda q, A, alpha: A * q ** alpha, [1.0, -3.0])\n if callback is not None:\n callback()\n # print(orig, orig_orig, orig + orig_orig, stat[drive_by])\n if drive_by == 'R2':\n return 1 - stat['R2']\n elif drive_by.startswith('Chi2'):\n return stat[drive_by]\n orig1 = scipy.optimize.fmin(targetfunc, np.array([extent, extent]),\n args=(\n data, mask, orig_initial - extent, callback),\n maxiter=maxiter, disp=False)\n return np.array(orig_initial) - extent + orig1"} {"SOURCE": "codesearchnet", "instruction": "Can you generate the documentation for the following Python 3 function\ndef maskname(self) -> Optional[str]:\n mask = self._data['geometry']['mask']\n if os.path.abspath(mask):\n mask = os.path.split(mask)[-1]\n return mask", "response": "Name of the mask matrix file."} {"SOURCE": "codesearchnet", "instruction": "Can you tell what is the following Python 3 function doing\ndef fit(self, fitfunction, parinit, unfittableparameters=(), *args, **kwargs):\n kwargs['otherparameters'] = unfittableparameters\n fitter = Fitter(fitfunction, parinit, self.q, self.Intensity, self.qError, self.Error, *args, **kwargs)\n fixedvalues = [[None, p][isinstance(p, FixedParameter)] for p in parinit]\n fitter.fixparameters(fixedvalues)\n fitter.fit()\n pars = fitter.parameters()\n uncs = fitter.uncertainties()\n stats = fitter.stats()\n results = [ErrorValue(p, u) for p, u in zip(pars, uncs)] + [stats, type(self)(self.q, stats['func_value'])]\n return results", "response": "Perform a nonlinear least - squares fit using sastool. misc. fitter. Fitter. fit"} {"SOURCE": "codesearchnet", "instruction": "Can you tell what is the following Python 3 function doing\ndef momentum(self, exponent=1, errorrequested=True):\n y = self.Intensity * self.q ** exponent\n m = np.trapz(y, self.q)\n if errorrequested:\n err = self.Error * self.q ** exponent\n dm = errtrapz(self.q, err)\n return ErrorValue(m, dm)\n else:\n return m", "response": "Calculate the momentum of the object."} {"SOURCE": "codesearchnet", "instruction": "Explain what the following Python 3 code does\ndef scalefactor(self, other, qmin=None, qmax=None, Npoints=None):\n if qmin is None:\n qmin = max(self.q.min(), other.q.min())\n if qmax is None:\n xmax = min(self.q.max(), other.q.max())\n data1 = self.trim(qmin, qmax)\n data2 = other.trim(qmin, qmax)\n if Npoints is None:\n Npoints = min(len(data1), len(data2))\n commonx = np.linspace(\n max(data1.q.min(), data2.q.min()), min(data2.q.max(), data1.q.max()), Npoints)\n data1 = data1.interpolate(commonx)\n data2 = data2.interpolate(commonx)\n return nonlinear_odr(data1.Intensity, data2.Intensity, data1.Error, data2.Error, lambda x, a: a * x, [1])[0]", "response": "Calculates a scaling factor by which this curve is to be multiplied to best fit the other one."} {"SOURCE": "codesearchnet", "instruction": "Explain what the following Python 3 code does\ndef _substitute_fixed_parameters_covar(self, covar):\n covar_resolved = np.empty((len(self._fixed_parameters), len(self._fixed_parameters)))\n indices_of_fixed_parameters = [i for i in range(len(self.parameters())) if\n self._fixed_parameters[i] is not None]\n indices_of_free_parameters = [i for i in range(len(self.parameters())) if self._fixed_parameters[i] is None]\n for i in range(covar_resolved.shape[0]):\n if i in indices_of_fixed_parameters:\n # the i-eth argument was fixed. This means that the row and column corresponding to this argument\n # must be None\n covar_resolved[i, :] = 0\n continue\n for j in range(covar_resolved.shape[1]):\n if j in indices_of_fixed_parameters:\n covar_resolved[:, j] = 0\n continue\n covar_resolved[i, j] = covar[indices_of_free_parameters.index(i), indices_of_free_parameters.index(j)]\n return covar_resolved", "response": "Insert fixed parameters in a covariance matrix"} {"SOURCE": "codesearchnet", "instruction": "Given the following Python 3 function, write the documentation\ndef loadmask(self, filename: str) -> np.ndarray:\n mask = scipy.io.loadmat(self.find_file(filename, what='mask'))\n maskkey = [k for k in mask.keys() if not (k.startswith('_') or k.endswith('_'))][0]\n return mask[maskkey].astype(np.bool)", "response": "Load a mask file."} {"SOURCE": "codesearchnet", "instruction": "How would you implement a function in Python 3 that\nloads a radial scattering curve", "response": "def loadcurve(self, fsn: int) -> classes2.Curve:\n \"\"\"Load a radial scattering curve\"\"\"\n return classes2.Curve.new_from_file(self.find_file(self._exposureclass + '_%05d.txt' % fsn))"} {"SOURCE": "codesearchnet", "instruction": "Can you generate a brief explanation for the following Python 3 code\ndef readcbf(name, load_header=False, load_data=True, for_nexus=False):\n with open(name, 'rb') as f:\n cbfbin = f.read()\n datastart = cbfbin.find(b'\\x0c\\x1a\\x04\\xd5') + 4\n hed = [x.strip() for x in cbfbin[:datastart].split(b'\\n')]\n header = {}\n readingmode = None\n for i in range(len(hed)):\n if not hed[i]:\n # skip empty header lines\n continue\n elif hed[i] == b';':\n continue\n elif hed[i].startswith(b'_array_data.header_convention'):\n header['CBF_header_convention'] = str(hed[i][\n len(b'_array_data.header_convention'):].strip().replace(b'\"', b''), encoding='utf-8')\n elif hed[i].startswith(b'_array_data.header_contents'):\n readingmode = 'PilatusHeader'\n elif hed[i].startswith(b'_array_data.data'):\n readingmode = 'CIFHeader'\n elif readingmode == 'PilatusHeader':\n if not hed[i].startswith(b'#'):\n continue\n line = hed[i].strip()[1:].strip()\n try:\n # try to interpret the line as the date.\n header['CBF_Date'] = dateutil.parser.parse(line)\n header['Date'] = header['CBF_Date']\n continue\n except (ValueError, TypeError):\n # eat exception: if we cannot parse this line as a date, try\n # another format.\n pass\n treated = False\n for sep in (b':', b'='):\n if treated:\n continue\n if line.count(sep) == 1:\n name, value = tuple(x.strip() for x in line.split(sep, 1))\n try:\n m = re.match(\n b'^(?P-?(\\d+(.\\d+)?(e-?\\d+)?))\\s+(?Pm|s|counts|eV)$', value).groupdict()\n value = float(m['number'])\n m['unit'] = str(m['unit'], encoding='utf-8')\n except AttributeError:\n # the regex did not match the string, thus re.match()\n # returned None.\n pass\n header[str(name, 'utf-8')] = value\n treated = True\n if treated:\n continue\n if line.startswith(b'Pixel_size'):\n header['XPixel'], header['YPixel'] = tuple(\n [float(a.strip().split(b' ')[0]) * 1000 for a in line[len(b'Pixel_size'):].split(b'x')])\n else:\n try:\n m = re.match(\n b'^(?P