Spaces:
Runtime error
Runtime error
| import hashlib | |
| import re | |
| import shutil | |
| from pathlib import Path | |
| from typing import Dict, List, Optional, Sequence, TypedDict | |
| from git import Repo | |
| from langchain_cli.constants import ( | |
| DEFAULT_GIT_REF, | |
| DEFAULT_GIT_REPO, | |
| DEFAULT_GIT_SUBDIRECTORY, | |
| ) | |
| class DependencySource(TypedDict): | |
| git: str | |
| ref: Optional[str] | |
| subdirectory: Optional[str] | |
| api_path: Optional[str] | |
| event_metadata: Dict | |
| # use poetry dependency string format | |
| def parse_dependency_string( | |
| dep: Optional[str], | |
| repo: Optional[str], | |
| branch: Optional[str], | |
| api_path: Optional[str], | |
| ) -> DependencySource: | |
| if dep is not None and dep.startswith("git+"): | |
| if repo is not None or branch is not None: | |
| raise ValueError( | |
| "If a dependency starts with git+, you cannot manually specify " | |
| "a repo or branch." | |
| ) | |
| # remove git+ | |
| gitstring = dep[4:] | |
| subdirectory = None | |
| ref = None | |
| # first check for #subdirectory= on the end | |
| if "#subdirectory=" in gitstring: | |
| gitstring, subdirectory = gitstring.split("#subdirectory=") | |
| if "#" in subdirectory or "@" in subdirectory: | |
| raise ValueError( | |
| "#subdirectory must be the last part of the dependency string" | |
| ) | |
| # find first slash after :// | |
| # find @ or # after that slash | |
| # remainder is ref | |
| # if no @ or #, then ref is None | |
| # find first slash after :// | |
| if "://" not in gitstring: | |
| raise ValueError( | |
| "git+ dependencies must start with git+https:// or git+ssh://" | |
| ) | |
| _, find_slash = gitstring.split("://", 1) | |
| if "/" not in find_slash: | |
| post_slash = find_slash | |
| ref = None | |
| else: | |
| _, post_slash = find_slash.split("/", 1) | |
| if "@" in post_slash or "#" in post_slash: | |
| _, ref = re.split(r"[@#]", post_slash, 1) | |
| # gitstring is everything before that | |
| gitstring = gitstring[: -len(ref) - 1] if ref is not None else gitstring | |
| return DependencySource( | |
| git=gitstring, | |
| ref=ref, | |
| subdirectory=subdirectory, | |
| api_path=api_path, | |
| event_metadata={"dependency_string": dep}, | |
| ) | |
| elif dep is not None and dep.startswith("https://"): | |
| raise ValueError("Only git dependencies are supported") | |
| else: | |
| # if repo is none, use default, including subdirectory | |
| base_subdir = Path(DEFAULT_GIT_SUBDIRECTORY) if repo is None else Path() | |
| subdir = str(base_subdir / dep) if dep is not None else None | |
| gitstring = ( | |
| DEFAULT_GIT_REPO | |
| if repo is None | |
| else f"https://github.com/{repo.strip('/')}.git" | |
| ) | |
| ref = DEFAULT_GIT_REF if branch is None else branch | |
| # it's a default git repo dependency | |
| return DependencySource( | |
| git=gitstring, | |
| ref=ref, | |
| subdirectory=subdir, | |
| api_path=api_path, | |
| event_metadata={ | |
| "dependency_string": dep, | |
| "used_repo_flag": repo is not None, | |
| "used_branch_flag": branch is not None, | |
| }, | |
| ) | |
| def _list_arg_to_length(arg: Optional[List[str]], num: int) -> Sequence[Optional[str]]: | |
| if not arg: | |
| return [None] * num | |
| elif len(arg) == 1: | |
| return arg * num | |
| elif len(arg) == num: | |
| return arg | |
| else: | |
| raise ValueError(f"Argument must be of length 1 or {num}") | |
| def parse_dependencies( | |
| dependencies: Optional[List[str]], | |
| repo: List[str], | |
| branch: List[str], | |
| api_path: List[str], | |
| ) -> List[DependencySource]: | |
| num_deps = max( | |
| len(dependencies) if dependencies is not None else 0, len(repo), len(branch) | |
| ) | |
| if ( | |
| (dependencies and len(dependencies) != num_deps) | |
| or (api_path and len(api_path) != num_deps) | |
| or (repo and len(repo) not in [1, num_deps]) | |
| or (branch and len(branch) not in [1, num_deps]) | |
| ): | |
| raise ValueError( | |
| "Number of defined repos/branches/api_paths did not match the " | |
| "number of templates." | |
| ) | |
| inner_deps = _list_arg_to_length(dependencies, num_deps) | |
| inner_api_paths = _list_arg_to_length(api_path, num_deps) | |
| inner_repos = _list_arg_to_length(repo, num_deps) | |
| inner_branches = _list_arg_to_length(branch, num_deps) | |
| return [ | |
| parse_dependency_string(iter_dep, iter_repo, iter_branch, iter_api_path) | |
| for iter_dep, iter_repo, iter_branch, iter_api_path in zip( | |
| inner_deps, inner_repos, inner_branches, inner_api_paths | |
| ) | |
| ] | |
| def _get_repo_path(gitstring: str, ref: Optional[str], repo_dir: Path) -> Path: | |
| # only based on git for now | |
| ref_str = ref if ref is not None else "" | |
| hashed = hashlib.sha256((f"{gitstring}:{ref_str}").encode("utf-8")).hexdigest()[:8] | |
| removed_protocol = gitstring.split("://")[-1] | |
| removed_basename = re.split(r"[/:]", removed_protocol, 1)[-1] | |
| removed_extras = removed_basename.split("#")[0] | |
| foldername = re.sub(r"\W", "_", removed_extras) | |
| directory_name = f"{foldername}_{hashed}" | |
| return repo_dir / directory_name | |
| def update_repo(gitstring: str, ref: Optional[str], repo_dir: Path) -> Path: | |
| # see if path already saved | |
| repo_path = _get_repo_path(gitstring, ref, repo_dir) | |
| if repo_path.exists(): | |
| # try pulling | |
| try: | |
| repo = Repo(repo_path) | |
| if repo.active_branch.name != ref: | |
| raise ValueError() | |
| repo.remotes.origin.pull() | |
| except Exception: | |
| # if it fails, delete and clone again | |
| shutil.rmtree(repo_path) | |
| Repo.clone_from(gitstring, repo_path, branch=ref, depth=1) | |
| else: | |
| Repo.clone_from(gitstring, repo_path, branch=ref, depth=1) | |
| return repo_path | |
| def copy_repo( | |
| source: Path, | |
| destination: Path, | |
| ) -> None: | |
| """ | |
| Copies a repo, ignoring git folders. | |
| Raises FileNotFound error if it can't find source | |
| """ | |
| def ignore_func(_, files): | |
| return [f for f in files if f == ".git"] | |
| shutil.copytree(source, destination, ignore=ignore_func) | |