#include #include #include #include #include #include #include #include #include #include #include #include "dive.h" #include "membuffer.h" /* * The libgit2 people are incompetent at making libraries. They randomly change * the interfaces, often just renaming things without any sane way to know which * version you should check for etc etc. It's a disgrace. */ #if !LIBGIT2_VER_MAJOR && LIBGIT2_VER_MINOR < 22 #define git_remote_lookup(res, repo, name) git_remote_load(res, repo, name) #define git_remote_fetch(remote, refspecs, signature, reflog) git_remote_fetch(remote, signature, reflog) #endif static const char *system_git_dir(void) { static char pathname[PATH_MAX]; if (!*pathname) { // This doesn't work on Windows, crap that it is. Somebody needs to fix it. snprintf(pathname, PATH_MAX, "%s/git-caches", system_default_directory()); mkdir(pathname, 0777); } return pathname; } static char *get_local_dir(const char *remote, const char *branch) { SHA_CTX ctx; unsigned char hash[20]; // That zero-byte update is so that we don't get hash // collisions for "repo1 branch" vs "repo 1branch". SHA1_Init(&ctx); SHA1_Update(&ctx, remote, strlen(remote)); SHA1_Update(&ctx, "", 1); SHA1_Update(&ctx, branch, strlen(branch)); SHA1_Final(hash, &ctx); return format_string("%s/%02x%02x%02x%02x%02x%02x%02x%02x", system_git_dir(), hash[0], hash[1], hash[2], hash[3], hash[4], hash[5], hash[6], hash[7]); } static int check_clean(const char *path, unsigned int status, void *payload) { status &= ~GIT_STATUS_CURRENT | GIT_STATUS_IGNORED; if (!status) return 0; report_error("WARNING: Git cache directory modified (path %s)", path); return 1; } static int try_to_update(git_repository *rep, git_reference *local, git_reference *remote) { if (!git_reference_cmp(local, remote)) return 0; return report_error("Local and remote do not match, not updating"); } static git_repository *update_local_repo(const char *localdir, const char *remote, const char *branch) { int error; git_repository *repo = NULL; git_remote *origin; git_reference *local_ref, *remote_ref; error = git_repository_open(&repo, localdir); if (error) { report_error("Unable to open git cache repository at %s: %s", localdir, giterr_last()->message); return NULL; } /* * NOTE! Remote errors are reported, but are nonfatal: * we still successfully return the local repository. */ error = git_remote_lookup(&origin, repo, "origin"); if (error) { report_error("Repository '%s' origin lookup failed (%s)", remote, giterr_last()->message); return repo; } // NOTE! A fetch error is not fatal, we just report it error = git_remote_fetch(origin, NULL, NULL, NULL); git_remote_free(origin); if (error) { report_error("Unable to update cache for remote '%s'", remote); return repo; } // Dirty modified state in the working tree? We're not going // to tru to update if (git_status_foreach(repo, check_clean, NULL)) return repo; if (git_branch_lookup(&local_ref, repo, branch, GIT_BRANCH_LOCAL)) { report_error("Git cache branch %s no longer exists", branch); return repo; } if (git_branch_upstream(&remote_ref, local_ref)) { report_error("Git cache branch %s no longer has an upstream branch", branch); git_reference_free(local_ref); return repo; } try_to_update(repo, local_ref, remote_ref); git_reference_free(local_ref); git_reference_free(remote_ref); return repo; } static git_repository *create_local_repo(const char *localdir, const char *remote, const char *branch) { int error; git_repository *cloned_repo = NULL; git_clone_options opts = GIT_CLONE_OPTIONS_INIT; opts.checkout_branch = branch; error = git_clone(&cloned_repo, remote, localdir, &opts); if (error) { report_error("git clone of %s failed (%s)", remote, giterr_last()->message); return NULL; } return cloned_repo; } static struct git_repository *get_remote_repo(const char *localdir, const char *remote, const char *branch) { struct stat st; /* Do we already have a local cache? */ if (!stat(localdir, &st)) { if (!S_ISDIR(st.st_mode)) { report_error("local git cache at '%s' is corrupt"); return NULL; } return update_local_repo(localdir, remote, branch); } return create_local_repo(localdir, remote, branch); } /* * This turns a remote repository into a local one if possible. * * The recognized formats are * git://host/repo[branch] * ssh://host/repo[branch] * http://host/repo[branch] * https://host/repo[branch] * file://repo[branch] */ static struct git_repository *is_remote_git_repository(const char *remote, const char *branch) { char c, *localdir; const char *p = remote; while ((c = *p++) >= 'a' && c <= 'z') /* nothing */; if (c != ':') return NULL; if (*p++ != '/' || *p++ != '/') return NULL; /* Special-case "file://", since it's already local */ if (!strncmp(remote, "file://", 7)) remote += 7; /* * Ok, we found "[a-z]*://", we've simplified the * local repo case (because libgit2 is insanely slow * for that), and we think we have a real "remote * git" format. * * We now create the SHA1 hash of the whole thing, * including the branch name. That will be our unique * unique local repository name. * * NOTE! We will create a local repository per branch, * because * * (a) libgit2 remote tracking branch support seems to * be a bit lacking * (b) we'll actually check the branch out so that we * can do merges etc too. * * so even if you have a single remote git repo with * multiple branches for different people, the local * caches will sadly force that to split into multiple * individual repositories. */ localdir = get_local_dir(remote, branch); if (!localdir) return NULL; return get_remote_repo(localdir, remote, branch); } /* * If it's not a git repo, return NULL. Be very conservative. */ struct git_repository *is_git_repository(const char *filename, const char **branchp) { int flen, blen, ret; struct stat st; git_repository *repo; char *loc, *branch; flen = strlen(filename); if (!flen || filename[--flen] != ']') return NULL; /* Find the matching '[' */ blen = 0; while (flen && filename[--flen] != '[') blen++; /* Ignore slashes at the end of the repo name */ while (flen && filename[flen-1] == '/') flen--; if (!flen) return NULL; /* * This is the "point of no return": the name matches * the git repository name rules, and we will no longer * return NULL. * * We will either return "dummy_git_repository" and the * branch pointer will have the _whole_ filename in it, * or we will return a real git repository with the * branch pointer being filled in with just the branch * name. * * The actual git reading/writing routines can use this * to generate proper error messages. */ *branchp = filename; loc = format_string("%.*s", flen, filename); if (!loc) return dummy_git_repository; branch = format_string("%.*s", blen, filename+flen+1); if (!branch) { free(loc); return dummy_git_repository; } repo = is_remote_git_repository(loc, branch); if (repo) { free(loc); *branchp = branch; return repo; } if (stat(loc, &st) < 0 || !S_ISDIR(st.st_mode)) { free(loc); free(branch); return dummy_git_repository; } ret = git_repository_open(&repo, loc); free(loc); if (ret < 0) { free(branch); return dummy_git_repository; } *branchp = branch; return repo; }