diff options
Diffstat (limited to 'git-access.c')
-rw-r--r-- | git-access.c | 278 |
1 files changed, 278 insertions, 0 deletions
diff --git a/git-access.c b/git-access.c new file mode 100644 index 000000000..bc8903331 --- /dev/null +++ b/git-access.c @@ -0,0 +1,278 @@ +#include <stdio.h> +#include <ctype.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <time.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> +#include <git2.h> + +#include "dive.h" +#include "membuffer.h" + +/* + * The libgit2 people are incompetent at making libraries. They randomly change + * the interfaces, often just renaming things without any sane way to know which + * version you should check for etc etc. It's a disgrace. + */ +#if !LIBGIT2_VER_MAJOR && LIBGIT2_VER_MINOR < 22 + #define git_remote_lookup(res, repo, name) git_remote_load(res, repo, name) + #if LIBGIT2_VER_MINOR <= 20 + #define git_remote_fetch(remote, refspecs, signature, reflog) git_remote_fetch(remote) + #else + #define git_remote_fetch(remote, refspecs, signature, reflog) git_remote_fetch(remote, signature, reflog) + #endif +#endif + +static char *get_local_dir(const char *remote, const char *branch) +{ + SHA_CTX ctx; + unsigned char hash[20]; + + // That zero-byte update is so that we don't get hash + // collisions for "repo1 branch" vs "repo 1branch". + SHA1_Init(&ctx); + SHA1_Update(&ctx, remote, strlen(remote)); + SHA1_Update(&ctx, "", 1); + SHA1_Update(&ctx, branch, strlen(branch)); + SHA1_Final(hash, &ctx); + + return format_string("%s/%02x%02x%02x%02x%02x%02x%02x%02x", + system_default_directory(), + hash[0], hash[1], hash[2], hash[3], + hash[4], hash[5], hash[6], hash[7]); +} + +static int check_clean(const char *path, unsigned int status, void *payload) +{ + status &= ~GIT_STATUS_CURRENT | GIT_STATUS_IGNORED; + if (!status) + return 0; + report_error("WARNING: Git cache directory modified (path %s)", path); + return 1; +} + +static int try_to_update(git_repository *rep, git_reference *local, git_reference *remote) +{ + if (!git_reference_cmp(local, remote)) + return 0; + return report_error("Local and remote do not match, not updating"); +} + +static git_repository *update_local_repo(const char *localdir, const char *remote, const char *branch) +{ + int error; + git_repository *repo = NULL; + git_remote *origin; + git_reference *local_ref, *remote_ref; + + error = git_repository_open(&repo, localdir); + if (error) { + report_error("Unable to open git cache repository at %s: %s", + localdir, giterr_last()->message); + return NULL; + } + + /* + * NOTE! Remote errors are reported, but are nonfatal: + * we still successfully return the local repository. + */ + error = git_remote_lookup(&origin, repo, "origin"); + if (error) { + report_error("Repository '%s' origin lookup failed (%s)", + remote, giterr_last()->message); + return repo; + } + + // NOTE! A fetch error is not fatal, we just report it + error = git_remote_fetch(origin, NULL, NULL, NULL); + git_remote_free(origin); + if (error) { + report_error("Unable to update cache for remote '%s'", remote); + return repo; + } + + // Dirty modified state in the working tree? We're not going + // to tru to update + if (git_status_foreach(repo, check_clean, NULL)) + return repo; + + if (git_branch_lookup(&local_ref, repo, branch, GIT_BRANCH_LOCAL)) { + report_error("Git cache branch %s no longer exists", branch); + return repo; + } + + if (git_branch_upstream(&remote_ref, local_ref)) { + report_error("Git cache branch %s no longer has an upstream branch", branch); + git_reference_free(local_ref); + return repo; + } + + try_to_update(repo, local_ref, remote_ref); + git_reference_free(local_ref); + git_reference_free(remote_ref); + return repo; +} + +static git_repository *create_local_repo(const char *localdir, const char *remote, const char *branch) +{ + int error; + git_repository *cloned_repo = NULL; + git_clone_options opts = GIT_CLONE_OPTIONS_INIT; + + opts.checkout_branch = branch; + error = git_clone(&cloned_repo, remote, localdir, &opts); + if (error) { + report_error("git clone of %s failed (%s)", remote, giterr_last()->message); + return NULL; + } + return cloned_repo; +} + +static struct git_repository *get_remote_repo(const char *localdir, const char *remote, const char *branch) +{ + struct stat st; + + /* Do we already have a local cache? */ + if (!stat(localdir, &st)) { + if (!S_ISDIR(st.st_mode)) { + report_error("local git cache at '%s' is corrupt"); + return NULL; + } + return update_local_repo(localdir, remote, branch); + } + return create_local_repo(localdir, remote, branch); +} + +/* + * This turns a remote repository into a local one if possible. + * + * The recognized formats are + * git://host/repo[branch] + * ssh://host/repo[branch] + * http://host/repo[branch] + * https://host/repo[branch] + * file://repo[branch] + */ +static struct git_repository *is_remote_git_repository(const char *remote, const char *branch) +{ + char c, *localdir; + const char *p = remote; + + while ((c = *p++) >= 'a' && c <= 'z') + /* nothing */; + if (c != ':') + return NULL; + if (*p++ != '/' || *p++ != '/') + return NULL; + + /* Special-case "file://", since it's already local */ + if (!strncmp(remote, "file://", 7)) + remote += 7; + + /* + * Ok, we found "[a-z]*://", we've simplified the + * local repo case (because libgit2 is insanely slow + * for that), and we think we have a real "remote + * git" format. + * + * We now create the SHA1 hash of the whole thing, + * including the branch name. That will be our unique + * unique local repository name. + * + * NOTE! We will create a local repository per branch, + * because + * + * (a) libgit2 remote tracking branch support seems to + * be a bit lacking + * (b) we'll actually check the branch out so that we + * can do merges etc too. + * + * so even if you have a single remote git repo with + * multiple branches for different people, the local + * caches will sadly force that to split into multiple + * individual repositories. + */ + localdir = get_local_dir(remote, branch); + if (!localdir) + return NULL; + + return get_remote_repo(localdir, remote, branch); +} + +/* + * If it's not a git repo, return NULL. Be very conservative. + */ +struct git_repository *is_git_repository(const char *filename, const char **branchp) +{ + int flen, blen, ret; + struct stat st; + git_repository *repo; + char *loc, *branch; + + flen = strlen(filename); + if (!flen || filename[--flen] != ']') + return NULL; + + /* Find the matching '[' */ + blen = 0; + while (flen && filename[--flen] != '[') + blen++; + + /* Ignore slashes at the end of the repo name */ + while (flen && filename[flen-1] == '/') + flen--; + + if (!flen) + return NULL; + + /* + * This is the "point of no return": the name matches + * the git repository name rules, and we will no longer + * return NULL. + * + * We will either return "dummy_git_repository" and the + * branch pointer will have the _whole_ filename in it, + * or we will return a real git repository with the + * branch pointer being filled in with just the branch + * name. + * + * The actual git reading/writing routines can use this + * to generate proper error messages. + */ + *branchp = filename; + loc = format_string("%.*s", flen, filename); + if (!loc) + return dummy_git_repository; + + branch = format_string("%.*s", blen, filename+flen+1); + if (!branch) { + free(loc); + return dummy_git_repository; + } + + repo = is_remote_git_repository(loc, branch); + if (repo) { + free(loc); + *branchp = branch; + return repo; + } + + if (stat(loc, &st) < 0 || !S_ISDIR(st.st_mode)) { + free(loc); + free(branch); + return dummy_git_repository; + } + + ret = git_repository_open(&repo, loc); + free(loc); + if (ret < 0) { + free(branch); + return dummy_git_repository; + } + *branchp = branch; + return repo; +} |