diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-03-08 15:59:39 -0800 |
---|---|---|
committer | Dirk Hohndel <dirk@hohndel.org> | 2014-03-09 19:36:34 -0700 |
commit | 719656b438c0574b4a98c5cda1ad742849312733 (patch) | |
tree | 2300779117f157896943ec102c85a74d623039fd | |
parent | 34fb8240da7632847e36ffd9629a49e1c60b59d6 (diff) | |
download | subsurface-719656b438c0574b4a98c5cda1ad742849312733.tar.gz |
Start actually parsing the git object data
This implements the simple line parser (including the multiline strings
with escape characters). What a difference a good file format makes:
this is nothing like the pain that is XML.
That said, it only does the line/string parsing right now, it doesn't
actually then look at what the lines say. So no human-noticeable
improvements in the actual data shown by subsurface.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Dirk Hohndel <dirk@hohndel.org>
-rw-r--r-- | load-git.c | 141 |
1 files changed, 141 insertions, 0 deletions
diff --git a/load-git.c b/load-git.c index 79c17ef22..ac1acd3a2 100644 --- a/load-git.c +++ b/load-git.c @@ -14,6 +14,144 @@ #include "device.h" #include "membuffer.h" +static void divecomputer_parser(const char *line, struct membuffer *str, void *_dc) +{ +// struct divecomputer *dc = _dc; +} + +static void dive_parser(const char *line, struct membuffer *str, void *_dive) +{ +// struct dive *dive = _dive; +} + +static void trip_parser(const char *line, struct membuffer *str, void *_trip) +{ +// dive_trip_t *trip = _trip; +} + +/* + * We have a very simple line-based interface, with the small + * complication that lines can have strings in the middle, and + * a string can be multiple lines. + * + * The UTF-8 string escaping is *very* simple, though: + * + * - a string starts and ends with double quotes (") + * + * - inside the string we escape: + * (a) double quotes with '\"' + * (b) backslash (\) with '\\' + * + * - additionally, for human readability, we escape + * newlines with '\n\t', with the exception that + * consecutive newlines are left unescaped (so an + * empty line doesn't become a line with just a tab + * on it). + * + * Also, while the UTF-8 string can have arbitrarily + * long lines, the non-string parts of the lines are + * never long, so we can use a small temporary buffer + * on stack for that part. + * + * Also, note that if a line has one or more strings + * in it: + * + * - each string will be represented as a single '"' + * character in the output. + * + * - all string will exist in the same 'membuffer', + * separated by NUL characters (that cannot exist + * in a string, not even quoted). + */ +static const char *parse_one_string(const char *buf, const char *end, struct membuffer *b) +{ + const char *p = buf; + + /* + * We turn multiple strings one one line (think dive tags) into one + * membuffer that has NUL characters in between strings. + */ + if (b->len) + put_bytes(b, "", 1); + + while (p < end) { + char replace; + + switch (*p++) { + default: + continue; + case '\n': + if (p < end && *p == '\t') { + replace = '\n'; + break; + } + continue; + case '\\': + if (p < end) { + replace = *p; + break; + } + continue; + case '"': + replace = 0; + break; + } + put_bytes(b, buf, p - buf - 1); + if (!replace) + break; + put_bytes(b, &replace, 1); + buf = ++p; + } + return p; +} + +typedef void (line_fn_t)(const char *, struct membuffer *, void *); +#define MAXLINE 100 +static unsigned parse_one_line(const char *buf, unsigned size, line_fn_t *fn, void *fndata, struct membuffer *b) +{ + const char *end = buf + size; + const char *p = buf; + char line[MAXLINE+1]; + int off = 0; + + while (p < end) { + char c = *p++; + if (c == '\n') + break; + line[off] = c; + off++; + if (off > MAXLINE) + off = MAXLINE; + if (c == '"') + p = parse_one_string(p, end, b); + } + line[off] = 0; + fn(line, b, fndata); + return p - buf; +} + +/* + * We keep on re-using the membuffer that we use for + * strings, but the callback function can "steal" it by + * saving its value and just clear the original. + */ +static void for_each_line(git_blob *blob, line_fn_t *fn, void *fndata) +{ + const char *content = git_blob_rawcontent(blob); + unsigned int size = git_blob_rawsize(blob); + struct membuffer str = { 0 }; + + while (size) { + unsigned int n = parse_one_line(content, size, fn, fndata, &str); + content += n; + size -= n; + + /* Re-use the allocation, but forget the data */ + str.len = 0; + } + free_buffer(&str); +} + #define GIT_WALK_OK 0 #define GIT_WALK_SKIP 1 @@ -265,6 +403,7 @@ static int parse_divecomputer_entry(git_repository *repo, const git_tree_entry * git_blob *blob = git_tree_entry_blob(repo, entry); if (!blob) return report_error("Unable to read divecomputer file"); + for_each_line(blob, divecomputer_parser, active_dive); git_blob_free(blob); return 0; } @@ -277,6 +416,7 @@ static int parse_dive_entry(git_repository *repo, const git_tree_entry *entry, c return report_error("Unable to read dive file"); if (*suffix) dive->number = atoi(suffix+1); + for_each_line(blob, dive_parser, active_dive); git_blob_free(blob); return 0; } @@ -286,6 +426,7 @@ static int parse_trip_entry(git_repository *repo, const git_tree_entry *entry) git_blob *blob = git_tree_entry_blob(repo, entry); if (!blob) return report_error("Unable to read trip file"); + for_each_line(blob, trip_parser, active_trip); git_blob_free(blob); return 0; } |