summaryrefslogtreecommitdiffstats
path: root/load-git.c
blob: ac1acd3a2701c4aaf48426a56e78b89baed219b7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <git2.h>

#include "dive.h"
#include "device.h"
#include "membuffer.h"

static void divecomputer_parser(const char *line, struct membuffer *str, void *_dc)
{
//	struct divecomputer *dc = _dc;
}

static void dive_parser(const char *line, struct membuffer *str, void *_dive)
{
//	struct dive *dive = _dive;
}

static void trip_parser(const char *line, struct membuffer *str, void *_trip)
{
//	dive_trip_t *trip = _trip;
}

/*
 * We have a very simple line-based interface, with the small
 * complication that lines can have strings in the middle, and
 * a string can be multiple lines.
 *
 * The UTF-8 string escaping is *very* simple, though:
 *
 *  - a string starts and ends with double quotes (")
 *
 *  - inside the string we escape:
 *     (a) double quotes with '\"'
 *     (b) backslash (\) with '\\'
 *
 *  - additionally, for human readability, we escape
 *    newlines with '\n\t', with the exception that
 *    consecutive newlines are left unescaped (so an
 *    empty line doesn't become a line with just a tab
 *    on it).
 *
 * Also, while the UTF-8 string can have arbitrarily
 * long lines, the non-string parts of the lines are
 * never long, so we can use a small temporary buffer
 * on stack for that part.
 *
 * Also, note that if a line has one or more strings
 * in it:
 *
 *  - each string will be represented as a single '"'
 *    character in the output.
 *
 *  - all string will exist in the same 'membuffer',
 *    separated by NUL characters (that cannot exist
 *    in a string, not even quoted).
 */
static const char *parse_one_string(const char *buf, const char *end, struct membuffer *b)
{
	const char *p = buf;

	/*
	 * We turn multiple strings one one line (think dive tags) into one
	 * membuffer that has NUL characters in between strings.
	 */
	if (b->len)
		put_bytes(b, "", 1);

	while (p < end) {
		char replace;

		switch (*p++) {
		default:
			continue;
		case '\n':
			if (p < end && *p == '\t') {
				replace = '\n';
				break;
			}
			continue;
		case '\\':
			if (p < end) {
				replace = *p;
				break;
			}
			continue;
		case '"':
			replace = 0;
			break;
		}
		put_bytes(b, buf, p - buf - 1);
		if (!replace)
			break;
		put_bytes(b, &replace, 1);
		buf = ++p;
	}
	return p;
}

typedef void (line_fn_t)(const char *, struct membuffer *, void *);
#define MAXLINE 100
static unsigned parse_one_line(const char *buf, unsigned size, line_fn_t *fn, void *fndata, struct membuffer *b)
{
	const char *end = buf + size;
	const char *p = buf;
	char line[MAXLINE+1];
	int off = 0;

	while (p < end) {
		char c = *p++;
		if (c == '\n')
			break;
		line[off] = c;
		off++;
		if (off > MAXLINE)
			off = MAXLINE;
		if (c == '"')
			p = parse_one_string(p, end, b);
	}
	line[off] = 0;
	fn(line, b, fndata);
	return p - buf;
}

/*
 * We keep on re-using the membuffer that we use for
 * strings, but the callback function can "steal" it by
 * saving its value and just clear the original.
 */
static void for_each_line(git_blob *blob, line_fn_t *fn, void *fndata)
{
	const char *content = git_blob_rawcontent(blob);
	unsigned int size = git_blob_rawsize(blob);
	struct membuffer str = { 0 };

	while (size) {
		unsigned int n = parse_one_line(content, size, fn, fndata, &str);
		content += n;
		size -= n;

		/* Re-use the allocation, but forget the data */
		str.len = 0;
	}
	free_buffer(&str);
}

#define GIT_WALK_OK   0
#define GIT_WALK_SKIP 1

static struct dive *active_dive;
static dive_trip_t *active_trip;

static struct dive *create_new_dive(timestamp_t when)
{
	struct dive *dive = alloc_dive();

	/* We'll fill in more data from the dive file */
	dive->when = when;

	if (active_trip)
		add_dive_to_trip(dive, active_trip);
	record_dive(dive);

	return dive;
}

static dive_trip_t *create_new_trip(int yyyy, int mm, int dd)
{
	dive_trip_t *trip = calloc(1, sizeof(dive_trip_t));
	struct tm tm = { 0 };

	/* We'll fill in the real data from the trip descriptor file */
	tm.tm_year = yyyy;
	tm.tm_mon = mm-1;
	tm.tm_mday = dd;
	trip->when = utc_mktime(&tm);

	return trip;
}

static bool validate_date(int yyyy, int mm, int dd)
{
	return yyyy > 1970 && yyyy < 3000 &&
		mm > 0 && mm < 13 &&
		dd > 0 && dd < 32;
}

static bool validate_time(int h, int m, int s)
{
	return h >= 0 && h < 24 &&
		m >= 0 && m < 60 &&
		s >=0 && s <= 60;
}

/*
 * Dive trip directory, name is 'nn-alphabetic[~hex]'
 */
static int dive_trip_directory(const char *root, const char *name)
{
	int yyyy = -1, mm = -1, dd = -1;

	if (sscanf(root, "%d/%d", &yyyy, &mm) != 2)
		return GIT_WALK_SKIP;
	dd = atoi(name);
	if (!validate_date(yyyy, mm, dd))
		return GIT_WALK_SKIP;
	active_trip = create_new_trip(yyyy, mm, dd);
	return GIT_WALK_OK;
}

/*
 * Dive directory, name is [[yyyy-]mm-]nn-ddd-hh:mm:ss[~hex],
 * and 'timeoff' points to what should be the time part of
 * the name (the first digit of the hour).
 *
 * The root path will be of the form yyyy/mm[/tripdir],
 */
static int dive_directory(const char *root, const char *name, int timeoff)
{
	int yyyy = -1, mm = -1, dd = -1;
	int h, m, s;
	int mday_off = timeoff - 7;
	int month_off = mday_off - 3;
	int year_off = month_off - 5;
	struct tm tm;

	/* There has to be a mday */
	if (mday_off < 0)
		return GIT_WALK_SKIP;
	if (name[timeoff-1] != '-')
		return GIT_WALK_SKIP;

	/* Get the time of day */
	if (sscanf(name+timeoff, "%d:%d:%d", &h, &m, &s) != 3)
		return GIT_WALK_SKIP;
	if (!validate_time(h, m, s))
		return GIT_WALK_SKIP;

	/*
	 * Using the "git_tree_walk()" interface is simple, but
	 * it kind of sucks as an interface because there is
	 * no sane way to pass the hierarchy to the callbacks.
	 * The "payload" is a fixed one-time thing: we'd like
	 * the "current trip" to be passed down to the dives
	 * that get parsed under that trip, but we can't.
	 *
	 * So "active_trip" is not the trip that is in the hierarchy
	 * _above_ us, it's just the trip that was _before_ us. But
	 * if a dive is not in a trip at all, we can't tell.
	 *
	 * We could just do a better walker that passes the
	 * return value around, but we hack around this by
	 * instead looking at the one hierarchical piece of
	 * data we have: the pathname to the current entry.
	 *
	 * This is pretty hacky. The magic '8' is the length
	 * of a pathname of the form 'yyyy/mm/'.
	 */
	if (strlen(root) == 8)
		active_trip = NULL;

	/*
	 * Get the date. The day of the month is in the dive directory
	 * name, the year and month might be in the path leading up
	 * to it.
	 */
	dd = atoi(name + mday_off);
	if (year_off < 0) {
		if (sscanf(root, "%d/%d", &yyyy, &mm) != 2)
			return GIT_WALK_SKIP;
	} else
		yyyy = atoi(name + year_off);
	if (month_off >= 0)
		mm = atoi(name + month_off);

	if (!validate_date(yyyy, mm, dd))
		return GIT_WALK_SKIP;

	/* Ok, close enough. We've gotten sufficient information */
	memset(&tm, 0, sizeof(tm));
	tm.tm_hour = h;
	tm.tm_min = m;
	tm.tm_sec = s;
	tm.tm_year = yyyy - 1900;
	tm.tm_mon = mm-1;
	tm.tm_mday = dd;

	active_dive = create_new_dive(utc_mktime(&tm));
	return GIT_WALK_OK;
}

/*
 * Return the length of the string without the unique part.
 */
static int nonunique_length(const char *str)
{
	int len = 0;

	for (;;) {
		char c = *str++;
		if (!c || c == '~')
			return len;
		len++;
	}
}

/*
 * When hitting a directory node, we have a couple of cases:
 *
 *  - It's just a date entry - all numeric (either year or month):
 *
 *       [yyyy|mm]
 *
 *    We don't do anything with these, we just traverse into them.
 *    The numeric data will show up as part of the full path when
 *    we hit more interesting entries.
 *
 *  - It's a trip directory. The name will be of the form
 *
 *       nn-alphabetic[~hex]
 *
 *    where 'nn' is the day of the month (year and month will be
 *    encoded in the path leading up to this).
 *
 *  - It's a dive directory. The name will be of the form
 *
 *       [[yyyy-]mm-]nn-ddd-hh:mm:ss[~hex]
 *
 *    which describes the date and time of a dive (yyyy and mm
 *    are optional, and may be encoded in the path leading up to
 *    the dive).
 *
 *  - It's some random non-dive-data directory.
 *
 *    Subsurface doesn't create these yet, but maybe we'll encode
 *    pictures etc. If it doesn't match the above patterns, we'll
 *    ignore them for dive loading purposes, and not even recurse
 *    into them.
 */
static int walk_tree_directory(const char *root, const git_tree_entry *entry)
{
	const char *name = git_tree_entry_name(entry);
	int digits = 0, len;
	char c;

	while (isdigit(c = name[digits]))
		digits++;

	/* Doesn't start with two or four digits? Skip */
	if (digits != 4 && digits != 2)
		return GIT_WALK_SKIP;

	/* Only digits? Do nothing, but recurse into it */
	if (!c)
		return GIT_WALK_OK;

	/* All valid cases need to have a slash following */
	if (c != '-')
		return GIT_WALK_SKIP;

	/* Do a quick check for a common dive case */
	len = nonunique_length(name);

	/*
	 * We know the len is at least 3, because we had at least
	 * two digits and a dash
	 */
	if (name[len-3] == ':')
		return dive_directory(root, name, len-8);

	if (digits != 2)
		return GIT_WALK_SKIP;

	return dive_trip_directory(root, name);
}

git_blob *git_tree_entry_blob(git_repository *repo, const git_tree_entry *entry)
{
	const git_oid *id = git_tree_entry_id(entry);
	git_blob *blob;

	if (git_blob_lookup(&blob, repo, id))
		return NULL;
	return blob;
}

/*
 * We should *really* try to delay the dive computer data parsing
 * until necessary, in order to reduce load-time. The parsing is
 * cheap, but the loading of the git blob into memory can be pretty
 * costly.
 */
static int parse_divecomputer_entry(git_repository *repo, const git_tree_entry *entry, const char *suffix)
{
	git_blob *blob = git_tree_entry_blob(repo, entry);
	if (!blob)
		return report_error("Unable to read divecomputer file");
	for_each_line(blob, divecomputer_parser, active_dive);
	git_blob_free(blob);
	return 0;
}

static int parse_dive_entry(git_repository *repo, const git_tree_entry *entry, const char *suffix)
{
	struct dive *dive = active_dive;
	git_blob *blob = git_tree_entry_blob(repo, entry);
	if (!blob)
		return report_error("Unable to read dive file");
	if (*suffix)
		dive->number = atoi(suffix+1);
	for_each_line(blob, dive_parser, active_dive);
	git_blob_free(blob);
	return 0;
}

static int parse_trip_entry(git_repository *repo, const git_tree_entry *entry)
{
	git_blob *blob = git_tree_entry_blob(repo, entry);
	if (!blob)
		return report_error("Unable to read trip file");
	for_each_line(blob, trip_parser, active_trip);
	git_blob_free(blob);
	return 0;
}

static int walk_tree_file(const char *root, const git_tree_entry *entry, git_repository *repo)
{
	struct dive *dive = active_dive;
	dive_trip_t *trip = active_trip;
	const char *name = git_tree_entry_name(entry);

	if (dive && !strncmp(name, "Divecomputer", 12))
		return parse_divecomputer_entry(repo, entry, name+12);
	if (dive && !strncmp(name, "Dive", 4))
		return parse_dive_entry(repo, entry, name+4);
	if (trip && !strcmp(name, "00-Trip"))
		return parse_trip_entry(repo, entry);
	report_error("Unknown file %s%s (%p %p)", root, name, dive, trip);
	return GIT_WALK_SKIP;
}

static int walk_tree_cb(const char *root, const git_tree_entry *entry, void *payload)
{
	git_repository *repo = payload;
	git_filemode_t mode = git_tree_entry_filemode(entry);

	if (mode == GIT_FILEMODE_TREE)
		return walk_tree_directory(root, entry);

	walk_tree_file(root, entry, repo);
	/* Ignore failed blob loads */
	return GIT_WALK_OK;
}

static int load_dives_from_tree(git_repository *repo, git_tree *tree)
{
	git_tree_walk(tree, GIT_TREEWALK_PRE, walk_tree_cb, repo);
	return 0;
}

static int do_git_load(git_repository *repo, const char *branch)
{
	int ret;
	git_reference *ref;
	git_object *tree;

	ret = git_branch_lookup(&ref, repo, branch, GIT_BRANCH_LOCAL);
	if (ret)
		return report_error("Unable to look up branch '%s'", branch);
	if (git_reference_peel(&tree, ref, GIT_OBJ_TREE))
		return report_error("Could not look up tree of branch '%s'", branch);
	ret = load_dives_from_tree(repo, (git_tree *) tree);
	git_object_free(tree);
	return ret;
}

int git_load_dives(char *where)
{
	int ret, len;
	git_repository *repo;
	char *loc, *branch;

	/* Jump over the "git" marker */
	loc = where + 3;
	while (isspace(*loc))
		loc++;

	/* Trim whitespace from the end */
	len = strlen(loc);
	while (len && isspace(loc[len-1]))
		loc[--len] = 0;

	/* Find a branch name if there is any */
	branch = strrchr(loc, ':');
	if (branch)
		*branch++ = 0;

	if (git_repository_open(&repo, loc))
		return report_error("Unable to open git repository at '%s' (branch '%s')", loc, branch);

	ret = do_git_load(repo, branch);
	git_repository_free(repo);
	return ret;
}