summaryrefslogtreecommitdiffstats
path: root/core/xmp_parser.cpp
blob: 4ce6cef316b40283431ec4131b4202c7d7e51172 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#include "xmp_parser.h"
#include "subsurface-string.h"

#include <libxml/parser.h>
#include <libxml/tree.h>
#include <cctype>

extern "C" timestamp_t utc_mktime(struct tm *tm); // declared in core/dive.h

static timestamp_t parse_xmp_date(const char *date)
{
	// Format: "yyyy-mm-dd[Thh:mm[:ss[.ms]][-05:00]]"
	int year, month, day;
	if (sscanf(date, "%d-%d-%d", &year, &month, &day) != 3)
		return 0;

	int hours = 0, minutes = 0, seconds = 0, milliseconds = 0;
	int timezone = 0;

	// Check for time part
	if ((date = strchr(date, 'T')) != nullptr) {
		++date; // Skip 'T'
		if (sscanf(date, "%d:%d:%d.%d", &hours, &minutes, &seconds, &milliseconds) < 2)
			return 0;

		// Check for timezone part. Note that we simply ignore 'Z' as that
		// means no time zone
		while (*date && *date != '+' && *date != '-')
			++date;
		if (*date) {
			int sign = *date == '+' ? 1 : -1;
			int timezone_hours, timezone_minutes;
			++date;
			if (sscanf(date, "%d:%d", &timezone_hours, &timezone_minutes) != 2)
				return 0;
			timezone = sign * (timezone_hours * 60 + timezone_minutes) * 60;
		}
	}

	// Round to seconds, since our timestamps are in seconds
	if (milliseconds >= 500)
		seconds += 1;

	struct tm tm = { 0 };
	tm.tm_year = year - 1900;
	tm.tm_mon = month - 1;
	tm.tm_mday = day;
	tm.tm_hour = hours;
	tm.tm_min = minutes;
	tm.tm_sec = seconds;

	timestamp_t res = utc_mktime(&tm);
	res += timezone;

	return res;
}

static timestamp_t extract_timestamp_from_attributes(const xmlNode *node)
{
	for (const xmlAttr *p = node->properties; p; p = p->next) {
		const xmlChar *ns = p->ns ? p->ns->prefix : nullptr;

		// Check for xmp::CreateDate property
		if (same_string((const char *)ns, "xmp") && same_string((const char *)p->name, "CreateDate")) {
			// We only support a single property value
			if (!p->children || !p->children->content)
				return 0;
			const char *date = (const char *)p->children->content;
			return parse_xmp_date(date);
		}
	}
	return 0;
}

static timestamp_t extract_timestamp(const xmlNode *firstnode)
{
	// We use a private stack, so that we can return in one go without
	// having to unwind the call-stack. We only recurse to a fixed depth,
	// since the data we are interested in are at a shallow depth.
	// This can be increased on demand.
	static const int max_recursion_depth = 16;
	const xmlNode *stack[max_recursion_depth];
	stack[0] = firstnode;
	int stack_depth = 1;

	while (stack_depth > 0) {
		const xmlNode *node = stack[stack_depth - 1];
		// Parse attributes
		timestamp_t timestamp = extract_timestamp_from_attributes(node);
		if (timestamp)
			return timestamp;

		// Parse content, if not blank node. Content can only be at the second level,
		// since it is always contained in a tag.
		// TODO: We have to cast node to pointer to non-const, since we're supporting
		// old libxml2 versions, where xmlIsBlankNode takes such a pointer. Remove
		// in due course.
		if (!xmlIsBlankNode((xmlNode *)node) && stack_depth >= 2) {
			const xmlNode *parent = stack[stack_depth - 2];
			// If this is a text node and the parent node is exif:DateTimeOriginal, try to parse as date
			if (!node->ns && parent->ns &&
			    same_string((const char *)parent->ns->prefix, "exif") &&
			    same_string((const char *)parent->name, "DateTimeOriginal")) {
				const char *date = (const char *)node->content;
				timestamp_t res = parse_xmp_date(date);
				if(res)
					return res;
			}
		}

		// If there are sub-items and we haven't reached recursion depth, recurse
		if (node->children && stack_depth < max_recursion_depth) {
			stack[stack_depth++] = node->children;
			continue;
		}

		// Advance stack to next node in this level
		while (stack_depth > 0) {
			if ((stack[stack_depth - 1] = stack[stack_depth - 1]->next) != nullptr)
				break;
			// No more nodes at this level -> go up a level.
			--stack_depth;
		}
	}
	return 0;
}

timestamp_t parse_xmp(const char *data, size_t size)
{
	const char *encoding = xmlGetCharEncodingName(XML_CHAR_ENCODING_UTF8);
	// TODO: What do we pass as URL-parameter?
	xmlDoc *doc = xmlReadMemory(data, size, "url", encoding, 0);
	if (!doc)
		return 0;
	timestamp_t res = extract_timestamp(xmlDocGetRootElement(doc));
	xmlFreeDoc(doc);
	return res;
}