From 02ad18d4d8480985ca400613031b89340404ab55 Mon Sep 17 00:00:00 2001 From: Berthold Stoeger Date: Tue, 10 Jul 2018 20:03:26 +0200 Subject: Metadata: extract duration fom QuickTime/MP4-style containers We want the duration of videos for two reasons: - To display the duration of the video in the profile plot. - To be able to determine which dive a video is closer to if the start is not during a dive. Signed-off-by: Berthold Stoeger --- core/metadata.cpp | 9 +++++++++ core/metadata.h | 1 + 2 files changed, 10 insertions(+) diff --git a/core/metadata.cpp b/core/metadata.cpp index 2397cd44b..877205ad4 100644 --- a/core/metadata.cpp +++ b/core/metadata.cpp @@ -158,18 +158,26 @@ static bool parseMP4(QFile &f, metadata *metadata) if (f.read(&data[0], atom_size) != static_cast(atom_size)) break; uint64_t timestamp = 0; + uint32_t timescale = 0; + uint64_t duration = 0; // First byte is version. We know version 0 and 1 switch (data[0]) { case 0: timestamp = getBE(&data[4]); + timescale = getBE(&data[12]); + duration = getBE(&data[16]); break; case 1: timestamp = getBE(&data[4]); + timescale = getBE(&data[20]); + duration = getBE(&data[24]); break; default: // For unknown versions: ignore -> maybe we find a parseable "mdhd" atom later in this file break; } + if (timescale > 0) + metadata->duration.seconds = lrint((double)duration / timescale); // Timestamp is given as seconds since midnight 1904/1/1. To be convertible to the UNIX epoch // it must be larger than 2082844800. if (timestamp >= 2082844800) { @@ -194,6 +202,7 @@ static bool parseMP4(QFile &f, metadata *metadata) extern "C" mediatype_t get_metadata(const char *filename_in, metadata *data) { data->timestamp = 0; + data->duration.seconds = 0; data->latitude.udeg = 0; data->longitude.udeg = 0; diff --git a/core/metadata.h b/core/metadata.h index da2a9c3d2..4470bdc88 100644 --- a/core/metadata.h +++ b/core/metadata.h @@ -5,6 +5,7 @@ struct metadata { timestamp_t timestamp; + duration_t duration; degrees_t latitude; degrees_t longitude; }; -- cgit v1.2.3-70-g09d2 From 4de0b7dd3d29333def6a04e08c301f1b88adcfe7 Mon Sep 17 00:00:00 2001 From: Berthold Stoeger Date: Wed, 11 Jul 2018 22:56:06 +0200 Subject: Metadata: Parse AVIs Whereas extraction of the dive-duration is trivial, AVIs don't seem to have a standardized way of saving the creation time. This commit implements support for two versions randomly found on the internet. Additional version will follow if need arises. AVI seems not to be a particular popular format for either vacation or professional videographers. Signed-off-by: Berthold Stoeger --- core/metadata.cpp | 234 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 221 insertions(+), 13 deletions(-) diff --git a/core/metadata.cpp b/core/metadata.cpp index 877205ad4..cf8e63cad 100644 --- a/core/metadata.cpp +++ b/core/metadata.cpp @@ -11,10 +11,10 @@ #define UINT64_MAX (~0ULL) #endif -// The following two functions fetch an arbitrary-length _unsigned_ integer from either -// a file or a memory location in big-endian mode. The size of the integer is passed -// via a template argument [e.g. getBE(...)]. -// The function doing file access returns a default value on IO error or end-of-file. +// The following functions fetch an arbitrary-length _unsigned_ integer from either +// a file or a memory location in big-endian or little-endian mode. The size of the +// integer is passed via a template argument [e.g. getBE(...)]. +// The functions doing file access return a default value on IO error or end-of-file. // Warning: This code works properly only for unsigned integers. The template parameter // is not checked and passing a signed integer will silently fail! template @@ -40,6 +40,29 @@ static inline T getBE(QFile &f, T def=0) return getBE(buf); } +template +static inline T getLE(const char *buf_in) +{ + constexpr size_t size = sizeof(T); + // Interpret raw bytes as unsigned char to avoid sign extension for + // characters in the 0x80...0xff range. + auto buf = (unsigned const char *)buf_in; + T ret = 0; + for (size_t i = 0; i < size; ++i) + ret |= static_cast(buf[i]) << (i * 8); + return ret; +} + +template +static inline T getLE(QFile &f, T def=0) +{ + constexpr size_t size = sizeof(T); + char buf[size]; + if (f.read(buf, size) != size) + return def; + return getLE(buf); +} + static bool parseExif(QFile &f, struct metadata *metadata) { f.seek(0); @@ -199,6 +222,187 @@ static bool parseMP4(QFile &f, metadata *metadata) return found_ftyp; } +static QStringList weekdays = { "mon", "tue", "wed", "thu", "fri", "sat", "sun" }; +static QStringList months = { "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec" }; + +static bool parseDate(const QString &s_in, timestamp_t ×tamp) +{ + // As a first attempt we're very crude: replace all '/' and '-' by ':' + // and try to see if this is of the form "yyyy:mm:dd hh:mm:ss". + // Since AVIs have no unified way of saving dates, we will have + // to find out empirically what different software produces. + // Note that we don't want to parse dates without time. That would + // be too imprecise and in such a case we'd rather go after the + // file modification date. + QString s = s_in; + s.replace('/', ':'); + s.replace('-', ':'); + QDateTime datetime = QDateTime::fromString(s, "yyyy:M:d h:m:s"); + if (datetime.isValid()) { + // Not knowing any better, we suppose that time is give in UTC + datetime.setTimeSpec(Qt::UTC); + timestamp = datetime.toMSecsSinceEpoch() / 1000; + return true; + } + + // I've also seen "Weekday Mon Day hh:mm:ss yyyy"(!) + QStringList items = s.split(' ', QString::SkipEmptyParts); + if (items.size() < 4) + return false; + + // Skip weekday if any is given + for (const QString &day: weekdays) { + if (items[0].startsWith(day, Qt::CaseInsensitive)) { + items.removeFirst(); + break; + } + } + if (items.size() < 4) + return false; + int month; + for (month = 0; month < 12; ++month) + if (items[0].startsWith(months[month], Qt::CaseInsensitive)) + break; + if (month >= 12) + return false; + bool ok; + int day = items[1].toInt(&ok, 10); + if (!ok) + return false; + QTime time = QTime::fromString(items[2], "h:m:s"); + if (!time.isValid()) + return false; + int year = items[3].toInt(&ok, 10); + if (!ok) + return false; + QDate date(year, month + 1, day); + if (!date.isValid()) + return false; + + // Not knowing any better, we suppose that time is give in UTC + datetime = QDateTime(date, time, Qt::UTC); + if (datetime.isValid()) { + timestamp = datetime.toMSecsSinceEpoch() / 1000; + return true; + } + + return false; +} + +static bool parseAVI(QFile &f, metadata *metadata) +{ + f.seek(0); + + // Like MP4s, AVIs are hierarchical, being made up of "chunks" and "lists", + // whereby the latter can contain more "chunks" and "lists". + // All elements are padded to an even-byte value. I.e. if the length of en element + // is odd, then a padding byte is introduced. + // To parse the file, the remaining to-be-parsed bytes of the upper lists in + // the parse-tree are tracked in a stack-like structure. This is not strictly + // necessary, since the level at which a chunk is found is insubstantial. + // Nevertheless, it is an effective and simple way of sanity-checking the file and the + // parsing routine. + std::vector list_stack; + list_stack.reserve(10); + + // For the outmost level, set the chunk-size the the maximum value representable in + // 64-bits, which effectively means parse to the end of file. + list_stack.push_back(UINT64_MAX); + + // The first element of an AVI is supposed to be a "RIFF" list. + // If such a list is found as first element, this function will return true, indicating + // that the file is a video. + bool found_riff = false; + + // Find creation date and duration. If we found both, we may quit. + bool found_date = false; + bool found_duration = false; + while (!f.atEnd() && !list_stack.empty() && (!found_date || !found_duration)) { + // Parse chunk/list header. If the first four bytes are "RIFF" or "LIST", then this + // is a list. Otherwise, it is an chunk. + char type[4]; + if (f.read(type, 4) != 4) + break; + + // The first element must be RIFF + if (!found_riff) { + found_riff = !memcmp(type, "RIFF", 4); + if (!found_riff) + break; + } + + uint32_t len = getLE(f); + // Elements are always padded to word (16-bit) boundaries + uint32_t len_in_file = len + (len & 1); + if (len_in_file + 8 > list_stack.back()) + break; + list_stack.back() -= len_in_file + 8; + + // Check if this is a list + if (!memcmp(type, "RIFF", 4) || !memcmp(type, "LIST", 4)) { + // This is a list + // The format is as follows: + // 4 bytes "RIFF" or "LIST" + // 4 bytes length (not including this and the previous entry) + // 4 bytes type + // n bytes data + // length includes the 4 bytes type + if (len < 4) + break; + char list_type[4]; + if (f.read(list_type, 4) != 4) + break; + + if (!memcmp(list_type, "AVI ", 4) || !memcmp(list_type, "hdrl", 4) || + !memcmp(list_type, "strl", 4) || !memcmp(list_type, "INFO", 4)) { + // Recurse into "AVI ", "hdrl", "strl" and "INFO" lists + list_stack.push_back(len_in_file - 4); + continue; + } else { + // Skip other lists + if (!f.seek(f.pos() + len_in_file - 4)) // TODO: switch to QFile::skip() + break; + } + } else if (!memcmp(type, "strh", 4) && !found_duration) { + // The stream header contains the duration information. We will just assume that + // the stream header is the correct one. + // Before reading, sanity-check the length. + if (len < 48 || len > 4096) + break; + std::vector data(len_in_file); + if (f.read(data.data(), len_in_file) != len_in_file) + break; + double scale = getLE(&data[20]); + double rate = getLE(&data[24]); + double start = getLE(&data[28]); + double length = getLE(&data[32]); + double duration = (start + length) * scale / rate; + metadata->duration.seconds = lrint(duration); + found_duration = true; + } else if (!memcmp(type, "IDIT", 4) || !memcmp(type, "ICRD", 4)) { + // "IDIT" of "ICRD" chunks may contain the creation date/time of the file + // First, sanity-check the length. + if (len > 4096) + break; + std::vector data(len_in_file); + if (f.read(data.data(), len_in_file) != len_in_file) + break; + QString idit = QString::fromUtf8(data.data(), len); + // In my test file, the string contained a '\0' terminator. Remove it. + idit.remove(QChar(0)); + found_date = parseDate(idit, metadata->timestamp); + } else { + if (!f.seek(f.pos() + len_in_file)) // TODO: switch to QFile::skip() + break; + } + + // If end of current list is reached, return to outer list + while (!list_stack.empty() && list_stack.back() == 0) + list_stack.pop_back(); + } + return found_riff; +} + extern "C" mediatype_t get_metadata(const char *filename_in, metadata *data) { data->timestamp = 0; @@ -211,16 +415,20 @@ extern "C" mediatype_t get_metadata(const char *filename_in, metadata *data) if (!f.open(QIODevice::ReadOnly)) return MEDIATYPE_IO_ERROR; - if (parseExif(f, data)) { - return MEDIATYPE_PICTURE; - } else if(parseMP4(f, data)) { - return MEDIATYPE_VIDEO; - } else { - // If we couldn't parse EXIF or MP4 data, use file creation date. - // TODO: QFileInfo::created is deprecated in newer Qt versions. + mediatype_t res = MEDIATYPE_UNKNOWN; + if (parseExif(f, data)) + res = MEDIATYPE_PICTURE; + else if(parseMP4(f, data)) + res = MEDIATYPE_VIDEO; + else if(parseAVI(f, data)) + res = MEDIATYPE_VIDEO; + + // If we couldn't get a creation date from the file (for example AVI files don't + // have a standard way of storing this datum), use the file creation date of the file. + // TODO: QFileInfo::created is deprecated in newer Qt versions. + if (data->timestamp == 0) data->timestamp = QFileInfo(filename).created().toMSecsSinceEpoch() / 1000; - return MEDIATYPE_UNKNOWN; - } + return res; } extern "C" timestamp_t picture_get_timestamp(const char *filename) -- cgit v1.2.3-70-g09d2 From 772935b596acee31edbb6b98e22afdf26d6a829a Mon Sep 17 00:00:00 2001 From: Berthold Stoeger Date: Thu, 12 Jul 2018 23:51:42 +0200 Subject: Metadata: Parse ASFs (=WMVs) The simplest video-formats to parse so far. Signed-off-by: Berthold Stoeger --- core/metadata.cpp | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/core/metadata.cpp b/core/metadata.cpp index cf8e63cad..6216476ab 100644 --- a/core/metadata.cpp +++ b/core/metadata.cpp @@ -403,6 +403,89 @@ static bool parseAVI(QFile &f, metadata *metadata) return found_riff; } +static bool parseASF(QFile &f, metadata *metadata) +{ + f.seek(0); + + // Parse the header of the header object: + // id (16 bytes) + // size (8 bytes) + // number of header objects (4 bytes) + // reserved (2 bytes) + // ------------------------------------------ + // total (30 bytes) + char header[30]; + if (f.read(header, 30) != 30) + return false; + + // Check if this is indeed an ASF header. + if (memcmp(&header[0], "\x30\x26\xb2\x75\x8e\x66\xcf\x11\xa6\xd9\x00\xaa\x00\x62\xce\x6c", 16) != 0) + return false; + + uint64_t header_len = getLE(&header[16]); + uint32_t num = getLE(&header[24]); + + // Sanity check + if (header_len <= 30 || num > 10000) + return false; + header_len -= 30; + + // Read through all the header objects + for (uint32_t i = 0; i < num && header_len > 24; ++i) { + // Each objects starts with the same header: + // id (16 bytes) + // size (8 bytes) + char data[24]; + if (f.read(data, 24) != 24) + return false; + + uint64_t object_len = getLE(&data[16]); + // Sanity check + if (object_len < 24 || object_len > header_len) + return false; + + header_len -= object_len; + object_len -= 24; + if (!memcmp(data, "\xa1\xdc\xab\x8c\x47\xa9\xcf\x11\x8e\xe4\x0\xc0\xc\x20\x53\x65", 16) != 0) { + // This is a file properties object. The interesting data are: + // quadword (64 bit) at byte 24: creation date in 100-nanoseconds since Jan. 1, 1601. + // quadword (64 bit) at byte 40: duration in 100-nanoseconds. + // quadword (64 bit) at byte 56: offset in msec (to be subtracted from duration) + // But first a sanity check: + if (object_len < 80 || object_len > 4096) + break; + + std::vector v(object_len); + if (f.read(v.data(), object_len) != (int)object_len) + break; + + uint64_t creation_date = getLE(&v[24]); + // OK - first convert to seconds + creation_date /= 10000000; + // Check if this is during the UNIX epoch and convert into epoch + if (creation_date <= 11644473600) + metadata->timestamp = 0; // Can't determine creation date, sorry! + else + metadata->timestamp = creation_date - 11644473600; + + uint64_t duration = getLE(&v[40]); + uint64_t offset = getLE(&v[56]); + metadata->duration.seconds = lrint(duration / 10000000.0 - offset / 1000.0); + + // We found everything that we wanted -> return success + return true; + } else { + // Skip over unknown object + if (!f.seek(f.pos() + object_len)) // TODO: switch to QFile::skip() + break; + } + } + + // We didn't find a file properties object. According to the ASF specification, this is + // *not* a valid ASF-file. Return failure accordingly. + return false; +} + extern "C" mediatype_t get_metadata(const char *filename_in, metadata *data) { data->timestamp = 0; @@ -422,6 +505,8 @@ extern "C" mediatype_t get_metadata(const char *filename_in, metadata *data) res = MEDIATYPE_VIDEO; else if(parseAVI(f, data)) res = MEDIATYPE_VIDEO; + else if(parseASF(f, data)) + res = MEDIATYPE_VIDEO; // If we couldn't get a creation date from the file (for example AVI files don't // have a standard way of storing this datum), use the file creation date of the file. -- cgit v1.2.3-70-g09d2 From cf73afb452a2c09c79ccf0972cce8b4a7084e561 Mon Sep 17 00:00:00 2001 From: Berthold Stoeger Date: Thu, 12 Jul 2018 17:17:05 +0200 Subject: Update CHANGELOG.md Signed-off-by: Berthold Stoeger --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d9c1e1f09..132286276 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ - +- Dive media: experimental support for metadata extraction from AVI and WMV files - Dive media: sort thumbnails by timestamp - Dive media: don't recalculate all pictures on drag & drop - Profile: immediately update thumbnail positions on deletion -- cgit v1.2.3-70-g09d2