diff options
author | Berthold Stoeger <bstoeger@mail.tuwien.ac.at> | 2018-07-11 22:56:06 +0200 |
---|---|---|
committer | Berthold Stoeger <bstoeger@mail.tuwien.ac.at> | 2018-07-14 08:32:30 +0200 |
commit | 4de0b7dd3d29333def6a04e08c301f1b88adcfe7 (patch) | |
tree | f7da233318752f9a6732ecd5470455ef30f4807b | |
parent | 02ad18d4d8480985ca400613031b89340404ab55 (diff) | |
download | subsurface-4de0b7dd3d29333def6a04e08c301f1b88adcfe7.tar.gz |
Metadata: Parse AVIs
Whereas extraction of the dive-duration is trivial, AVIs don't seem
to have a standardized way of saving the creation time. This commit
implements support for two versions randomly found on the internet.
Additional version will follow if need arises. AVI seems not to be
a particular popular format for either vacation or professional
videographers.
Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>
-rw-r--r-- | core/metadata.cpp | 234 |
1 files changed, 221 insertions, 13 deletions
diff --git a/core/metadata.cpp b/core/metadata.cpp index 877205ad4..cf8e63cad 100644 --- a/core/metadata.cpp +++ b/core/metadata.cpp @@ -11,10 +11,10 @@ #define UINT64_MAX (~0ULL) #endif -// The following two functions fetch an arbitrary-length _unsigned_ integer from either -// a file or a memory location in big-endian mode. The size of the integer is passed -// via a template argument [e.g. getBE<uint16_t>(...)]. -// The function doing file access returns a default value on IO error or end-of-file. +// The following functions fetch an arbitrary-length _unsigned_ integer from either +// a file or a memory location in big-endian or little-endian mode. The size of the +// integer is passed via a template argument [e.g. getBE<uint16_t>(...)]. +// The functions doing file access return a default value on IO error or end-of-file. // Warning: This code works properly only for unsigned integers. The template parameter // is not checked and passing a signed integer will silently fail! template <typename T> @@ -40,6 +40,29 @@ static inline T getBE(QFile &f, T def=0) return getBE<T>(buf); } +template <typename T> +static inline T getLE(const char *buf_in) +{ + constexpr size_t size = sizeof(T); + // Interpret raw bytes as unsigned char to avoid sign extension for + // characters in the 0x80...0xff range. + auto buf = (unsigned const char *)buf_in; + T ret = 0; + for (size_t i = 0; i < size; ++i) + ret |= static_cast<T>(buf[i]) << (i * 8); + return ret; +} + +template <typename T> +static inline T getLE(QFile &f, T def=0) +{ + constexpr size_t size = sizeof(T); + char buf[size]; + if (f.read(buf, size) != size) + return def; + return getLE<T>(buf); +} + static bool parseExif(QFile &f, struct metadata *metadata) { f.seek(0); @@ -199,6 +222,187 @@ static bool parseMP4(QFile &f, metadata *metadata) return found_ftyp; } +static QStringList weekdays = { "mon", "tue", "wed", "thu", "fri", "sat", "sun" }; +static QStringList months = { "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec" }; + +static bool parseDate(const QString &s_in, timestamp_t ×tamp) +{ + // As a first attempt we're very crude: replace all '/' and '-' by ':' + // and try to see if this is of the form "yyyy:mm:dd hh:mm:ss". + // Since AVIs have no unified way of saving dates, we will have + // to find out empirically what different software produces. + // Note that we don't want to parse dates without time. That would + // be too imprecise and in such a case we'd rather go after the + // file modification date. + QString s = s_in; + s.replace('/', ':'); + s.replace('-', ':'); + QDateTime datetime = QDateTime::fromString(s, "yyyy:M:d h:m:s"); + if (datetime.isValid()) { + // Not knowing any better, we suppose that time is give in UTC + datetime.setTimeSpec(Qt::UTC); + timestamp = datetime.toMSecsSinceEpoch() / 1000; + return true; + } + + // I've also seen "Weekday Mon Day hh:mm:ss yyyy"(!) + QStringList items = s.split(' ', QString::SkipEmptyParts); + if (items.size() < 4) + return false; + + // Skip weekday if any is given + for (const QString &day: weekdays) { + if (items[0].startsWith(day, Qt::CaseInsensitive)) { + items.removeFirst(); + break; + } + } + if (items.size() < 4) + return false; + int month; + for (month = 0; month < 12; ++month) + if (items[0].startsWith(months[month], Qt::CaseInsensitive)) + break; + if (month >= 12) + return false; + bool ok; + int day = items[1].toInt(&ok, 10); + if (!ok) + return false; + QTime time = QTime::fromString(items[2], "h:m:s"); + if (!time.isValid()) + return false; + int year = items[3].toInt(&ok, 10); + if (!ok) + return false; + QDate date(year, month + 1, day); + if (!date.isValid()) + return false; + + // Not knowing any better, we suppose that time is give in UTC + datetime = QDateTime(date, time, Qt::UTC); + if (datetime.isValid()) { + timestamp = datetime.toMSecsSinceEpoch() / 1000; + return true; + } + + return false; +} + +static bool parseAVI(QFile &f, metadata *metadata) +{ + f.seek(0); + + // Like MP4s, AVIs are hierarchical, being made up of "chunks" and "lists", + // whereby the latter can contain more "chunks" and "lists". + // All elements are padded to an even-byte value. I.e. if the length of en element + // is odd, then a padding byte is introduced. + // To parse the file, the remaining to-be-parsed bytes of the upper lists in + // the parse-tree are tracked in a stack-like structure. This is not strictly + // necessary, since the level at which a chunk is found is insubstantial. + // Nevertheless, it is an effective and simple way of sanity-checking the file and the + // parsing routine. + std::vector<uint64_t> list_stack; + list_stack.reserve(10); + + // For the outmost level, set the chunk-size the the maximum value representable in + // 64-bits, which effectively means parse to the end of file. + list_stack.push_back(UINT64_MAX); + + // The first element of an AVI is supposed to be a "RIFF" list. + // If such a list is found as first element, this function will return true, indicating + // that the file is a video. + bool found_riff = false; + + // Find creation date and duration. If we found both, we may quit. + bool found_date = false; + bool found_duration = false; + while (!f.atEnd() && !list_stack.empty() && (!found_date || !found_duration)) { + // Parse chunk/list header. If the first four bytes are "RIFF" or "LIST", then this + // is a list. Otherwise, it is an chunk. + char type[4]; + if (f.read(type, 4) != 4) + break; + + // The first element must be RIFF + if (!found_riff) { + found_riff = !memcmp(type, "RIFF", 4); + if (!found_riff) + break; + } + + uint32_t len = getLE<uint32_t>(f); + // Elements are always padded to word (16-bit) boundaries + uint32_t len_in_file = len + (len & 1); + if (len_in_file + 8 > list_stack.back()) + break; + list_stack.back() -= len_in_file + 8; + + // Check if this is a list + if (!memcmp(type, "RIFF", 4) || !memcmp(type, "LIST", 4)) { + // This is a list + // The format is as follows: + // 4 bytes "RIFF" or "LIST" + // 4 bytes length (not including this and the previous entry) + // 4 bytes type + // n bytes data + // length includes the 4 bytes type + if (len < 4) + break; + char list_type[4]; + if (f.read(list_type, 4) != 4) + break; + + if (!memcmp(list_type, "AVI ", 4) || !memcmp(list_type, "hdrl", 4) || + !memcmp(list_type, "strl", 4) || !memcmp(list_type, "INFO", 4)) { + // Recurse into "AVI ", "hdrl", "strl" and "INFO" lists + list_stack.push_back(len_in_file - 4); + continue; + } else { + // Skip other lists + if (!f.seek(f.pos() + len_in_file - 4)) // TODO: switch to QFile::skip() + break; + } + } else if (!memcmp(type, "strh", 4) && !found_duration) { + // The stream header contains the duration information. We will just assume that + // the stream header is the correct one. + // Before reading, sanity-check the length. + if (len < 48 || len > 4096) + break; + std::vector<char> data(len_in_file); + if (f.read(data.data(), len_in_file) != len_in_file) + break; + double scale = getLE<uint32_t>(&data[20]); + double rate = getLE<uint32_t>(&data[24]); + double start = getLE<uint32_t>(&data[28]); + double length = getLE<uint32_t>(&data[32]); + double duration = (start + length) * scale / rate; + metadata->duration.seconds = lrint(duration); + found_duration = true; + } else if (!memcmp(type, "IDIT", 4) || !memcmp(type, "ICRD", 4)) { + // "IDIT" of "ICRD" chunks may contain the creation date/time of the file + // First, sanity-check the length. + if (len > 4096) + break; + std::vector<char> data(len_in_file); + if (f.read(data.data(), len_in_file) != len_in_file) + break; + QString idit = QString::fromUtf8(data.data(), len); + // In my test file, the string contained a '\0' terminator. Remove it. + idit.remove(QChar(0)); + found_date = parseDate(idit, metadata->timestamp); + } else { + if (!f.seek(f.pos() + len_in_file)) // TODO: switch to QFile::skip() + break; + } + + // If end of current list is reached, return to outer list + while (!list_stack.empty() && list_stack.back() == 0) + list_stack.pop_back(); + } + return found_riff; +} + extern "C" mediatype_t get_metadata(const char *filename_in, metadata *data) { data->timestamp = 0; @@ -211,16 +415,20 @@ extern "C" mediatype_t get_metadata(const char *filename_in, metadata *data) if (!f.open(QIODevice::ReadOnly)) return MEDIATYPE_IO_ERROR; - if (parseExif(f, data)) { - return MEDIATYPE_PICTURE; - } else if(parseMP4(f, data)) { - return MEDIATYPE_VIDEO; - } else { - // If we couldn't parse EXIF or MP4 data, use file creation date. - // TODO: QFileInfo::created is deprecated in newer Qt versions. + mediatype_t res = MEDIATYPE_UNKNOWN; + if (parseExif(f, data)) + res = MEDIATYPE_PICTURE; + else if(parseMP4(f, data)) + res = MEDIATYPE_VIDEO; + else if(parseAVI(f, data)) + res = MEDIATYPE_VIDEO; + + // If we couldn't get a creation date from the file (for example AVI files don't + // have a standard way of storing this datum), use the file creation date of the file. + // TODO: QFileInfo::created is deprecated in newer Qt versions. + if (data->timestamp == 0) data->timestamp = QFileInfo(filename).created().toMSecsSinceEpoch() / 1000; - return MEDIATYPE_UNKNOWN; - } + return res; } extern "C" timestamp_t picture_get_timestamp(const char *filename) |