aboutsummaryrefslogtreecommitdiffstats
path: root/core
diff options
context:
space:
mode:
Diffstat (limited to 'core')
-rw-r--r--core/metadata.cpp234
1 files changed, 221 insertions, 13 deletions
diff --git a/core/metadata.cpp b/core/metadata.cpp
index 877205ad4..cf8e63cad 100644
--- a/core/metadata.cpp
+++ b/core/metadata.cpp
@@ -11,10 +11,10 @@
#define UINT64_MAX (~0ULL)
#endif
-// The following two functions fetch an arbitrary-length _unsigned_ integer from either
-// a file or a memory location in big-endian mode. The size of the integer is passed
-// via a template argument [e.g. getBE<uint16_t>(...)].
-// The function doing file access returns a default value on IO error or end-of-file.
+// The following functions fetch an arbitrary-length _unsigned_ integer from either
+// a file or a memory location in big-endian or little-endian mode. The size of the
+// integer is passed via a template argument [e.g. getBE<uint16_t>(...)].
+// The functions doing file access return a default value on IO error or end-of-file.
// Warning: This code works properly only for unsigned integers. The template parameter
// is not checked and passing a signed integer will silently fail!
template <typename T>
@@ -40,6 +40,29 @@ static inline T getBE(QFile &f, T def=0)
return getBE<T>(buf);
}
+template <typename T>
+static inline T getLE(const char *buf_in)
+{
+ constexpr size_t size = sizeof(T);
+ // Interpret raw bytes as unsigned char to avoid sign extension for
+ // characters in the 0x80...0xff range.
+ auto buf = (unsigned const char *)buf_in;
+ T ret = 0;
+ for (size_t i = 0; i < size; ++i)
+ ret |= static_cast<T>(buf[i]) << (i * 8);
+ return ret;
+}
+
+template <typename T>
+static inline T getLE(QFile &f, T def=0)
+{
+ constexpr size_t size = sizeof(T);
+ char buf[size];
+ if (f.read(buf, size) != size)
+ return def;
+ return getLE<T>(buf);
+}
+
static bool parseExif(QFile &f, struct metadata *metadata)
{
f.seek(0);
@@ -199,6 +222,187 @@ static bool parseMP4(QFile &f, metadata *metadata)
return found_ftyp;
}
+static QStringList weekdays = { "mon", "tue", "wed", "thu", "fri", "sat", "sun" };
+static QStringList months = { "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec" };
+
+static bool parseDate(const QString &s_in, timestamp_t &timestamp)
+{
+ // As a first attempt we're very crude: replace all '/' and '-' by ':'
+ // and try to see if this is of the form "yyyy:mm:dd hh:mm:ss".
+ // Since AVIs have no unified way of saving dates, we will have
+ // to find out empirically what different software produces.
+ // Note that we don't want to parse dates without time. That would
+ // be too imprecise and in such a case we'd rather go after the
+ // file modification date.
+ QString s = s_in;
+ s.replace('/', ':');
+ s.replace('-', ':');
+ QDateTime datetime = QDateTime::fromString(s, "yyyy:M:d h:m:s");
+ if (datetime.isValid()) {
+ // Not knowing any better, we suppose that time is give in UTC
+ datetime.setTimeSpec(Qt::UTC);
+ timestamp = datetime.toMSecsSinceEpoch() / 1000;
+ return true;
+ }
+
+ // I've also seen "Weekday Mon Day hh:mm:ss yyyy"(!)
+ QStringList items = s.split(' ', QString::SkipEmptyParts);
+ if (items.size() < 4)
+ return false;
+
+ // Skip weekday if any is given
+ for (const QString &day: weekdays) {
+ if (items[0].startsWith(day, Qt::CaseInsensitive)) {
+ items.removeFirst();
+ break;
+ }
+ }
+ if (items.size() < 4)
+ return false;
+ int month;
+ for (month = 0; month < 12; ++month)
+ if (items[0].startsWith(months[month], Qt::CaseInsensitive))
+ break;
+ if (month >= 12)
+ return false;
+ bool ok;
+ int day = items[1].toInt(&ok, 10);
+ if (!ok)
+ return false;
+ QTime time = QTime::fromString(items[2], "h:m:s");
+ if (!time.isValid())
+ return false;
+ int year = items[3].toInt(&ok, 10);
+ if (!ok)
+ return false;
+ QDate date(year, month + 1, day);
+ if (!date.isValid())
+ return false;
+
+ // Not knowing any better, we suppose that time is give in UTC
+ datetime = QDateTime(date, time, Qt::UTC);
+ if (datetime.isValid()) {
+ timestamp = datetime.toMSecsSinceEpoch() / 1000;
+ return true;
+ }
+
+ return false;
+}
+
+static bool parseAVI(QFile &f, metadata *metadata)
+{
+ f.seek(0);
+
+ // Like MP4s, AVIs are hierarchical, being made up of "chunks" and "lists",
+ // whereby the latter can contain more "chunks" and "lists".
+ // All elements are padded to an even-byte value. I.e. if the length of en element
+ // is odd, then a padding byte is introduced.
+ // To parse the file, the remaining to-be-parsed bytes of the upper lists in
+ // the parse-tree are tracked in a stack-like structure. This is not strictly
+ // necessary, since the level at which a chunk is found is insubstantial.
+ // Nevertheless, it is an effective and simple way of sanity-checking the file and the
+ // parsing routine.
+ std::vector<uint64_t> list_stack;
+ list_stack.reserve(10);
+
+ // For the outmost level, set the chunk-size the the maximum value representable in
+ // 64-bits, which effectively means parse to the end of file.
+ list_stack.push_back(UINT64_MAX);
+
+ // The first element of an AVI is supposed to be a "RIFF" list.
+ // If such a list is found as first element, this function will return true, indicating
+ // that the file is a video.
+ bool found_riff = false;
+
+ // Find creation date and duration. If we found both, we may quit.
+ bool found_date = false;
+ bool found_duration = false;
+ while (!f.atEnd() && !list_stack.empty() && (!found_date || !found_duration)) {
+ // Parse chunk/list header. If the first four bytes are "RIFF" or "LIST", then this
+ // is a list. Otherwise, it is an chunk.
+ char type[4];
+ if (f.read(type, 4) != 4)
+ break;
+
+ // The first element must be RIFF
+ if (!found_riff) {
+ found_riff = !memcmp(type, "RIFF", 4);
+ if (!found_riff)
+ break;
+ }
+
+ uint32_t len = getLE<uint32_t>(f);
+ // Elements are always padded to word (16-bit) boundaries
+ uint32_t len_in_file = len + (len & 1);
+ if (len_in_file + 8 > list_stack.back())
+ break;
+ list_stack.back() -= len_in_file + 8;
+
+ // Check if this is a list
+ if (!memcmp(type, "RIFF", 4) || !memcmp(type, "LIST", 4)) {
+ // This is a list
+ // The format is as follows:
+ // 4 bytes "RIFF" or "LIST"
+ // 4 bytes length (not including this and the previous entry)
+ // 4 bytes type
+ // n bytes data
+ // length includes the 4 bytes type
+ if (len < 4)
+ break;
+ char list_type[4];
+ if (f.read(list_type, 4) != 4)
+ break;
+
+ if (!memcmp(list_type, "AVI ", 4) || !memcmp(list_type, "hdrl", 4) ||
+ !memcmp(list_type, "strl", 4) || !memcmp(list_type, "INFO", 4)) {
+ // Recurse into "AVI ", "hdrl", "strl" and "INFO" lists
+ list_stack.push_back(len_in_file - 4);
+ continue;
+ } else {
+ // Skip other lists
+ if (!f.seek(f.pos() + len_in_file - 4)) // TODO: switch to QFile::skip()
+ break;
+ }
+ } else if (!memcmp(type, "strh", 4) && !found_duration) {
+ // The stream header contains the duration information. We will just assume that
+ // the stream header is the correct one.
+ // Before reading, sanity-check the length.
+ if (len < 48 || len > 4096)
+ break;
+ std::vector<char> data(len_in_file);
+ if (f.read(data.data(), len_in_file) != len_in_file)
+ break;
+ double scale = getLE<uint32_t>(&data[20]);
+ double rate = getLE<uint32_t>(&data[24]);
+ double start = getLE<uint32_t>(&data[28]);
+ double length = getLE<uint32_t>(&data[32]);
+ double duration = (start + length) * scale / rate;
+ metadata->duration.seconds = lrint(duration);
+ found_duration = true;
+ } else if (!memcmp(type, "IDIT", 4) || !memcmp(type, "ICRD", 4)) {
+ // "IDIT" of "ICRD" chunks may contain the creation date/time of the file
+ // First, sanity-check the length.
+ if (len > 4096)
+ break;
+ std::vector<char> data(len_in_file);
+ if (f.read(data.data(), len_in_file) != len_in_file)
+ break;
+ QString idit = QString::fromUtf8(data.data(), len);
+ // In my test file, the string contained a '\0' terminator. Remove it.
+ idit.remove(QChar(0));
+ found_date = parseDate(idit, metadata->timestamp);
+ } else {
+ if (!f.seek(f.pos() + len_in_file)) // TODO: switch to QFile::skip()
+ break;
+ }
+
+ // If end of current list is reached, return to outer list
+ while (!list_stack.empty() && list_stack.back() == 0)
+ list_stack.pop_back();
+ }
+ return found_riff;
+}
+
extern "C" mediatype_t get_metadata(const char *filename_in, metadata *data)
{
data->timestamp = 0;
@@ -211,16 +415,20 @@ extern "C" mediatype_t get_metadata(const char *filename_in, metadata *data)
if (!f.open(QIODevice::ReadOnly))
return MEDIATYPE_IO_ERROR;
- if (parseExif(f, data)) {
- return MEDIATYPE_PICTURE;
- } else if(parseMP4(f, data)) {
- return MEDIATYPE_VIDEO;
- } else {
- // If we couldn't parse EXIF or MP4 data, use file creation date.
- // TODO: QFileInfo::created is deprecated in newer Qt versions.
+ mediatype_t res = MEDIATYPE_UNKNOWN;
+ if (parseExif(f, data))
+ res = MEDIATYPE_PICTURE;
+ else if(parseMP4(f, data))
+ res = MEDIATYPE_VIDEO;
+ else if(parseAVI(f, data))
+ res = MEDIATYPE_VIDEO;
+
+ // If we couldn't get a creation date from the file (for example AVI files don't
+ // have a standard way of storing this datum), use the file creation date of the file.
+ // TODO: QFileInfo::created is deprecated in newer Qt versions.
+ if (data->timestamp == 0)
data->timestamp = QFileInfo(filename).created().toMSecsSinceEpoch() / 1000;
- return MEDIATYPE_UNKNOWN;
- }
+ return res;
}
extern "C" timestamp_t picture_get_timestamp(const char *filename)