summaryrefslogtreecommitdiffstats
path: root/core/metadata.cpp
diff options
context:
space:
mode:
authorGravatar Berthold Stoeger <bstoeger@mail.tuwien.ac.at>2018-03-20 22:36:45 +0100
committerGravatar Lubomir I. Ivanov <neolit123@gmail.com>2018-04-01 16:04:48 +0300
commitb01c9328b48d95d7b3ac845e777da9c67d5424b2 (patch)
tree18246f330977798a677175591ba96d912d360838 /core/metadata.cpp
parent66b71c60a0980a1490be3bdacb1fdfa9dcca0c48 (diff)
downloadsubsurface-b01c9328b48d95d7b3ac845e777da9c67d5424b2.tar.gz
Dive pictures: extract timestamp from MP4 and related formats
Parse MP4s and related video files and extract the creation timestamp from the "mdhd" (media header) atom. Introduce helper function templates to extract arbitrary-length unsigned integers in big-endian format from file or memory. Signed-off-by: Berthold Stoeger <bstoeger@mail.tuwien.ac.at>
Diffstat (limited to 'core/metadata.cpp')
-rw-r--r--core/metadata.cpp150
1 files changed, 136 insertions, 14 deletions
diff --git a/core/metadata.cpp b/core/metadata.cpp
index ca72cecb4..2397cd44b 100644
--- a/core/metadata.cpp
+++ b/core/metadata.cpp
@@ -6,22 +6,47 @@
#include <QFile>
#include <QDateTime>
-// Fetch quint16 in big endian mode from QFile and return 0 on error.
-// This is a very specialized function for parsing JPEGs, therefore we can get away with such an in-band error code.
-static inline quint16 getShortBE(QFile &f)
+// Weirdly, android builds fail owing to undefined UINT64_MAX
+#ifndef UINT64_MAX
+#define UINT64_MAX (~0ULL)
+#endif
+
+// The following two functions fetch an arbitrary-length _unsigned_ integer from either
+// a file or a memory location in big-endian mode. The size of the integer is passed
+// via a template argument [e.g. getBE<uint16_t>(...)].
+// The function doing file access returns a default value on IO error or end-of-file.
+// Warning: This code works properly only for unsigned integers. The template parameter
+// is not checked and passing a signed integer will silently fail!
+template <typename T>
+static inline T getBE(const char *buf_in)
{
- unsigned char buf[2];
- if (f.read(reinterpret_cast<char *>(buf), 2) != 2)
- return 0;
- return (buf[0] << 8) | buf[1];
+ constexpr size_t size = sizeof(T);
+ // Interpret raw bytes as unsigned char to avoid sign extension for
+ // characters in the 0x80...0xff range.
+ auto buf = (unsigned const char *)buf_in;
+ T ret = 0;
+ for (size_t i = 0; i < size; ++i)
+ ret = (ret << 8) | buf[i];
+ return ret;
+}
+
+template <typename T>
+static inline T getBE(QFile &f, T def=0)
+{
+ constexpr size_t size = sizeof(T);
+ char buf[size];
+ if (f.read(buf, size) != size)
+ return def;
+ return getBE<T>(buf);
}
static bool parseExif(QFile &f, struct metadata *metadata)
{
- if (getShortBE(f) != 0xffd8)
+ f.seek(0);
+ if (getBE<uint16_t>(f) != 0xffd8)
return false;
for (;;) {
- switch (getShortBE(f)) {
+ switch (getBE<uint16_t>(f)) {
case 0xffc0:
case 0xffc2:
case 0xffc4:
@@ -31,14 +56,14 @@ static bool parseExif(QFile &f, struct metadata *metadata)
case 0xffe0:
case 0xffe2 ... 0xffef:
case 0xfffe: {
- quint16 len = getShortBE(f);
+ uint16_t len = getBE<uint16_t>(f);
if (len < 2)
return false;
f.seek(f.pos() + len - 2); // TODO: switch to QFile::skip()
break;
}
case 0xffe1: {
- quint16 len = getShortBE(f);
+ uint16_t len = getBE<uint16_t>(f);
if (len < 2)
return false;
len -= 2;
@@ -63,10 +88,107 @@ static bool parseExif(QFile &f, struct metadata *metadata)
}
}
-static bool parseMP4(QFile &, metadata *)
+static bool parseMP4(QFile &f, metadata *metadata)
{
- // TODO: Implement MP4 parsing
- return false;
+ f.seek(0);
+
+ // MP4s and related formats are hierarchical, being made up of "atoms", which can
+ // contain other atoms (an interesting interpretation of the term atom).
+ // To parse the file, the remaining to-be-parsed bytes of the upper atoms in
+ // the parse-tree are tracked in a stack-like structure. This is not strictly
+ // necessary, since the level at which an atom is found is insubstantial.
+ // Nevertheless, it is an effective and simple way of sanity-checking the file and the
+ // parsing routine.
+ std::vector<uint64_t> atom_stack;
+ atom_stack.reserve(10);
+
+ // For the outmost level, set the atom-size the the maximum value representable in
+ // 64-bits, which effectively means parse to the end of file.
+ atom_stack.push_back(UINT64_MAX);
+
+ // The first atom of an MP4 or related video is supposed to be of the "ftyp" kind.
+ // If such an atom is found as first atom, this function will return true, indicating
+ // that the file is a video.
+ bool found_ftyp = false;
+
+ while (!f.atEnd() && !atom_stack.empty()) {
+ // Parse atom header. The header can have two forms (each character stands for a byte):
+ // lllltttt
+ // or
+ // 0001ttttllllllll
+ // where "l" stands for length in big-endian mode and "t" for type of the atom.
+ // The length includes the 8- or 16-bytes header.
+ uint64_t atom_size = getBE<uint32_t>(f, 2);
+ int atom_header_size = 8;
+ if (atom_size > 1 && atom_size < 8)
+ break;
+ char type[4];
+ if (f.read(type, 4) != 4)
+ break;
+ if (atom_size == 1) {
+ atom_size = getBE<uint64_t>(f);
+ atom_header_size = 16;
+ if (atom_size < 16)
+ break;
+ }
+ if (atom_size == 0)
+ atom_size = atom_stack.back();
+ if (atom_size > atom_stack.back())
+ break;
+ atom_stack.back() -= atom_size;
+ atom_size -= atom_header_size;
+
+ // The first atom must be "ftyp"
+ if (!found_ftyp) {
+ found_ftyp = !memcmp(type, "ftyp", 4);
+ if (!found_ftyp)
+ break;
+ }
+
+ if (!memcmp(type, "moov", 4) ||
+ !memcmp(type, "trak", 4) ||
+ !memcmp(type, "mdia", 4)) {
+ // Recurse into "moov", "trak" and "mdia" atoms
+ atom_stack.push_back(atom_size);
+ continue;
+ } else if (!memcmp(type, "mdhd", 4) && atom_size >= 24 && atom_size < 4096) {
+ // Parse "mdhd" (media header).
+ // Sanity check: size between 24 and 4096
+ std::vector<char> data(atom_size);
+ if (f.read(&data[0], atom_size) != static_cast<int>(atom_size))
+ break;
+ uint64_t timestamp = 0;
+ // First byte is version. We know version 0 and 1
+ switch (data[0]) {
+ case 0:
+ timestamp = getBE<uint32_t>(&data[4]);
+ break;
+ case 1:
+ timestamp = getBE<uint64_t>(&data[4]);
+ break;
+ default:
+ // For unknown versions: ignore -> maybe we find a parseable "mdhd" atom later in this file
+ break;
+ }
+ // Timestamp is given as seconds since midnight 1904/1/1. To be convertible to the UNIX epoch
+ // it must be larger than 2082844800.
+ if (timestamp >= 2082844800) {
+ metadata->timestamp = timestamp - 2082844800;
+ // Currently, we only know how to extract timestamps, so we might just quit parsing here.
+ break;
+ }
+ } else {
+ // Jump over unknown atom
+ if (!f.seek(f.pos() + atom_size)) // TODO: switch to QFile::skip()
+ break;
+ }
+
+ // If end of atom is reached, return to outer atom
+ while (!atom_stack.empty() && atom_stack.back() == 0)
+ atom_stack.pop_back();
+ }
+
+ return found_ftyp;
}
extern "C" mediatype_t get_metadata(const char *filename_in, metadata *data)