MP4: Guard against deleting shared mdat on QT chapter remove

The previous fix for orphaned chapter mdats assumed the chapter text
mdat was dedicated and derived its location from stco[0] - 8.  In
audiobooks that co-locate chapter text at the start of the primary
audio mdat (stco[0] == audioMdat.offset + 8), that arithmetic lands
on the audio mdat header, the "mdat" signature check passes, and the
full audio payload gets removed -- shrinking a 484 MB audiobook to
5.4 MB.

Fix: resolve the chapter mdat by finding the top-level mdat whose
data range contains stco[0], then re-parse after the trak/tref
removals and confirm no other track's stco/co64 points into that
mdat before deleting it.  Shared mdats are left intact; the dead
chapter text bytes remain as harmless padding.

Add a regression test that writes a chapter track, patches its
stco[0] to point into the primary audio mdat (simulating the
audiobook layout), removes the chapter track, and verifies the
audio mdat is byte-identical afterwards.
This commit is contained in:
Ryan Francesconi
2026-04-22 11:29:28 -07:00
parent 5c70f0071f
commit 85b6a9eb93
2 changed files with 209 additions and 23 deletions

View File

@@ -935,30 +935,102 @@ namespace
}
}
// Removes the QT chapter trak, the tref from the audio track, and the orphaned
// mdat atom that holds the chapter text samples.
//! Finds the top-level mdat atom that covers the given file offset.
const MP4::Atom *findMdatContaining(const MP4::Atoms *atoms, offset_t fileOffset)
{
for(const auto *atom : atoms->atoms()) {
if(atom->name() != "mdat")
continue;
const offset_t dataStart = atom->offset() + 8;
const offset_t end = atom->offset() + atom->length();
if(fileOffset >= dataStart && fileOffset < end)
return atom;
}
return nullptr;
}
//! True if any stco/co64 entry in the atom tree points inside the given mdat range.
//! Used to detect mdats that are shared with other tracks (audio data + chapter
//! text co-located in a single mdat) so we never delete live track data.
bool mdatIsUsedByAnyTrack(TagLib::File *file, const MP4::Atoms *atoms,
offset_t mdatStart, offset_t mdatSize)
{
const offset_t dataStart = mdatStart + 8;
const offset_t dataEnd = mdatStart + mdatSize;
const MP4::Atom *moov = atoms->find("moov");
if(!moov)
return false;
for(const auto &stco : moov->findall("stco", true)) {
file->seek(stco->offset() + 12);
ByteVector data = file->readBlock(stco->length() - 12);
if(data.size() < 4)
continue;
unsigned int count = data.toUInt();
unsigned int pos = 4;
const unsigned int maxPos = data.size() - 4;
while(count-- && pos <= maxPos) {
const auto o = static_cast<offset_t>(data.toUInt(pos));
if(o >= dataStart && o < dataEnd)
return true;
pos += 4;
}
}
for(const auto &co64 : moov->findall("co64", true)) {
file->seek(co64->offset() + 12);
ByteVector data = file->readBlock(co64->length() - 12);
if(data.size() < 4)
continue;
unsigned int count = data.toUInt();
unsigned int pos = 4;
const unsigned int maxPos = data.size() - 8;
while(count-- && pos <= maxPos) {
const offset_t o = data.toLongLong(pos);
if(o >= dataStart && o < dataEnd)
return true;
pos += 8;
}
}
return false;
}
// Removes the QT chapter trak and the tref from the audio track, plus the
// chapter text mdat if (and only if) no other track points into it.
//
// The chapter mdat was appended at EOF by write(). Its location is derived
// from the chapter track's stco entry before the track is deleted. Both the
// chapter trak and tref live inside moov, which precedes the mdat, so removing
// them shifts the mdat by -(chapterLen + trefLen).
// The chapter mdat identity is resolved by looking up which top-level mdat
// contains the chapter track's first chunk offset. After removing the trak
// and tref, we re-parse and verify the mdat is not referenced by any other
// track before deleting it. Audiobook-style files commonly co-locate chapter
// text inside the main audio mdat; in that case the mdat is shared and MUST
// NOT be deleted -- the text bytes are left as dead space and the chapter
// track alone is removed.
void removeQTChapterTrack(TagLib::File *file, const MP4::Atoms *atoms,
MP4::Atom *moov, MP4::Atom *chapterTrak,
const MP4::Atom *audioTrak)
{
// Read the first stco chunk offset BEFORE deleting the trak: the chapter
// mdat data starts at stco[0], so the mdat atom header is 8 bytes earlier.
// Identify the chapter text mdat BEFORE removal (while stco is still valid).
offset_t chapterMdatOffset = -1;
offset_t chapterMdatSize = 0;
{
const std::vector<unsigned int> stco = readStco(file, chapterTrak);
if(!stco.empty() && stco[0] >= 8)
chapterMdatOffset = static_cast<offset_t>(stco[0]) - 8;
if(!stco.empty()) {
if(const MP4::Atom *mdat = findMdatContaining(atoms,
static_cast<offset_t>(stco[0]))) {
chapterMdatOffset = mdat->offset();
chapterMdatSize = mdat->length();
}
}
}
// Record tref length BEFORE removeAudioTref so we can adjust the mdat offset.
// Capture tref/chapter trak locations for mdat offset fix-up below.
offset_t trefOff = -1;
offset_t trefLen = 0;
for(const auto &child : audioTrak->children()) {
if(child->name() == "tref") {
trefOff = child->offset();
trefLen = child->length();
break;
}
@@ -981,18 +1053,32 @@ namespace
// Remove tref from audio trak (lower offset, still valid after chapter trak removal).
removeAudioTref(file, atoms, audioTrak);
// Remove the orphaned chapter mdat. Both removals above are inside moov,
// which precedes the mdat at EOF, so adjust the stored offset accordingly.
if(chapterMdatOffset >= 0) {
const offset_t adjustedOffset = chapterMdatOffset - chapterLen - trefLen;
file->seek(adjustedOffset);
const ByteVector header = file->readBlock(8);
if(header.size() == 8 && header.mid(4, 4) == "mdat") {
const offset_t mdatSize = header.toUInt();
if(mdatSize >= 8)
file->removeBlock(adjustedOffset, mdatSize);
}
}
// Decide whether the chapter mdat is safe to delete.
if(chapterMdatOffset < 0)
return;
// Shift the original mdat offset by however much of the removed bytes
// preceded it in the file.
offset_t adjustedOffset = chapterMdatOffset;
if(chapterMdatOffset > chapterOff)
adjustedOffset -= chapterLen;
if(trefOff >= 0 && chapterMdatOffset > trefOff)
adjustedOffset -= trefLen;
// Re-parse to verify the post-removal on-disk state matches expectations
// and that no surviving track references this mdat's data range.
const MP4::Atoms cleanAtoms(file);
if(mdatIsUsedByAnyTrack(file, &cleanAtoms, adjustedOffset, chapterMdatSize))
return;
file->seek(adjustedOffset);
const ByteVector header = file->readBlock(8);
if(header.size() != 8 || header.mid(4, 4) != "mdat")
return;
if(static_cast<offset_t>(header.toUInt()) != chapterMdatSize)
return;
file->removeBlock(adjustedOffset, chapterMdatSize);
}
} // namespace

View File

@@ -114,6 +114,7 @@ class TestMP4 : public CppUnit::TestFixture
CPPUNIT_TEST(testQTChapterListTimestampPrecision);
CPPUNIT_TEST(testQTChapterListNonZeroFirstChapter);
CPPUNIT_TEST(testQTChapterListNoOrphanedMdat);
CPPUNIT_TEST(testQTChapterListSharedMdatPreservesAudio);
CPPUNIT_TEST_SUITE_END();
public:
@@ -1303,6 +1304,105 @@ public:
CPPUNIT_ASSERT_EQUAL(baseMdatTagLib, countMdatTagLib());
}
// Regression test for the data-loss bug reported in PR #1343 by ufleisch.
// Audiobook-style files co-locate chapter text samples inside the main
// audio mdat. In that case the chapter track's stco[0] does NOT mark a
// dedicated chapter mdat -- it points into the shared audio mdat, and
// naively deleting "the mdat at stco[0] - 8" destroys the audio payload.
//
// Simulate that layout by writing a chapter track, then rewriting its
// stco[0] to point at the start of the primary audio mdat. Removing the
// chapter track must leave the audio mdat fully intact.
void testQTChapterListSharedMdatPreservesAudio()
{
ScopedFileCopy copy("no-tags", ".m4a");
string filename = copy.fileName();
struct MdatInfo { offset_t offset; offset_t length; };
auto findFirstMdat = [&]() -> MdatInfo {
PlainFile pf(filename.c_str());
MP4::Atoms atoms(&pf);
for(const auto *atom : atoms.atoms())
if(atom->name() == "mdat")
return {atom->offset(), atom->length()};
return {-1, 0};
};
const MdatInfo audioMdat = findFirstMdat();
CPPUNIT_ASSERT(audioMdat.offset >= 0);
CPPUNIT_ASSERT(audioMdat.length > 16);
// Capture the audio mdat bytes so we can confirm byte-for-byte preservation.
ByteVector originalAudioMdat;
{
PlainFile pf(filename.c_str());
pf.seek(audioMdat.offset);
originalAudioMdat = pf.readBlock(audioMdat.length);
}
// Add a chapter track. write() appends its own mdat for the chapter text
// at EOF; we'll relocate stco[0] below to simulate the shared-mdat case.
{
MP4::File f(filename.c_str());
f.setQtChapters(MP4::ChapterList{
MP4::Chapter("Chapter 1", 0),
MP4::Chapter("Chapter 2", 1000LL)
});
CPPUNIT_ASSERT(f.save());
}
// Rewrite the chapter track's stco[0] to point inside the audio mdat's
// data, so findMdatContaining() will identify the audio mdat as the
// candidate target. Choosing audioMdat.offset + 8 (the data start) is
// the worst case: without the shared-mdat guard, the old code would
// treat the audio mdat header as the chapter mdat header and wipe it.
{
PlainFile pf(filename.c_str());
MP4::Atoms atoms(&pf);
const MP4::Atom *moov = atoms.find("moov");
CPPUNIT_ASSERT(moov);
const MP4::AtomList traks = moov->findall("trak");
CPPUNIT_ASSERT(traks.size() >= 2);
// The chapter trak is the most recently added -- find the one whose
// hdlr handler_type is "text".
MP4::Atom *chapterTrak = nullptr;
for(auto *t : traks) {
MP4::Atom *hdlr = t->find("mdia", "hdlr");
if(!hdlr) continue;
pf.seek(hdlr->offset());
if(ByteVector d = pf.readBlock(hdlr->length()); d.containsAt("text", 16)) {
chapterTrak = t;
break;
}
}
CPPUNIT_ASSERT(chapterTrak);
MP4::Atom *stco = chapterTrak->find("mdia", "minf", "stbl", "stco");
CPPUNIT_ASSERT(stco);
// stco payload: full-box header(4) + entry_count(4) + offsets[]
pf.seek(stco->offset() + 16);
pf.writeBlock(ByteVector::fromUInt(
static_cast<unsigned int>(audioMdat.offset + 8)));
}
// Trigger the chapter-removal path with the crafted stco[0].
{
MP4::File f(filename.c_str());
f.setQtChapters(MP4::ChapterList());
CPPUNIT_ASSERT(f.save());
}
// The audio mdat must survive with its contents byte-identical.
const MdatInfo afterMdat = findFirstMdat();
CPPUNIT_ASSERT(afterMdat.offset >= 0);
CPPUNIT_ASSERT_EQUAL(audioMdat.length, afterMdat.length);
{
PlainFile pf(filename.c_str());
pf.seek(afterMdat.offset);
const ByteVector afterBytes = pf.readBlock(afterMdat.length);
CPPUNIT_ASSERT(afterBytes == originalAudioMdat);
}
}
};
CPPUNIT_TEST_SUITE_REGISTRATION(TestMP4);