Matroska: Provide different WriteStyle to trade-off size/speed

A new Matroska::File::save(WriteStyle style) overload is provided to
control how tags, attachments and chapters are written to the file.

- Compact: Write tags, attachments and chapters as compact as possible.
  This is the default mode.
- DoNotShrink: Do not shrink elements; add void padding when content
  gets smaller. Allow inserts when content gets larger.
- AvoidInsert: Like DoNotShrink but also avoid inserts for non-last
  elements: replace a growing non-last element with a void of the old
  size and append the new element at the end of the segment.
  For very large files and/or slow (network) filesystems, using this
  mode will reduce write time significantly.

Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
Urs Fleisch
2026-04-29 16:58:04 +02:00
committed by Urs Fleisch
parent 59ed19d12f
commit 4c43f1c577
18 changed files with 886 additions and 5 deletions

View File

@@ -244,6 +244,7 @@ if(WITH_MATROSKA)
matroska/matroskaproperties.h
matroska/matroskasimpletag.h
matroska/matroskatag.h
matroska/matroskawritestyle.h
)
set(tag_PRIVATE_HDRS ${tag_PRIVATE_HDRS}
matroska/ebml/ebmlbinaryelement.h

View File

@@ -46,7 +46,7 @@ std::unique_ptr<Matroska::Attachments> EBML::MkAttachments::parse() const
{
auto attachments = std::make_unique<Matroska::Attachments>();
attachments->setOffset(offset);
attachments->setSize(getSize());
attachments->setSize(getSize() + padding);
for(const auto &element : elements) {
if(element->getId() != Id::MkAttachedFile)

View File

@@ -92,7 +92,7 @@ std::unique_ptr<Matroska::Chapters> EBML::MkChapters::parse() const
{
auto chapters = std::make_unique<Matroska::Chapters>();
chapters->setOffset(offset);
chapters->setSize(getSize());
chapters->setSize(getSize() + padding);
// Collect any orphan ChapterAtom elements not wrapped in an EditionEntry.
// The Matroska spec requires ChapterAtom to be inside an EditionEntry, but

View File

@@ -83,6 +83,8 @@ bool EBML::MkSegment::readLimited(File &file, offset_t scanLimit)
const offset_t filePos = file.tell();
const offset_t maxOffset = filePos + dataSize;
const offset_t maxScanOffset = filePos + std::min(scanLimit, dataSize);
MasterElement *pendingPaddingTarget = nullptr;
offset_t accumulatedPadding = 0;
std::unique_ptr<Element> element;
while((element = findNextElement(file, maxScanOffset))) {
if(const Id id = element->getId(); id == Id::MkSeekHead) {
@@ -95,6 +97,17 @@ bool EBML::MkSegment::readLimited(File &file, offset_t scanLimit)
seekHead->setPadding(elementAfterSeekHead->getSize());
const offset_t segDataOffset = segmentDataOffset();
const auto matroskaSeekHead = parseSeekHead();
const auto accumulateVoidPadding = [&](MasterElement *target) {
offset_t accPadding = 0;
while(const auto next = findNextElement(file, maxOffset)) {
if(next->getId() != Id::VoidElement)
break;
accPadding += next->getSize();
next->skipData(file);
}
if(accPadding > 0)
target->setPadding(accPadding);
};
for(const auto &[idValue, relativeOffset] : matroskaSeekHead->entryList()) {
const offset_t absoluteOffset = segDataOffset + relativeOffset;
switch(static_cast<Id>(idValue)) {
@@ -117,16 +130,19 @@ bool EBML::MkSegment::readLimited(File &file, offset_t scanLimit)
if(!((tags = readElementAt<Id::MkTags, MkTags>(
file, absoluteOffset, maxOffset))))
return false;
accumulateVoidPadding(tags.get());
break;
case Id::MkAttachments:
if(!((attachments = readElementAt<Id::MkAttachments, MkAttachments>(
file, absoluteOffset, maxOffset))))
return false;
accumulateVoidPadding(attachments.get());
break;
case Id::MkChapters:
if(!((chapters = readElementAt<Id::MkChapters, MkChapters>(
file, absoluteOffset, maxOffset))))
return false;
accumulateVoidPadding(chapters.get());
break;
default:
break;
@@ -134,37 +150,61 @@ bool EBML::MkSegment::readLimited(File &file, offset_t scanLimit)
}
return true;
}
else if(id == Id::VoidElement) {
if(pendingPaddingTarget) {
accumulatedPadding += element->getSize();
pendingPaddingTarget->setPadding(accumulatedPadding);
}
element->skipData(file);
}
else if(id == Id::MkCues) {
pendingPaddingTarget = nullptr;
accumulatedPadding = 0;
cues = element_cast<Id::MkCues>(std::move(element));
if(!cues->read(file))
return false;
}
else if(id == Id::MkInfo) {
pendingPaddingTarget = nullptr;
accumulatedPadding = 0;
info = element_cast<Id::MkInfo>(std::move(element));
if(!info->read(file))
return false;
}
else if(id == Id::MkTracks) {
pendingPaddingTarget = nullptr;
accumulatedPadding = 0;
tracks = element_cast<Id::MkTracks>(std::move(element));
if(!tracks->read(file))
return false;
}
else if(id == Id::MkTags) {
pendingPaddingTarget = nullptr;
accumulatedPadding = 0;
tags = element_cast<Id::MkTags>(std::move(element));
if(!tags->read(file))
return false;
pendingPaddingTarget = tags.get();
}
else if(id == Id::MkAttachments) {
pendingPaddingTarget = nullptr;
accumulatedPadding = 0;
attachments = element_cast<Id::MkAttachments>(std::move(element));
if(!attachments->read(file))
return false;
pendingPaddingTarget = attachments.get();
}
else if(id == Id::MkChapters) {
pendingPaddingTarget = nullptr;
accumulatedPadding = 0;
chapters = element_cast<Id::MkChapters>(std::move(element));
if(!chapters->read(file))
return false;
pendingPaddingTarget = chapters.get();
}
else {
pendingPaddingTarget = nullptr;
accumulatedPadding = 0;
element->skipData(file);
}
}

View File

@@ -47,7 +47,7 @@ std::unique_ptr<Matroska::Tag> EBML::MkTags::parse() const
{
auto mTag = std::make_unique<Matroska::Tag>();
mTag->setOffset(offset);
mTag->setSize(getSize());
mTag->setSize(getSize() + padding);
mTag->setID(static_cast<Matroska::Element::ID>(id));
// Loop through each <Tag> element

View File

@@ -137,5 +137,14 @@ ByteVector Matroska::Attachments::renderInternal()
attachments.appendElement(std::move(attachedFileElement));
}
// Pad to the previous size so the element keeps its slot in the file,
// unless this element is the trailing element of the segment in
// AvoidInsert mode -- shrinking from the end never inserts anything.
if(writeStyle() != WriteStyle::Compact &&
!(writeStyle() == WriteStyle::AvoidInsert && isTrailingInSegment())) {
const auto beforeSize = sizeRenderedOrWritten();
if(beforeSize > 0)
attachments.setMinRenderSize(beforeSize);
}
return attachments.render();
}

View File

@@ -147,5 +147,14 @@ ByteVector Matroska::Chapters::renderInternal()
chapters.appendElement(std::move(chapterEditionElement));
}
// Pad to the previous size so the element keeps its slot in the file,
// unless this element is the trailing element of the segment in
// AvoidInsert mode -- shrinking from the end never inserts anything.
if(writeStyle() != WriteStyle::Compact &&
!(writeStyle() == WriteStyle::AvoidInsert && isTrailingInSegment())) {
const auto beforeSize = sizeRenderedOrWritten();
if(beforeSize > 0)
chapters.setMinRenderSize(beforeSize);
}
return chapters.render();
}

View File

@@ -23,6 +23,7 @@
#include "tlist.h"
#include "tfile.h"
#include "tbytevector.h"
#include "ebmlvoidelement.h"
using namespace TagLib;
@@ -42,6 +43,14 @@ public:
// therefore rendering is required by default and needs to be explicitly set
// using setNeedsRender(false) together with overriding the write() method.
bool needsRender = true;
WriteStyle writeStyle = WriteStyle::Compact;
bool isLastElement = true;
bool isTrailingInSegment = false;
offset_t appendOffset = 0;
// Populated during render() for AvoidInsert+grow+non-last: the offset and
// original size of the slot that should be overwritten with a Void element.
offset_t voidAtOffset = 0;
offset_t voidAtSize = 0;
};
Matroska::Element::Element(ID id) :
@@ -116,8 +125,24 @@ bool Matroska::Element::render()
const auto data = renderInternal();
setNeedsRender(false);
if(const auto afterSize = data.size(); afterSize != beforeSize) {
if(!emitSizeChanged(afterSize - beforeSize)) {
return false;
if(e->writeStyle == WriteStyle::AvoidInsert && !e->isLastElement
&& afterSize > beforeSize && beforeSize > 0) {
// Record old slot for void-overwrite, move element to end of segment.
e->voidAtOffset = e->offset;
e->voidAtSize = beforeSize;
e->offset = e->appendOffset;
// Notify listeners that a new element of afterSize bytes appeared at
// appendOffset (which is past all other elements, so no offset shifts).
if(!emitSizeChanged(static_cast<offset_t>(afterSize))) {
return false;
}
// Update appendOffset for any subsequent AvoidInsert-grow in this round.
e->appendOffset += static_cast<offset_t>(afterSize);
}
else {
if(!emitSizeChanged(afterSize - beforeSize)) {
return false;
}
}
}
@@ -161,8 +186,55 @@ offset_t Matroska::Element::sizeRenderedOrWritten() const
return dataSize != 0 ? dataSize : e->size;
}
void Matroska::Element::setWriteStyle(WriteStyle style)
{
e->writeStyle = style;
}
Matroska::WriteStyle Matroska::Element::writeStyle() const
{
return e->writeStyle;
}
void Matroska::Element::setIsLastElement(bool isLast)
{
e->isLastElement = isLast;
}
void Matroska::Element::setAppendOffset(offset_t appendOffset)
{
e->appendOffset = appendOffset;
}
void Matroska::Element::setIsTrailingInSegment(bool isTrailing)
{
e->isTrailingInSegment = isTrailing;
}
bool Matroska::Element::isTrailingInSegment() const
{
return e->isTrailingInSegment;
}
bool Matroska::Element::wasMoved() const
{
// voidAtSize is set when the element was moved during render().
// After write() it is cleared, but the caller checks before write().
return e->voidAtOffset != 0 || e->voidAtSize != 0;
}
void Matroska::Element::write(File &file)
{
if(e->voidAtSize > 0) {
// AvoidInsert: overwrite the old slot with a Void element.
const auto voidData = EBML::VoidElement::renderSize(e->voidAtSize);
file.insert(voidData, e->voidAtOffset, e->voidAtSize);
e->voidAtOffset = 0;
// The element was moved to a new position (end of segment),
// so there are no existing bytes to replace at the new offset.
e->size = 0;
e->voidAtSize = 0;
}
file.insert(e->data, e->offset, e->size);
e->size = e->data.size();
}

View File

@@ -26,6 +26,7 @@
#include "taglib_export.h"
#include "taglib.h"
#include "tlist.h"
#include "matroskawritestyle.h"
namespace TagLib {
class File;
@@ -57,6 +58,17 @@ namespace TagLib {
bool emitSizeChanged(offset_t delta);
virtual bool sizeChanged(Element &caller, offset_t delta);
void setWriteStyle(WriteStyle style);
WriteStyle writeStyle() const;
void setIsLastElement(bool isLast);
void setAppendOffset(offset_t appendOffset);
bool wasMoved() const;
//! Mark this element as the trailing element of the segment (no other
//! element follows it in the file). Trailing elements may shrink even
//! in non-Compact write styles because no offsets need to be preserved.
void setIsTrailingInSegment(bool isTrailing);
bool isTrailingInSegment() const;
protected:
offset_t sizeRenderedOrWritten() const;

View File

@@ -445,6 +445,11 @@ void Matroska::File::read(bool readProperties, Properties::ReadStyle readStyle)
}
bool Matroska::File::save()
{
return save(WriteStyle::Compact);
}
bool Matroska::File::save(WriteStyle writeStyle)
{
if(readOnly()) {
debug("Matroska::File::save() -- File is read only.");
@@ -508,6 +513,75 @@ bool Matroska::File::save()
renderList.sort(sortAscending);
renderList.append(newElements);
// Configure write style on each data element. Determines whether elements
// may be padded (DoNotShrink/AvoidInsert) or moved to the end (AvoidInsert).
// New elements (no prior size) are always written compactly.
if(writeStyle != WriteStyle::Compact) {
// Determine which existing data element has the highest file offset
// (i.e., is "last" among the data elements, before cues/seekHead/segment).
// New elements always go after existing ones and are treated as compact.
const Element *lastDataElement = nullptr;
for(const auto element : renderList) {
if(element->size() > 0)
lastDataElement = element;
}
// For AvoidInsert: an existing data element (Tags, Chapters, Attachments)
// located before the LAST Cluster must not be grown in-place. Doing so
// would shift later clusters and invalidate their cue positions. Such
// elements are voided at their original position and appended at the
// end of the segment instead. The boundary is the maximum cluster offset
// (derived from cue-point cluster positions). If no cue points are
// available, the Cues element offset is used as a safe upper bound
// (Cues are always after the last Cluster). A value of 0 means
// "no boundary" any offset compares >= 0, so the boundary check is
// a no-op in non-AvoidInsert modes.
offset_t audioBoundary = 0;
if(writeStyle == WriteStyle::AvoidInsert && d->cues) {
const offset_t segDataOffset = d->segment->dataOffset();
for(const auto &cp : d->cues->cuePointList()) {
for(const auto &ct : cp->cueTrackList()) {
audioBoundary = std::max(audioBoundary,
segDataOffset + ct->getClusterPosition());
}
}
if(audioBoundary == 0)
audioBoundary = d->cues->offset();
}
for(const auto element : renderList) {
if(element->size() > 0) {
element->setWriteStyle(writeStyle);
// An element is "last" only if it has the highest data-element
// offset AND sits past the last cluster. The latter is always true
// when audioBoundary == 0 (DoNotShrink, or AvoidInsert without cues).
element->setIsLastElement(element == lastDataElement
&& element->offset() >= audioBoundary);
}
}
// For AvoidInsert: identify the segment-trailing element (highest offset
// among data elements, Cues, SeekHead). The trailing element may shrink
// without padding -- there is nothing after it whose offset would shift,
// so a trailing void would be wasted space.
if(writeStyle == WriteStyle::AvoidInsert) {
Element *trailing = nullptr;
offset_t maxOffset = 0;
const auto consider = [&](Element *e) {
if(e && e->size() > 0 && e->offset() > maxOffset) {
maxOffset = e->offset();
trailing = e;
}
};
for(const auto element : renderList)
consider(element);
consider(d->cues.get());
consider(d->seekHead.get());
if(trailing)
trailing->setIsTrailingInSegment(true);
}
}
// Add our new elements to the Seek Head (if the file has one)
if(d->seekHead) {
const auto segmentDataOffset = d->segment->dataOffset();
@@ -550,6 +624,12 @@ bool Matroska::File::save()
bool rendering = true;
while(rendering && renderRound < 5) {
rendering = false;
// Initialize appendOffset for AvoidInsert elements at the start of each round.
if(writeStyle == WriteStyle::AvoidInsert) {
const offset_t appendOffset = d->segment->endOffset();
for(const auto element : renderList)
element->setAppendOffset(appendOffset);
}
for(const auto element : renderList) {
if(element->needsRender()) {
rendering = true;
@@ -561,6 +641,51 @@ bool Matroska::File::save()
++renderRound;
}
// For AvoidInsert: elements that were moved during rendering may have
// stale offsets if in-place elements grew after the move was computed.
// Re-assign their offsets sequentially from the correct position.
if(writeStyle == WriteStyle::AvoidInsert) {
// Collect moved elements in render order (= ascending original-offset order
// = order they appear in renderList before any re-sort).
List<Element *> movedElements;
offset_t totalMovedSize = 0;
for(const auto element : renderList) {
if(element->wasMoved()) {
movedElements.append(element);
totalMovedSize += static_cast<offset_t>(element->data().size());
}
}
if(!movedElements.isEmpty()) {
// The segment end includes in-place growths AND all moved element sizes.
// The moved elements start right after all in-place content.
offset_t appendAt = d->segment->endOffset() - totalMovedSize;
for(const auto element : movedElements) {
element->setOffset(appendAt);
appendAt += static_cast<offset_t>(element->data().size());
}
}
}
// For elements that were moved to the end by AvoidInsert, update their
// seek head entry to reflect the new file position.
if(writeStyle == WriteStyle::AvoidInsert && d->seekHead) {
const offset_t segDataOffset = d->segment->dataOffset();
for(const auto element : renderList) {
if(element->wasMoved()) {
d->seekHead->updateEntry(element->id(), element->offset() - segDataOffset);
}
}
// Re-render the seekHead (and anything it affects) after updating entries.
// The seekHead slot was pre-padded, so this should not cause size changes.
d->seekHead->setNeedsRender(true);
for(const auto element : renderList) {
if(element->needsRender()) {
if(!element->render())
return false;
}
}
}
// Write out to file
renderList.sort(sortAscending);
for(const auto element : renderList)

View File

@@ -24,6 +24,7 @@
#include "taglib_export.h"
#include "tfile.h"
#include "matroskaproperties.h"
#include "matroskawritestyle.h"
//! An implementation of Matroska metadata
namespace TagLib::Matroska {
@@ -145,6 +146,13 @@ namespace TagLib::Matroska {
*/
bool save() override;
/*!
* Save the file with the specified write style.
*
* This returns \c true if the save was successful.
*/
bool save(WriteStyle style);
/*!
* Returns a pointer to the attachments of the file.
*

View File

@@ -63,6 +63,17 @@ void Matroska::SeekHead::addEntry(ID id, offset_t offset)
setNeedsRender(true);
}
void Matroska::SeekHead::updateEntry(ID id, offset_t newOffset)
{
for(auto &entry : entries) {
if(entry.first == id) {
entry.second = newOffset;
setNeedsRender(true);
return;
}
}
}
const List<std::pair<unsigned int, offset_t>> &Matroska::SeekHead::entryList() const
{
return entries;

View File

@@ -39,6 +39,7 @@ namespace TagLib {
bool isValid(TagLib::File &file) const;
void addEntry(const Element &element);
void addEntry(ID id, offset_t offset);
void updateEntry(ID id, offset_t offset);
const List<std::pair<unsigned int, offset_t>> &entryList() const;
void write(TagLib::File &file) override;
void sort();

View File

@@ -69,3 +69,8 @@ offset_t Matroska::Segment::dataOffset() const
{
return offset() + sizeLength;
}
offset_t Matroska::Segment::endOffset() const
{
return dataOffset() + dataSize;
}

View File

@@ -33,6 +33,7 @@ namespace TagLib::Matroska {
bool render() override;
bool sizeChanged(Element &caller, offset_t delta) override;
offset_t dataOffset() const;
offset_t endOffset() const;
private:
ByteVector renderInternal() override;

View File

@@ -364,6 +364,16 @@ ByteVector Matroska::Tag::renderInternal()
}
tags.appendElement(std::move(tag));
}
// Pad to the previous size so the element keeps its slot in the file,
// unless this element is the trailing element of the segment in
// AvoidInsert mode -- shrinking from the end never inserts anything,
// so the trailing void would be wasted space.
if(writeStyle() != WriteStyle::Compact &&
!(writeStyle() == WriteStyle::AvoidInsert && isTrailingInSegment())) {
const auto beforeSize = sizeRenderedOrWritten();
if(beforeSize > 0)
tags.setMinRenderSize(beforeSize);
}
return tags.render();
}

View File

@@ -0,0 +1,49 @@
/***************************************************************************
copyright : (C) 2026 by Urs Fleisch
email : ufleisch@users.sourceforge.net
***************************************************************************/
/***************************************************************************
* This library is free software; you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License version *
* 2.1 as published by the Free Software Foundation. *
* *
* This library is distributed in the hope that it will be useful, but *
* WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
* Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public *
* License along with this library; if not, write to the Free Software *
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA *
* 02110-1301 USA *
* *
* Alternatively, this file is available under the Mozilla Public *
* License Version 1.1. You may obtain a copy of the License at *
* http://www.mozilla.org/MPL/ *
***************************************************************************/
#ifndef TAGLIB_MATROSKAWRITESTYLE_H
#define TAGLIB_MATROSKAWRITESTYLE_H
namespace TagLib::Matroska {
/*!
* Controls the trade-off between file size and write speed when saving.
* Mode of writing tags, attachments and chapters to the file.
* For very large files and/or slow (network) filesystems, using
* \c AvoidInsert will reduce write time significantly.
*/
enum class WriteStyle {
//! Write tags, attachments and chapters as compact as possible (default).
Compact,
//! Do not shrink elements; add void padding when content gets smaller.
//! Allow inserts when content gets larger.
DoNotShrink,
//! Like \c DoNotShrink but also avoid inserts for non-last elements:
//! replace a growing non-last element with a void of the old size and
//! append the new element at the end of the segment.
AvoidInsert
};
}
#endif //TAGLIB_MATROSKAWRITESTYLE_H

View File

@@ -156,6 +156,10 @@ class TestMatroska : public CppUnit::TestFixture
CPPUNIT_TEST(testOpenInvalid);
CPPUNIT_TEST(testSegmentSizeChange);
CPPUNIT_TEST(testChapters);
CPPUNIT_TEST(testSaveTypes);
CPPUNIT_TEST(testSaveTypesBeforeCues);
CPPUNIT_TEST(testSaveTypesNoTrailingVoid);
CPPUNIT_TEST(testSaveTypesReclaimVoid);
CPPUNIT_TEST_SUITE_END();
public:
@@ -1249,6 +1253,530 @@ public:
CPPUNIT_ASSERT(origData == fileData);
}
void testSaveTypesBeforeCues()
{
// tags-before-cues.mkv layout:
// SeekHead | Void | SegInfo | Tracks | Tags | Cluster | Cues
//
// Verify all three WriteStyles correctly grow the Tags element which
// sits *before* the Cluster:
// - Compact / DoNotShrink: bytes are inserted before the Cluster, the
// Cluster shifts, the seek head and cue cluster positions must be
// updated accordingly; the file must remain valid and tag content
// must round-trip.
// - AvoidInsert: the Tags element is replaced with a Void at its
// original position and appended at the end of the segment, so the
// Cluster must NOT shift; tag content must round-trip.
const ByteVector origData =
PlainFile(TEST_FILE_PATH_C("tags-before-cues.mkv")).readAll();
// Cluster ID 0x1F43B675 does not appear in the SeekHead of this file,
// so find() returns the offset of the actual Cluster element.
const ByteVector clusterId = ByteVector::fromUInt(0x1F43B675U, true);
const ByteVector tagsId = ByteVector::fromUInt(0x1254C367U, true);
const ByteVector cuesId = ByteVector::fromUInt(0x1C53BB6BU, true);
const int origClusterPos = origData.find(clusterId);
CPPUNIT_ASSERT(origClusterPos > 0);
const String longTitle =
"An Extremely Long Title Value That Is Definitely Larger Than The Original "
"Tags Element In The File Because It Contains Many Characters To Ensure "
"That The AvoidInsert Move-To-End Behavior Triggers Here";
const String longArtist =
"An Extremely Long Artist Name Value That Is Also Larger Than The Original "
"Tags Element And Together With The Title Tag Makes The Rendered Output "
"Exceed The Original Tags Size So The AvoidInsert Triggers";
for(const auto writeStyle : {Matroska::WriteStyle::Compact,
Matroska::WriteStyle::DoNotShrink,
Matroska::WriteStyle::AvoidInsert}) {
const auto wsLabel = String::number(static_cast<int>(writeStyle)).to8Bit();
ScopedFileCopy copy("tags-before-cues", ".mkv");
const string newname = copy.fileName();
// Save with Tags significantly larger than the original Tags element.
{
Matroska::File f(newname.c_str());
CPPUNIT_ASSERT_MESSAGE("Open ws=" + wsLabel, f.isValid());
auto tag = f.tag(true);
tag->clearSimpleTags();
tag->addSimpleTag(Matroska::SimpleTag(
String("TITLE"), longTitle,
Matroska::SimpleTag::TargetTypeValue::Track));
tag->addSimpleTag(Matroska::SimpleTag(
String("ARTIST"), longArtist,
Matroska::SimpleTag::TargetTypeValue::Track));
CPPUNIT_ASSERT_MESSAGE("Save ws=" + wsLabel, f.save(writeStyle));
}
// File must be valid: Accurate mode verifies seek-head and cue positions.
// Tag content must round-trip exactly.
{
Matroska::File f(newname.c_str(), true, AudioProperties::Accurate);
CPPUNIT_ASSERT_MESSAGE("Reopen valid ws=" + wsLabel, f.isValid());
auto tag = f.tag(false);
CPPUNIT_ASSERT_MESSAGE("Tag exists ws=" + wsLabel, tag != nullptr);
const auto &simpleTags = tag->simpleTagsList();
bool foundTitle = false, foundArtist = false;
for(const auto &st : simpleTags) {
if(st.name() == "TITLE" && st.toString() == longTitle)
foundTitle = true;
else if(st.name() == "ARTIST" && st.toString() == longArtist)
foundArtist = true;
}
CPPUNIT_ASSERT_MESSAGE("TITLE roundtrip ws=" + wsLabel, foundTitle);
CPPUNIT_ASSERT_MESSAGE("ARTIST roundtrip ws=" + wsLabel, foundArtist);
}
const ByteVector newData = PlainFile(newname.c_str()).readAll();
const int newClusterPos = newData.find(clusterId);
CPPUNIT_ASSERT_MESSAGE("Cluster present ws=" + wsLabel, newClusterPos > 0);
if(writeStyle == Matroska::WriteStyle::AvoidInsert) {
// Cluster must not shift in AvoidInsert mode.
CPPUNIT_ASSERT_EQUAL_MESSAGE(
"AvoidInsert must not shift Cluster",
origClusterPos, newClusterPos);
// Tags must be appended after Cues.
const int cuesPos = newData.find(cuesId, newClusterPos);
const int newTagsPos = newData.find(tagsId, cuesPos + 4);
CPPUNIT_ASSERT_MESSAGE("Tags appended after Cues ws=" + wsLabel,
newTagsPos > cuesPos);
}
else {
// Compact / DoNotShrink: Tags grew in place, so Cluster must have
// shifted to a higher offset.
CPPUNIT_ASSERT_MESSAGE(
"Cluster must shift when growing in place ws=" + wsLabel,
newClusterPos > origClusterPos);
}
}
}
void testSaveTypesNoTrailingVoid()
{
// After AvoidInsert moved the Tags element to the end of the segment,
// a subsequent save with smaller content must not leave a trailing
// EBML void at the very end of the segment. The trailing element may
// shrink freely because no element follows it.
ScopedFileCopy copy("tags-before-cues", ".mkv");
const string newname = copy.fileName();
// Round 1: enlarge Tags so they get moved to the end.
{
Matroska::File f(newname.c_str());
CPPUNIT_ASSERT(f.isValid());
auto tag = f.tag(true);
tag->clearSimpleTags();
tag->addSimpleTag(Matroska::SimpleTag(
String("TITLE"),
String("An Extremely Long Title Value That Is Definitely Larger Than The Original "
"Tags Element In The File Because It Contains Many Characters To Ensure "
"That The AvoidInsert Move-To-End Behavior Triggers Here"),
Matroska::SimpleTag::TargetTypeValue::Track));
tag->addSimpleTag(Matroska::SimpleTag(
String("ARTIST"),
String("An Extremely Long Artist Name Value That Is Also Larger Than The Original "
"Tags Element And Together With The Title Tag Makes The Rendered Output "
"Exceed The Original Tags Size So The AvoidInsert Triggers"),
Matroska::SimpleTag::TargetTypeValue::Track));
CPPUNIT_ASSERT(f.save(Matroska::WriteStyle::AvoidInsert));
}
const size_t sizeAfterRound1 = PlainFile(newname.c_str()).readAll().size();
// Round 2: shrink Tags. The trailing element must shrink without
// leaving a void at the end.
{
Matroska::File f(newname.c_str());
CPPUNIT_ASSERT(f.isValid());
auto tag = f.tag(true);
tag->clearSimpleTags();
tag->addSimpleTag(Matroska::SimpleTag(
String("TITLE"), String("X"),
Matroska::SimpleTag::TargetTypeValue::Track));
CPPUNIT_ASSERT(f.save(Matroska::WriteStyle::AvoidInsert));
}
{
Matroska::File f(newname.c_str(), true, AudioProperties::Accurate);
CPPUNIT_ASSERT(f.isValid());
CPPUNIT_ASSERT(f.tag(false) != nullptr);
}
const ByteVector newData = PlainFile(newname.c_str()).readAll();
// File must have shrunk because the trailing Tags element shrank.
CPPUNIT_ASSERT(newData.size() < sizeAfterRound1);
// The last bytes of the file must be the (small) Tags element, not a
// Void element. Find the Tags element after the Cues element and parse
// its VINT size: the file must end exactly at Tags' end with nothing
// (no Void) after it.
const ByteVector clusterId = ByteVector::fromUInt(0x1F43B675U, true);
const ByteVector cuesId = ByteVector::fromUInt(0x1C53BB6BU, true);
const ByteVector tagsId = ByteVector::fromUInt(0x1254C367U, true);
const int clusterPos = newData.find(clusterId);
const int cuesPos = newData.find(cuesId, clusterPos);
const int tagsPos = newData.find(tagsId, cuesPos + 4);
CPPUNIT_ASSERT(tagsPos > cuesPos);
// Decode VINT data size of the Tags element. The first byte after the
// 4-byte ID has a leading marker bit indicating the VINT length.
const auto vintFirst = static_cast<unsigned char>(newData[tagsPos + 4]);
int vintLen = 1;
for(int b = 0; b < 8; ++b) {
if(vintFirst & (0x80 >> b)) { vintLen = b + 1; break; }
}
unsigned long long dataSize = vintFirst & ((0x80 >> (vintLen - 1)) - 1);
for(int i = 1; i < vintLen; ++i)
dataSize = (dataSize << 8) | static_cast<unsigned char>(newData[tagsPos + 4 + i]);
const unsigned long long tagsEnd =
static_cast<unsigned long long>(tagsPos) + 4 + vintLen + dataSize;
CPPUNIT_ASSERT_EQUAL_MESSAGE(
"No trailing EBML void must remain at the end of the segment",
static_cast<unsigned long long>(newData.size()), tagsEnd);
}
void testSaveTypesReclaimVoid()
{
// After AvoidInsert moves a Tags element to the end (leaving a Void at
// its original position), a subsequent save with WriteStyle::Compact
// must produce a tightly packed file: the void left by the move must
// be reclaimed and the file must be at most as large as the original.
ScopedFileCopy copy("tags-before-cues", ".mkv");
const string newname = copy.fileName();
// Step 1: AvoidInsert with enlarged Tags -> Tags moved to end, Void in
// original slot. File grows.
{
Matroska::File f(newname.c_str());
CPPUNIT_ASSERT(f.isValid());
auto tag = f.tag(true);
tag->clearSimpleTags();
tag->addSimpleTag(Matroska::SimpleTag(String("TITLE"),
String("An Extremely Long Title Value That Is Definitely Larger Than The Original "
"Tags Element In The File Because It Contains Many Characters To Ensure "
"That The AvoidInsert Move-To-End Behavior Triggers Here"),
Matroska::SimpleTag::TargetTypeValue::Track));
tag->addSimpleTag(Matroska::SimpleTag(String("ARTIST"),
String("An Extremely Long Artist Name Value That Is Also Larger Than The Original "
"Tags Element And Together With The Title Tag Makes The Rendered Output "
"Exceed The Original Tags Size So The AvoidInsert Triggers"),
Matroska::SimpleTag::TargetTypeValue::Track));
CPPUNIT_ASSERT(f.save(Matroska::WriteStyle::AvoidInsert));
}
const size_t sizeAfterAvoidInsert =
PlainFile(newname.c_str()).readAll().size();
CPPUNIT_ASSERT(sizeAfterAvoidInsert >
PlainFile(TEST_FILE_PATH_C("tags-before-cues.mkv")).readAll().size());
// Step 2: Save again with Compact and short tag values. Compact must
// reclaim the void left by the prior move and produce a file no
// larger than the original.
{
Matroska::File f(newname.c_str());
CPPUNIT_ASSERT(f.isValid());
auto tag = f.tag(true);
tag->clearSimpleTags();
tag->addSimpleTag(Matroska::SimpleTag(String("TITLE"), String("X"),
Matroska::SimpleTag::TargetTypeValue::Track));
CPPUNIT_ASSERT(f.save(Matroska::WriteStyle::Compact));
}
const size_t sizeAfterCompact =
PlainFile(newname.c_str()).readAll().size();
CPPUNIT_ASSERT_MESSAGE(
"Compact must reclaim space after AvoidInsert grew the file",
sizeAfterCompact < sizeAfterAvoidInsert);
// Reference: applying Compact directly to the original file with the
// same tiny tags. Note: an orphan Void left in the middle of the
// segment by AvoidInsert is not currently reclaimed by Compact (it is
// attached as padding to a neighbouring element), so the post-Compact
// size is allowed to be slightly larger than the reference. The
// result must, however, be no larger than the original input file.
ScopedFileCopy reference("tags-before-cues", ".mkv");
{
Matroska::File f(reference.fileName().c_str());
CPPUNIT_ASSERT(f.isValid());
auto tag = f.tag(true);
tag->clearSimpleTags();
tag->addSimpleTag(Matroska::SimpleTag(String("TITLE"), String("X"),
Matroska::SimpleTag::TargetTypeValue::Track));
CPPUNIT_ASSERT(f.save(Matroska::WriteStyle::Compact));
}
const size_t referenceCompactSize =
PlainFile(reference.fileName().c_str()).readAll().size();
CPPUNIT_ASSERT(referenceCompactSize <= sizeAfterCompact);
// File must round-trip correctly.
{
Matroska::File f(newname.c_str(), true, AudioProperties::Accurate);
CPPUNIT_ASSERT(f.isValid());
auto tag = f.tag(false);
CPPUNIT_ASSERT(tag != nullptr);
bool foundTitle = false;
for(const auto &st : tag->simpleTagsList()) {
if(st.name() == "TITLE" && st.toString() == "X") {
foundTitle = true;
break;
}
}
CPPUNIT_ASSERT(foundTitle);
}
}
void testSaveTypes()
{
// Helper lambdas for adding data of different sizes
// largeTags: 2 simple tags with long values
const auto setLargeTags = [](Matroska::File &f) {
auto tag = f.tag(true);
tag->addSimpleTag(Matroska::SimpleTag(String("TITLE"),
String("A Very Long Title That Takes Up A Lot Of Space In The File 1234567890"),
Matroska::SimpleTag::TargetTypeValue::Track));
tag->addSimpleTag(Matroska::SimpleTag(String("ARTIST"),
String("A Very Long Artist Name That Takes Up A Lot Of Space In The File 1234567890"),
Matroska::SimpleTag::TargetTypeValue::Track));
};
const auto setSmallTags = [](Matroska::File &f) {
auto tag = f.tag(true);
tag->addSimpleTag(Matroska::SimpleTag(String("TITLE"), String("Short"),
Matroska::SimpleTag::TargetTypeValue::Track));
};
const auto setMediumTags = [](Matroska::File &f) {
auto tag = f.tag(true);
tag->addSimpleTag(Matroska::SimpleTag(String("TITLE"), String("Medium Title 12345678901234"),
Matroska::SimpleTag::TargetTypeValue::Track));
tag->addSimpleTag(Matroska::SimpleTag(String("ARTIST"), String("Medium Artist"),
Matroska::SimpleTag::TargetTypeValue::Track));
};
const auto setExtraLargeTags = [](Matroska::File &f) {
auto tag = f.tag(true);
tag->addSimpleTag(Matroska::SimpleTag(String("TITLE"),
String("An Extremely Long Title That Is Even Larger Than The Previous Large Title "
"With Extra Content To Ensure Growth 0123456789ABCDEF"),
Matroska::SimpleTag::TargetTypeValue::Track));
tag->addSimpleTag(Matroska::SimpleTag(String("ARTIST"),
String("An Extremely Long Artist Name Exceeding The Prior Large Artist Value "
"With Even More Content To Guarantee Growth 0123456789ABCDEF"),
Matroska::SimpleTag::TargetTypeValue::Track));
};
const auto setLargeAttachments = [](Matroska::File &f) {
auto atts = f.attachments(true);
atts->addAttachedFile(Matroska::AttachedFile(
ByteVector(200, 'x'), "cover.jpg", "image/jpeg", 111ULL, "Cover"));
};
const auto setSmallAttachments = [](Matroska::File &f) {
auto atts = f.attachments(true);
atts->addAttachedFile(Matroska::AttachedFile(
ByteVector(20, 'x'), "img.png", "image/png", 222ULL, "Img"));
};
const auto setMediumAttachments = [](Matroska::File &f) {
auto atts = f.attachments(true);
atts->addAttachedFile(Matroska::AttachedFile(
ByteVector(80, 'x'), "cover.jpg", "image/jpeg", 333ULL, "Cover"));
};
const auto setExtraLargeAttachments = [](Matroska::File &f) {
auto atts = f.attachments(true);
atts->addAttachedFile(Matroska::AttachedFile(
ByteVector(500, 'x'), "cover.jpg", "image/jpeg", 444ULL, "Cover"));
};
const auto setLargeChapters = [](Matroska::File &f) {
auto chs = f.chapters(true);
chs->addChapterEdition(Matroska::ChapterEdition(
List<Matroska::Chapter>{
Matroska::Chapter(0, 40000,
List{Matroska::Chapter::Display("Chapter One Long Name", "eng")},
1, false),
Matroska::Chapter(40000, 80000,
List{Matroska::Chapter::Display("Chapter Two Long Name", "eng")},
2, false),
}, true, false));
};
const auto setSmallChapters = [](Matroska::File &f) {
auto chs = f.chapters(true);
chs->addChapterEdition(Matroska::ChapterEdition(
List<Matroska::Chapter>{
Matroska::Chapter(0, 1000,
List{Matroska::Chapter::Display("A", "und")},
1, false),
}, false, false));
};
const auto setMediumChapters = [](Matroska::File &f) {
auto chs = f.chapters(true);
chs->addChapterEdition(Matroska::ChapterEdition(
List<Matroska::Chapter>{
Matroska::Chapter(0, 40000,
List{Matroska::Chapter::Display("Chapter Medium", "eng")},
1, false),
}, true, false));
};
const auto setExtraLargeChapters = [](Matroska::File &f) {
auto chs = f.chapters(true);
chs->addChapterEdition(Matroska::ChapterEdition(
List<Matroska::Chapter>{
Matroska::Chapter(0, 40000,
List{Matroska::Chapter::Display("Chapter One Extremely Long Name Here", "eng"),
Matroska::Chapter::Display("Kapitel Eins Sehr Langer Name", "deu")},
1, false),
Matroska::Chapter(40000, 80000,
List{Matroska::Chapter::Display("Chapter Two Extremely Long Name Here", "eng"),
Matroska::Chapter::Display("Kapitel Zwei Sehr Langer Name", "deu")},
2, false),
Matroska::Chapter(80000, 120000,
List{Matroska::Chapter::Display("Chapter Three Extra Large", "eng")},
3, true),
}, true, true));
};
for(const auto writeStyle : {Matroska::WriteStyle::Compact,
Matroska::WriteStyle::DoNotShrink,
Matroska::WriteStyle::AvoidInsert}) {
ScopedFileCopy copy("no-tags", ".mka");
const string newname = copy.fileName();
const int wsIdx = static_cast<int>(writeStyle);
// Verify tag/attachment/chapter content for a saved file. Each round
// uses unique identifiers (specific TITLE value, attachment UID,
// chapter timeStart) so any cross-round leakage or corruption is
// caught here.
const auto verifyRound = [&](const std::string &label,
const String &expectedTitle,
unsigned long long expectedAttachmentUid,
unsigned int expectedChapterCount,
unsigned long long expectedFirstChapterEnd) {
Matroska::File f(newname.c_str(), true, AudioProperties::Accurate);
CPPUNIT_ASSERT_MESSAGE(label + " valid", f.isValid());
auto tag = f.tag(false);
CPPUNIT_ASSERT_MESSAGE(label + " tag", tag != nullptr);
bool foundTitle = false;
for(const auto &st : tag->simpleTagsList()) {
if(st.name() == "TITLE" && st.toString() == expectedTitle) {
foundTitle = true;
break;
}
}
CPPUNIT_ASSERT_MESSAGE(label + " TITLE roundtrip", foundTitle);
auto atts = f.attachments(false);
CPPUNIT_ASSERT_MESSAGE(label + " attachments", atts != nullptr);
bool foundAtt = false;
for(const auto &a : atts->attachedFileList()) {
if(a.uid() == expectedAttachmentUid) {
foundAtt = true;
break;
}
}
CPPUNIT_ASSERT_MESSAGE(label + " attachment uid roundtrip", foundAtt);
auto chs = f.chapters(false);
CPPUNIT_ASSERT_MESSAGE(label + " chapters", chs != nullptr);
CPPUNIT_ASSERT_EQUAL_MESSAGE(label + " edition count", 1U,
chs->chapterEditionList().size());
const auto &edition = chs->chapterEditionList().front();
CPPUNIT_ASSERT_EQUAL_MESSAGE(label + " chapter count",
expectedChapterCount, edition.chapterList().size());
CPPUNIT_ASSERT_EQUAL_MESSAGE(label + " first chapter end",
expectedFirstChapterEnd, edition.chapterList()[0].timeEnd());
};
// --- Round 1: save large data ---
{
Matroska::File f(newname.c_str());
CPPUNIT_ASSERT(f.isValid());
setLargeTags(f);
setLargeAttachments(f);
setLargeChapters(f);
CPPUNIT_ASSERT_MESSAGE("Round1 save ws=" + String::number(wsIdx).to8Bit(), f.save(writeStyle));
}
const size_t sizeAfterRound1 = PlainFile(newname.c_str()).readAll().size();
verifyRound("Round1 ws=" + std::to_string(wsIdx),
String("A Very Long Title That Takes Up A Lot Of Space In The File 1234567890"),
111ULL, 2U, 40000ULL);
// --- Round 2: save smaller data → slot must not shrink for DoNotShrink/AvoidInsert ---
{
Matroska::File f(newname.c_str());
CPPUNIT_ASSERT(f.isValid());
f.tag(true)->clearSimpleTags();
f.attachments(true)->clear();
f.chapters(true)->clear();
setSmallTags(f);
setSmallAttachments(f);
setSmallChapters(f);
CPPUNIT_ASSERT_MESSAGE("Round2 save ws=" + String::number(wsIdx).to8Bit(), f.save(writeStyle));
}
const size_t sizeAfterRound2 = PlainFile(newname.c_str()).readAll().size();
verifyRound("Round2 ws=" + std::to_string(wsIdx),
String("Short"), 222ULL, 1U, 1000ULL);
if(writeStyle == Matroska::WriteStyle::Compact) {
// Compact always shrinks, so file is smaller
CPPUNIT_ASSERT(sizeAfterRound2 < sizeAfterRound1);
} else if(writeStyle == Matroska::WriteStyle::AvoidInsert) {
// AvoidInsert: existing slots are kept, but the segment-trailing
// element may shrink (no element follows it -- shrinking only
// truncates the file, no inserts are needed).
CPPUNIT_ASSERT(sizeAfterRound2 <= sizeAfterRound1);
} else {
// DoNotShrink: elements keep their original slot size.
// The file size must not be smaller than after round 1
CPPUNIT_ASSERT_EQUAL(sizeAfterRound1, sizeAfterRound2);
}
// --- Round 3: save medium data (fits in round2's slot if DoNotShrink/AvoidInsert) ---
{
Matroska::File f(newname.c_str());
CPPUNIT_ASSERT(f.isValid());
f.tag(true)->clearSimpleTags();
f.attachments(true)->clear();
f.chapters(true)->clear();
setMediumTags(f);
setMediumAttachments(f);
setMediumChapters(f);
CPPUNIT_ASSERT_MESSAGE("Round3 save ws=" + String::number(wsIdx).to8Bit(), f.save(writeStyle));
}
const size_t sizeAfterRound3 = PlainFile(newname.c_str()).readAll().size();
verifyRound("Round3 ws=" + std::to_string(wsIdx),
String("Medium Title 12345678901234"), 333ULL, 1U, 40000ULL);
if(writeStyle == Matroska::WriteStyle::Compact) {
// Compact: medium > small, but exact, so different from round2
CPPUNIT_ASSERT(sizeAfterRound3 != sizeAfterRound2);
CPPUNIT_ASSERT(sizeAfterRound3 < sizeAfterRound1);
} else if(writeStyle == Matroska::WriteStyle::AvoidInsert) {
// AvoidInsert: medium fits in round1's slot for non-trailing
// elements, but the trailing element may take less space than in
// round 1. File size therefore stays <= round 1.
CPPUNIT_ASSERT(sizeAfterRound3 <= sizeAfterRound1);
} else {
// DoNotShrink: medium fits in round1's slot (with remaining void)
// so file size stays the same as round1/round2
CPPUNIT_ASSERT_EQUAL(sizeAfterRound1, sizeAfterRound3);
}
// --- Round 4: save extra-large data (larger than round 1) ---
{
Matroska::File f(newname.c_str());
CPPUNIT_ASSERT(f.isValid());
f.tag(true)->clearSimpleTags();
f.attachments(true)->clear();
f.chapters(true)->clear();
setExtraLargeTags(f);
setExtraLargeAttachments(f);
setExtraLargeChapters(f);
CPPUNIT_ASSERT_MESSAGE("Round4 save ws=" + String::number(wsIdx).to8Bit(), f.save(writeStyle));
}
const size_t sizeAfterRound4 = PlainFile(newname.c_str()).readAll().size();
verifyRound("Round4 ws=" + std::to_string(wsIdx),
String("An Extremely Long Title That Is Even Larger Than The Previous Large Title "
"With Extra Content To Ensure Growth 0123456789ABCDEF"),
444ULL, 3U, 40000ULL);
// All styles must accommodate the larger data: file must be larger than round1
CPPUNIT_ASSERT(sizeAfterRound4 > sizeAfterRound1);
}
}
};
CPPUNIT_TEST_SUITE_REGISTRATION(TestMatroska);