mirror of
https://github.com/taglib/taglib.git
synced 2026-05-25 13:08:55 -04:00
Matroska: Use seek head for faster element lookup (#1321)
Limit scan for Matroska seek head to 512 KB in ReadStyle::Fast --------- Co-authored-by: tolriq <git@leetzone.org>
This commit is contained in:
@@ -117,4 +117,3 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -225,7 +225,7 @@ namespace
|
||||
#endif
|
||||
#ifdef TAGLIB_WITH_MATROSKA
|
||||
else if(ext == "MKA" || ext == "MKV" || ext == "WEBM")
|
||||
file = new Matroska::File(stream, readAudioProperties);
|
||||
file = new Matroska::File(stream, readAudioProperties, audioPropertiesStyle);
|
||||
#endif
|
||||
|
||||
// if file is not valid, leave it to content-based detection.
|
||||
|
||||
@@ -30,6 +30,32 @@
|
||||
|
||||
using namespace TagLib;
|
||||
|
||||
namespace {
|
||||
|
||||
template <EBML::Element::Id Id, typename ElementType>
|
||||
std::unique_ptr<ElementType> readElementAt(File &file,
|
||||
offset_t offset,
|
||||
offset_t maxOffset)
|
||||
{
|
||||
if(offset < 0 || offset >= maxOffset) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
file.seek(offset);
|
||||
auto element = EBML::Element::factory(file);
|
||||
if(!element || element->getId() != Id) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto typed = EBML::element_cast<Id>(std::move(element));
|
||||
if(!typed || !typed->read(file)) {
|
||||
return nullptr;
|
||||
}
|
||||
return typed;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
EBML::MkSegment::MkSegment(int sizeLength, offset_t dataSize, offset_t offset):
|
||||
MasterElement(Id::MkSegment, sizeLength, dataSize, offset)
|
||||
{
|
||||
@@ -49,16 +75,64 @@ offset_t EBML::MkSegment::segmentDataOffset() const
|
||||
|
||||
bool EBML::MkSegment::read(File &file)
|
||||
{
|
||||
const offset_t maxOffset = file.tell() + dataSize;
|
||||
return readLimited(file, dataSize);
|
||||
}
|
||||
|
||||
bool EBML::MkSegment::readLimited(File &file, offset_t scanLimit)
|
||||
{
|
||||
const offset_t filePos = file.tell();
|
||||
const offset_t maxOffset = filePos + dataSize;
|
||||
const offset_t maxScanOffset = filePos + std::min(scanLimit, dataSize);
|
||||
std::unique_ptr<Element> element;
|
||||
int i = 0;
|
||||
int seekHeadIndex = -1;
|
||||
while((element = findNextElement(file, maxOffset))) {
|
||||
while((element = findNextElement(file, maxScanOffset))) {
|
||||
if(const Id id = element->getId(); id == Id::MkSeekHead) {
|
||||
seekHeadIndex = i;
|
||||
seekHead = element_cast<Id::MkSeekHead>(std::move(element));
|
||||
if(!seekHead->read(file))
|
||||
return false;
|
||||
// We have a seek head, let's use it for faster access to the other elements
|
||||
if(const auto elementAfterSeekHead = findNextElement(file, maxScanOffset);
|
||||
elementAfterSeekHead && elementAfterSeekHead->getId() == Id::VoidElement)
|
||||
seekHead->setPadding(elementAfterSeekHead->getSize());
|
||||
const offset_t segDataOffset = segmentDataOffset();
|
||||
const auto matroskaSeekHead = parseSeekHead();
|
||||
for(const auto &[idValue, relativeOffset] : matroskaSeekHead->entryList()) {
|
||||
const offset_t absoluteOffset = segDataOffset + relativeOffset;
|
||||
switch(static_cast<Id>(idValue)) {
|
||||
case Id::MkCues:
|
||||
if(!((cues = readElementAt<Id::MkCues, MkCues>(
|
||||
file, absoluteOffset, maxOffset))))
|
||||
return false;
|
||||
break;
|
||||
case Id::MkInfo:
|
||||
if(!((info = readElementAt<Id::MkInfo, MkInfo>(
|
||||
file, absoluteOffset, maxOffset))))
|
||||
return false;
|
||||
break;
|
||||
case Id::MkTracks:
|
||||
if(!((tracks = readElementAt<Id::MkTracks, MkTracks>(
|
||||
file, absoluteOffset, maxOffset))))
|
||||
return false;
|
||||
break;
|
||||
case Id::MkTags:
|
||||
if(!((tags = readElementAt<Id::MkTags, MkTags>(
|
||||
file, absoluteOffset, maxOffset))))
|
||||
return false;
|
||||
break;
|
||||
case Id::MkAttachments:
|
||||
if(!((attachments = readElementAt<Id::MkAttachments, MkAttachments>(
|
||||
file, absoluteOffset, maxOffset))))
|
||||
return false;
|
||||
break;
|
||||
case Id::MkChapters:
|
||||
if(!((chapters = readElementAt<Id::MkChapters, MkChapters>(
|
||||
file, absoluteOffset, maxOffset))))
|
||||
return false;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
else if(id == Id::MkCues) {
|
||||
cues = element_cast<Id::MkCues>(std::move(element));
|
||||
@@ -91,14 +165,8 @@ bool EBML::MkSegment::read(File &file)
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
if(id == Id::VoidElement
|
||||
&& seekHead
|
||||
&& seekHeadIndex == i - 1)
|
||||
seekHead->setPadding(element->getSize());
|
||||
|
||||
element->skipData(file);
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -51,6 +51,7 @@ namespace TagLib {
|
||||
|
||||
offset_t segmentDataOffset() const;
|
||||
bool read(File &file) override;
|
||||
bool readLimited(File &file, offset_t scanLimit);
|
||||
std::unique_ptr<Matroska::Tag> parseTag() const;
|
||||
std::unique_ptr<Matroska::Attachments> parseAttachments() const;
|
||||
std::unique_ptr<Matroska::Chapters> parseChapters() const;
|
||||
|
||||
@@ -144,6 +144,8 @@ PropertyMap Matroska::File::setProperties(const PropertyMap &properties)
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr offset_t FAST_SCAN_LIMIT = static_cast<offset_t>(512 * 1024);
|
||||
|
||||
String keyForAttachedFile(const Matroska::AttachedFile &attachedFile)
|
||||
{
|
||||
if(attachedFile.mediaType().startsWith("image/")) {
|
||||
@@ -376,10 +378,15 @@ void Matroska::File::read(bool readProperties, Properties::ReadStyle readStyle)
|
||||
head->skipData(*this);
|
||||
}
|
||||
|
||||
offset_t maxOffset = fileLength - tell();
|
||||
if (readStyle == Properties::ReadStyle::Fast && maxOffset > FAST_SCAN_LIMIT) {
|
||||
maxOffset = FAST_SCAN_LIMIT;
|
||||
}
|
||||
|
||||
// Find the Matroska segment in the file
|
||||
const std::unique_ptr<EBML::MkSegment> segment(
|
||||
EBML::element_cast<EBML::Element::Id::MkSegment>(
|
||||
EBML::findElement(*this, EBML::Element::Id::MkSegment, fileLength - tell())
|
||||
EBML::findElement(*this, EBML::Element::Id::MkSegment, maxOffset)
|
||||
)
|
||||
);
|
||||
if(!segment) {
|
||||
@@ -389,14 +396,18 @@ void Matroska::File::read(bool readProperties, Properties::ReadStyle readStyle)
|
||||
}
|
||||
|
||||
// Read the segment into memory from file
|
||||
if(!segment->read(*this)) {
|
||||
d->segment = segment->parseSegment();
|
||||
maxOffset = segment->getDataSize();
|
||||
if (readStyle == Properties::ReadStyle::Fast && maxOffset > FAST_SCAN_LIMIT) {
|
||||
maxOffset = FAST_SCAN_LIMIT;
|
||||
}
|
||||
if(!segment->readLimited(*this, maxOffset)) {
|
||||
debug("Failed to read segment");
|
||||
setValid(false);
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse the elements
|
||||
d->segment = segment->parseSegment();
|
||||
d->seekHead = segment->parseSeekHead();
|
||||
d->cues = segment->parseCues();
|
||||
d->tag = segment->parseTag();
|
||||
|
||||
@@ -54,7 +54,6 @@ bool Matroska::SeekHead::isValid(TagLib::File &file) const
|
||||
void Matroska::SeekHead::addEntry(const Element &element)
|
||||
{
|
||||
entries.append({element.id(), element.offset()});
|
||||
debug("adding to seekhead");
|
||||
setNeedsRender(true);
|
||||
}
|
||||
|
||||
@@ -64,6 +63,11 @@ void Matroska::SeekHead::addEntry(ID id, offset_t offset)
|
||||
setNeedsRender(true);
|
||||
}
|
||||
|
||||
const List<std::pair<unsigned int, offset_t>> &Matroska::SeekHead::entryList() const
|
||||
{
|
||||
return entries;
|
||||
}
|
||||
|
||||
ByteVector Matroska::SeekHead::renderInternal()
|
||||
{
|
||||
const auto beforeSize = sizeRenderedOrWritten();
|
||||
|
||||
@@ -39,6 +39,7 @@ namespace TagLib {
|
||||
bool isValid(TagLib::File &file) const;
|
||||
void addEntry(const Element &element);
|
||||
void addEntry(ID id, offset_t offset);
|
||||
const List<std::pair<unsigned int, offset_t>> &entryList() const;
|
||||
void write(TagLib::File &file) override;
|
||||
void sort();
|
||||
bool sizeChanged(Element &caller, offset_t delta) override;
|
||||
|
||||
Reference in New Issue
Block a user