Efficient lookup for an ID3v2 tag in a MPEG file.

An ID3v2 tag or MPEG frame is most likely be at the beginning of the file.
This commit is contained in:
Tsuda Kageyu 2015-11-19 17:29:25 +09:00
parent 11fbf394a3
commit e10684312e
2 changed files with 25 additions and 100 deletions

View File

@ -489,7 +489,7 @@ void MPEG::File::read(bool readProperties)
{
// Look for an ID3v2 tag
d->ID3v2Location = findID3v2(0);
d->ID3v2Location = findID3v2();
if(d->ID3v2Location >= 0) {
@ -532,115 +532,40 @@ void MPEG::File::read(bool readProperties)
ID3v1Tag(true);
}
long MPEG::File::findID3v2(long offset)
long MPEG::File::findID3v2()
{
// This method is based on the contents of TagLib::File::find(), but because
// of some subtleties -- specifically the need to look for the bit pattern of
// an MPEG sync, it has been modified for use here.
if(!isValid())
return -1;
if(isValid() && ID3v2::Header::fileIdentifier().size() <= bufferSize()) {
// An ID3v2 tag or MPEG frame is most likely be at the beginning of the file.
// The position in the file that the current buffer starts at.
const ByteVector headerID = ID3v2::Header::fileIdentifier();
long bufferOffset = 0;
ByteVector buffer;
seek(0);
// These variables are used to keep track of a partial match that happens at
// the end of a buffer.
const ByteVector data = readBlock(headerID.size());
if(data.size() < headerID.size())
return -1;
int previousPartialMatch = -1;
bool previousPartialSynchMatch = false;
if(data == headerID)
return 0;
// Save the location of the current read pointer. We will restore the
// position using seek() before all returns.
if(firstSyncByte(data[0]) && secondSynchByte(data[1]))
return -1;
long originalPosition = tell();
// Look for the entire file, if neither an MEPG frame or ID3v2 tag was found
// at the beginning of the file.
// We don't care about the inefficiency of the code, since this is a seldom case.
// Start the search at the offset.
const long tagOffset = find(headerID);
if(tagOffset < 0)
return -1;
seek(offset);
const long frameOffset = firstFrameOffset();
if(frameOffset < tagOffset)
return -1;
// This loop is the crux of the find method. There are three cases that we
// want to account for:
// (1) The previously searched buffer contained a partial match of the search
// pattern and we want to see if the next one starts with the remainder of
// that pattern.
//
// (2) The search pattern is wholly contained within the current buffer.
//
// (3) The current buffer ends with a partial match of the pattern. We will
// note this for use in the next iteration, where we will check for the rest
// of the pattern.
for(buffer = readBlock(bufferSize()); buffer.size() > 0; buffer = readBlock(bufferSize())) {
// (1) previous partial match
if(previousPartialSynchMatch && secondSynchByte(buffer[0]))
return -1;
if(previousPartialMatch >= 0 && int(bufferSize()) > previousPartialMatch) {
const int patternOffset = (bufferSize() - previousPartialMatch);
if(buffer.containsAt(ID3v2::Header::fileIdentifier(), 0, patternOffset)) {
seek(originalPosition);
return offset + bufferOffset - bufferSize() + previousPartialMatch;
}
}
// (2) pattern contained in current buffer
long location = buffer.find(ID3v2::Header::fileIdentifier());
if(location >= 0) {
seek(originalPosition);
return offset + bufferOffset + location;
}
int firstSynchByte = buffer.find(char(uchar(255)));
// Here we have to loop because there could be several of the first
// (11111111) byte, and we want to check all such instances until we find
// a full match (11111111 111) or hit the end of the buffer.
while(firstSynchByte >= 0) {
// if this *is not* at the end of the buffer
if(firstSynchByte < int(buffer.size()) - 1) {
if(secondSynchByte(buffer[firstSynchByte + 1])) {
// We've found the frame synch pattern.
seek(originalPosition);
return -1;
}
else {
// We found 11111111 at the end of the current buffer indicating a
// partial match of the synch pattern. The find() below should
// return -1 and break out of the loop.
previousPartialSynchMatch = true;
}
}
// Check in the rest of the buffer.
firstSynchByte = buffer.find(char(uchar(255)), firstSynchByte + 1);
}
// (3) partial match
previousPartialMatch = buffer.endsWithPartialMatch(ID3v2::Header::fileIdentifier());
bufferOffset += bufferSize();
}
// Since we hit the end of the file, reset the status before continuing.
clear();
seek(originalPosition);
}
return -1;
return tagOffset;
}
long MPEG::File::findID3v1()

View File

@ -390,7 +390,7 @@ namespace TagLib {
File &operator=(const File &);
void read(bool readProperties);
long findID3v2(long offset);
long findID3v2();
long findID3v1();
void findAPE();