Improved EXIF V3 compatibility (2)

This patch improve the string read/write in case of non ASCII encoding.
- When reading, checks for UTF-8 text even on ASCII data type. If it fails, Latin1 converter is used.
- When writing using V3 specs, a better check is done to identify 7-bit ASCII text.

Related to MR !358
This commit is contained in:
Mirco Miranda 2025-04-21 13:29:13 +02:00 committed by Albert Astals Cid
parent 2adca7c0ca
commit 6bf38ea638

View File

@ -12,6 +12,7 @@
#include <QCoreApplication> #include <QCoreApplication>
#include <QDataStream> #include <QDataStream>
#include <QHash> #include <QHash>
#include <QStringDecoder>
#include <QTimeZone> #include <QTimeZone>
// TIFF 6 specs // TIFF 6 specs
@ -379,16 +380,14 @@ static ExifTagType updateDataType(const ExifTagType &dataType, const QVariant &v
// To maximize compatibility, I check if the string can be encoded in ASCII. // To maximize compatibility, I check if the string can be encoded in ASCII.
auto txt = value.toString(); auto txt = value.toString();
// I try to implement a rudimentary check without going through Qt's string conversion classes: // Exif ASCII data type allow only values up to 127 (7-bit ASCII).
// a UTF-8 string if it uses only characters in the first 127 of the ASCII table is encoded as auto u8 = txt.toUtf8();
// a Latin 1. Each character above 128, whether it is an extended ASCII character or a character for (auto &&c : u8) {
// of another nature, uses 2 or more bytes. Since the EXIF specifications state that the ASCII if (uchar(c) > 127)
// type must use only the first 127 characters, I only need to do the size comparison to understand return dataType;
// which type to use. }
if (txt.toLatin1().size() == txt.toUtf8().size())
return ExifTagType::Ascii;
return dataType; return ExifTagType::Ascii;
} }
/*! /*!
@ -562,8 +561,16 @@ static bool readIfd(QDataStream &ds, MicroExif::Tags &tags, quint32 pos = 0, con
if (dataType == EXIF_TAG_DATATYPE(ExifTagType::Ascii) || dataType == EXIF_TAG_DATATYPE(ExifTagType::Utf8)) { if (dataType == EXIF_TAG_DATATYPE(ExifTagType::Ascii) || dataType == EXIF_TAG_DATATYPE(ExifTagType::Utf8)) {
auto l = readBytes(ds, count, true); auto l = readBytes(ds, count, true);
if (!l.isEmpty()) if (!l.isEmpty()) {
tags.insert(tagId, dataType == EXIF_TAG_DATATYPE(ExifTagType::Utf8) ? QString::fromUtf8(l) : QString::fromLatin1(l)); // It seems that converting to Latin 1 never detects errors so, using UTF-8.
// Note that if the dataType is ASCII, by EXIF specification, it must use only the
// first 128 values so the UTF-8 conversion is correct.
auto dec = QStringDecoder(QStringDecoder::Utf8);
// QStringDecoder raise an error only after converting to QString
auto ut8 = QString(dec(l));
// If there are errors in the conversion to UTF-8, then I try with latin1 (extended ASCII)
tags.insert(tagId, dec.hasError() ? QString::fromLatin1(l) : ut8);
}
} else if (dataType == EXIF_TAG_DATATYPE(ExifTagType::Undefined)) { } else if (dataType == EXIF_TAG_DATATYPE(ExifTagType::Undefined)) {
auto l = readBytes(ds, count, false); auto l = readBytes(ds, count, false);
if (!l.isEmpty()) if (!l.isEmpty())