From 4f326f0a81417e5a2b968abc138194af01eebf18 Mon Sep 17 00:00:00 2001 From: gras Date: Fri, 5 Apr 2024 22:43:43 +0200 Subject: [PATCH 1/4] removed ts-node-iptc dependency. Now the only metadataparser in MetadataLoader is exifr --- package-lock.json | 1 - package.json | 1 - .../model/fileaccess/MetadataLoader.ts | 89 ++++--------- src/common/HTMLCharCodes.ts | 118 ++++++++++++++++++ src/common/Utils.ts | 48 ++++++- .../assets/sidecar/testimagedesc1.json | 4 +- test/tmp/sqlite.db-journal | Bin 8720 -> 0 bytes 7 files changed, 194 insertions(+), 67 deletions(-) create mode 100644 src/common/HTMLCharCodes.ts delete mode 100644 test/tmp/sqlite.db-journal diff --git a/package-lock.json b/package-lock.json index 06dc3a97..96be8c81 100644 --- a/package-lock.json +++ b/package-lock.json @@ -26,7 +26,6 @@ "nodemailer": "6.9.4", "reflect-metadata": "0.1.13", "sharp": "0.31.3", - "ts-node-iptc": "1.0.11", "typeconfig": "2.2.11", "typeorm": "0.3.12", "xml2js": "0.6.2" diff --git a/package.json b/package.json index 65e65868..70e71323 100644 --- a/package.json +++ b/package.json @@ -53,7 +53,6 @@ "nodemailer": "6.9.4", "reflect-metadata": "0.1.13", "sharp": "0.31.3", - "ts-node-iptc": "1.0.11", "typeconfig": "2.2.11", "typeorm": "0.3.12", "xml2js": "0.6.2" diff --git a/src/backend/model/fileaccess/MetadataLoader.ts b/src/backend/model/fileaccess/MetadataLoader.ts index a0aafe52..0e08ef5e 100644 --- a/src/backend/model/fileaccess/MetadataLoader.ts +++ b/src/backend/model/fileaccess/MetadataLoader.ts @@ -12,7 +12,6 @@ import { FfprobeData } from 'fluent-ffmpeg'; import { FileHandle } from 'fs/promises'; import * as util from 'node:util'; import * as path from 'path'; -import { IptcParser } from 'ts-node-iptc'; import { Utils } from '../../../common/Utils'; import { FFmpegFactory } from '../FFmpegFactory'; import { ExtensionDecorator } from '../extension/ExtensionDecorator'; @@ -181,7 +180,7 @@ export class MetadataLoader { icc: false, jfif: false, //not needed and not supported for png ihdr: true, - iptc: false, //exifr reads UTF8-encoded data wrongly, using IptcParser instead + iptc: true, exif: true, gps: true, reviveValues: false, //don't convert timestamps @@ -221,46 +220,6 @@ export class MetadataLoader { await fileHandle.close(); } try { - - - try { //Parse iptc data using the IptcParser, which works correctly for both UTF-8 and ASCII - const iptcData = IptcParser.parse(data); - if (iptcData.country_or_primary_location_name) { - metadata.positionData = metadata.positionData || {}; - metadata.positionData.country = - iptcData.country_or_primary_location_name - .replace(/\0/g, '') - .trim(); - } - if (iptcData.province_or_state) { - metadata.positionData = metadata.positionData || {}; - metadata.positionData.state = iptcData.province_or_state - .replace(/\0/g, '') - .trim(); - } - if (iptcData.city) { - metadata.positionData = metadata.positionData || {}; - metadata.positionData.city = iptcData.city - .replace(/\0/g, '') - .trim(); - } - if (iptcData.object_name) { - metadata.title = iptcData.object_name.replace(/\0/g, '').trim(); - } - if (iptcData.caption) { - metadata.caption = iptcData.caption.replace(/\0/g, '').trim(); - } - if (Array.isArray(iptcData.keywords)) { - metadata.keywords = iptcData.keywords; - } - - if (iptcData.date_time) { - metadata.creationDate = iptcData.date_time.getTime(); - } - } catch (err) { - // Logger.debug(LOG_TAG, 'Error parsing iptc data', fullPath, err); - } - try { const exif = await exifr.parse(data, exifrOptions); MetadataLoader.mapMetadata(metadata, exif); @@ -370,20 +329,35 @@ export class MetadataLoader { } } } + if (exif.iptc && + exif.iptc.Keywords && + exif.iptc.Keywords.length > 0) { + const subj = Array.isArray(exif.iptc.Keywords) ? exif.iptc.Keywords : [exif.iptc.Keywords]; + if (metadata.keywords === undefined) { + metadata.keywords = []; + } + for (let kw of subj) { + kw = Utils.asciiToUTF8(kw); + if (metadata.keywords.indexOf(kw) === -1) { + metadata.keywords.push(kw); + } + } + } } private static mapTitle(metadata: PhotoMetadata, exif: any) { - metadata.title = exif.dc?.title?.value || metadata.title || exif.photoshop?.Headline || exif.acdsee?.caption; //acdsee caption holds the title when data is saved by digikam. Used as last resort if iptc and dc do not contain the data + metadata.title = exif.dc?.title?.value || Utils.asciiToUTF8(exif.iptc?.ObjectName) || metadata.title || exif.photoshop?.Headline || exif.acdsee?.caption; //acdsee caption holds the title when data is saved by digikam. Used as last resort if iptc and dc do not contain the data } private static mapCaption(metadata: PhotoMetadata, exif: any) { - metadata.caption = exif.dc?.description?.value || metadata.caption || exif.ifd0?.ImageDescription || exif.exif?.UserComment?.value || exif.Iptc4xmpCore?.ExtDescrAccessibility?.value ||exif.acdsee?.notes; + metadata.caption = exif.dc?.description?.value || Utils.asciiToUTF8(exif.iptc?.Caption) || metadata.caption || exif.ifd0?.ImageDescription || exif.exif?.UserComment?.value || exif.Iptc4xmpCore?.ExtDescrAccessibility?.value ||exif.acdsee?.notes; } private static mapTimestampAndOffset(metadata: PhotoMetadata, exif: any) { metadata.creationDate = Utils.timestampToMS(exif?.photoshop?.DateCreated, null) || Utils.timestampToMS(exif?.xmp?.CreateDate, null) || Utils.timestampToMS(exif?.xmp?.ModifyDate, null) || + Utils.timestampToMS(Utils.toIsoTimestampString(exif?.iptc?.DateCreated, exif?.iptc?.TimeCreated), null) || metadata.creationDate; metadata.creationDateOffset = Utils.timestampToOffsetString(exif?.photoshop?.DateCreated) || @@ -490,24 +464,15 @@ export class MetadataLoader { private static mapToponyms(metadata: PhotoMetadata, exif: any) { //Function to convert html code for special characters into their corresponding character (used in exif.photoshop-section) - const unescape = (tag: string) => { - return tag.replace(/&#([0-9]{1,3});/gi, function (match, numStr) { - return String.fromCharCode(parseInt(numStr, 10)); - }); - } - //photoshop section sometimes has City, Country and State - if (exif.photoshop) { - if (!metadata.positionData?.country && exif.photoshop.Country) { - metadata.positionData = metadata.positionData || {}; - metadata.positionData.country = unescape(exif.photoshop.Country); - } - if (!metadata.positionData?.state && exif.photoshop.State) { - metadata.positionData = metadata.positionData || {}; - metadata.positionData.state = unescape(exif.photoshop.State); - } - if (!metadata.positionData?.city && exif.photoshop.City) { - metadata.positionData = metadata.positionData || {}; - metadata.positionData.city = unescape(exif.photoshop.City); + + metadata.positionData = metadata.positionData || {}; + metadata.positionData.country = Utils.asciiToUTF8(exif.iptc?.Country) || Utils.decodeHTMLChars(exif.photoshop?.Country); + metadata.positionData.state = Utils.asciiToUTF8(exif.iptc?.State) || Utils.decodeHTMLChars(exif.photoshop?.State); + metadata.positionData.city = Utils.asciiToUTF8(exif.iptc?.City) || Utils.decodeHTMLChars(exif.photoshop?.City); + if (metadata.positionData) { + Utils.removeNullOrEmptyObj(metadata.positionData); + if (Object.keys(metadata.positionData).length === 0) { + delete metadata.positionData; } } } diff --git a/src/common/HTMLCharCodes.ts b/src/common/HTMLCharCodes.ts new file mode 100644 index 00000000..c094c79e --- /dev/null +++ b/src/common/HTMLCharCodes.ts @@ -0,0 +1,118 @@ +interface HTMLCharDictionary { + [key: string]: string; +} + +export const HTMLChar: HTMLCharDictionary = { + """: "\"", + "&": "&", + "<": "<", + ">": ">", + " ": " ", + "¡": "¡", + "¢": "¢", + "£": "£", + "¤": "¤", + "¥": "¥", + "¦": "¦", + "§": "§", + "¨": "¨", + "©": "©", + "®": "®", + "™": "™", + "ª": "ª", + "«": "«", + "¬": "¬", + "­": "­", + "¯": "¯", + "°": "°", + "±": "±", + "²": "²", + "³": "³", + "´": "´", + "µ": "µ", + "¶": "¶", + "·": "·", + "¸": "¸", + "¹": "¹", + "º": "º", + "»": "»", + "¼": "¼", + "½": "½", + "¾": "¾", + "¿": "¿", + "×": "×", + "÷": "÷", + "Ð": "Ð", + "ð": "ð", + "Þ": "Þ", + "þ": "þ", + "Æ": "Æ", + "æ": "æ", + "Œ": "Œ", + "œ": "œ", + "Å": "Å", + "Ø": "Ø", + "Ç": "Ç", + "ç": "ç", + "ß": "ß", + "Ñ": "Ñ", + "ñ": "ñ", + "Á": "Á", + "À": "À", + "Â": "Â", + "Ä": "Ä", + "Ã": "Ã", + "á": "á", + "à": "à", + "â": "â", + "ä": "ä", + "ã": "ã", + "å": "å", + "É": "É", + "È": "È", + "Ê": "Ê", + "Ë": "Ë", + "&Etilde;": "Ẽ", + "é": "é", + "è": "è", + "ê": "ê", + "ë": "ë", + "Í": "Í", + "Ì": "Ì", + "Î": "Î", + "Ï": "Ï", + "Ĩ": "Ĩ", + "í": "í", + "ì": "ì", + "î": "î", + "ï": "ï", + "ĩ": "ĩ", + "Ó": "Ó", + "Ò": "Ò", + "Ô": "Ô", + "Ö": "Ö", + "Õ": "Õ", + "ó": "ó", + "ò": "ò", + "ô": "ô", + "ö": "ö", + "õ": "õ", + "Ú": "Ú", + "Ù": "Ù", + "Û": "Û", + "Ü": "Ü", + "Ũ": "Ũ", + "Ů": "Ů", + "ú": "ú", + "ù": "ù", + "û": "û", + "ü": "ü", + "ũ": "ũ", + "ů": "ů", + "Ý": "Ý", + "Ŷ": "Ŷ", + "Ÿ": "Ÿ", + "ý": "ý", + "ŷ": "ŷ", + "ÿ": "ÿ" +}; \ No newline at end of file diff --git a/src/common/Utils.ts b/src/common/Utils.ts index 2d69923f..2750cf29 100644 --- a/src/common/Utils.ts +++ b/src/common/Utils.ts @@ -1,3 +1,5 @@ +import { HTMLChar } from './HTMLCharCodes'; + export class Utils { static GUID(): string { const s4 = (): string => @@ -97,6 +99,25 @@ export class Utils { return d.getUTCFullYear() + '-' + d.getUTCMonth() + '-' + d.getUTCDate(); } + static toIsoTimestampString(YYYYMMDD: string, hhmmss: string): string { + if (YYYYMMDD && hhmmss) { + // Regular expression to match YYYYMMDD format + const dateRegex = /^(\d{4})(\d{2})(\d{2})$/; + // Regular expression to match hhmmss+/-ohom format + const timeRegex = /^(\d{2})(\d{2})(\d{2})([+-]\d{2})?(\d{2})?$/; + const [, year, month, day] = YYYYMMDD.match(dateRegex); + const [, hour, minute, second, offsetHour, offsetMinute] = hhmmss.match(timeRegex); + const isoTimestamp = `${year}-${month}-${day}T${hour}:${minute}:${second}`; + if (offsetHour && offsetMinute) { + return isoTimestamp + `${offsetHour}:${offsetMinute}`; + } else { + return isoTimestamp; + } + } else { + return undefined; + } + } + static makeUTCMidnight(d: number | Date) { if (!(d instanceof Date)) { @@ -125,7 +146,7 @@ export class Utils { } //function to convert timestamp into milliseconds taking offset into account - static timestampToMS(timestamp: string, offset: string) { + static timestampToMS(timestamp: string, offset: string): number { if (!timestamp) { return undefined; } @@ -371,6 +392,31 @@ export class Utils { return curr; } + public static asciiToUTF8(text: string): string { + if (text) { + return Buffer.from(text, 'ascii').toString('utf-8'); + } else { + return text; + } + } + + + + public static decodeHTMLChars(text: string): string { + if (text) { + const newtext = text.replace(/&#([0-9]{1,3});/gi, function (match, numStr) { + return String.fromCharCode(parseInt(numStr, 10)); + }); + return newtext.replace(/&[^;]+;/g, function (match) { + const char = HTMLChar[match]; + return char ? char : match; + }); + } else { + return text; + } + } + + public static isUInt32(value: number, max = 4294967295): boolean { value = parseInt('' + value, 10); return !isNaN(value) && value >= 0 && value <= max; diff --git a/test/backend/assets/sidecar/testimagedesc1.json b/test/backend/assets/sidecar/testimagedesc1.json index bc0aac3b..42f82919 100644 --- a/test/backend/assets/sidecar/testimagedesc1.json +++ b/test/backend/assets/sidecar/testimagedesc1.json @@ -41,9 +41,9 @@ "latitude": 37.871093, "longitude": -122.25678 }, - "city": "test city őúéáűóöí-.,)(=", + "city": "test city őúéáűóöí-.,)(=/%!+\"'", "country": "test country őúéáűóöí-.,)(=/%!+\"'", - "state": "test state őúéáűóöí-.,)(" + "state": "test state őúéáűóöí-.,)(=/%!+\"'" }, "rating": 3, "size": { diff --git a/test/tmp/sqlite.db-journal b/test/tmp/sqlite.db-journal deleted file mode 100644 index 41fef9eebaebbae9c91a7f46412db09bd82d3d74..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8720 zcmeHLOK;mo5Edg#vST}To@!9ECFB~l;5c1Am%OxfZ3PhG$gwR0$tA*)yX&yf!=WfQ z28tANilYCiK!N^*0=*SIWmTn%UT+izw*u9(T3zg|5t ze}vI$CUXhCmd_38*}KfZM~kpJVd?J-ywg|8K*~VMK*~VMK*~VMK*~VMK*~VMK*~VM zK+3>7$H0t`%?KZlp8E&~dpnNDMBZ^*#1j>G0wf5TFGLZRorMFQ3uQipWAJdO4#x}} z@!8*I9~OSj?-hQ?{gRuV z>1BV&=(ET^E&>x(PGpg4c7J?@3bL> zN5%fF8--#O3w_hSw7UJMA=%87bqNt}Pz!U)Wn|cfq1)OqbGw~(=nZC4(NrRfn$;%-eWO^j0#*G7gaEtSRo&Fe#8H~^-YHd3)IF{V+#$Eji% z1RJJ_CI-9_2bLV+4ZwWPhPalm4RLJ*xER$`U6&~3wrOyK5koQ%lBpt3#C1K6D>=d& za5+9z_pTO-7z=0XenKWrI`PMABP8|BFLx?a(gL%HPTONg%oQK*S08NdKNjy-9*diY z_1$U>f;_0y>SApdj>DauJ7Q@}SKt-04UqtsF=P3~GEloJ^xqGtMW(K3j9J*S45CYt zjJd9=$kz1Hf}p6~)2netlN~QG>bnmr&nNOsy;3Nyt_t5>Akx1kJl)bn+QOlNrTr(i z+N8xw;t6r>r^F4@Xm@&TFV2&=UAeb;xKkI`P#MT}y7$OQr{_Y?mqe@6X)@9dqC~Fk zlGbsP?Xx3j-%twfh+Dh0gZlnv74ld*+yf231~qjHXy>MEG95!T6Nw>BDv5`+>fT`m zNb`Z0%Xn#;_=K5jNa4|Pe_;_4z9IBK4H8Z%v$@6y^ec2 z60$4tq$fKMlO7G>OAc`%bHD{i&g{Yv&L;sJTQUF$0!-X6ZLD(5B$~`Qk)p8{<8YFl zr@`@_`cUShP%tR6Vr!tuRKtW(#U>!rTq1T9%0e7UvU4(&VRprS{_+sYO27({5n?*E z2{6GWgfX^s3p7whQ7H3qD9O&rQ2g$ExiecRZfpqO-}hxMI#qN$H=24RZc+27>EBYP z9ur90q!w1Nh~sfPjERA;_P}$RS|E*L)zDQ;Wo&W7z>Fh9)+xhs zR4pK!&sDkn`CPtuu#vQ7iKeR&Yq-lV3Q(6TLjQxHE+q>oSVNL5*_MW6_<%cxN(}>X zr^!aTF&i(*WWQlahP(%556%fE-`8zR3+?+zwz+1888#k~LFSBm|kEvZb0>wL&oRaTv*d6JhvW*{{zOiYqI^ zS;z0n&;+5=+{IoDS>A3NQsUnO{Vsz_^Ak1h$~8$ZhdVD!;<(rYd--8DUtC`o9{Y Date: Mon, 8 Apr 2024 18:44:57 +0200 Subject: [PATCH 2/4] inserting fake file --- .gitignore | 1 - test/tmp/sqlite.db-journal | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 test/tmp/sqlite.db-journal diff --git a/.gitignore b/.gitignore index 2e0fc4c6..6efad7e3 100644 --- a/.gitignore +++ b/.gitignore @@ -17,7 +17,6 @@ test/e2e/**/*.js test/e2e/**/*.js.map test/*.js test/*.js.map -test/tmp/* benchmark/**/*.js benchmark/**/*.js.map gulpfile.js diff --git a/test/tmp/sqlite.db-journal b/test/tmp/sqlite.db-journal new file mode 100644 index 00000000..12de16d9 --- /dev/null +++ b/test/tmp/sqlite.db-journal @@ -0,0 +1 @@ +deleteme \ No newline at end of file From fc88a5d2f08b625a43a81f0c658930545d0f3df5 Mon Sep 17 00:00:00 2001 From: gras Date: Mon, 8 Apr 2024 18:45:41 +0200 Subject: [PATCH 3/4] delete fake file --- test/tmp/sqlite.db-journal | 1 - 1 file changed, 1 deletion(-) delete mode 100644 test/tmp/sqlite.db-journal diff --git a/test/tmp/sqlite.db-journal b/test/tmp/sqlite.db-journal deleted file mode 100644 index 12de16d9..00000000 --- a/test/tmp/sqlite.db-journal +++ /dev/null @@ -1 +0,0 @@ -deleteme \ No newline at end of file From 28bf225419a900dadac4281638f541c5c71816a3 Mon Sep 17 00:00:00 2001 From: gras Date: Mon, 8 Apr 2024 18:46:03 +0200 Subject: [PATCH 4/4] update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 6efad7e3..2e0fc4c6 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ test/e2e/**/*.js test/e2e/**/*.js.map test/*.js test/*.js.map +test/tmp/* benchmark/**/*.js benchmark/**/*.js.map gulpfile.js