From 9f9dbe0c519999b19f3d536c098d0d2d10b0ff43 Mon Sep 17 00:00:00 2001 From: "Patrik J. Braun" Date: Sat, 20 Mar 2021 11:20:53 +0100 Subject: [PATCH] refactoring query parser --- src/common/SearchQueryParser.ts | 293 ++++++++++++++++++ src/common/entities/SearchQueryDTO.ts | 242 --------------- ...SearchQueryDTO.ts => SearchQueryParser.ts} | 26 +- 3 files changed, 318 insertions(+), 243 deletions(-) create mode 100644 src/common/SearchQueryParser.ts rename test/common/unit/{SearchQueryDTO.ts => SearchQueryParser.ts} (89%) diff --git a/src/common/SearchQueryParser.ts b/src/common/SearchQueryParser.ts new file mode 100644 index 00000000..18b34c3a --- /dev/null +++ b/src/common/SearchQueryParser.ts @@ -0,0 +1,293 @@ +import { + ANDSearchQuery, + DistanceSearch, + FromDateSearch, + MaxRatingSearch, + MaxResolutionSearch, + MinRatingSearch, + MinResolutionSearch, + OrientationSearch, + ORSearchQuery, + RangeSearch, + SearchListQuery, + SearchQueryDTO, + SearchQueryTypes, + SomeOfSearchQuery, + TextSearch, + TextSearchQueryMatchTypes, + TextSearchQueryTypes, + ToDateSearch +} from './entities/SearchQueryDTO'; +import {Utils} from './Utils'; + + +export interface QueryKeywords { + portrait: string; + landscape: string; + orientation: string; + kmFrom: string; + maxResolution: string; + minResolution: string; + maxRating: string; + minRating: string; + NSomeOf: string; + someOf: string; + or: string; + and: string; + from: string; + to: string; + caption: string; + directory: string; + file_name: string; + keyword: string; + person: string; + position: string; +} + +export class SearchQueryParser { + constructor(private keywords: QueryKeywords) { + } + + public parse(str: string, implicitOR = true): SearchQueryDTO { + str = str.replace(/\s\s+/g, ' ') // remove double spaces + .replace(/:\s+/g, ':').replace(/\)(?=\S)/g, ') ').trim(); + + if (str.charAt(0) === '(' && str.charAt(str.length - 1) === ')') { + str = str.slice(1, str.length - 1); + } + const fistSpace = (start = 0) => { + const bracketIn = []; + let quotationMark = false; + for (let i = start; i < str.length; ++i) { + if (str.charAt(i) === '"') { + quotationMark = !quotationMark; + continue; + } + if (str.charAt(i) === '(') { + bracketIn.push(i); + continue; + } + if (str.charAt(i) === ')') { + bracketIn.pop(); + continue; + } + + if (quotationMark === false && + bracketIn.length === 0 && + str.charAt(i) === ' ') { + return i; + } + } + return str.length - 1; + }; + + // tokenize + const tokenEnd = fistSpace(); + + if (tokenEnd !== str.length - 1) { + if (str.startsWith(' ' + this.keywords.and, tokenEnd)) { + return { + type: SearchQueryTypes.AND, + list: [this.parse(str.slice(0, tokenEnd), implicitOR), // trim brackets + this.parse(str.slice(tokenEnd + (' ' + this.keywords.and).length), implicitOR)] + }; + } else if (str.startsWith(' ' + this.keywords.or, tokenEnd)) { + return { + type: SearchQueryTypes.OR, + list: [this.parse(str.slice(0, tokenEnd), implicitOR), // trim brackets + this.parse(str.slice(tokenEnd + (' ' + this.keywords.or).length), implicitOR)] + }; + } else { // Relation cannot be detected + return { + type: implicitOR === true ? SearchQueryTypes.OR : SearchQueryTypes.UNKNOWN_RELATION, + list: [this.parse(str.slice(0, tokenEnd), implicitOR), // trim brackets + this.parse(str.slice(tokenEnd), implicitOR)] + }; + } + } + if (str.startsWith(this.keywords.someOf + ':') || + new RegExp(/^\d*-of:/).test(str)) { + const prefix = str.startsWith(this.keywords.someOf + ':') ? + this.keywords.someOf + ':' : + new RegExp(/^\d*-of:/).exec(str)[0]; + let tmpList: any = this.parse(str.slice(prefix.length + 1, -1), false); // trim brackets + // console.log(JSON.stringify(tmpList, null, 4)); + const unfoldList = (q: SearchListQuery): SearchQueryDTO[] => { + if (q.list) { + if (q.type === SearchQueryTypes.UNKNOWN_RELATION) { + return [].concat.apply([], q.list.map(e => unfoldList(e))); // flatten array + } else { + q.list.forEach(e => unfoldList(e)); + } + } + return [q]; + }; + tmpList = unfoldList(tmpList); + const ret = { + type: SearchQueryTypes.SOME_OF, + list: tmpList + }; + if (new RegExp('/^\d*-' + this.keywords.NSomeOf + ':/').test(str)) { + ret.min = parseInt(new RegExp(/^\d*/).exec(str)[0], 10); + } + return ret; + } + + if (str.startsWith(this.keywords.from + ':')) { + return { + type: SearchQueryTypes.from_date, + value: Date.parse(str.slice((this.keywords.from + ':').length + 1, str.length - 1)) + }; + } + if (str.startsWith(this.keywords.to + ':')) { + return { + type: SearchQueryTypes.to_date, + value: Date.parse(str.slice((this.keywords.to + ':').length + 1, str.length - 1)) + }; + } + + if (str.startsWith(this.keywords.minRating + ':')) { + return { + type: SearchQueryTypes.min_rating, + value: parseInt(str.slice((this.keywords.minRating + ':').length), 10) + }; + } + if (str.startsWith(this.keywords.maxRating + ':')) { + return { + type: SearchQueryTypes.max_rating, + value: parseInt(str.slice((this.keywords.maxRating + ':').length), 10) + }; + } + if (str.startsWith(this.keywords.minResolution + ':')) { + return { + type: SearchQueryTypes.min_resolution, + value: parseInt(str.slice((this.keywords.minResolution + ':').length), 10) + }; + } + if (str.startsWith(this.keywords.maxResolution + ':')) { + return { + type: SearchQueryTypes.max_resolution, + value: parseInt(str.slice((this.keywords.maxResolution + ':').length), 10) + }; + } + if (new RegExp('/^\d*-' + this.keywords.kmFrom + ':/').test(str)) { + let from = str.slice(new RegExp('/^\d*-' + this.keywords.kmFrom + ':/').exec(str)[0].length); + if (from.charAt(0) === '(' && from.charAt(from.length - 1) === ')') { + from = from.slice(1, from.length - 1); + } + return { + type: SearchQueryTypes.distance, + distance: parseInt(new RegExp(/^\d*/).exec(str)[0], 10), + from: {text: from} + }; + } + + if (str.startsWith(this.keywords.orientation + ':')) { + return { + type: SearchQueryTypes.orientation, + landscape: str.slice((this.keywords.orientation + ':').length) === this.keywords.landscape + }; + } + + // parse text search + const tmp = TextSearchQueryTypes.map(type => ({ + key: (this.keywords)[SearchQueryTypes[type]] + ':', + queryTemplate: {type: type, text: ''} + })); + for (let i = 0; i < tmp.length; ++i) { + if (str.startsWith(tmp[i].key)) { + const ret: TextSearch = Utils.clone(tmp[i].queryTemplate); + if (str.charAt(tmp[i].key.length) === '"' && str.charAt(str.length - 1) === '"') { + ret.text = str.slice(tmp[i].key.length + 1, str.length - 1); + ret.matchType = TextSearchQueryMatchTypes.exact_match; + } else if (str.charAt(tmp[i].key.length) === '(' && str.charAt(str.length - 1) === ')') { + ret.text = str.slice(tmp[i].key.length + 1, str.length - 1); + } else { + ret.text = str.slice(tmp[i].key.length); + } + return ret; + } + } + + + return {type: SearchQueryTypes.any_text, text: str}; + } + + public stringify(query: SearchQueryDTO): string { + if (!query || !query.type) { + return ''; + } + switch (query.type) { + case SearchQueryTypes.AND: + return '(' + (query).list.map(q => this.stringify(q)).join(' ' + this.keywords.and + ' ') + ')'; + + case SearchQueryTypes.OR: + return '(' + (query).list.map(q => this.stringify(q)).join(' ' + this.keywords.or + ' ') + ')'; + + case SearchQueryTypes.SOME_OF: + if ((query).min) { + return (query).min + '-' + this.keywords.NSomeOf + ':(' + + (query).list.map(q => this.stringify(q)).join(' ') + ')'; + } + return this.keywords.someOf + ':(' + + (query).list.map(q => this.stringify(q)).join(' ') + ')'; + + + case SearchQueryTypes.orientation: + return this.keywords.orientation + ':' + ((query).landscape ? this.keywords.landscape : this.keywords.portrait); + + case SearchQueryTypes.from_date: + if (!(query).value) { + return ''; + } + return this.keywords.from + ':(' + new Date((query).value).toLocaleDateString() + ')'.trim(); + case SearchQueryTypes.to_date: + if (!(query).value) { + return ''; + } + return this.keywords.to + ':(' + new Date((query).value).toLocaleDateString() + ')'.trim(); + case SearchQueryTypes.min_rating: + return this.keywords.minRating + ':' + (isNaN((query).value) ? '' : (query).value); + case SearchQueryTypes.max_rating: + return this.keywords.maxRating + ':' + (isNaN((query).value) ? '' : (query).value); + case SearchQueryTypes.min_resolution: + return this.keywords.minResolution + ':' + (isNaN((query).value) ? '' : (query).value); + case SearchQueryTypes.max_resolution: + return this.keywords.maxResolution + ':' + (isNaN((query).value) ? '' : (query).value); + case SearchQueryTypes.distance: + if ((query).from.text.indexOf(' ') !== -1) { + return (query).distance + '-' + this.keywords.kmFrom + ':(' + (query).from.text + ')'; + } + return (query).distance + '-' + this.keywords.kmFrom + ':' + (query).from.text; + + case SearchQueryTypes.any_text: + if ((query).matchType === TextSearchQueryMatchTypes.exact_match) { + return '"' + (query).text + '"'; + + } else if ((query).text.indexOf(' ') !== -1) { + return '(' + (query).text + ')'; + } + return (query).text; + + case SearchQueryTypes.person: + case SearchQueryTypes.position: + case SearchQueryTypes.keyword: + case SearchQueryTypes.caption: + case SearchQueryTypes.file_name: + case SearchQueryTypes.directory: + if (!(query).text) { + return ''; + } + if ((query).matchType === TextSearchQueryMatchTypes.exact_match) { + return SearchQueryTypes[query.type] + ':"' + (query).text + '"'; + + } else if ((query).text.indexOf(' ') !== -1) { + return SearchQueryTypes[query.type] + ':(' + (query).text + ')'; + } + return SearchQueryTypes[query.type] + ':' + (query).text; + + default: + throw new Error('Unknown type: ' + query.type); + } + } +} diff --git a/src/common/entities/SearchQueryDTO.ts b/src/common/entities/SearchQueryDTO.ts index fc564824..432068a7 100644 --- a/src/common/entities/SearchQueryDTO.ts +++ b/src/common/entities/SearchQueryDTO.ts @@ -1,5 +1,4 @@ import {GPSMetadata} from './PhotoDTO'; -import {Utils} from '../Utils'; export enum SearchQueryTypes { AND = 1, OR, SOME_OF, UNKNOWN_RELATION = 99999, @@ -119,247 +118,6 @@ export namespace SearchQueryDTO { throw new Error('Unknown type' + query.type); } }; - - export const parse = (str: string, implicitOR = true): SearchQueryDTO => { - str = str.replace(/\s\s+/g, ' ') // remove double spaces - .replace(/:\s+/g, ':').replace(/\)(?=\S)/g, ') ').trim(); - - if (str.charAt(0) === '(' && str.charAt(str.length - 1) === ')') { - str = str.slice(1, str.length - 1); - } - const fistSpace = (start = 0) => { - const bracketIn = []; - let quotationMark = false; - for (let i = start; i < str.length; ++i) { - if (str.charAt(i) === '"') { - quotationMark = !quotationMark; - continue; - } - if (str.charAt(i) === '(') { - bracketIn.push(i); - continue; - } - if (str.charAt(i) === ')') { - bracketIn.pop(); - continue; - } - - if (quotationMark === false && - bracketIn.length === 0 && - str.charAt(i) === ' ') { - return i; - } - } - return str.length - 1; - }; - - // tokenize - const tokenEnd = fistSpace(); - - if (tokenEnd !== str.length - 1) { - if (str.startsWith(' and', tokenEnd)) { - return { - type: SearchQueryTypes.AND, - list: [SearchQueryDTO.parse(str.slice(0, tokenEnd), implicitOR), // trim brackets - SearchQueryDTO.parse(str.slice(tokenEnd + 4), implicitOR)] - }; - } else if (str.startsWith(' or', tokenEnd)) { - return { - type: SearchQueryTypes.OR, - list: [SearchQueryDTO.parse(str.slice(0, tokenEnd), implicitOR), // trim brackets - SearchQueryDTO.parse(str.slice(tokenEnd + 3), implicitOR)] - }; - } else { // Relation cannot be detected - return { - type: implicitOR === true ? SearchQueryTypes.OR : SearchQueryTypes.UNKNOWN_RELATION, - list: [SearchQueryDTO.parse(str.slice(0, tokenEnd), implicitOR), // trim brackets - SearchQueryDTO.parse(str.slice(tokenEnd), implicitOR)] - }; - } - } - if (str.startsWith('some-of:') || - new RegExp(/^\d*-of:/).test(str)) { - const prefix = str.startsWith('some-of:') ? 'some-of:' : new RegExp(/^\d*-of:/).exec(str)[0]; - let tmpList: any = SearchQueryDTO.parse(str.slice(prefix.length + 1, -1), false); // trim brackets - // console.log(JSON.stringify(tmpList, null, 4)); - const unfoldList = (q: SearchListQuery): SearchQueryDTO[] => { - if (q.list) { - if (q.type === SearchQueryTypes.UNKNOWN_RELATION) { - return [].concat.apply([], q.list.map(e => unfoldList(e))); // flatten array - } else { - q.list.forEach(e => unfoldList(e)); - } - } - return [q]; - }; - tmpList = unfoldList(tmpList); - const ret = { - type: SearchQueryTypes.SOME_OF, - list: tmpList - }; - if (new RegExp(/^\d*-of:/).test(str)) { - ret.min = parseInt(new RegExp(/^\d*/).exec(str)[0], 10); - } - return ret; - } - - if (str.startsWith('from:')) { - return { - type: SearchQueryTypes.from_date, - value: Date.parse(str.slice('from:'.length + 1, str.length - 1)) - }; - } - if (str.startsWith('to:')) { - return { - type: SearchQueryTypes.to_date, - value: Date.parse(str.slice('to:'.length + 1, str.length - 1)) - }; - } - - if (str.startsWith('min-rating:')) { - return { - type: SearchQueryTypes.min_rating, - value: parseInt(str.slice('min-rating:'.length), 10) - }; - } - if (str.startsWith('max-rating:')) { - return { - type: SearchQueryTypes.max_rating, - value: parseInt(str.slice('max-rating:'.length), 10) - }; - } - if (str.startsWith('min-resolution:')) { - return { - type: SearchQueryTypes.min_resolution, - value: parseInt(str.slice('min-resolution:'.length), 10) - }; - } - if (str.startsWith('max-resolution:')) { - return { - type: SearchQueryTypes.max_resolution, - value: parseInt(str.slice('max-resolution:'.length), 10) - }; - } - if (new RegExp(/^\d*-km-from:/).test(str)) { - let from = str.slice(new RegExp(/^\d*-km-from:/).exec(str)[0].length); - if (from.charAt(0) === '(' && from.charAt(from.length - 1) === ')') { - from = from.slice(1, from.length - 1); - } - return { - type: SearchQueryTypes.distance, - distance: parseInt(new RegExp(/^\d*/).exec(str)[0], 10), - from: {text: from} - }; - } - - if (str.startsWith('orientation:')) { - return { - type: SearchQueryTypes.orientation, - landscape: str.slice('orientation:'.length) === 'landscape' - }; - } - - // parse text search - const tmp = TextSearchQueryTypes.map(type => ({ - key: SearchQueryTypes[type] + ':', - queryTemplate: {type: type, text: ''} - })); - for (let i = 0; i < tmp.length; ++i) { - if (str.startsWith(tmp[i].key)) { - const ret: TextSearch = Utils.clone(tmp[i].queryTemplate); - if (str.charAt(tmp[i].key.length) === '"' && str.charAt(str.length - 1) === '"') { - ret.text = str.slice(tmp[i].key.length + 1, str.length - 1); - ret.matchType = TextSearchQueryMatchTypes.exact_match; - } else if (str.charAt(tmp[i].key.length) === '(' && str.charAt(str.length - 1) === ')') { - ret.text = str.slice(tmp[i].key.length + 1, str.length - 1); - } else { - ret.text = str.slice(tmp[i].key.length); - } - return ret; - } - } - - - return {type: SearchQueryTypes.any_text, text: str}; - }; - - export const stringify = (query: SearchQueryDTO): string => { - if (!query || !query.type) { - return ''; - } - switch (query.type) { - case SearchQueryTypes.AND: - return '(' + (query).list.map(q => SearchQueryDTO.stringify(q)).join(' and ') + ')'; - - case SearchQueryTypes.OR: - return '(' + (query).list.map(q => SearchQueryDTO.stringify(q)).join(' or ') + ')'; - - case SearchQueryTypes.SOME_OF: - if ((query).min) { - return (query).min + '-of:(' + - (query).list.map(q => SearchQueryDTO.stringify(q)).join(' ') + ')'; - } - return 'some-of:(' + - (query).list.map(q => SearchQueryDTO.stringify(q)).join(' ') + ')'; - - - case SearchQueryTypes.orientation: - return 'orientation:' + ((query).landscape ? 'landscape' : 'portrait'); - - case SearchQueryTypes.from_date: - if (!(query).value) { - return ''; - } - return 'from:(' + new Date((query).value).toLocaleDateString() + ')'.trim(); - case SearchQueryTypes.to_date: - if (!(query).value) { - return ''; - } - return 'to:(' + new Date((query).value).toLocaleDateString() + ')'.trim(); - case SearchQueryTypes.min_rating: - return 'min-rating:' + (isNaN((query).value) ? '' : (query).value); - case SearchQueryTypes.max_rating: - return 'max-rating:' + (isNaN((query).value) ? '' : (query).value); - case SearchQueryTypes.min_resolution: - return 'min-resolution:' + (isNaN((query).value) ? '' : (query).value); - case SearchQueryTypes.max_resolution: - return 'max-resolution:' + (isNaN((query).value) ? '' : (query).value); - case SearchQueryTypes.distance: - if ((query).from.text.indexOf(' ') !== -1) { - return (query).distance + '-km-from:(' + (query).from.text + ')'; - } - return (query).distance + '-km-from:' + (query).from.text; - - case SearchQueryTypes.any_text: - if ((query).matchType === TextSearchQueryMatchTypes.exact_match) { - return '"' + (query).text + '"'; - - } else if ((query).text.indexOf(' ') !== -1) { - return '(' + (query).text + ')'; - } - return (query).text; - - case SearchQueryTypes.person: - case SearchQueryTypes.position: - case SearchQueryTypes.keyword: - case SearchQueryTypes.caption: - case SearchQueryTypes.file_name: - case SearchQueryTypes.directory: - if (!(query).text) { - return ''; - } - if ((query).matchType === TextSearchQueryMatchTypes.exact_match) { - return SearchQueryTypes[query.type] + ':"' + (query).text + '"'; - - } else if ((query).text.indexOf(' ') !== -1) { - return SearchQueryTypes[query.type] + ':(' + (query).text + ')'; - } - return SearchQueryTypes[query.type] + ':' + (query).text; - - default: - throw new Error('Unknown type: ' + query.type); - } - }; } export interface SearchQueryDTO { diff --git a/test/common/unit/SearchQueryDTO.ts b/test/common/unit/SearchQueryParser.ts similarity index 89% rename from test/common/unit/SearchQueryDTO.ts rename to test/common/unit/SearchQueryParser.ts index d6716680..d2de5a36 100644 --- a/test/common/unit/SearchQueryDTO.ts +++ b/test/common/unit/SearchQueryParser.ts @@ -16,12 +16,36 @@ import { TextSearchQueryMatchTypes, ToDateSearch } from '../../../src/common/entities/SearchQueryDTO'; +import {QueryKeywords, SearchQueryParser} from '../../../src/common/SearchQueryParser'; describe('SearchQueryDTO', () => { + const keywords: QueryKeywords = { + NSomeOf: '-of', + and: 'and', + caption: 'caption', + directory: 'directory', + file_name: 'file-name', + from: 'from', + keyword: 'keyword', + landscape: 'landscape', + maxRating: 'max-rating', + maxResolution: 'max-resolution', + minRating: 'min-rating', + minResolution: 'min-resolution', + or: 'or', + orientation: 'orientation', + person: 'person', + portrait: 'portrait', + position: 'position', + someOf: 'some-of', + to: 'to', + kmFrom: 'km-from' + }; const check = (query: SearchQueryDTO) => { - expect(SearchQueryDTO.parse(SearchQueryDTO.stringify(query))).to.deep.equals(query, SearchQueryDTO.stringify(query)); + const parser = new SearchQueryParser(keywords); + expect(parser.parse(parser.stringify(query))).to.deep.equals(query, parser.stringify(query)); };