1
0
mirror of https://github.com/xuthus83/pigallery2.git synced 2024-11-03 21:04:03 +08:00
pigallery2/src/common/SearchQueryParser.ts

544 lines
16 KiB
TypeScript
Raw Normal View History

2021-03-20 18:20:53 +08:00
import {
ANDSearchQuery,
DistanceSearch,
FromDateSearch,
MaxRatingSearch,
MaxResolutionSearch,
MinRatingSearch,
MinResolutionSearch,
NegatableSearchQuery,
2021-03-20 18:20:53 +08:00
OrientationSearch,
ORSearchQuery,
RangeSearch,
SearchListQuery,
SearchQueryDTO,
SearchQueryTypes,
SomeOfSearchQuery,
TextSearch,
TextSearchQueryMatchTypes,
TextSearchQueryTypes,
2022-04-05 01:37:31 +08:00
ToDateSearch,
2021-03-20 18:20:53 +08:00
} from './entities/SearchQueryDTO';
import {Utils} from './Utils';
2021-03-20 18:20:53 +08:00
export interface QueryKeywords {
portrait: string;
landscape: string;
orientation: string;
kmFrom: string;
maxResolution: string;
minResolution: string;
maxRating: string;
minRating: string;
NSomeOf: string;
someOf: string;
or: string;
and: string;
from: string;
to: string;
any_text: string;
2021-03-20 18:20:53 +08:00
caption: string;
directory: string;
file_name: string;
keyword: string;
person: string;
position: string;
}
2021-05-31 22:44:37 +08:00
export const defaultQueryKeywords: QueryKeywords = {
NSomeOf: 'of',
and: 'and',
or: 'or',
from: 'after',
to: 'before',
landscape: 'landscape',
maxRating: 'max-rating',
maxResolution: 'max-resolution',
minRating: 'min-rating',
minResolution: 'min-resolution',
orientation: 'orientation',
any_text: 'any-text',
keyword: 'keyword',
caption: 'caption',
directory: 'directory',
file_name: 'file-name',
person: 'person',
portrait: 'portrait',
position: 'position',
someOf: 'some-of',
2022-04-05 01:37:31 +08:00
kmFrom: 'km-from',
2021-05-31 22:44:37 +08:00
};
2021-03-20 18:20:53 +08:00
export class SearchQueryParser {
constructor(private keywords: QueryKeywords = defaultQueryKeywords) {
}
2021-03-20 18:20:53 +08:00
2022-04-05 01:37:31 +08:00
public static stringifyText(
text: string,
matchType = TextSearchQueryMatchTypes.like
): string {
if (matchType === TextSearchQueryMatchTypes.exact_match) {
return '"' + text + '"';
}
if (text.indexOf(' ') !== -1) {
return '(' + text + ')';
}
return text;
}
public static stringifyDate(time: number): string {
const date = new Date(time);
// simplify date with yeah only if its first of jan
if (date.getMonth() === 0 && date.getDate() === 1) {
return date.getFullYear().toString();
}
return this.stringifyText(date.toISOString().substring(0, 10));
}
private static parseDate(text: string): number {
if (text.charAt(0) === '"' || text.charAt(0) === '(') {
text = text.substring(1);
}
2022-04-05 01:37:31 +08:00
if (
text.charAt(text.length - 1) === '"' ||
text.charAt(text.length - 1) === ')'
) {
text = text.substring(0, text.length - 1);
}
// it is the year only
if (text.length === 4) {
return Date.UTC(parseInt(text, 10), 0, 1, 0, 0, 0, 0);
}
let timestamp = null;
// Parsing ISO string
try {
2022-04-05 01:37:31 +08:00
const parts = text.split('-').map((t) => parseInt(t, 10));
if (parts && parts.length === 2) {
timestamp = Date.UTC(parts[0], parts[1] - 1, 1, 0, 0, 0, 0); // Note: months are 0-based
}
if (parts && parts.length === 3) {
timestamp = Date.UTC(parts[0], parts[1] - 1, parts[2], 0, 0, 0, 0); // Note: months are 0-based
}
2022-04-26 00:09:06 +08:00
} catch (e) {
// ignoring errors
}
// If it could not parse as ISO string, try our luck with Date.parse
// https://stackoverflow.com/questions/2587345/why-does-date-parse-give-incorrect-results
if (timestamp === null) {
timestamp = Date.parse(text);
}
if (isNaN(timestamp) || timestamp === null) {
throw new Error('Cannot parse date: ' + text);
}
return timestamp;
}
public parse(str: string, implicitAND = true): SearchQueryDTO {
2022-04-05 01:37:31 +08:00
str = str
.replace(/\s\s+/g, ' ') // remove double spaces
.replace(/:\s+/g, ':')
.trim();
2021-03-20 18:20:53 +08:00
if (str.charAt(0) === '(' && str.charAt(str.length - 1) === ')') {
str = str.slice(1, str.length - 1);
}
const fistSpace = (start = 0) => {
const bracketIn = [];
let quotationMark = false;
for (let i = start; i < str.length; ++i) {
if (str.charAt(i) === '"') {
quotationMark = !quotationMark;
continue;
}
if (str.charAt(i) === '(') {
bracketIn.push(i);
continue;
}
if (str.charAt(i) === ')') {
bracketIn.pop();
continue;
}
2022-04-05 01:37:31 +08:00
if (
quotationMark === false &&
2021-03-20 18:20:53 +08:00
bracketIn.length === 0 &&
2022-04-05 01:37:31 +08:00
str.charAt(i) === ' '
) {
2021-03-20 18:20:53 +08:00
return i;
}
}
return str.length - 1;
};
// tokenize
const tokenEnd = fistSpace();
if (tokenEnd !== str.length - 1) {
if (str.startsWith(' ' + this.keywords.and, tokenEnd)) {
2022-04-05 01:37:31 +08:00
const rest = this.parse(
str.slice(tokenEnd + (' ' + this.keywords.and).length),
implicitAND
);
return {
2021-03-20 18:20:53 +08:00
type: SearchQueryTypes.AND,
2022-04-05 01:37:31 +08:00
list: [
this.parse(str.slice(0, tokenEnd), implicitAND), // trim brackets
...(rest.type === SearchQueryTypes.AND
? (rest as SearchListQuery).list
: [rest]),
],
} as ANDSearchQuery;
2021-03-20 18:20:53 +08:00
} else if (str.startsWith(' ' + this.keywords.or, tokenEnd)) {
2022-04-05 01:37:31 +08:00
const rest = this.parse(
str.slice(tokenEnd + (' ' + this.keywords.or).length),
implicitAND
);
return {
2021-03-20 18:20:53 +08:00
type: SearchQueryTypes.OR,
2022-04-05 01:37:31 +08:00
list: [
this.parse(str.slice(0, tokenEnd), implicitAND), // trim brackets
...(rest.type === SearchQueryTypes.OR
? (rest as SearchListQuery).list
: [rest]),
],
} as ORSearchQuery;
2022-04-05 01:37:31 +08:00
} else {
// Relation cannot be detected
const t =
implicitAND === true
? SearchQueryTypes.AND
: SearchQueryTypes.UNKNOWN_RELATION;
const rest = this.parse(str.slice(tokenEnd), implicitAND);
return {
2021-05-06 02:47:02 +08:00
type: t,
2022-04-05 01:37:31 +08:00
list: [
this.parse(str.slice(0, tokenEnd), implicitAND), // trim brackets
...(rest.type === t ? (rest as SearchListQuery).list : [rest]),
],
} as SearchListQuery;
2021-03-20 18:20:53 +08:00
}
}
2022-04-05 01:37:31 +08:00
if (
str.startsWith(this.keywords.someOf + ':') ||
new RegExp('^\\d*-' + this.keywords.NSomeOf + ':').test(str)
) {
const prefix = str.startsWith(this.keywords.someOf + ':')
? this.keywords.someOf + ':'
: new RegExp('^\\d*-' + this.keywords.NSomeOf + ':').exec(str)[0];
2022-04-26 00:09:06 +08:00
let tmpList: SearchQueryDTO | SearchQueryDTO[] = this.parse(str.slice(prefix.length + 1, -1), false); // trim brackets
2021-04-06 17:32:31 +08:00
2021-03-20 18:20:53 +08:00
const unfoldList = (q: SearchListQuery): SearchQueryDTO[] => {
if (q.list) {
if (q.type === SearchQueryTypes.UNKNOWN_RELATION) {
return q.list.map((e) => unfoldList(e as SearchListQuery)).flat(); // flatten array
2021-03-20 18:20:53 +08:00
} else {
2022-04-26 00:09:06 +08:00
q.list.forEach((e) => unfoldList(e as SearchListQuery));
2021-03-20 18:20:53 +08:00
}
}
return [q];
};
tmpList = unfoldList(tmpList as SearchListQuery);
const ret = {
2021-03-20 18:20:53 +08:00
type: SearchQueryTypes.SOME_OF,
2022-04-05 01:37:31 +08:00
list: tmpList,
} as SomeOfSearchQuery;
2021-03-21 06:31:39 +08:00
if (new RegExp('^\\d*-' + this.keywords.NSomeOf + ':').test(str)) {
2021-03-20 18:20:53 +08:00
ret.min = parseInt(new RegExp(/^\d*/).exec(str)[0], 10);
}
return ret;
}
const kwStartsWith = (s: string, kw: string): boolean => {
return s.startsWith(kw + ':') || s.startsWith(kw + '!:');
};
if (kwStartsWith(str, this.keywords.from)) {
return {
2021-03-20 18:20:53 +08:00
type: SearchQueryTypes.from_date,
value: SearchQueryParser.parseDate(str.substring(str.indexOf(':') + 1)),
...(str.startsWith(this.keywords.from + '!:') && {negate: true}), // only add if the value is true
} as FromDateSearch;
2021-03-20 18:20:53 +08:00
}
if (kwStartsWith(str, this.keywords.to)) {
return {
2021-03-20 18:20:53 +08:00
type: SearchQueryTypes.to_date,
value: SearchQueryParser.parseDate(str.substring(str.indexOf(':') + 1)),
...(str.startsWith(this.keywords.to + '!:') && {negate: true}), // only add if the value is true
} as ToDateSearch;
2021-03-20 18:20:53 +08:00
}
if (kwStartsWith(str, this.keywords.minRating)) {
return {
2021-03-20 18:20:53 +08:00
type: SearchQueryTypes.min_rating,
value: parseInt(str.substring(str.indexOf(':') + 1), 10),
...(str.startsWith(this.keywords.minRating + '!:') && {negate: true}), // only add if the value is true
} as MinRatingSearch;
2021-03-20 18:20:53 +08:00
}
if (kwStartsWith(str, this.keywords.maxRating)) {
return {
2021-03-20 18:20:53 +08:00
type: SearchQueryTypes.max_rating,
value: parseInt(str.substring(str.indexOf(':') + 1), 10),
...(str.startsWith(this.keywords.maxRating + '!:') && {negate: true}), // only add if the value is true
} as MaxRatingSearch;
2021-03-20 18:20:53 +08:00
}
if (kwStartsWith(str, this.keywords.minResolution)) {
return {
2021-03-20 18:20:53 +08:00
type: SearchQueryTypes.min_resolution,
value: parseInt(str.substring(str.indexOf(':') + 1), 10),
2022-04-05 01:37:31 +08:00
...(str.startsWith(this.keywords.minResolution + '!:') && {
negate: true,
}), // only add if the value is true
} as MinResolutionSearch;
2021-03-20 18:20:53 +08:00
}
if (kwStartsWith(str, this.keywords.maxResolution)) {
return {
2021-03-20 18:20:53 +08:00
type: SearchQueryTypes.max_resolution,
value: parseInt(str.substring(str.indexOf(':') + 1), 10),
2022-04-05 01:37:31 +08:00
...(str.startsWith(this.keywords.maxResolution + '!:') && {
negate: true,
}), // only add if the value is true
} as MaxResolutionSearch;
2021-03-20 18:20:53 +08:00
}
if (new RegExp('^\\d*-' + this.keywords.kmFrom + '!?:').test(str)) {
2022-04-05 01:37:31 +08:00
let from = str.slice(
new RegExp('^\\d*-' + this.keywords.kmFrom + '!?:').exec(str)[0].length
);
if (
(from.charAt(0) === '(' && from.charAt(from.length - 1) === ')') ||
(from.charAt(0) === '"' && from.charAt(from.length - 1) === '"')
) {
2021-03-20 18:20:53 +08:00
from = from.slice(1, from.length - 1);
}
return {
2021-03-20 18:20:53 +08:00
type: SearchQueryTypes.distance,
distance: parseInt(new RegExp(/^\d*/).exec(str)[0], 10),
from: {text: from},
2022-04-05 01:37:31 +08:00
...(new RegExp('^\\d*-' + this.keywords.kmFrom + '!:').test(str) && {
negate: true,
}), // only add if the value is true
} as DistanceSearch;
2021-03-20 18:20:53 +08:00
}
if (str.startsWith(this.keywords.orientation + ':')) {
return {
2021-03-20 18:20:53 +08:00
type: SearchQueryTypes.orientation,
2022-04-05 01:37:31 +08:00
landscape:
str.slice((this.keywords.orientation + ':').length) ===
this.keywords.landscape,
} as OrientationSearch;
2021-03-20 18:20:53 +08:00
}
// parse text search
2022-04-05 01:37:31 +08:00
const tmp = TextSearchQueryTypes.map((type) => ({
key: (this.keywords as any)[SearchQueryTypes[type]] + ':',
queryTemplate: {type, text: ''} as TextSearch,
2022-04-05 01:37:31 +08:00
})).concat(
TextSearchQueryTypes.map((type) => ({
key: (this.keywords as any)[SearchQueryTypes[type]] + '!:',
queryTemplate: {type, text: '', negate: true} as TextSearch,
2022-04-05 01:37:31 +08:00
}))
);
for (const typeTmp of tmp) {
if (str.startsWith(typeTmp.key)) {
const ret: TextSearch = Utils.clone(typeTmp.queryTemplate);
// exact match
2022-04-05 01:37:31 +08:00
if (
str.charAt(typeTmp.key.length) === '"' &&
str.charAt(str.length - 1) === '"'
) {
ret.text = str.slice(typeTmp.key.length + 1, str.length - 1);
2021-03-20 18:20:53 +08:00
ret.matchType = TextSearchQueryMatchTypes.exact_match;
// like match
2022-04-05 01:37:31 +08:00
} else if (
str.charAt(typeTmp.key.length) === '(' &&
str.charAt(str.length - 1) === ')'
) {
ret.text = str.slice(typeTmp.key.length + 1, str.length - 1);
2021-03-20 18:20:53 +08:00
} else {
ret.text = str.slice(typeTmp.key.length);
2021-03-20 18:20:53 +08:00
}
return ret;
}
}
return {type: SearchQueryTypes.any_text, text: str} as TextSearch;
2021-03-20 18:20:53 +08:00
}
public stringify(query: SearchQueryDTO): string {
2021-05-06 02:47:02 +08:00
const ret = this.stringifyOnEntry(query);
if (ret.charAt(0) === '(' && ret.charAt(ret.length - 1) === ')') {
return ret.slice(1, ret.length - 1);
}
return ret;
}
private stringifyOnEntry(query: SearchQueryDTO): string {
2021-03-20 18:20:53 +08:00
if (!query || !query.type) {
return '';
}
const colon = (query as NegatableSearchQuery).negate === true ? '!:' : ':';
2021-03-20 18:20:53 +08:00
switch (query.type) {
case SearchQueryTypes.AND:
2022-04-05 01:37:31 +08:00
return (
'(' +
(query as SearchListQuery).list
.map((q) => this.stringifyOnEntry(q))
.join(' ' + this.keywords.and + ' ') +
')'
);
2021-03-20 18:20:53 +08:00
case SearchQueryTypes.OR:
2022-04-05 01:37:31 +08:00
return (
'(' +
(query as SearchListQuery).list
.map((q) => this.stringifyOnEntry(q))
.join(' ' + this.keywords.or + ' ') +
')'
);
2021-03-20 18:20:53 +08:00
case SearchQueryTypes.SOME_OF:
if ((query as SomeOfSearchQuery).min) {
2022-04-05 01:37:31 +08:00
return (
(query as SomeOfSearchQuery).min +
'-' +
this.keywords.NSomeOf +
':(' +
(query as SearchListQuery).list
.map((q) => this.stringifyOnEntry(q))
.join(' ') +
')'
);
2021-03-20 18:20:53 +08:00
}
2022-04-05 01:37:31 +08:00
return (
this.keywords.someOf +
':(' +
(query as SearchListQuery).list
.map((q) => this.stringifyOnEntry(q))
.join(' ') +
')'
);
2021-03-20 18:20:53 +08:00
case SearchQueryTypes.orientation:
2022-04-05 01:37:31 +08:00
return (
this.keywords.orientation +
':' +
((query as OrientationSearch).landscape
? this.keywords.landscape
: this.keywords.portrait)
);
2021-03-20 18:20:53 +08:00
case SearchQueryTypes.from_date:
if (!(query as FromDateSearch).value) {
2021-03-20 18:20:53 +08:00
return '';
}
2022-04-05 01:37:31 +08:00
return (
this.keywords.from +
colon +
SearchQueryParser.stringifyDate((query as FromDateSearch).value)
);
2021-03-20 18:20:53 +08:00
case SearchQueryTypes.to_date:
if (!(query as ToDateSearch).value) {
2021-03-20 18:20:53 +08:00
return '';
}
2022-04-05 01:37:31 +08:00
return (
this.keywords.to +
colon +
SearchQueryParser.stringifyDate((query as ToDateSearch).value)
);
2021-03-20 18:20:53 +08:00
case SearchQueryTypes.min_rating:
2022-04-05 01:37:31 +08:00
return (
this.keywords.minRating +
colon +
(isNaN((query as RangeSearch).value)
? ''
: (query as RangeSearch).value)
);
2021-03-20 18:20:53 +08:00
case SearchQueryTypes.max_rating:
2022-04-05 01:37:31 +08:00
return (
this.keywords.maxRating +
colon +
(isNaN((query as RangeSearch).value)
? ''
: (query as RangeSearch).value)
);
2021-03-20 18:20:53 +08:00
case SearchQueryTypes.min_resolution:
2022-04-05 01:37:31 +08:00
return (
this.keywords.minResolution +
colon +
(isNaN((query as RangeSearch).value)
? ''
: (query as RangeSearch).value)
);
2021-03-20 18:20:53 +08:00
case SearchQueryTypes.max_resolution:
2022-04-05 01:37:31 +08:00
return (
this.keywords.maxResolution +
colon +
(isNaN((query as RangeSearch).value)
? ''
: (query as RangeSearch).value)
);
2021-03-20 18:20:53 +08:00
case SearchQueryTypes.distance:
if ((query as DistanceSearch).from.text.indexOf(' ') !== -1) {
2022-04-05 01:37:31 +08:00
return (
(query as DistanceSearch).distance +
'-' +
this.keywords.kmFrom +
colon +
'(' +
(query as DistanceSearch).from.text +
')'
);
2021-03-20 18:20:53 +08:00
}
2022-04-05 01:37:31 +08:00
return (
(query as DistanceSearch).distance +
'-' +
this.keywords.kmFrom +
colon +
(query as DistanceSearch).from.text
);
2021-03-20 18:20:53 +08:00
case SearchQueryTypes.any_text:
if (!(query as TextSearch).negate) {
2022-04-05 01:37:31 +08:00
return SearchQueryParser.stringifyText(
(query as TextSearch).text,
(query as TextSearch).matchType
);
} else {
2022-04-05 01:37:31 +08:00
return (
(this.keywords as any)[SearchQueryTypes[query.type]] +
colon +
SearchQueryParser.stringifyText(
(query as TextSearch).text,
(query as TextSearch).matchType
)
);
}
2021-03-20 18:20:53 +08:00
case SearchQueryTypes.person:
case SearchQueryTypes.position:
case SearchQueryTypes.keyword:
case SearchQueryTypes.caption:
case SearchQueryTypes.file_name:
case SearchQueryTypes.directory:
if (!(query as TextSearch).text) {
2021-03-20 18:20:53 +08:00
return '';
}
2022-04-05 01:37:31 +08:00
return (
(this.keywords as any)[SearchQueryTypes[query.type]] +
colon +
SearchQueryParser.stringifyText(
(query as TextSearch).text,
(query as TextSearch).matchType
)
);
2021-03-20 18:20:53 +08:00
default:
throw new Error('Unknown type: ' + query.type);
}
}
}