1
0
mirror of https://github.com/xuthus83/pigallery2.git synced 2025-01-14 14:43:17 +08:00
pigallery2/src/common/SearchQueryParser.ts

647 lines
20 KiB
TypeScript
Raw Normal View History

2021-03-20 11:20:53 +01:00
import {
ANDSearchQuery,
DatePatternFrequency,
DatePatternSearch,
2021-03-20 11:20:53 +01:00
DistanceSearch,
FromDateSearch,
MaxRatingSearch,
MaxResolutionSearch,
MinRatingSearch,
MinResolutionSearch,
NegatableSearchQuery,
2021-03-20 11:20:53 +01:00
OrientationSearch,
ORSearchQuery,
RangeSearch,
SearchListQuery,
SearchQueryDTO,
SearchQueryTypes,
SomeOfSearchQuery,
TextSearch,
TextSearchQueryMatchTypes,
TextSearchQueryTypes,
2022-04-04 19:37:31 +02:00
ToDateSearch,
2021-03-20 11:20:53 +01:00
} from './entities/SearchQueryDTO';
import {Utils} from './Utils';
2021-03-20 11:20:53 +01:00
export interface QueryKeywords {
days_ago: any;
years_ago: string;
months_ago: string;
weeks_ago: string;
every_year: string;
every_month: string;
every_week: string;
lastNDays: string;
sameDay: string;
2021-03-20 11:20:53 +01:00
portrait: string;
landscape: string;
orientation: string;
kmFrom: string;
maxResolution: string;
minResolution: string;
maxRating: string;
minRating: string;
NSomeOf: string;
someOf: string;
or: string;
and: string;
from: string;
to: string;
any_text: string;
2021-03-20 11:20:53 +01:00
caption: string;
directory: string;
file_name: string;
keyword: string;
person: string;
position: string;
}
2021-05-31 16:44:37 +02:00
export const defaultQueryKeywords: QueryKeywords = {
NSomeOf: 'of',
and: 'and',
or: 'or',
from: 'after',
to: 'before',
2021-05-31 16:44:37 +02:00
maxRating: 'max-rating',
maxResolution: 'max-resolution',
minRating: 'min-rating',
minResolution: 'min-resolution',
kmFrom: 'km-from',
2021-05-31 16:44:37 +02:00
orientation: 'orientation',
landscape: 'landscape',
portrait: 'portrait',
years_ago: '%d-years-ago',
months_ago: '%d-months-ago',
weeks_ago: '%d-weeks-ago',
days_ago: '%d-days-ago',
every_year: 'every-year',
every_month: 'every-month',
every_week: 'every-week',
lastNDays: 'last-%d-days',
sameDay: 'same-day',
2021-05-31 16:44:37 +02:00
any_text: 'any-text',
keyword: 'keyword',
caption: 'caption',
directory: 'directory',
file_name: 'file-name',
person: 'person',
position: 'position',
someOf: 'some-of',
};
2021-03-20 11:20:53 +01:00
export class SearchQueryParser {
constructor(private keywords: QueryKeywords = defaultQueryKeywords) {
}
2021-03-20 11:20:53 +01:00
2022-04-04 19:37:31 +02:00
public static stringifyText(
text: string,
matchType = TextSearchQueryMatchTypes.like
): string {
if (matchType === TextSearchQueryMatchTypes.exact_match) {
return '"' + text + '"';
}
if (text.indexOf(' ') !== -1) {
return '(' + text + ')';
}
return text;
}
public static stringifyDate(time: number): string {
const date = new Date(time);
// simplify date with yeah only if its first of jan
if (date.getMonth() === 0 && date.getDate() === 1) {
return date.getFullYear().toString();
}
return this.stringifyText(date.toISOString().substring(0, 10));
}
private static parseDate(text: string): number {
if (text.charAt(0) === '"' || text.charAt(0) === '(') {
text = text.substring(1);
}
2022-04-04 19:37:31 +02:00
if (
text.charAt(text.length - 1) === '"' ||
text.charAt(text.length - 1) === ')'
) {
text = text.substring(0, text.length - 1);
}
// it is the year only
if (text.length === 4) {
return Date.UTC(parseInt(text, 10), 0, 1, 0, 0, 0, 0);
}
let timestamp = null;
// Parsing ISO string
try {
2022-04-04 19:37:31 +02:00
const parts = text.split('-').map((t) => parseInt(t, 10));
if (parts && parts.length === 2) {
timestamp = Date.UTC(parts[0], parts[1] - 1, 1, 0, 0, 0, 0); // Note: months are 0-based
}
if (parts && parts.length === 3) {
timestamp = Date.UTC(parts[0], parts[1] - 1, parts[2], 0, 0, 0, 0); // Note: months are 0-based
}
2022-04-25 18:09:06 +02:00
} catch (e) {
// ignoring errors
}
// If it could not parse as ISO string, try our luck with Date.parse
// https://stackoverflow.com/questions/2587345/why-does-date-parse-give-incorrect-results
if (timestamp === null) {
timestamp = Date.parse(text);
}
if (isNaN(timestamp) || timestamp === null) {
throw new Error('Cannot parse date: ' + text);
}
return timestamp;
}
public parse(str: string, implicitAND = true): SearchQueryDTO {
2022-04-04 19:37:31 +02:00
str = str
.replace(/\s\s+/g, ' ') // remove double spaces
.replace(/:\s+/g, ':')
.trim();
2021-03-20 11:20:53 +01:00
const humanToRegexpStr = (str: string) => {
return str.replace(/%d/g, '\\d*');
};
const intFromRegexp = (str: string) => {
return parseInt(new RegExp(/\d+/).exec(str)[0], 10);
};
2021-03-20 11:20:53 +01:00
if (str.charAt(0) === '(' && str.charAt(str.length - 1) === ')') {
str = str.slice(1, str.length - 1);
}
const fistSpace = (start = 0) => {
const bracketIn = [];
let quotationMark = false;
for (let i = start; i < str.length; ++i) {
if (str.charAt(i) === '"') {
quotationMark = !quotationMark;
continue;
}
if (str.charAt(i) === '(') {
bracketIn.push(i);
continue;
}
if (str.charAt(i) === ')') {
bracketIn.pop();
continue;
}
2022-04-04 19:37:31 +02:00
if (
quotationMark === false &&
2021-03-20 11:20:53 +01:00
bracketIn.length === 0 &&
2022-04-04 19:37:31 +02:00
str.charAt(i) === ' '
) {
2021-03-20 11:20:53 +01:00
return i;
}
}
return str.length - 1;
};
// tokenize
const tokenEnd = fistSpace();
if (tokenEnd !== str.length - 1) {
if (str.startsWith(' ' + this.keywords.and, tokenEnd)) {
2022-04-04 19:37:31 +02:00
const rest = this.parse(
str.slice(tokenEnd + (' ' + this.keywords.and).length),
implicitAND
);
return {
2021-03-20 11:20:53 +01:00
type: SearchQueryTypes.AND,
2022-04-04 19:37:31 +02:00
list: [
this.parse(str.slice(0, tokenEnd), implicitAND), // trim brackets
...(rest.type === SearchQueryTypes.AND
? (rest as SearchListQuery).list
: [rest]),
],
} as ANDSearchQuery;
2021-03-20 11:20:53 +01:00
} else if (str.startsWith(' ' + this.keywords.or, tokenEnd)) {
2022-04-04 19:37:31 +02:00
const rest = this.parse(
str.slice(tokenEnd + (' ' + this.keywords.or).length),
implicitAND
);
return {
2021-03-20 11:20:53 +01:00
type: SearchQueryTypes.OR,
2022-04-04 19:37:31 +02:00
list: [
this.parse(str.slice(0, tokenEnd), implicitAND), // trim brackets
...(rest.type === SearchQueryTypes.OR
? (rest as SearchListQuery).list
: [rest]),
],
} as ORSearchQuery;
2022-04-04 19:37:31 +02:00
} else {
// Relation cannot be detected
const t =
implicitAND === true
? SearchQueryTypes.AND
: SearchQueryTypes.UNKNOWN_RELATION;
const rest = this.parse(str.slice(tokenEnd), implicitAND);
return {
2021-05-05 20:47:02 +02:00
type: t,
2022-04-04 19:37:31 +02:00
list: [
this.parse(str.slice(0, tokenEnd), implicitAND), // trim brackets
...(rest.type === t ? (rest as SearchListQuery).list : [rest]),
],
} as SearchListQuery;
2021-03-20 11:20:53 +01:00
}
}
2022-04-04 19:37:31 +02:00
if (
str.startsWith(this.keywords.someOf + ':') ||
new RegExp('^\\d*-' + this.keywords.NSomeOf + ':').test(str)
) {
const prefix = str.startsWith(this.keywords.someOf + ':')
? this.keywords.someOf + ':'
: new RegExp('^\\d*-' + this.keywords.NSomeOf + ':').exec(str)[0];
2022-04-25 18:09:06 +02:00
let tmpList: SearchQueryDTO | SearchQueryDTO[] = this.parse(str.slice(prefix.length + 1, -1), false); // trim brackets
2021-04-06 11:32:31 +02:00
2021-03-20 11:20:53 +01:00
const unfoldList = (q: SearchListQuery): SearchQueryDTO[] => {
if (q.list) {
if (q.type === SearchQueryTypes.UNKNOWN_RELATION) {
return q.list.map((e) => unfoldList(e as SearchListQuery)).flat(); // flatten array
2021-03-20 11:20:53 +01:00
} else {
2022-04-25 18:09:06 +02:00
q.list.forEach((e) => unfoldList(e as SearchListQuery));
2021-03-20 11:20:53 +01:00
}
}
return [q];
};
tmpList = unfoldList(tmpList as SearchListQuery);
const ret = {
2021-03-20 11:20:53 +01:00
type: SearchQueryTypes.SOME_OF,
2022-04-04 19:37:31 +02:00
list: tmpList,
} as SomeOfSearchQuery;
2021-03-20 23:31:39 +01:00
if (new RegExp('^\\d*-' + this.keywords.NSomeOf + ':').test(str)) {
2021-03-20 11:20:53 +01:00
ret.min = parseInt(new RegExp(/^\d*/).exec(str)[0], 10);
}
return ret;
}
const kwStartsWith = (s: string, kw: string): boolean => {
return s.startsWith(kw + ':') || s.startsWith(kw + '!:');
};
if (kwStartsWith(str, this.keywords.from)) {
return {
2021-03-20 11:20:53 +01:00
type: SearchQueryTypes.from_date,
value: SearchQueryParser.parseDate(str.substring(str.indexOf(':') + 1)),
...(str.startsWith(this.keywords.from + '!:') && {negate: true}), // only add if the value is true
} as FromDateSearch;
2021-03-20 11:20:53 +01:00
}
if (kwStartsWith(str, this.keywords.to)) {
return {
2021-03-20 11:20:53 +01:00
type: SearchQueryTypes.to_date,
value: SearchQueryParser.parseDate(str.substring(str.indexOf(':') + 1)),
...(str.startsWith(this.keywords.to + '!:') && {negate: true}), // only add if the value is true
} as ToDateSearch;
2021-03-20 11:20:53 +01:00
}
if (kwStartsWith(str, this.keywords.minRating)) {
return {
2021-03-20 11:20:53 +01:00
type: SearchQueryTypes.min_rating,
value: parseInt(str.substring(str.indexOf(':') + 1), 10),
...(str.startsWith(this.keywords.minRating + '!:') && {negate: true}), // only add if the value is true
} as MinRatingSearch;
2021-03-20 11:20:53 +01:00
}
if (kwStartsWith(str, this.keywords.maxRating)) {
return {
2021-03-20 11:20:53 +01:00
type: SearchQueryTypes.max_rating,
value: parseInt(str.substring(str.indexOf(':') + 1), 10),
...(str.startsWith(this.keywords.maxRating + '!:') && {negate: true}), // only add if the value is true
} as MaxRatingSearch;
2021-03-20 11:20:53 +01:00
}
if (kwStartsWith(str, this.keywords.minResolution)) {
return {
2021-03-20 11:20:53 +01:00
type: SearchQueryTypes.min_resolution,
value: parseInt(str.substring(str.indexOf(':') + 1), 10),
2022-04-04 19:37:31 +02:00
...(str.startsWith(this.keywords.minResolution + '!:') && {
negate: true,
}), // only add if the value is true
} as MinResolutionSearch;
2021-03-20 11:20:53 +01:00
}
if (kwStartsWith(str, this.keywords.maxResolution)) {
return {
2021-03-20 11:20:53 +01:00
type: SearchQueryTypes.max_resolution,
value: parseInt(str.substring(str.indexOf(':') + 1), 10),
2022-04-04 19:37:31 +02:00
...(str.startsWith(this.keywords.maxResolution + '!:') && {
negate: true,
}), // only add if the value is true
} as MaxResolutionSearch;
2021-03-20 11:20:53 +01:00
}
if (new RegExp('^\\d*-' + this.keywords.kmFrom + '!?:').test(str)) {
2022-04-04 19:37:31 +02:00
let from = str.slice(
new RegExp('^\\d*-' + this.keywords.kmFrom + '!?:').exec(str)[0].length
);
if (
(from.charAt(0) === '(' && from.charAt(from.length - 1) === ')') ||
(from.charAt(0) === '"' && from.charAt(from.length - 1) === '"')
) {
2021-03-20 11:20:53 +01:00
from = from.slice(1, from.length - 1);
}
return {
2021-03-20 11:20:53 +01:00
type: SearchQueryTypes.distance,
distance: intFromRegexp(str),
from: {text: from},
2022-04-04 19:37:31 +02:00
...(new RegExp('^\\d*-' + this.keywords.kmFrom + '!:').test(str) && {
negate: true,
}), // only add if the value is true
} as DistanceSearch;
2021-03-20 11:20:53 +01:00
}
if (str.startsWith(this.keywords.orientation + ':')) {
return {
2021-03-20 11:20:53 +01:00
type: SearchQueryTypes.orientation,
2022-04-04 19:37:31 +02:00
landscape:
str.slice((this.keywords.orientation + ':').length) ===
this.keywords.landscape,
} as OrientationSearch;
2021-03-20 11:20:53 +01:00
}
if (kwStartsWith(str, this.keywords.sameDay) ||
new RegExp('^' + humanToRegexpStr(this.keywords.lastNDays) + '!?:').test(str)) {
const freqStr = str.slice(str.indexOf(':') + 1);
let freq: DatePatternFrequency = null;
let ago;
if (freqStr == this.keywords.every_week) {
freq = DatePatternFrequency.every_week;
} else if (freqStr == this.keywords.every_month) {
freq = DatePatternFrequency.every_month;
} else if (freqStr == this.keywords.every_year) {
freq = DatePatternFrequency.every_year;
} else if (new RegExp('^' + humanToRegexpStr(this.keywords.days_ago) + '$').test(freqStr)) {
freq = DatePatternFrequency.days_ago;
ago = intFromRegexp(freqStr);
} else if (new RegExp('^' + humanToRegexpStr(this.keywords.weeks_ago) + '$').test(freqStr)) {
freq = DatePatternFrequency.weeks_ago;
ago = intFromRegexp(freqStr);
} else if (new RegExp('^' + humanToRegexpStr(this.keywords.months_ago) + '$').test(freqStr)) {
freq = DatePatternFrequency.months_ago;
ago = intFromRegexp(freqStr);
} else if (new RegExp('^' + humanToRegexpStr(this.keywords.years_ago) + '$').test(freqStr)) {
freq = DatePatternFrequency.years_ago;
ago = intFromRegexp(freqStr);
}
if (freq) {
const ret = {
type: SearchQueryTypes.date_pattern,
daysLength: kwStartsWith(str, this.keywords.sameDay) ? 0 : intFromRegexp(str),
frequency: freq
} as DatePatternSearch;
if (ago) {
ret.agoNumber = ago;
}
return ret;
}
}
2021-03-20 11:20:53 +01:00
// parse text search
2022-04-04 19:37:31 +02:00
const tmp = TextSearchQueryTypes.map((type) => ({
key: (this.keywords as any)[SearchQueryTypes[type]] + ':',
queryTemplate: {type, text: ''} as TextSearch,
2022-04-04 19:37:31 +02:00
})).concat(
TextSearchQueryTypes.map((type) => ({
key: (this.keywords as any)[SearchQueryTypes[type]] + '!:',
queryTemplate: {type, text: '', negate: true} as TextSearch,
2022-04-04 19:37:31 +02:00
}))
);
for (const typeTmp of tmp) {
if (str.startsWith(typeTmp.key)) {
const ret: TextSearch = Utils.clone(typeTmp.queryTemplate);
// exact match
2022-04-04 19:37:31 +02:00
if (
str.charAt(typeTmp.key.length) === '"' &&
str.charAt(str.length - 1) === '"'
) {
ret.text = str.slice(typeTmp.key.length + 1, str.length - 1);
2021-03-20 11:20:53 +01:00
ret.matchType = TextSearchQueryMatchTypes.exact_match;
// like match
2022-04-04 19:37:31 +02:00
} else if (
str.charAt(typeTmp.key.length) === '(' &&
str.charAt(str.length - 1) === ')'
) {
ret.text = str.slice(typeTmp.key.length + 1, str.length - 1);
2021-03-20 11:20:53 +01:00
} else {
ret.text = str.slice(typeTmp.key.length);
2021-03-20 11:20:53 +01:00
}
return ret;
}
}
return {type: SearchQueryTypes.any_text, text: str} as TextSearch;
2021-03-20 11:20:53 +01:00
}
public stringify(query: SearchQueryDTO): string {
2021-05-05 20:47:02 +02:00
const ret = this.stringifyOnEntry(query);
if (ret.charAt(0) === '(' && ret.charAt(ret.length - 1) === ')') {
return ret.slice(1, ret.length - 1);
}
return ret;
}
private stringifyOnEntry(query: SearchQueryDTO): string {
2021-03-20 11:20:53 +01:00
if (!query || !query.type) {
return '';
}
const colon = (query as NegatableSearchQuery).negate === true ? '!:' : ':';
2021-03-20 11:20:53 +01:00
switch (query.type) {
case SearchQueryTypes.AND:
2022-04-04 19:37:31 +02:00
return (
'(' +
(query as SearchListQuery).list
.map((q) => this.stringifyOnEntry(q))
.join(' ' + this.keywords.and + ' ') +
')'
);
2021-03-20 11:20:53 +01:00
case SearchQueryTypes.OR:
2022-04-04 19:37:31 +02:00
return (
'(' +
(query as SearchListQuery).list
.map((q) => this.stringifyOnEntry(q))
.join(' ' + this.keywords.or + ' ') +
')'
);
2021-03-20 11:20:53 +01:00
case SearchQueryTypes.SOME_OF:
if ((query as SomeOfSearchQuery).min) {
2022-04-04 19:37:31 +02:00
return (
(query as SomeOfSearchQuery).min +
'-' +
this.keywords.NSomeOf +
':(' +
(query as SearchListQuery).list
.map((q) => this.stringifyOnEntry(q))
.join(' ') +
')'
);
2021-03-20 11:20:53 +01:00
}
2022-04-04 19:37:31 +02:00
return (
this.keywords.someOf +
':(' +
(query as SearchListQuery).list
.map((q) => this.stringifyOnEntry(q))
.join(' ') +
')'
);
2021-03-20 11:20:53 +01:00
case SearchQueryTypes.from_date:
if (!(query as FromDateSearch).value) {
2021-03-20 11:20:53 +01:00
return '';
}
2022-04-04 19:37:31 +02:00
return (
this.keywords.from +
colon +
SearchQueryParser.stringifyDate((query as FromDateSearch).value)
);
2021-03-20 11:20:53 +01:00
case SearchQueryTypes.to_date:
if (!(query as ToDateSearch).value) {
2021-03-20 11:20:53 +01:00
return '';
}
2022-04-04 19:37:31 +02:00
return (
this.keywords.to +
colon +
SearchQueryParser.stringifyDate((query as ToDateSearch).value)
);
2021-03-20 11:20:53 +01:00
case SearchQueryTypes.min_rating:
2022-04-04 19:37:31 +02:00
return (
this.keywords.minRating +
colon +
(isNaN((query as RangeSearch).value)
? ''
: (query as RangeSearch).value)
);
2021-03-20 11:20:53 +01:00
case SearchQueryTypes.max_rating:
2022-04-04 19:37:31 +02:00
return (
this.keywords.maxRating +
colon +
(isNaN((query as RangeSearch).value)
? ''
: (query as RangeSearch).value)
);
2021-03-20 11:20:53 +01:00
case SearchQueryTypes.min_resolution:
2022-04-04 19:37:31 +02:00
return (
this.keywords.minResolution +
colon +
(isNaN((query as RangeSearch).value)
? ''
: (query as RangeSearch).value)
);
2021-03-20 11:20:53 +01:00
case SearchQueryTypes.max_resolution:
2022-04-04 19:37:31 +02:00
return (
this.keywords.maxResolution +
colon +
(isNaN((query as RangeSearch).value)
? ''
: (query as RangeSearch).value)
);
2021-03-20 11:20:53 +01:00
case SearchQueryTypes.distance:
if ((query as DistanceSearch).from.text.indexOf(' ') !== -1) {
2022-04-04 19:37:31 +02:00
return (
(query as DistanceSearch).distance +
'-' +
this.keywords.kmFrom +
colon +
'(' +
(query as DistanceSearch).from.text +
')'
);
2021-03-20 11:20:53 +01:00
}
2022-04-04 19:37:31 +02:00
return (
(query as DistanceSearch).distance +
'-' +
this.keywords.kmFrom +
colon +
(query as DistanceSearch).from.text
);
case SearchQueryTypes.orientation:
return (
this.keywords.orientation +
':' +
((query as OrientationSearch).landscape
? this.keywords.landscape
: this.keywords.portrait)
);
case SearchQueryTypes.date_pattern: {
const q = (query as DatePatternSearch);
let strBuilder = '';
if (q.daysLength <= 0) {
strBuilder += this.keywords.sameDay;
} else {
strBuilder += this.keywords.lastNDays.replace(/%d/g, q.daysLength.toString());
}
strBuilder += ':';
switch (q.frequency) {
case DatePatternFrequency.every_week:
strBuilder += this.keywords.every_week;
break;
case DatePatternFrequency.every_month:
strBuilder += this.keywords.every_month;
break;
case DatePatternFrequency.every_year:
strBuilder += this.keywords.every_year;
break;
case DatePatternFrequency.days_ago:
strBuilder += this.keywords.days_ago.replace(/%d/g, q.agoNumber.toString());
break;
case DatePatternFrequency.weeks_ago:
strBuilder += this.keywords.weeks_ago.replace(/%d/g, q.agoNumber.toString());
break;
case DatePatternFrequency.months_ago:
strBuilder += this.keywords.months_ago.replace(/%d/g, q.agoNumber.toString());
break;
case DatePatternFrequency.years_ago:
strBuilder += this.keywords.years_ago.replace(/%d/g, q.agoNumber.toString());
break;
}
return strBuilder;
}
2021-03-20 11:20:53 +01:00
case SearchQueryTypes.any_text:
if (!(query as TextSearch).negate) {
2022-04-04 19:37:31 +02:00
return SearchQueryParser.stringifyText(
(query as TextSearch).text,
(query as TextSearch).matchType
);
} else {
2022-04-04 19:37:31 +02:00
return (
(this.keywords as any)[SearchQueryTypes[query.type]] +
colon +
SearchQueryParser.stringifyText(
(query as TextSearch).text,
(query as TextSearch).matchType
)
);
}
2021-03-20 11:20:53 +01:00
case SearchQueryTypes.person:
case SearchQueryTypes.position:
case SearchQueryTypes.keyword:
case SearchQueryTypes.caption:
case SearchQueryTypes.file_name:
case SearchQueryTypes.directory:
if (!(query as TextSearch).text) {
2021-03-20 11:20:53 +01:00
return '';
}
2022-04-04 19:37:31 +02:00
return (
(this.keywords as any)[SearchQueryTypes[query.type]] +
colon +
SearchQueryParser.stringifyText(
(query as TextSearch).text,
(query as TextSearch).matchType
)
);
2021-03-20 11:20:53 +01:00
default:
throw new Error('Unknown type: ' + query.type);
}
}
}