Clamp dates when serializing to Elasticsearch API ()

This commit is contained in:
Claire 2023-11-27 14:25:54 +01:00 committed by GitHub
parent 7b56085f11
commit f1657e6d62
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 26 additions and 4 deletions

View file

@ -1,6 +1,8 @@
# frozen_string_literal: true
class AccountsIndex < Chewy::Index
include DatetimeClampingConcern
settings index: index_preset(refresh_interval: '30s'), analysis: {
filter: {
english_stop: {
@ -60,7 +62,7 @@ class AccountsIndex < Chewy::Index
field(:following_count, type: 'long')
field(:followers_count, type: 'long')
field(:properties, type: 'keyword', value: ->(account) { account.searchable_properties })
field(:last_status_at, type: 'date', value: ->(account) { account.last_status_at || account.created_at })
field(:last_status_at, type: 'date', value: ->(account) { clamp_date(account.last_status_at || account.created_at) })
field(:display_name, type: 'text', analyzer: 'verbatim') { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' }
field(:username, type: 'text', analyzer: 'verbatim', value: ->(account) { [account.username, account.domain].compact.join('@') }) { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' }
field(:text, type: 'text', analyzer: 'verbatim', value: ->(account) { account.searchable_text }) { field :stemmed, type: 'text', analyzer: 'natural' }

View file

@ -0,0 +1,14 @@
# frozen_string_literal: true
module DatetimeClampingConcern
extend ActiveSupport::Concern
MIN_ISO8601_DATETIME = '0000-01-01T00:00:00Z'.to_datetime.freeze
MAX_ISO8601_DATETIME = '9999-12-31T23:59:59Z'.to_datetime.freeze
class_methods do
def clamp_date(datetime)
datetime.clamp(MIN_ISO8601_DATETIME, MAX_ISO8601_DATETIME)
end
end
end

View file

@ -1,6 +1,8 @@
# frozen_string_literal: true
class PublicStatusesIndex < Chewy::Index
include DatetimeClampingConcern
settings index: index_preset(refresh_interval: '30s', number_of_shards: 5), analysis: {
filter: {
english_stop: {
@ -62,6 +64,6 @@ class PublicStatusesIndex < Chewy::Index
field(:tags, type: 'text', analyzer: 'hashtag', value: ->(status) { status.tags.map(&:display_name) })
field(:language, type: 'keyword')
field(:properties, type: 'keyword', value: ->(status) { status.searchable_properties })
field(:created_at, type: 'date')
field(:created_at, type: 'date', value: ->(status) { clamp_date(status.created_at) })
end
end

View file

@ -1,6 +1,8 @@
# frozen_string_literal: true
class StatusesIndex < Chewy::Index
include DatetimeClampingConcern
settings index: index_preset(refresh_interval: '30s', number_of_shards: 5), analysis: {
filter: {
english_stop: {
@ -60,6 +62,6 @@ class StatusesIndex < Chewy::Index
field(:searchable_by, type: 'long', value: ->(status) { status.searchable_by })
field(:language, type: 'keyword')
field(:properties, type: 'keyword', value: ->(status) { status.searchable_properties })
field(:created_at, type: 'date')
field(:created_at, type: 'date', value: ->(status) { clamp_date(status.created_at) })
end
end

View file

@ -1,6 +1,8 @@
# frozen_string_literal: true
class TagsIndex < Chewy::Index
include DatetimeClampingConcern
settings index: index_preset(refresh_interval: '30s'), analysis: {
analyzer: {
content: {
@ -42,6 +44,6 @@ class TagsIndex < Chewy::Index
field(:name, type: 'text', analyzer: 'content', value: :display_name) { field(:edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content') }
field(:reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? })
field(:usage, type: 'long', value: ->(tag, crutches) { tag.history.aggregate(crutches.time_period).accounts })
field(:last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at })
field(:last_status_at, type: 'date', value: ->(tag) { clamp_date(tag.last_status_at || tag.created_at) })
end
end