2022-08-07 14:04:42 +00:00
//! Module defining a search engine to find [Card]s.
2022-08-04 20:06:50 +00:00
2022-08-08 21:23:26 +00:00
use tantivy ::{ Document , Index , IndexReader , IndexWriter , Score } ;
2022-08-05 01:03:46 +00:00
use tantivy ::collector ::TopDocs ;
use tantivy ::query ::{ QueryParser , QueryParserError } ;
2022-08-07 03:37:43 +00:00
use tantivy ::schema ::{ Field , NumericOptions , Schema , TextOptions } ;
2022-08-05 01:03:46 +00:00
use tantivy ::tokenizer ::TextAnalyzer ;
use itertools ::Itertools ;
2022-08-06 17:44:44 +00:00
use crate ::data ::corebundle ::globals ::LocalizedGlobalsIndexes ;
2022-08-07 03:37:43 +00:00
use crate ::data ::setbundle ::card ::{ Card , CardIndex } ;
2022-08-04 20:06:50 +00:00
2022-08-07 03:37:43 +00:00
/// The search engine.
2022-08-04 20:06:50 +00:00
///
2022-08-07 03:37:43 +00:00
/// To create a new engine, use [CardSearchEngine::new].
///
/// A separate search engine should be created for every locale.
pub struct CardSearchEngine {
/// The index of the search engine.
index : Index ,
2022-08-04 20:06:50 +00:00
2022-08-07 03:37:43 +00:00
/// Struct to read documents from the search engine.
reader : IndexReader ,
2022-08-04 20:06:50 +00:00
2022-08-07 03:37:43 +00:00
/// Struct to parse queries input by the user.
parser : QueryParser ,
2022-08-04 20:06:50 +00:00
2022-08-07 03:37:43 +00:00
/// Localization of game globals used by the search engine.
pub globals : LocalizedGlobalsIndexes ,
2022-08-04 20:06:50 +00:00
2022-08-07 03:37:43 +00:00
/// Cards searchable in the search engine.
pub cards : CardIndex
2022-08-04 20:06:50 +00:00
}
2022-08-07 03:37:43 +00:00
impl CardSearchEngine {
/// Create the [tantivy::tokenizer::TextAnalyzer] for card text.
///
/// It should not alter text significantly, as it may contain important game vocabulary terms.
fn tokenizer ( ) -> TextAnalyzer {
use tantivy ::tokenizer ::* ;
TextAnalyzer ::from ( SimpleTokenizer )
. filter ( LowerCaser )
}
/// Create the [tantivy::schema::TextOptions] for card codes.
///
/// Card codes should:
2022-08-08 02:10:59 +00:00
/// - TODO: be tokenized without alterations;
/// - ignore positioning;
2022-08-07 03:37:43 +00:00
/// - be retrievable (what [tantivy] calls "stored").
fn options_code ( ) -> TextOptions {
use tantivy ::schema ::* ;
TextOptions ::default ( )
2022-08-08 02:10:59 +00:00
. set_indexing_options ( TextFieldIndexing ::default ( )
. set_tokenizer ( " card " )
. set_index_option ( IndexRecordOption ::Basic )
)
2022-08-07 03:37:43 +00:00
. set_stored ( )
. set_fast ( )
}
/// Create the [tantivy::schema::TextOptions] for card keywords.
///
/// Card keywords should:
/// - be tokenized with the [CardSearchEngine::tokenizer];
/// - ignore positioning.
fn options_keyword ( ) -> TextOptions {
use tantivy ::schema ::* ;
TextOptions ::default ( )
. set_indexing_options ( TextFieldIndexing ::default ( )
. set_tokenizer ( " card " )
. set_index_option ( IndexRecordOption ::Basic )
)
}
/// Create the [tantivy::schema::TextOptions] for card text fields.
///
/// Card text should:
/// - TODO: be tokenized with the tokenizer for the locale language;
/// - consider both frequency and positioning.
fn options_text ( ) -> TextOptions {
use tantivy ::schema ::* ;
TextOptions ::default ( )
. set_indexing_options ( TextFieldIndexing ::default ( )
. set_tokenizer ( " card " )
. set_index_option ( IndexRecordOption ::WithFreqsAndPositions )
)
}
/// Create the [tantivy::schema::NumericOptions] for card numeric fields.
///
/// Card numbers should:
/// - be indexed.
fn options_number ( ) -> NumericOptions {
use tantivy ::schema ::* ;
NumericOptions ::default ( )
. set_indexed ( )
}
/// Create the [Schema] for the search engine.
///
/// It will contain [Field]s with the following names:
///
2022-08-08 19:06:52 +00:00
/// | Name | Type | Description |
/// |---------------|----------------------------------|-------------|
/// | `code` | [code](Self::options_code) | The internal [card code](Card::code), such as `01IO012`. |
/// | `name` | [text](Self::options_text) | The [name of the card](Card::name). |
/// | `type` | [keyword](Self::options_keyword) | The [type of the card](Card::type), such as `Unit`. |
/// | `set` | [keyword](Self::options_keyword) | The [set the card belongs to](Card::set), such as `Beyond the Bandlewood`. |
/// | `rarity` | [keyword](Self::options_keyword) | The [rarity of the card](patched_porobot::data::setbundle::card::Card::rarity), such as `Rare`, or `Champion`. |
/// | `collectible` | [number](Self::options_number) | `1` if the [card is collectible](Card::collectible), `0` otherwise. |
/// | `regions` | [keyword](Self::options_keyword) | The [regions of the card](Card::regions), separated by spaces. |
/// | `attack` | [number](Self::options_number) | The [attack of the unit](Card::attack); always `0` for non-units. |
/// | `cost` | [number](Self::options_number) | The [mana cost of the card](Card::cost); always `0` for non-playable cards. |
/// | `health` | [number](Self::options_number) | The [health of the unit](Card::health); always `0` for non-units. |
/// | `spellspeed` | [keyword](Self::options_keyword) | The [speed of the spell](Card::spell_speed), or an empty string for non-spells. Note that [`Focus`](crate::data::setbundle::keyword::CardKeyword::Focus) is a keyword, and not a spell speed. |
/// | `keywords` | [keyword](Self::options_keyword) | The [keywords of the card](Card::keywords), separated by spaces. |
/// | `description` | [text](Self::options_text) | The [description of the card](Card::localized_description_text). |
/// | `levelup` | [text](Self::options_text) | The [level up text of the champion](Card::localized_levelup_text). |
/// | `flavor` | [text](Self::options_text) | The [flavor text of the card](Card::localized_flavor_text). |
/// | `artist` | [text](Self::options_text) | The [artist(s) of the card's illustration](Card::artist_name). |
2022-08-07 03:37:43 +00:00
///
/// Use [Self::schema_fields] to create the [CardSchemaFields] object containing all of them.
///
fn schema ( ) -> Schema {
use tantivy ::schema ::* ;
let mut schema_builder = Schema ::builder ( ) ;
let options_code = Self ::options_code ( ) ;
let options_keyword = Self ::options_keyword ( ) ;
let options_text = Self ::options_text ( ) ;
let options_number = Self ::options_number ( ) ;
schema_builder . add_text_field ( " code " , options_code ) ;
schema_builder . add_text_field ( " name " , options_text . clone ( ) ) ;
schema_builder . add_text_field ( " type " , options_keyword . clone ( ) ) ;
schema_builder . add_text_field ( " set " , options_keyword . clone ( ) ) ;
schema_builder . add_text_field ( " rarity " , options_keyword . clone ( ) ) ;
schema_builder . add_u64_field ( " collectible " , options_number . clone ( ) ) ;
schema_builder . add_text_field ( " regions " , options_keyword . clone ( ) ) ;
schema_builder . add_u64_field ( " attack " , options_number . clone ( ) ) ;
schema_builder . add_u64_field ( " cost " , options_number . clone ( ) ) ;
schema_builder . add_u64_field ( " health " , options_number ) ;
schema_builder . add_text_field ( " spellspeed " , options_keyword . clone ( ) ) ;
schema_builder . add_text_field ( " keywords " , options_keyword . clone ( ) ) ;
schema_builder . add_text_field ( " description " , options_text . clone ( ) ) ;
schema_builder . add_text_field ( " levelup " , options_text . clone ( ) ) ;
schema_builder . add_text_field ( " flavor " , options_text . clone ( ) ) ;
schema_builder . add_text_field ( " artist " , options_text ) ;
schema_builder . add_text_field ( " subtypes " , options_keyword . clone ( ) ) ;
schema_builder . add_text_field ( " supertype " , options_keyword ) ;
schema_builder . build ( )
}
/// Create a [CardSchemaFields] object from the given schema.
fn schema_fields ( schema : & Schema ) -> CardSchemaFields {
CardSchemaFields {
code : schema . get_field ( " code " ) . expect ( " schema to have a 'code' field " ) ,
name : schema . get_field ( " name " ) . expect ( " schema to have a 'name' field " ) ,
r#type : schema . get_field ( " type " ) . expect ( " schema to have a 'type' field " ) ,
set : schema . get_field ( " set " ) . expect ( " schema to have a 'set' field " ) ,
rarity : schema . get_field ( " rarity " ) . expect ( " schema to have a 'rarity' field " ) ,
collectible : schema . get_field ( " collectible " ) . expect ( " schema to have a 'collectible' field " ) ,
regions : schema . get_field ( " regions " ) . expect ( " schema to have a 'regions' field " ) ,
attack : schema . get_field ( " attack " ) . expect ( " schema to have a 'attack' field " ) ,
cost : schema . get_field ( " cost " ) . expect ( " schema to have a 'cost' field " ) ,
health : schema . get_field ( " health " ) . expect ( " schema to have a 'health' field " ) ,
spellspeed : schema . get_field ( " spellspeed " ) . expect ( " schema to have a 'spellspeed' field " ) ,
keywords : schema . get_field ( " keywords " ) . expect ( " schema to have a 'keywords' field " ) ,
description : schema . get_field ( " description " ) . expect ( " schema to have a 'description' field " ) ,
levelup : schema . get_field ( " levelup " ) . expect ( " schema to have a 'levelup' field " ) ,
flavor : schema . get_field ( " flavor " ) . expect ( " schema to have a 'flavor' field " ) ,
artist : schema . get_field ( " artist " ) . expect ( " schema to have a 'artist' field " ) ,
subtypes : schema . get_field ( " subtypes " ) . expect ( " schema to have a 'subtypes' field " ) ,
supertype : schema . get_field ( " supertype " ) . expect ( " schema to have a 'supertype' field " ) ,
}
}
/// Build [in RAM](Index::create_in_ram) the [Index] of the search engine.
fn index ( ) -> Index {
Index ::create_in_ram (
Self ::schema ( )
2022-08-04 20:06:50 +00:00
)
2022-08-07 03:37:43 +00:00
}
/// Build a [IndexWriter] with the optimal configuration for the search engine.
///
/// Uses 12 MB of RAM; do not lower below 3 MB, or it will panic!
fn writer ( index : & Index ) -> IndexWriter {
index
. writer ( 12_000_000 )
. expect ( " to be able to create a IndexWriter " )
}
/// Build a [IndexReader] with the optimal configuration for the search engine.
fn reader ( index : & Index ) -> IndexReader {
index
. reader_builder ( )
. reload_policy ( tantivy ::ReloadPolicy ::Manual )
. try_into ( )
. expect ( " to be able to create a IndexReader " )
}
/// Create a [Document] from a [Card].
fn document ( fields : & CardSchemaFields , globals : & LocalizedGlobalsIndexes , card : Card ) -> Document {
use tantivy ::doc ;
doc! (
fields . code = > card . code ,
fields . name = > card . name ,
2022-08-07 15:32:40 +00:00
fields . r#type = > String ::from ( & card . r#type ) ,
2022-08-07 03:37:43 +00:00
fields . set = > card . set
. localized ( & globals . sets )
. map ( | cs | cs . name . to_owned ( ) )
. unwrap_or_else ( String ::new ) ,
fields . rarity = > card . rarity
. localized ( & globals . rarities )
. map ( | cr | cr . name . to_owned ( ) )
. unwrap_or_else ( String ::new ) ,
fields . collectible = > if card . collectible { 1 u64 } else { 0 u64 } ,
fields . regions = > card . regions . iter ( )
. map ( | region | region
. localized ( & globals . regions )
. map ( | cr | cr . name . to_owned ( ) )
. unwrap_or_else ( String ::new )
) . join ( " " ) ,
fields . attack = > card . attack ,
fields . cost = > card . cost ,
fields . health = > card . health ,
fields . spellspeed = > card . spell_speed
. localized ( & globals . spell_speeds )
. map ( | ss | ss . name . to_owned ( ) )
. unwrap_or_else ( String ::new ) ,
fields . keywords = > card . keywords . iter ( )
. map ( | keyword | keyword
. localized ( & globals . keywords )
. map ( | ck | ck . name . to_owned ( ) )
. unwrap_or_else ( String ::new ) )
. join ( " " ) ,
fields . description = > card . localized_description_text ,
fields . levelup = > card . localized_levelup_text ,
fields . flavor = > card . localized_flavor_text ,
fields . artist = > card . artist_name ,
fields . subtypes = > card . subtypes . join ( " " ) ,
fields . supertype = > card . supertype ,
)
}
2022-08-04 20:06:50 +00:00
2022-08-07 03:41:33 +00:00
/// Build the [QueryParser] of the search engine.
2022-08-07 03:37:43 +00:00
fn parser ( index : & Index , fields : CardSchemaFields ) -> QueryParser {
2022-08-08 17:58:57 +00:00
let mut parser = QueryParser ::for_index (
2022-08-07 03:37:43 +00:00
& index ,
2022-08-08 02:10:59 +00:00
vec! [
fields . code ,
fields . name ,
fields . description ,
2022-08-08 17:58:57 +00:00
fields . set ,
fields . regions ,
2022-08-08 02:10:59 +00:00
fields . flavor ,
2022-08-08 17:58:57 +00:00
fields . artist ,
2022-08-08 02:10:59 +00:00
fields . subtypes ,
fields . supertype ,
]
2022-08-08 17:58:57 +00:00
) ;
parser . set_conjunction_by_default ( ) ;
2022-08-08 21:23:26 +00:00
parser . set_field_boost ( fields . code , 3.0 ) ;
parser . set_field_boost ( fields . name , 3.0 ) ;
parser . set_field_boost ( fields . flavor , 0.7 ) ;
2022-08-08 17:58:57 +00:00
parser
2022-08-07 03:37:43 +00:00
}
2022-08-04 20:06:50 +00:00
2022-08-07 03:37:43 +00:00
/// Create a new [CardSearchEngine].
pub fn new ( globals : LocalizedGlobalsIndexes , cards : CardIndex ) -> Self {
let index = Self ::index ( ) ;
let schema = index . schema ( ) ;
let fields = Self ::schema_fields ( & schema ) ;
2022-08-04 20:06:50 +00:00
2022-08-07 03:37:43 +00:00
index . tokenizers ( ) . register ( " card " , Self ::tokenizer ( ) ) ;
2022-08-04 20:06:50 +00:00
2022-08-07 03:37:43 +00:00
let mut writer = Self ::writer ( & index ) ;
for card in cards . values ( ) {
let document = Self ::document ( & fields , & globals , card . clone ( ) ) ;
writer . add_document ( document )
. expect ( " IndexWriter threads to not panic or die before adding a document " ) ;
} ;
writer . commit ( )
. expect ( " IndexWriter threads to not panic or die before commit " ) ;
2022-08-04 20:06:50 +00:00
2022-08-07 03:37:43 +00:00
let parser = Self ::parser ( & index , fields ) ;
let reader = Self ::reader ( & index ) ;
2022-08-04 20:06:50 +00:00
2022-08-07 03:37:43 +00:00
Self { index , reader , parser , globals , cards }
}
2022-08-04 20:06:50 +00:00
2022-08-07 03:37:43 +00:00
/// Perform a query on the search engine.
pub fn query ( & self , input : & str , top : usize ) -> Result < Vec < & Card > , QueryParserError > {
let searcher = self . reader . searcher ( ) ;
2022-08-05 01:03:46 +00:00
2022-08-07 03:37:43 +00:00
let query = self . parser . parse_query ( input ) ? ;
2022-08-05 01:03:46 +00:00
2022-08-07 03:37:43 +00:00
let search = searcher . search ( & * query , & TopDocs ::with_limit ( top ) )
. expect ( " Searcher::search to never fail " ) ;
2022-08-05 01:03:46 +00:00
2022-08-07 03:37:43 +00:00
let f_code = self . index . schema ( ) . get_field ( " code " )
. expect ( " schema to have a 'code' field " ) ;
2022-08-05 01:03:46 +00:00
2022-08-07 03:37:43 +00:00
let results = search . iter ( )
. filter_map ( | ( _score , address ) | searcher . doc ( address . to_owned ( ) ) . ok ( ) )
. filter_map ( | doc | doc . get_first ( f_code ) . cloned ( ) )
. filter_map ( | field | field . as_text ( ) . map ( String ::from ) )
. filter_map ( | code | self . cards . get ( & * code ) )
. collect_vec ( ) ;
Ok ( results )
}
2022-08-05 01:03:46 +00:00
}
2022-08-07 03:37:43 +00:00
/// Struct containing all retrieved [CardSearchEngine] [Field]s.
///
/// This makes it easier to pass them around without having to re-fetch them every time they are used.
2022-08-07 03:42:21 +00:00
#[ derive(Clone, Debug) ]
2022-08-07 03:37:43 +00:00
struct CardSchemaFields {
/// [Card::code].
pub code : Field ,
/// [Card::name].
pub name : Field ,
/// English [Card::type].
pub r#type : Field ,
/// Localized [Card::set].
pub set : Field ,
/// Localized [Card::rarity].
pub rarity : Field ,
/// `0` if the card is not [Card::collectible], `1` otherwise.
pub collectible : Field ,
/// Space-separated localized [Card::regions].
pub regions : Field ,
/// [Card::attack].
pub attack : Field ,
/// [Card::cost].
pub cost : Field ,
/// [Card::health].
pub health : Field ,
/// [Card::spell_speed].
pub spellspeed : Field ,
/// Space-separated localized [Card::keywords].
pub keywords : Field ,
/// [Card::localized_description_text].
pub description : Field ,
/// [Card::localized_levelup_text].
pub levelup : Field ,
/// [Card::localized_flavor_text].
pub flavor : Field ,
/// [Card::artist_name].
pub artist : Field ,
/// Space-separated [Card::subtypes].
pub subtypes : Field ,
/// [Card::supertype].
pub supertype : Field ,
2022-08-05 01:03:46 +00:00
}