From eb43e6b2c861053c6a788d6316c1b798f60533ea Mon Sep 17 00:00:00 2001 From: Stefano Pigozzi Date: Fri, 5 Aug 2022 03:03:46 +0200 Subject: [PATCH] Some more things --- src/search/botindex.rs | 30 ---------- src/search/botsearch.rs | 86 ---------------------------- src/search/card.rs | 124 +++++++++++++++++++++++++++++++++++++--- src/search/mod.rs | 2 - src/telegram/bin.rs | 11 +++- 5 files changed, 126 insertions(+), 127 deletions(-) delete mode 100644 src/search/botindex.rs delete mode 100644 src/search/botsearch.rs diff --git a/src/search/botindex.rs b/src/search/botindex.rs deleted file mode 100644 index 8393e91..0000000 --- a/src/search/botindex.rs +++ /dev/null @@ -1,30 +0,0 @@ -//! This module provides functions to manage [tantivy] [Index]es for internal bot usage. - -use tantivy::{Index, IndexReader, IndexWriter, LeasedItem, ReloadPolicy, Searcher}; -use crate::search::card::card_schema; - - -/// Build a new [Index] for [crate::schena::setbundle::Card] documents, based on [card_schema]. -pub fn card_index() -> Index { - Index::create_in_ram( - card_schema() - ) -} - - -/// Build a [IndexWriter] with the optimal configuration for [crate::schena::setbundle::Card] documents. -pub fn card_writer(index: &Index) -> IndexWriter { - index - .writer(4_000_000) - .expect("to be able to allocate 4 MB for a IndexWriter") -} - - -/// Build a [IndexReader] with the optimal configuration for [crate::schena::setbundle::Card] documents. -pub fn card_reader(index: &Index) -> IndexReader { - index - .reader_builder() - .reload_policy(ReloadPolicy::Manual) - .try_into() - .expect("to be able to create a IndexReader") -} diff --git a/src/search/botsearch.rs b/src/search/botsearch.rs deleted file mode 100644 index 10caa74..0000000 --- a/src/search/botsearch.rs +++ /dev/null @@ -1,86 +0,0 @@ -//! This module provides functions to perform queries for internal bot usage. - - -use tantivy::{Index, IndexReader, TantivyError}; -use tantivy::collector::TopDocs; -use tantivy::query::{QueryParser, QueryParserError}; -use tantivy::schema::Schema; -use itertools::Itertools; -use crate::search::botsearch::QueryError::Parsing; - - -pub fn card_query_parser(index: &Index) -> QueryParser { - let schema = index.schema(); - - let f_code = schema.get_field("code").expect("schema to have a 'code' field"); - let f_name = schema.get_field("name").expect("schema to have a 'name' field"); - let f_type = schema.get_field("type").expect("schema to have a 'type' field"); - let f_set = schema.get_field("set").expect("schema to have a 'set' field"); - let f_rarity = schema.get_field("rarity").expect("schema to have a 'rarity' field"); - let f_collectible = schema.get_field("collectible").expect("schema to have a 'collectible' field"); - let f_regions = schema.get_field("regions").expect("schema to have a 'regions' field"); - let f_attack = schema.get_field("attack").expect("schema to have a 'attack' field"); - let f_cost = schema.get_field("cost").expect("schema to have a 'cost' field"); - let f_health = schema.get_field("health").expect("schema to have a 'health' field"); - let f_spellspeed = schema.get_field("spellspeed").expect("schema to have a 'spellspeed' field"); - let f_keywords = schema.get_field("keywords").expect("schema to have a 'keywords' field"); - let f_description = schema.get_field("description").expect("schema to have a 'description' field"); - let f_levelup = schema.get_field("levelup").expect("schema to have a 'levelup' field"); - let f_associated = schema.get_field("associated").expect("schema to have a 'associated' field"); - let f_flavor = schema.get_field("flavor").expect("schema to have a 'flavor' field"); - let f_artist = schema.get_field("artist").expect("schema to have a 'artist' field"); - let f_subtypes = schema.get_field("subtypes").expect("schema to have a 'subtypes' field"); - let f_supertype = schema.get_field("supertype").expect("schema to have a 'supertype' field"); - - QueryParser::for_index( - &index, - vec![ - f_code, - f_name, - f_type, - f_set, - f_rarity, - f_collectible, - f_regions, - f_attack, - f_cost, - f_health, - f_spellspeed, - f_keywords, - f_description, - f_levelup, - f_associated, - f_flavor, - f_artist, - f_subtypes, - f_supertype, - ] - ) -} - - -pub enum QueryError { - Parsing(QueryParserError), - Search(TantivyError), -} - - -pub fn card_query(schema: &Schema, reader: &IndexReader, parser: &QueryParser, query: &str, amount: usize) -> Result, QueryError> { - log::debug!("Searching for `{}`...", &query); - - let searcher = reader.searcher(); - let query = parser.parse_query(query) - .map_err(QueryError::Parsing)?; - let search = searcher.search(&*query, &TopDocs::with_limit(amount)) - .map_err(QueryError::Search)?; - - let f_code = schema.get_field("code").expect("schema to have a 'code' field"); - - let results = search.iter() - .filter_map(|(_score, address)| searcher.doc(address.to_owned()).ok()) - .filter_map(|doc| doc.get_first(f_code).cloned()) - .filter_map(|field| field.as_text().map(String::from)) - .collect_vec(); - - Ok(results) -} diff --git a/src/search/card.rs b/src/search/card.rs index 492ec92..ca7ac65 100644 --- a/src/search/card.rs +++ b/src/search/card.rs @@ -1,5 +1,11 @@ //! This module configures [tantivy] structs for [Card] search. +use tantivy::{Document, Index, IndexReader, IndexWriter, TantivyError}; +use tantivy::collector::TopDocs; +use tantivy::query::{QueryParser, QueryParserError}; +use tantivy::schema::{Schema, TextOptions}; +use tantivy::tokenizer::TextAnalyzer; +use itertools::Itertools; use crate::load::corebundle::MappedGlobals; use crate::schema::setbundle::{Card, CardType}; @@ -7,7 +13,7 @@ use crate::schema::setbundle::{Card, CardType}; /// Create a new [tantivy::tokenizer::TextAnalyzer] for card text. /// /// It should not alter text significantly, as it may contain important game vocabulary terms. -pub fn card_tokenizer() -> tantivy::tokenizer::TextAnalyzer { +pub fn card_tokenizer() -> TextAnalyzer { use tantivy::tokenizer::*; TextAnalyzer::from(SimpleTokenizer) @@ -16,7 +22,7 @@ pub fn card_tokenizer() -> tantivy::tokenizer::TextAnalyzer { /// Create a new [tantivy::schema::TextOptions] for card codes, skipping tokenization. -pub fn cardcode_options() -> tantivy::schema::TextOptions { +pub fn cardcode_options() -> TextOptions { use tantivy::schema::*; TextOptions::default() @@ -26,7 +32,7 @@ pub fn cardcode_options() -> tantivy::schema::TextOptions { /// Create a new [tantivy::schema::TextOptions] for card keywords, using the given tokenizer. -pub fn cardkeyword_options() -> tantivy::schema::TextOptions { +pub fn cardkeyword_options() -> TextOptions { use tantivy::schema::*; TextOptions::default() @@ -39,7 +45,7 @@ pub fn cardkeyword_options() -> tantivy::schema::TextOptions { /// Create a new [tantivy::schema::TextOptions] for card text fields, using the given tokenizer. -pub fn cardtext_options() -> tantivy::schema::TextOptions { +pub fn cardtext_options() -> TextOptions { use tantivy::schema::*; TextOptions::default() @@ -52,7 +58,7 @@ pub fn cardtext_options() -> tantivy::schema::TextOptions { /// Create a new [tantivy::schema::Schema] using [Card]s as documents. -pub fn card_schema() -> tantivy::schema::Schema { +pub fn card_schema() -> Schema { use tantivy::schema::*; let mut schema_builder = Schema::builder(); @@ -86,7 +92,7 @@ pub fn card_schema() -> tantivy::schema::Schema { /// Create a new [tantivy::Document] using a [Card] in a specific [locale](MappedGlobals] as base. -pub fn card_to_document(schema: &tantivy::schema::Schema, locale: &MappedGlobals, card: Card) -> tantivy::Document { +pub fn card_to_document(schema: &Schema, locale: &MappedGlobals, card: Card) -> Document { use tantivy::*; use itertools::Itertools; @@ -163,9 +169,113 @@ pub fn card_to_document(schema: &tantivy::schema::Schema, locale: &MappedGlobals /// Stage all [tantivy::Document]s generated from [Card]s contained in the passed [Vec] for write on a [tantivy::Index] via the given [tantivy::IndexWriter]. -pub fn cards_to_index(writer: tantivy::IndexWriter, schema: tantivy::schema::Schema, locale: MappedGlobals, cards: Vec) -> tantivy::Result<()> { +pub fn cards_to_index(writer: IndexWriter, schema: Schema, locale: MappedGlobals, cards: Vec) -> tantivy::Result<()> { for card in cards { writer.add_document(card_to_document(&schema, &locale, card))?; }; Ok(()) } + + +/// Build a new [tantivy::Index] for [crate::schena::setbundle::Card] documents, based on [card_schema]. +pub(crate) fn card_index() -> Index { + Index::create_in_ram( + card_schema() + ) +} + + +/// Build a [tantivy::IndexWriter] with the optimal configuration for [crate::schena::setbundle::Card] documents. +pub(crate) fn card_writer(index: &Index) -> IndexWriter { + index + .writer(4_000_000) + .expect("to be able to allocate 4 MB for a IndexWriter") +} + + +/// Build a [tantivy::IndexReader] with the optimal configuration for [crate::schena::setbundle::Card] documents. +pub(crate) fn card_reader(index: &Index) -> IndexReader { + index + .reader_builder() + .reload_policy(tantivy::ReloadPolicy::Manual) + .try_into() + .expect("to be able to create a IndexReader") +} + + +/// Build a new [tantivy::QueryParser] for [Card] documents, based on [crate::search::card::card_schema] and the passed index. +pub(crate) fn card_query_parser(index: &Index) -> QueryParser { + let schema = index.schema(); + + let f_code = schema.get_field("code").expect("schema to have a 'code' field"); + let f_name = schema.get_field("name").expect("schema to have a 'name' field"); + let f_type = schema.get_field("type").expect("schema to have a 'type' field"); + let f_set = schema.get_field("set").expect("schema to have a 'set' field"); + let f_rarity = schema.get_field("rarity").expect("schema to have a 'rarity' field"); + let f_collectible = schema.get_field("collectible").expect("schema to have a 'collectible' field"); + let f_regions = schema.get_field("regions").expect("schema to have a 'regions' field"); + let f_attack = schema.get_field("attack").expect("schema to have a 'attack' field"); + let f_cost = schema.get_field("cost").expect("schema to have a 'cost' field"); + let f_health = schema.get_field("health").expect("schema to have a 'health' field"); + let f_spellspeed = schema.get_field("spellspeed").expect("schema to have a 'spellspeed' field"); + let f_keywords = schema.get_field("keywords").expect("schema to have a 'keywords' field"); + let f_description = schema.get_field("description").expect("schema to have a 'description' field"); + let f_levelup = schema.get_field("levelup").expect("schema to have a 'levelup' field"); + let f_associated = schema.get_field("associated").expect("schema to have a 'associated' field"); + let f_flavor = schema.get_field("flavor").expect("schema to have a 'flavor' field"); + let f_artist = schema.get_field("artist").expect("schema to have a 'artist' field"); + let f_subtypes = schema.get_field("subtypes").expect("schema to have a 'subtypes' field"); + let f_supertype = schema.get_field("supertype").expect("schema to have a 'supertype' field"); + + QueryParser::for_index( + &index, + vec![ + f_code, + f_name, + f_type, + f_set, + f_rarity, + f_collectible, + f_regions, + f_attack, + f_cost, + f_health, + f_spellspeed, + f_keywords, + f_description, + f_levelup, + f_associated, + f_flavor, + f_artist, + f_subtypes, + f_supertype, + ] + ) +} + + +pub(crate) enum CardQueryError { + Parsing(QueryParserError), + Search(TantivyError), +} + + +pub(crate) fn card_query(schema: &Schema, reader: &IndexReader, parser: &QueryParser, query: &str, amount: usize) -> Result, CardQueryError> { + log::debug!("Searching for `{}`...", &query); + + let searcher = reader.searcher(); + let query = parser.parse_query(query) + .map_err(CardQueryError::Parsing)?; + let search = searcher.search(&*query, &TopDocs::with_limit(amount)) + .map_err(CardQueryError::Search)?; + + let f_code = schema.get_field("code").expect("schema to have a 'code' field"); + + let results = search.iter() + .filter_map(|(_score, address)| searcher.doc(address.to_owned()).ok()) + .filter_map(|doc| doc.get_first(f_code).cloned()) + .filter_map(|field| field.as_text().map(String::from)) + .collect_vec(); + + Ok(results) +} diff --git a/src/search/mod.rs b/src/search/mod.rs index 74bb9d8..ff18471 100644 --- a/src/search/mod.rs +++ b/src/search/mod.rs @@ -1,5 +1,3 @@ //! This module implements full-text search on Legends of Runeterra data. pub mod card; -pub(crate) mod botindex; -pub(crate) mod botsearch; \ No newline at end of file diff --git a/src/telegram/bin.rs b/src/telegram/bin.rs index cf74f49..b684958 100644 --- a/src/telegram/bin.rs +++ b/src/telegram/bin.rs @@ -1,3 +1,10 @@ -fn main() { +use std::collections::HashMap; +use std::fs::File; +use std::path::Path; +use log::*; -} \ No newline at end of file + +#[tokio::main] +async fn main() { + pretty_env_logger::init(); +}