153 lines
4.8 KiB
Rust
153 lines
4.8 KiB
Rust
use std::path::Path;
|
|
|
|
use serde::{Deserialize, Serialize};
|
|
use serde_with::{serde_as, DefaultOnError, DefaultOnNull};
|
|
use tantivy::{schema::*, store::Compressor, Index};
|
|
use tokenizer::{get_tokenizer, META_DATA_TOKENIZER};
|
|
|
|
pub mod index;
|
|
pub mod search;
|
|
mod tokenizer;
|
|
|
|
#[serde_as]
|
|
#[derive(Debug, Default, Serialize, Deserialize)]
|
|
pub struct Book {
|
|
pub id: u64,
|
|
|
|
pub title: String,
|
|
#[serde_as(deserialize_as = "DefaultOnNull")]
|
|
pub author: String,
|
|
#[serde_as(deserialize_as = "DefaultOnNull")]
|
|
pub publisher: String,
|
|
#[serde_as(deserialize_as = "DefaultOnNull")]
|
|
pub extension: String,
|
|
#[serde_as(deserialize_as = "DefaultOnError")]
|
|
pub filesize: u64,
|
|
#[serde_as(deserialize_as = "DefaultOnNull")]
|
|
pub language: String,
|
|
#[serde_as(deserialize_as = "DefaultOnError")]
|
|
pub year: u64,
|
|
#[serde_as(deserialize_as = "DefaultOnError")]
|
|
pub pages: u64,
|
|
#[serde_as(deserialize_as = "DefaultOnNull")]
|
|
pub isbn: String,
|
|
#[serde_as(deserialize_as = "DefaultOnNull")]
|
|
pub ipfs_cid: String,
|
|
}
|
|
|
|
impl From<(&Schema, Document)> for Book {
|
|
fn from((schema, doc): (&Schema, Document)) -> Self {
|
|
macro_rules! get_field_text {
|
|
($field:expr) => {
|
|
doc.get_first(schema.get_field($field).unwrap())
|
|
.unwrap()
|
|
.as_text()
|
|
.unwrap_or_default()
|
|
.to_owned()
|
|
};
|
|
}
|
|
|
|
macro_rules! get_field_u64 {
|
|
($field:expr) => {
|
|
doc.get_first(schema.get_field($field).unwrap())
|
|
.unwrap()
|
|
.as_u64()
|
|
.unwrap_or_default()
|
|
};
|
|
}
|
|
|
|
Book {
|
|
id: get_field_u64!("id"),
|
|
title: get_field_text!("title"),
|
|
author: get_field_text!("author"),
|
|
publisher: get_field_text!("publisher"),
|
|
extension: get_field_text!("extension"),
|
|
filesize: get_field_u64!("filesize"),
|
|
language: get_field_text!("language"),
|
|
year: get_field_u64!("year"),
|
|
pages: get_field_u64!("pages"),
|
|
isbn: get_field_text!("isbn"),
|
|
ipfs_cid: get_field_text!("ipfs_cid"),
|
|
}
|
|
}
|
|
}
|
|
|
|
pub struct Searcher {
|
|
index: Index,
|
|
schema: Schema,
|
|
|
|
// fields
|
|
id: Field,
|
|
title: Field,
|
|
author: Field,
|
|
publisher: Field,
|
|
extension: Field,
|
|
filesize: Field,
|
|
language: Field,
|
|
year: Field,
|
|
pages: Field,
|
|
isbn: Field,
|
|
ipfs_cid: Field,
|
|
}
|
|
|
|
impl Searcher {
|
|
pub fn new(index_dir: impl AsRef<Path>) -> Self {
|
|
let text_indexing = TextFieldIndexing::default()
|
|
.set_tokenizer(META_DATA_TOKENIZER)
|
|
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
|
|
let text_options = TextOptions::default()
|
|
.set_indexing_options(text_indexing)
|
|
.set_stored();
|
|
|
|
let mut schema_builder = Schema::builder();
|
|
let id = schema_builder.add_u64_field("id", INDEXED | STORED);
|
|
let title = schema_builder.add_text_field("title", text_options.clone());
|
|
let author = schema_builder.add_text_field("author", text_options.clone());
|
|
let publisher = schema_builder.add_text_field("publisher", text_options);
|
|
let extension = schema_builder.add_text_field("extension", STRING | STORED);
|
|
let filesize = schema_builder.add_u64_field("filesize", STORED);
|
|
let language = schema_builder.add_text_field("language", TEXT | STORED);
|
|
let year = schema_builder.add_u64_field("year", STORED);
|
|
let pages = schema_builder.add_u64_field("pages", STORED);
|
|
let isbn = schema_builder.add_text_field("isbn", TEXT | STORED);
|
|
let ipfs_cid = schema_builder.add_text_field("ipfs_cid", STORED);
|
|
let schema = schema_builder.build();
|
|
|
|
// open or create index
|
|
let index_dir = index_dir.as_ref();
|
|
let mut index = Index::open_in_dir(index_dir).unwrap_or_else(|_| {
|
|
std::fs::create_dir_all(index_dir).expect("create index directory");
|
|
Index::create_in_dir(index_dir, schema.clone()).unwrap()
|
|
});
|
|
#[cfg(feature = "best-size")]
|
|
{
|
|
index.settings_mut().docstore_compression = Compressor::Brotli; // size: 2.1G, size is best
|
|
}
|
|
#[cfg(feature = "best-speed")]
|
|
{
|
|
index.settings_mut().docstore_compression = Compressor::Lz4; // size: 3.1G, speed is best
|
|
}
|
|
|
|
index
|
|
.tokenizers()
|
|
.register(META_DATA_TOKENIZER, get_tokenizer());
|
|
_ = index.set_default_multithread_executor();
|
|
|
|
Self {
|
|
index,
|
|
schema,
|
|
id,
|
|
title,
|
|
author,
|
|
publisher,
|
|
extension,
|
|
filesize,
|
|
language,
|
|
year,
|
|
pages,
|
|
isbn,
|
|
ipfs_cid,
|
|
}
|
|
}
|
|
}
|