@ -0,0 +1,9 @@
|
||||
index
|
||||
.github
|
||||
target
|
||||
*.csv
|
||||
.git
|
||||
|
||||
.vscode/*
|
||||
.idea
|
||||
.DS_Store
|
@ -0,0 +1,9 @@
|
||||
/target
|
||||
/index
|
||||
|
||||
*.csv
|
||||
/release
|
||||
/zlib-searcher
|
||||
|
||||
.vscode
|
||||
index_0.6.zip
|
@ -0,0 +1,33 @@
|
||||
[workspace]
|
||||
members = [
|
||||
"crates/zlib-searcher",
|
||||
"crates/zlib-searcher-core",
|
||||
"crates/zlib-searcher-desktop",
|
||||
]
|
||||
|
||||
|
||||
[workspace.package]
|
||||
edition = "2021"
|
||||
authors = ["zu1k <i@zu1k.com>"]
|
||||
description = "search z-library index."
|
||||
homepage = "https://github.com/zlib-searcher/zlib-searcher"
|
||||
repository = "https://github.com/zlib-searcher/zlib-searcher"
|
||||
license = "MIT"
|
||||
exclude = [".github/", "index/", "frontend/"]
|
||||
|
||||
|
||||
[profile.release]
|
||||
strip = true
|
||||
lto = true
|
||||
opt-level = 3
|
||||
codegen-units = 1
|
||||
|
||||
|
||||
[workspace.dependencies]
|
||||
anyhow = "1.0"
|
||||
env_logger = "0.10"
|
||||
log = "0.4"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_with = "2.0"
|
||||
|
||||
zlib-searcher-core = { path = "crates/zlib-searcher-core" }
|
@ -0,0 +1,10 @@
|
||||
[build.env]
|
||||
passthrough = [
|
||||
"RUSTFLAGS"
|
||||
]
|
||||
|
||||
[target.mips-unknown-linux-musl]
|
||||
image = "rustembedded/cross:mips-unknown-linux-musl-0.2.1"
|
||||
|
||||
[target.mipsel-unknown-linux-musl]
|
||||
image = "rustembedded/cross:mipsel-unknown-linux-musl-0.2.1"
|
@ -0,0 +1,16 @@
|
||||
FROM node:19-bullseye as frontend
|
||||
|
||||
COPY . /source
|
||||
RUN cd /source/frontend && npm install && npm run build
|
||||
|
||||
FROM rust:1.65-buster as backend
|
||||
|
||||
COPY . /source
|
||||
COPY --from=frontend /source/frontend/dist /source/frontend/dist
|
||||
RUN cd /source && cargo build --release -p zlib-searcher
|
||||
|
||||
FROM ubuntu:22.04
|
||||
|
||||
COPY --from=backend /source/target/release/zlib-searcher /zlib-searcher
|
||||
|
||||
CMD ["/zlib-searcher", "run", "-b", "0.0.0.0:7070"]
|
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2022 zlib-searcher's authors
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@ -0,0 +1,27 @@
|
||||
NAME=zlib-searcher
|
||||
|
||||
PREFIX ?= /usr/local/bin
|
||||
TARGET ?= debug
|
||||
|
||||
.PHONY: all frontend_preinstall frontend build clean
|
||||
all: build
|
||||
|
||||
frontend_preinstall:
|
||||
pnpm -C frontend install
|
||||
|
||||
frontend:
|
||||
pnpm -C frontend run build
|
||||
|
||||
build: frontend
|
||||
ifeq (${TARGET}, release)
|
||||
cargo build -p zlib-searcher --release
|
||||
else
|
||||
cargo build -p zlib-searcher
|
||||
endif
|
||||
|
||||
clean:
|
||||
cargo clean
|
||||
rm -rf release
|
||||
|
||||
releases:
|
||||
cd scripts && ./build_release.sh -a a
|
@ -0,0 +1,131 @@
|
||||
# Millelibri project
|
||||
|
||||
This is a fork from zlib-searcher project. Future goals:
|
||||
|
||||
- improve search indexes (language)
|
||||
- add books
|
||||
- expand file types
|
||||
|
||||
# zlib(libgen) searcher
|
||||
|
||||
[](https://github.com/zlib-searcher/zlib-searcher/stargazers)
|
||||
[](https://github.com/zlib-searcher/zlib-searcher/network)
|
||||
[](https://github.com/zlib-searcher/zlib-searcher/releases)
|
||||
[](https://github.com/zlib-searcher/zlib-searcher/issues)
|
||||
[](https://github.com/zlib-searcher/zlib-searcher/blob/master/LICENSE)
|
||||
|
||||
Search `zlib`/`libgen` index to get `ipfs_cid`.
|
||||
|
||||
We don't save and provide files, we provide search.
|
||||
|
||||
I hope everyone have a copy of the index locally, so that no need to rely on any centralized service.
|
||||
|
||||
## Deploy with Docker
|
||||
|
||||
```
|
||||
git clone https://github.com/zlib-searcher/zlib-searcher.git && cd zlib-searcher
|
||||
wget https://github.com/zlib-searcher/zlib-searcher/releases/download/0.6.0/index_0.6.zip && unzip index_0.6.zip
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
Now `zlib-searcher` it will listen to `0.0.0.0:7070`.
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
### 1. Download the pre-compiled binary from [Release](https://github.com/zlib-searcher/zlib-searcher/releases).
|
||||
|
||||
Or you can compile by yourself. Refer to [Build from source](#build-from-source) for instructions.
|
||||
|
||||
### 2. Download the `index` file that has been created.
|
||||
|
||||
We will give the corresponding `index` download links for each version in the release page.
|
||||
|
||||
Or you can make your own via `zlib-searcher index`.
|
||||
|
||||
Extract the `index` folder to the same level as the program, it should look like the following:
|
||||
|
||||
```
|
||||
zlib_searcher_dir
|
||||
โโโ index
|
||||
โย ย โโโ some index files...
|
||||
โย ย โโโ meta.json
|
||||
โโโ zlib-searcher
|
||||
```
|
||||
|
||||
### 3. Run `zlib-searcher run`, it will listen to `127.0.0.1:7070`.
|
||||
|
||||
Access http://127.0.0.1:7070/ to use webui, or you can use the original api.
|
||||
|
||||
#### original search api
|
||||
|
||||
You can search by the following fields:
|
||||
|
||||
- title
|
||||
- author
|
||||
- publisher
|
||||
- extension
|
||||
- language
|
||||
- isbn
|
||||
- zlib_id
|
||||
|
||||
Examples:
|
||||
|
||||
- `http://127.0.0.1:7070/search?limit=30&query=ไฝๅ`
|
||||
- `http://127.0.0.1:7070/search?limit=30&query=title:ๆบๅจๅญฆไน extension:azw3 publisher:ๆธ
ๅ`
|
||||
- `http://127.0.0.1:7070/search?limit=30&query=zlib_id:18557063`
|
||||
- `http://127.0.0.1:7070/search?limit=30&query=isbn:9787302423287`
|
||||
|
||||
## Build from source
|
||||
|
||||
### 1. Build `zlib-searcher`
|
||||
|
||||
First build frontend
|
||||
|
||||
```bash
|
||||
make frontend_preinstall frontend
|
||||
```
|
||||
|
||||
Then build zlib-searcher
|
||||
|
||||
```bash
|
||||
TARGET=release make
|
||||
|
||||
# move the compiled binary to the project root directory
|
||||
mv target/release/zlib-searcher .
|
||||
```
|
||||
|
||||
### 2. Build `index`
|
||||
|
||||
Download `zlib_index_books.csv.zip` and `libgen_index_books.csv.zip` and extract the `csv` files to the project root directory.
|
||||
|
||||
Then run `zlib-searcher index`. You may need to `rm index/*` first.
|
||||
|
||||
If you have other csv files, you can run `zlib-searcher index -f *.csv` to index them.
|
||||
|
||||
The finally folder structure should look like this:
|
||||
|
||||
```
|
||||
zlib_searcher_dir // in the example above, it is project root directory.
|
||||
โโโ index
|
||||
โย ย โโโ some index files...
|
||||
โย ย โโโ meta.json
|
||||
โโโ zlib-searcher
|
||||
```
|
||||
|
||||
## Raw data
|
||||
|
||||
We downloaded `libgen` sql and `zlib` sql and exported the necessary data from them.
|
||||
|
||||
```
|
||||
id, title, author, publisher, extension, filesize, language, year, pages, isbn, ipfs_cid
|
||||
```
|
||||
|
||||
This raw data is used to generate our `index`, you can download the raw data from here:
|
||||
|
||||
- [zlib_index_books.csv.zip](https://github.com/zlib-searcher/zlib-searcher/releases/download/0.4.0/zlib_index_books.csv.zip)
|
||||
- [libgen_index_books.csv.zip](https://github.com/zlib-searcher/zlib-searcher/releases/download/0.4.0/libgen_index_books.csv.zip)
|
||||
|
||||
## License
|
||||
|
||||
**zlib-searcher** ยฉ [zlib-searcher's authors](https://github.com/zlib-searcher/zlib-searcher/graphs/contributors), Released under the [MIT](./LICENSE) License.
|
@ -0,0 +1,31 @@
|
||||
[package]
|
||||
name = "zlib-searcher-core"
|
||||
version = "0.7.0"
|
||||
edition.workspace = true
|
||||
authors.workspace = true
|
||||
description.workspace = true
|
||||
homepage.workspace = true
|
||||
repository.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
env_logger = { workspace = true }
|
||||
log = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_with = { workspace = true }
|
||||
|
||||
tantivy = { version = "0.18", default-features = false, features = ["mmap"] }
|
||||
cang-jie = "0.14"
|
||||
jieba-rs = { version = "0.6", features = ["default-dict"] }
|
||||
|
||||
csv = "1.1"
|
||||
indicatif = "0.17"
|
||||
sysinfo = { version = "0.27", default-features = false }
|
||||
|
||||
|
||||
[features]
|
||||
default = ["best-size"]
|
||||
best-size = ["tantivy/brotli-compression"]
|
||||
best-speed = ["tantivy/lz4-compression"]
|
@ -0,0 +1,101 @@
|
||||
use crate::{Book, Searcher};
|
||||
use indicatif::{ProgressBar, ProgressIterator, ProgressStyle};
|
||||
use log::info;
|
||||
use std::{
|
||||
fs::File,
|
||||
io::{BufRead, BufReader},
|
||||
path::Path,
|
||||
};
|
||||
use sysinfo::{System, SystemExt};
|
||||
use tantivy::doc;
|
||||
|
||||
fn get_memory_arena_num_bytes() -> usize {
|
||||
let sys = System::new_all();
|
||||
let available_memory = sys.available_memory() as usize;
|
||||
let cpu_num = sys.cpus().len();
|
||||
info!("Your system has cpu {cpu_num} cores and {available_memory} Bytes available");
|
||||
|
||||
let chunk_size = 1024 * 1024 * 1024; // 1GB
|
||||
let total_num_chunk = available_memory / chunk_size;
|
||||
|
||||
let s = if total_num_chunk < 2 {
|
||||
// <2G
|
||||
available_memory - 100 * 1024 * 1024 // available_memory-100MB
|
||||
} else {
|
||||
// >2G
|
||||
available_memory * (total_num_chunk - 1) // available_memory-1GB
|
||||
};
|
||||
|
||||
let num_threads = std::cmp::min(cpu_num, 8);
|
||||
let s = std::cmp::min(s, num_threads * 4293967294);
|
||||
|
||||
info!("Using {num_threads} threads and {s} Bytes to do index");
|
||||
s
|
||||
}
|
||||
|
||||
impl Searcher {
|
||||
pub fn index(&mut self, csv_file: impl AsRef<Path>) {
|
||||
let mut writer = self.index.writer(get_memory_arena_num_bytes()).unwrap();
|
||||
|
||||
let file = File::open(&csv_file).unwrap();
|
||||
let reader = BufReader::new(file);
|
||||
|
||||
let mut rdr = csv::ReaderBuilder::new()
|
||||
.has_headers(false)
|
||||
.from_reader(reader);
|
||||
|
||||
let line_count = BufReader::new(File::open(&csv_file).unwrap())
|
||||
.lines()
|
||||
.count();
|
||||
let style = ProgressStyle::default_bar()
|
||||
.template("[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}")
|
||||
.unwrap();
|
||||
let bar = ProgressBar::new(line_count as u64)
|
||||
.with_message(format!("Indexing {}", csv_file.as_ref().to_str().unwrap()))
|
||||
.with_style(style);
|
||||
for result in rdr.deserialize::<Book>().progress_with(bar) {
|
||||
match result {
|
||||
Ok(item) => {
|
||||
if let Err(err) = writer.add_document(doc!(
|
||||
self.id => item.id,
|
||||
self.title => item.title,
|
||||
self.author => item.author,
|
||||
self.publisher => item.publisher,
|
||||
self.extension => item.extension,
|
||||
self.filesize => item.filesize,
|
||||
self.language => item.language,
|
||||
self.year => item.year,
|
||||
self.pages => item.pages,
|
||||
self.isbn => item.isbn,
|
||||
self.ipfs_cid => item.ipfs_cid,
|
||||
)) {
|
||||
println!("{err}");
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
println!("{err}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
writer.commit().unwrap();
|
||||
writer.wait_merging_threads().expect("merge complete");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_csv_der() {
|
||||
let file = File::open("zlib_index_books.csv").unwrap();
|
||||
let reader = BufReader::new(file);
|
||||
|
||||
let mut rdr = csv::ReaderBuilder::new()
|
||||
.has_headers(false)
|
||||
.from_reader(reader);
|
||||
for result in rdr.records() {
|
||||
if let Err(err) = result {
|
||||
println!("{err:?}");
|
||||
break;
|
||||
}
|
||||
}
|
||||
println!("{:?}", rdr.position());
|
||||
}
|
@ -0,0 +1,152 @@
|
||||
use std::path::Path;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DefaultOnError, DefaultOnNull};
|
||||
use tantivy::{schema::*, store::Compressor, Index};
|
||||
use tokenizer::{get_tokenizer, META_DATA_TOKENIZER};
|
||||
|
||||
pub mod index;
|
||||
pub mod search;
|
||||
mod tokenizer;
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
pub struct Book {
|
||||
pub id: u64,
|
||||
|
||||
pub title: String,
|
||||
#[serde_as(deserialize_as = "DefaultOnNull")]
|
||||
pub author: String,
|
||||
#[serde_as(deserialize_as = "DefaultOnNull")]
|
||||
pub publisher: String,
|
||||
#[serde_as(deserialize_as = "DefaultOnNull")]
|
||||
pub extension: String,
|
||||
#[serde_as(deserialize_as = "DefaultOnError")]
|
||||
pub filesize: u64,
|
||||
#[serde_as(deserialize_as = "DefaultOnNull")]
|
||||
pub language: String,
|
||||
#[serde_as(deserialize_as = "DefaultOnError")]
|
||||
pub year: u64,
|
||||
#[serde_as(deserialize_as = "DefaultOnError")]
|
||||
pub pages: u64,
|
||||
#[serde_as(deserialize_as = "DefaultOnNull")]
|
||||
pub isbn: String,
|
||||
#[serde_as(deserialize_as = "DefaultOnNull")]
|
||||
pub ipfs_cid: String,
|
||||
}
|
||||
|
||||
impl From<(&Schema, Document)> for Book {
|
||||
fn from((schema, doc): (&Schema, Document)) -> Self {
|
||||
macro_rules! get_field_text {
|
||||
($field:expr) => {
|
||||
doc.get_first(schema.get_field($field).unwrap())
|
||||
.unwrap()
|
||||
.as_text()
|
||||
.unwrap_or_default()
|
||||
.to_owned()
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! get_field_u64 {
|
||||
($field:expr) => {
|
||||
doc.get_first(schema.get_field($field).unwrap())
|
||||
.unwrap()
|
||||
.as_u64()
|
||||
.unwrap_or_default()
|
||||
};
|
||||
}
|
||||
|
||||
Book {
|
||||
id: get_field_u64!("id"),
|
||||
title: get_field_text!("title"),
|
||||
author: get_field_text!("author"),
|
||||
publisher: get_field_text!("publisher"),
|
||||
extension: get_field_text!("extension"),
|
||||
filesize: get_field_u64!("filesize"),
|
||||
language: get_field_text!("language"),
|
||||
year: get_field_u64!("year"),
|
||||
pages: get_field_u64!("pages"),
|
||||
isbn: get_field_text!("isbn"),
|
||||
ipfs_cid: get_field_text!("ipfs_cid"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Searcher {
|
||||
index: Index,
|
||||
schema: Schema,
|
||||
|
||||
// fields
|
||||
id: Field,
|
||||
title: Field,
|
||||
author: Field,
|
||||
publisher: Field,
|
||||
extension: Field,
|
||||
filesize: Field,
|
||||
language: Field,
|
||||
year: Field,
|
||||
pages: Field,
|
||||
isbn: Field,
|
||||
ipfs_cid: Field,
|
||||
}
|
||||
|
||||
impl Searcher {
|
||||
pub fn new(index_dir: impl AsRef<Path>) -> Self {
|
||||
let text_indexing = TextFieldIndexing::default()
|
||||
.set_tokenizer(META_DATA_TOKENIZER)
|
||||
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
|
||||
let text_options = TextOptions::default()
|
||||
.set_indexing_options(text_indexing)
|
||||
.set_stored();
|
||||
|
||||
let mut schema_builder = Schema::builder();
|
||||
let id = schema_builder.add_u64_field("id", INDEXED | STORED);
|
||||
let title = schema_builder.add_text_field("title", text_options.clone());
|
||||
let author = schema_builder.add_text_field("author", text_options.clone());
|
||||
let publisher = schema_builder.add_text_field("publisher", text_options);
|
||||
let extension = schema_builder.add_text_field("extension", STRING | STORED);
|
||||
let filesize = schema_builder.add_u64_field("filesize", STORED);
|
||||
let language = schema_builder.add_text_field("language", TEXT | STORED);
|
||||
let year = schema_builder.add_u64_field("year", STORED);
|
||||
let pages = schema_builder.add_u64_field("pages", STORED);
|
||||
let isbn = schema_builder.add_text_field("isbn", TEXT | STORED);
|
||||
let ipfs_cid = schema_builder.add_text_field("ipfs_cid", STORED);
|
||||
let schema = schema_builder.build();
|
||||
|
||||
// open or create index
|
||||
let index_dir = index_dir.as_ref();
|
||||
let mut index = Index::open_in_dir(index_dir).unwrap_or_else(|_| {
|
||||
std::fs::create_dir_all(index_dir).expect("create index directory");
|
||||
Index::create_in_dir(index_dir, schema.clone()).unwrap()
|
||||
});
|
||||
#[cfg(feature = "best-size")]
|
||||
{
|
||||
index.settings_mut().docstore_compression = Compressor::Brotli; // size: 2.1G, size is best
|
||||
}
|
||||
#[cfg(feature = "best-speed")]
|
||||
{
|
||||
index.settings_mut().docstore_compression = Compressor::Lz4; // size: 3.1G, speed is best
|
||||
}
|
||||
|
||||
index
|
||||
.tokenizers()
|
||||
.register(META_DATA_TOKENIZER, get_tokenizer());
|
||||
_ = index.set_default_multithread_executor();
|
||||
|
||||
Self {
|
||||
index,
|
||||
schema,
|
||||
id,
|
||||
title,
|
||||
author,
|
||||
publisher,
|
||||
extension,
|
||||
filesize,
|
||||
language,
|
||||
year,
|
||||
pages,
|
||||
isbn,
|
||||
ipfs_cid,
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,29 @@
|
||||
use crate::{Book, Searcher};
|
||||
use tantivy::{collector::TopDocs, query::QueryParser};
|
||||
|
||||
impl Searcher {
|
||||
pub fn search(&self, query: &str, limit: usize) -> Vec<Book> {
|
||||
let reader = self.index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
|
||||
let mut query_parser = QueryParser::for_index(
|
||||
&self.index,
|
||||
vec![self.title, self.author, self.publisher, self.isbn],
|
||||
);
|
||||
query_parser.set_conjunction_by_default();
|
||||
let query = query_parser.parse_query(query).unwrap();
|
||||
|
||||
let top_docs = searcher
|
||||
.search(&query, &TopDocs::with_limit(limit))
|
||||
.unwrap();
|
||||
|
||||
top_docs
|
||||
.iter()
|
||||
.map(|d| {
|
||||
let doc = searcher.doc(d.1).unwrap();
|
||||
let item: Book = (&self.schema, doc).into();
|
||||
item
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
@ -0,0 +1,18 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use cang_jie::{CangJieTokenizer, TokenizerOption};
|
||||
use jieba_rs::Jieba;
|
||||
use tantivy::tokenizer::{AsciiFoldingFilter, LowerCaser, RemoveLongFilter, TextAnalyzer};
|
||||
|
||||
pub const META_DATA_TOKENIZER: &str = "meta_data_tokenizer";
|
||||
|
||||
pub fn get_tokenizer() -> TextAnalyzer {
|
||||
let cangjie = CangJieTokenizer {
|
||||
worker: Arc::new(Jieba::new()),
|
||||
option: TokenizerOption::ForSearch { hmm: false },
|
||||
};
|
||||
TextAnalyzer::from(cangjie)
|
||||
.filter(RemoveLongFilter::limit(20))
|
||||
.filter(AsciiFoldingFilter)
|
||||
.filter(LowerCaser)
|
||||
}
|
@ -0,0 +1,4 @@
|
||||
# Generated by Cargo
|
||||
# will have compiled files and executables
|
||||
/target/
|
||||
/index/
|
@ -0,0 +1,38 @@
|
||||
[package]
|
||||
name = "zlib-searcher-desktop"
|
||||
version = "0.7.0"
|
||||
edition = "2021"
|
||||
authors = ["Wybxc <wybxc@qq.com>", "zu1k <i@zu1k.com>"]
|
||||
description = "search z-library index."
|
||||
homepage = "https://github.com/zlib-searcher/zlib-searcher"
|
||||
repository = "https://github.com/zlib-searcher/zlib-searcher"
|
||||
license = "MIT"
|
||||
|
||||
|
||||
[build-dependencies]
|
||||
tauri-build = { version = "1.2.1", features = [] }
|
||||
|
||||
[dependencies]
|
||||
zlib-searcher-core = { workspace = true }
|
||||
|
||||
anyhow = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
env_logger = { workspace = true }
|
||||
log = { workspace = true }
|
||||
|
||||
serde_json = "1.0"
|
||||
tauri = { version = "1.2.1", features = ["dialog-open", "shell-open"] }
|
||||
|
||||
tokio = { version = "1", features = ["sync", "parking_lot"] }
|
||||
|
||||
confy = "0.5"
|
||||
dunce = "1.0"
|
||||
|
||||
|
||||
[features]
|
||||
# by default Tauri runs in production mode
|
||||
# when `tauri dev` runs it is executed with `cargo run --no-default-features` if `devPath` is an URL
|
||||
default = ["custom-protocol"]
|
||||
# this feature is used for production builds where `devPath` points to the filesystem
|
||||
# DO NOT remove this
|
||||
custom-protocol = ["tauri/custom-protocol"]
|
@ -0,0 +1,3 @@
|
||||
fn main() {
|
||||
tauri_build::build()
|
||||
}
|
After Width: | Height: | Size: 3.4 KiB |
After Width: | Height: | Size: 5.7 KiB |
After Width: | Height: | Size: 1.4 KiB |
After Width: | Height: | Size: 3.4 KiB |
After Width: | Height: | Size: 4.7 KiB |
After Width: | Height: | Size: 4.9 KiB |
After Width: | Height: | Size: 10 KiB |
After Width: | Height: | Size: 1.3 KiB |
After Width: | Height: | Size: 12 KiB |
After Width: | Height: | Size: 1.7 KiB |
After Width: | Height: | Size: 2.3 KiB |
After Width: | Height: | Size: 2.8 KiB |
After Width: | Height: | Size: 1.8 KiB |
After Width: | Height: | Size: 14 KiB |
After Width: | Height: | Size: 6.2 KiB |
@ -0,0 +1,121 @@
|
||||
#![cfg_attr(
|
||||
all(not(debug_assertions), target_os = "windows"),
|
||||
windows_subsystem = "windows"
|
||||
)]
|
||||
|
||||
use log::info;
|
||||
use std::{error::Error, path::PathBuf};
|
||||
use zlib_searcher_core::{Book, Searcher};
|
||||
|
||||
const VERSION: &str = env!("CARGO_PKG_VERSION");
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tauri::State;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
struct AppConfig {
|
||||
pub index_dir: PathBuf,
|
||||
pub ipfs_gateways: Vec<String>,
|
||||
}
|
||||
|
||||
fn get_dir(name: &str) -> Option<PathBuf> {
|
||||
let dir = std::env::current_exe().ok()?.parent()?.join(name);
|
||||
std::fs::create_dir_all(&dir).ok()?;
|
||||
let dir = dunce::canonicalize(dir).ok()?;
|
||||
Some(dir)
|
||||
}
|
||||
|
||||
impl Default for AppConfig {
|
||||
fn default() -> Self {
|
||||
let index_dir = get_dir("index").unwrap_or_else(|| PathBuf::from("index"));
|
||||
Self {
|
||||
index_dir,
|
||||
ipfs_gateways: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AppConfig {
|
||||
const APP_NAME: &'static str = "zlib-searcher-desktop";
|
||||
|
||||
pub fn load() -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let config = confy::load(Self::APP_NAME, None)?;
|
||||
Ok(config)
|
||||
}
|
||||
|
||||
pub fn save(&self) -> Result<(), Box<dyn std::error::Error>> {
|
||||
confy::store(Self::APP_NAME, None, self)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn configuration_file_path() -> Result<PathBuf, Box<dyn std::error::Error>> {
|
||||
Ok(confy::get_configuration_file_path(Self::APP_NAME, None)?)
|
||||
}
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
async fn get_config(config: State<'_, Mutex<AppConfig>>) -> Result<AppConfig, String> {
|
||||
Ok(config.lock().await.clone())
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
async fn set_config(
|
||||
new_config: AppConfig,
|
||||
config: State<'_, Mutex<AppConfig>>,
|
||||
searcher: tauri::State<'_, Mutex<Searcher>>,
|
||||
) -> Result<(), String> {
|
||||
let mut config = config.lock().await;
|
||||
|
||||
// reload searcher if index_dir changed
|
||||
if config.index_dir != new_config.index_dir {
|
||||
info!("index_dir changed, reloading searcher");
|
||||
let mut searcher = searcher.lock().await;
|
||||
*searcher = Searcher::new(new_config.index_dir.clone());
|
||||
}
|
||||
|
||||
*config = new_config;
|
||||
config.save().map_err(|e| e.to_string())?;
|
||||
|
||||
info!("Config saved: {:?}", config);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
async fn search(
|
||||
searcher: tauri::State<'_, Mutex<Searcher>>,
|
||||
query: String,
|
||||
limit: usize,
|
||||
) -> Result<Vec<Book>, ()> {
|
||||
info!("Search: {}", query);
|
||||
Ok(searcher.lock().await.search(&query, limit))
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
fn version() -> String {
|
||||
VERSION.to_string()
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
env_logger::init();
|
||||
|
||||
let config = AppConfig::load()?;
|
||||
let searcher = Mutex::new(Searcher::new(&config.index_dir));
|
||||
let config = Mutex::new(config);
|
||||
|
||||
info!(
|
||||
"load config from {:?}",
|
||||
AppConfig::configuration_file_path()?
|
||||
);
|
||||
|
||||
tauri::Builder::default()
|
||||
.manage(config)
|
||||
.manage(searcher)
|
||||
.invoke_handler(tauri::generate_handler![
|
||||
version, search, get_config, set_config
|
||||
])
|
||||
.run(tauri::generate_context!())
|
||||
.expect("error while running tauri application");
|
||||
|
||||
Ok(())
|
||||
}
|
@ -0,0 +1,71 @@
|
||||
{
|
||||
"build": {
|
||||
"beforeBuildCommand": "cd ../frontend && pnpm run build",
|
||||
"beforeDevCommand": "cd ../frontend && pnpm run dev",
|
||||
"devPath": "http://localhost:5173/",
|
||||
"distDir": "../../frontend/dist"
|
||||
},
|
||||
"package": {
|
||||
"productName": "zLib Searcher",
|
||||
"version": "0.1.0"
|
||||
},
|
||||
"tauri": {
|
||||
"allowlist": {
|
||||
"all": false,
|
||||
"shell": {
|
||||
"open": true
|
||||
},
|
||||
"dialog": {
|
||||
"open": true
|
||||
}
|
||||
},
|
||||
"bundle": {
|
||||
"active": true,
|
||||
"category": "DeveloperTool",
|
||||
"copyright": "",
|
||||
"deb": {
|
||||
"depends": []
|
||||
},
|
||||
"externalBin": [],
|
||||
"icon": [
|
||||
"icons/32x32.png",
|
||||
"icons/128x128.png",
|
||||
"icons/128x128@2x.png",
|
||||
"icons/icon.icns",
|
||||
"icons/icon.ico"
|
||||
],
|
||||
"identifier": "com.github.zlib-searcher",
|
||||
"longDescription": "",
|
||||
"macOS": {
|
||||
"entitlements": null,
|
||||
"exceptionDomain": "",
|
||||
"frameworks": [],
|
||||
"providerShortName": null,
|
||||
"signingIdentity": null
|
||||
},
|
||||
"resources": [],
|
||||
"shortDescription": "",
|
||||
"targets": "all",
|
||||
"windows": {
|
||||
"certificateThumbprint": null,
|
||||
"digestAlgorithm": "sha256",
|
||||
"timestampUrl": ""
|
||||
}
|
||||
},
|
||||
"security": {
|
||||
"csp": null
|
||||
},
|
||||
"updater": {
|
||||
"active": false
|
||||
},
|
||||
"windows": [
|
||||
{
|
||||
"fullscreen": false,
|
||||
"height": 900,
|
||||
"resizable": true,
|
||||
"title": "zLib Searcher",
|
||||
"width": 1500
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,34 @@
|
||||
[package]
|
||||
name = "zlib-searcher"
|
||||
version = "0.7.0"
|
||||
edition.workspace = true
|
||||
authors.workspace = true
|
||||
description.workspace = true
|
||||
homepage.workspace = true
|
||||
repository.workspace = true
|
||||
license.workspace = true
|
||||
default-run = "zlib-searcher"
|
||||
|
||||
[dependencies]
|
||||
zlib-searcher-core = { workspace = true }
|
||||
|
||||
anyhow = { workspace = true }
|
||||
env_logger = { workspace = true }
|
||||
log = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_with = { workspace = true }
|
||||
|
||||
actix-web = "4"
|
||||
actix-web-static-files = "4.0"
|
||||
static-files = "0.2"
|
||||
|
||||
csv = "1.1"
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
|
||||
[build-dependencies]
|
||||
static-files = "0.2"
|
||||
|
||||
[features]
|
||||
default = ["best-size"]
|
||||
best-size = ["zlib-searcher-core/best-size"]
|
||||
best-speed = ["zlib-searcher-core/best-speed"]
|
@ -0,0 +1,6 @@
|
||||
use static_files::resource_dir;
|
||||
|
||||
fn main() -> std::io::Result<()> {
|
||||
println!("cargo:rerun-if-changed=../../frontend/dist");
|
||||
resource_dir("../../frontend/dist").build()
|
||||
}
|
@ -0,0 +1,61 @@
|
||||
use std::{fs::File, io::BufReader};
|
||||
use zlib_searcher_core::Book;
|
||||
|
||||
fn main() {
|
||||
let mut writer = csv::Writer::from_path("zlib_libgen_chinese_books.csv").unwrap();
|
||||
|
||||
let mut filter_csv = |path: &str| {
|
||||
let file = File::open(path).unwrap();
|
||||
let reader = BufReader::new(file);
|
||||
|
||||
let mut rdr = csv::ReaderBuilder::new()
|
||||
.has_headers(false)
|
||||
.from_reader(reader);
|
||||
for result in rdr.deserialize::<Book>() {
|
||||
match result {
|
||||
Ok(ref book) => {
|
||||
if is_chinese_title(book) {
|
||||
if let Err(err) = writer.serialize(book) {
|
||||
println!("err: {err}");
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
println!("{err}");
|
||||
}
|
||||
}
|
||||
}
|
||||
println!("{:?}", rdr.position());
|
||||
};
|
||||
|
||||
filter_csv("zlib_index_books.csv");
|
||||
filter_csv("libgen_index_books.csv");
|
||||
}
|
||||
|
||||
fn is_chinese_title(book: &Book) -> bool {
|
||||
let chinese_char_count = book.title.matches(is_chinese_char).count();
|
||||
chinese_char_count as f32 / book.title.len() as f32 > 0.3
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
const fn is_chinese_char(c: char) -> bool {
|
||||
matches!(c as u32,
|
||||
0x4E00..=0x9FA5 |
|
||||
0x9FA6..=0x9FFF |
|
||||
0x3400..=0x4DB5 |
|
||||
0x20000..=0x2A6D6 |
|
||||
0x2A700..=0x2B734 |
|
||||
0x2B740..=0x2B81D |
|
||||
0x2F00..=0x2FD5 |
|
||||
0x2E80..=0x2EF3 |
|
||||
0xF900..=0xFAD9 |
|
||||
0x2F800..=0x2FA1D |
|
||||
0xE815..=0xE86F |
|
||||
0xE400..=0xE5E8 |
|
||||
0xE600..=0xE6CF |
|
||||
0x31C0..=0x31E3 |
|
||||
0x2FF0..=0x2FFB |
|
||||
0x3105..=0x3120 |
|
||||
0x31A0..=0x31BA
|
||||
)
|
||||
}
|
@ -0,0 +1,141 @@
|
||||
use actix_web::{
|
||||
get, http::header, middleware::Logger, web, App, HttpResponse, HttpServer, Responder,
|
||||
};
|
||||
use actix_web_static_files::ResourceFiles;
|
||||
use clap::Parser;
|
||||
use log::{info, LevelFilter};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{path::PathBuf, sync::Arc};
|
||||
use zlib_searcher_core::{Book, Searcher};
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/generated.rs"));
|
||||
|
||||
#[derive(Clone)]
|
||||
struct AppState {
|
||||
searcher: Arc<Searcher>,
|
||||
}
|
||||
|
||||
impl AppState {
|
||||
pub fn init(index_dir: &str) -> Self {
|
||||
info!("AppState init!");
|
||||
AppState {
|
||||
searcher: Arc::new(Searcher::new(index_dir)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn default_limit() -> usize {
|
||||
30
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct SearchQuery {
|
||||
query: String,
|
||||
#[serde(default = "default_limit")]
|
||||
limit: usize,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct SearchResult {
|
||||
books: Vec<Book>,
|
||||
}
|
||||
|
||||
#[get("/search")]
|
||||
async fn search(query: web::Query<SearchQuery>, state: web::Data<AppState>) -> impl Responder {
|
||||
let books = state.searcher.search(&query.query, query.limit);
|
||||
let result = SearchResult { books };
|
||||
|
||||
return HttpResponse::Ok()
|
||||
.insert_header(header::ContentType::json())
|
||||
.insert_header((header::ACCESS_CONTROL_ALLOW_ORIGIN, "*"))
|
||||
.json(result);
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
#[clap(author, version, about, long_about)]
|
||||
struct AppOpts {
|
||||
#[clap(subcommand)]
|
||||
subcmd: SubCommand,
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
enum SubCommand {
|
||||
/// run search webserver
|
||||
Run(Run),
|
||||
/// index the raw data
|
||||
Index(Index),
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
struct Run {
|
||||
#[clap(
|
||||
short,
|
||||
long,
|
||||
default_value = "127.0.0.1:7070",
|
||||
help = "webserver bind address"
|
||||
)]
|
||||
bind: String,
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
struct Index {
|
||||
#[clap(short, long, num_args=1.., help = "specify csv file to be indexed")]
|
||||
file: Vec<PathBuf>,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
env_logger::builder().filter_level(LevelFilter::Info).init();
|
||||
|
||||
let args = AppOpts::parse();
|
||||
match args.subcmd {
|
||||
SubCommand::Run(opts) => run(opts).unwrap(),
|
||||
SubCommand::Index(opts) => index(opts),
|
||||
}
|
||||
}
|
||||
|
||||
#[actix_web::main]
|
||||
async fn run(opts: Run) -> std::io::Result<()> {
|
||||
info!("zlib-searcher webserver started!");
|
||||
|
||||
let index_dir = std::env::current_exe()
|
||||
.unwrap()
|
||||
.parent()
|
||||
.unwrap()
|
||||
.join("index")
|
||||
.to_str()
|
||||
.unwrap()
|
||||
.to_string();
|
||||
let app_state = AppState::init(&index_dir);
|
||||
|
||||
HttpServer::new(move || {
|
||||
let generated = generate();
|
||||
App::new()
|
||||
.wrap(Logger::default())
|
||||
.app_data(web::Data::new(app_state.clone()))
|
||||
.service(search)
|
||||
.service(ResourceFiles::new("/", generated))
|
||||
})
|
||||
.bind(opts.bind)?
|
||||
.run()
|
||||
.await
|
||||
}
|
||||
|
||||
fn index(opts: Index) {
|
||||
let index_dir = std::env::current_exe()
|
||||
.unwrap()
|
||||
.parent()
|
||||
.unwrap()
|
||||
.join("index")
|
||||
.to_str()
|
||||
.unwrap()
|
||||
.to_string();
|
||||
let mut searcher = Searcher::new(&index_dir);
|
||||
|
||||
if opts.file.is_empty() {
|
||||
vec!["zlib_index_books.csv", "libgen_index_books.csv"]
|
||||
.iter()
|
||||
.for_each(|file| searcher.index(file));
|
||||
} else {
|
||||
opts.file.iter().for_each(|file| searcher.index(file));
|
||||
}
|
||||
}
|
@ -0,0 +1,21 @@
|
||||
version: '3'
|
||||
|
||||
services:
|
||||
zlib:
|
||||
image: lamacchinadesiderante/millelibri:latest
|
||||
|
||||
# image: millelibri:v0.2
|
||||
|
||||
# image: millelibri
|
||||
|
||||
# build:
|
||||
# context: .
|
||||
# dockerfile: ./Dockerfile
|
||||
|
||||
restart: always
|
||||
|
||||
ports:
|
||||
- "7070:7070"
|
||||
|
||||
volumes:
|
||||
- ./index:/index
|
@ -0,0 +1,4 @@
|
||||
*.log
|
||||
node_modules
|
||||
dist
|
||||
dist-ssr
|
@ -0,0 +1,2 @@
|
||||
# .env.production
|
||||
VITE_BACKEND_BASE_API = 'http://127.0.0.1:7070/'
|
@ -0,0 +1,2 @@
|
||||
# .env.production
|
||||
VITE_BACKEND_BASE_API = ''
|
@ -0,0 +1,26 @@
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
pnpm-debug.log*
|
||||
lerna-debug.log*
|
||||
|
||||
node_modules
|
||||
dist
|
||||
dist-ssr
|
||||
*.local
|
||||
|
||||
# Editor directories and files
|
||||
.vscode/*
|
||||
!.vscode/extensions.json
|
||||
.idea
|
||||
.DS_Store
|
||||
*.suo
|
||||
*.ntvs*
|
||||
*.njsproj
|
||||
*.sln
|
||||
*.sw?
|
||||
|
||||
components.d.ts
|
@ -0,0 +1,4 @@
|
||||
singleQuote: true
|
||||
semi: true
|
||||
printWidth: 100
|
||||
trailingComma: none
|
@ -0,0 +1,12 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Millelibri / zlib searcher</title>
|
||||
</head>
|
||||
<body>
|
||||
<div id="app"></div>
|
||||
<script type="module" src="/src/main.tsx"></script>
|
||||
</body>
|
||||
</html>
|
@ -0,0 +1,49 @@
|
||||
{
|
||||
"name": "frontend",
|
||||
"private": true,
|
||||
"version": "0.2.0",
|
||||
"type": "module",
|
||||
"repository": "https://github.com/lamacchinadesiderante/millelibri",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
"build": "vite build",
|
||||
"preview": "vite preview"
|
||||
},
|
||||
"dependencies": {
|
||||
"@chakra-ui/react": "^2.4.6",
|
||||
"@chakra-ui/skip-nav": "^2.0.13",
|
||||
"@chakra-ui/system": "^2.3.7",
|
||||
"@emotion/react": "^11.10.5",
|
||||
"@emotion/styled": "^11.10.5",
|
||||
"@tanstack/react-table": "^8.7.4",
|
||||
"@tanstack/table-core": "^8.7.4",
|
||||
"@tauri-apps/api": "^1.2.0",
|
||||
"ahooks": "^3.7.4",
|
||||
"axios": "^1.2.2",
|
||||
"filesize": "^10.0.6",
|
||||
"framer-motion": "^7.10.3",
|
||||
"i18next": "^22.4.6",
|
||||
"i18next-browser-languagedetector": "^7.0.1",
|
||||
"lodash": "^4.17.21",
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0",
|
||||
"react-hook-form": "^7.41.3",
|
||||
"react-i18next": "^12.1.1",
|
||||
"react-icons": "^4.7.1",
|
||||
"react-intersection-observer": "^9.4.1",
|
||||
"react-responsive": "^9.0.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/core": "^7.20.7",
|
||||
"@darkobits/vite-plugin-favicons": "^0.1.8",
|
||||
"@types/lodash": "^4.14.191",
|
||||
"@types/node": "^18.11.18",
|
||||
"@types/react": "^18.0.26",
|
||||
"@types/react-dom": "^18.0.10",
|
||||
"@vitejs/plugin-react": "^2.2.0",
|
||||
"prettier": "^2.8.1",
|
||||
"typescript": "^4.9.4",
|
||||
"vite": "^3.2.5",
|
||||
"vite-plugin-top-level-await": "^1.2.2"
|
||||
}
|
||||
}
|
@ -0,0 +1,67 @@
|
||||
import { Flex, HStack, Icon, IconButton, Spacer } from '@chakra-ui/react';
|
||||
import React, { Suspense, useState } from 'react';
|
||||
import { SkipNavContent, SkipNavLink } from '@chakra-ui/skip-nav';
|
||||
|
||||
import { Book } from './scripts/searcher';
|
||||
import BooksView from './components/BooksView';
|
||||
import ColorModeSwitch from './components/ColorModeSwitch';
|
||||
import ExternalLink from './components/ExternalLink';
|
||||
import { FaGithub } from 'react-icons/fa';
|
||||
import Footer from './components/Footer';
|
||||
import Header from './components/Header';
|
||||
import LanguageSwitch from './components/LanguageSwitch';
|
||||
import Search from './components/Search';
|
||||
import { repository } from '../package.json';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
const Main: React.FC = () => {
|
||||
const [books, setBooks] = useState<Book[]>([]);
|
||||
return (
|
||||
<>
|
||||
<SkipNavContent />
|
||||
<Search setBooks={setBooks} />
|
||||
<BooksView books={books} />
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
const Settings =
|
||||
import.meta.env.VITE_TAURI === '1'
|
||||
? React.lazy(() => import('./components/Settings-tauri'))
|
||||
: React.lazy(() => import('./components/Settings'));
|
||||
|
||||
const App: React.FC = () => {
|
||||
const { t } = useTranslation();
|
||||
|
||||
return (
|
||||
<Flex direction="column" minH="100vh">
|
||||
<SkipNavLink>Skip to content</SkipNavLink>
|
||||
<Header title="Millelibri">
|
||||
<HStack spacing={{ base: 1, md: 2 }}>
|
||||
<IconButton
|
||||
as={ExternalLink}
|
||||
aria-label={t('nav.repository')}
|
||||
title={t('nav.repository') ?? ''}
|
||||