본문 바로가기
Rust

[Rust] Web Spider 예제 (57일차)

by 꾸압 2023. 2. 12.

 

<예제 코드_1>

# Cargo.toml

# ... 중략

[dependencies]
spider = "1.18.9"
tokio = { version = "1.25.0", features = ["full"] }

 

// src/main.rs
extern crate spider;

use spider::website::Website;
use spider::tokio;

#[tokio::main]
async fn main() {
    let url = "https://choosealicense.com";
    let mut website: Website = Website::new(&url);
    website.crawl().await;

    for page in website.get_pages() {
        println!("- {}", page.get_url());
    }
}

 


 

<예제 코드_2>

# Cargo.toml

# ... 중략

[dependencies]
tokio = { version = "1.25.0", features = ["full"] }
spider = { version = "1.18.9"}

 

// src\main.rs
extern crate spider;

use spider::tokio;
use spider::website::Website;

#[tokio::main]
async fn main() {
    let mut website: Website = Website::new("https://rsseau.fr");
    website.on_link_find_callback = |s| {
        println!("link target: {}", s);
        s
    };
    website.crawl().await;
}

 

 


 

<예제 코드_3>

# Cargo.toml

# ... 중략

[dependencies]
tokio = { version = "1.25.0", features = ["full"] }
spider = { version = "1.18.9", features = ["regex", "ua_generator", "jemalloc"] }
log = "0.4.0"
env_logger = "0.9.0"

 

// src\main.rs
extern crate env_logger;
extern crate spider;

use env_logger::Env;
use spider::tokio;
use spider::website::Website;

#[tokio::main]
async fn main() {
    let env = Env::default()
        .filter_or("RUST_LOG", "info")
        .write_style("always");

    env_logger::init_from_env(env);

    let mut website: Website = Website::new("https://rsseau.fr");
    website.configuration.respect_robots_txt = true;
    website.configuration.delay = 15;
    website.configuration.user_agent = "SpiderBot".into();

    website.scrape().await;

    for page in website.get_pages() {
        println!("{}", page.get_html());
    }
}

 

 


 

<참조 1> https://crates.io/crates/spider

<참조 2> https://github.com/spider-rs/spider/blob/main/examples/scrape.rs

<참조 3> https://github.com/spider-rs/spider/blob/main/examples/callback.rs

<참조 4>

 

 

댓글