Tiempo de lectura: 3 minutos
A couple of months ago, I started reading about Rust, and after purchasing an API for web scraping called RocketScrape, I decided to write my first program.
To begin, you will need to add some dependencies: select, tokio, and reqwest crates, respectively.
[dependencies]
select = "0.5.0"
tokio = { version = "1", features = ["full"] }
reqwest = "0.11.4"
And here is the code:
use select::document::Document;
use select::predicate::{Attr, Name, Predicate};
const NO_VALUE: &str = "NOT FOUND";
const APIKEY: &str = "write here your API number";
async fn scrapebooks(i: &i32) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let rocketapi = format!("https://api.rocketscrap.io/?apiKey={}&url=", APIKEY);
let site_url = format!("http://books.toscrape.com/catalogue/page-{}.html", i);
let url = format!("{}{}", rocketapi, site_url);
let response = reqwest::get(&url).await?;
if response.status().is_success() {
let response = response.text().await?;
let document = Document::from(response.as_str());
for node in document.find(Name("article")) {
let name = node
.find(Name("h3").descendant(Name("a")))
.next()
.map_or(NO_VALUE.into(), |n| n.text());
let price = node
.find(Attr("class", "price_color"))
.next()
.map_or(NO_VALUE.into(), |n| n.text());
let link = node
.find(Name("h3").descendant(Name("a")))
.filter_map(|n| n.attr("href"))
.next() // .nth(0)
.map_or(NO_VALUE.into(), |n| n.to_string());
let photo = node
.find(Name("article")
.descendant(Name("div"))
.descendant(Name("a"))
.descendant(Name("img"))
)
.filter_map(|n| n.attr("src"))
.next() // .nth(0)
.map_or(NO_VALUE.into(), |n| n.to_string());
println!("{:?},{:?},{:?},{:?},{:?}", site_url, name, price, link, photo);
}
} else {
println!("Status: {:?} - Response failed from: {}", response.status(), &url);
}
Ok(())
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut handles: std::vec::Vec<_> = Vec::new();
for i in 1..=40 {
let job = tokio::spawn(async move { scrapebooks(&i).await });
handles.push(job);
}
let mut results = Vec::new();
for job in handles {
results.push(job.await);
}
Ok(())
}
After running the program, the results will be displayed on the screen, simulating a CSV output.
The program will run quite fast, taking less than 2 seconds to grab 40 pages of results.

This article has been heavily inspired by the work of Xavier Tao at https://able.bio/haixuanTao/web-scraper-python-vs-rust–d6176429


