Commit af10f4c4 authored by Ben Boeckel's avatar Ben Boeckel
Browse files

api/paged: simplify paginated queries

Factoring out the pagination options allows for treating paginated
queries as simple queries as well.
parent 5560898d
......@@ -15,5 +15,13 @@
//! API implications of adding new members for additional query parameters in future GitLab
//! releases.
mod paged;
pub mod projects;
pub mod users;
pub use self::paged::paged;
pub use self::paged::LinkHeaderParseError;
pub use self::paged::Pageable;
pub use self::paged::Paged;
pub use self::paged::Pagination;
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use reqwest::header::HeaderMap;
use reqwest::Method;
use serde::de::DeserializeOwned;
use thiserror::Error;
use url::Url;
use crate::gitlab::{GitlabError, PaginationError};
use crate::query::{GitlabClient, Query, SingleQuery};
struct LinkHeader<'a> {
url: &'a str,
params: Vec<(&'a str, &'a str)>,
}
impl<'a> LinkHeader<'a> {
fn parse(s: &'a str) -> Result<Self, LinkHeaderParseError> {
let mut parts = s.split(';');
let url_part = parts.next().expect("a split always has at least one part");
let url = {
let part = url_part.trim();
if part.starts_with('<') && part.ends_with('>') {
&part[1..part.len() - 1]
} else {
return Err(LinkHeaderParseError::NoBrackets);
}
};
let params = parts
.map(|part| {
let part = part.trim();
let mut halves = part.splitn(2, '=');
let key = halves.next().expect("a split always has at least one part");
let value = if let Some(value) = halves.next() {
if value.starts_with('"') && value.ends_with('"') {
&value[1..value.len() - 1]
} else {
value
}
} else {
return Err(LinkHeaderParseError::MissingParamValue);
};
Ok((key, value))
})
.collect::<Result<Vec<_>, LinkHeaderParseError>>()?;
Ok(Self {
url,
params,
})
}
}
/// An error which can occur when parsing a link header.
#[derive(Debug, Error)]
pub enum LinkHeaderParseError {
/// An invalid HTTP header was found.
#[error("invalid header")]
InvalidHeader {
/// The source of the error.
#[from]
source: reqwest::header::ToStrError,
},
/// The `url` for a `Link` header was missing `<>` brackets.
#[error("missing brackets around url")]
NoBrackets,
/// A parameter for a `Link` header was missing a value.
#[error("missing parameter value")]
MissingParamValue,
}
impl LinkHeaderParseError {
fn invalid_header(source: reqwest::header::ToStrError) -> Self {
Self::InvalidHeader {
source,
}
}
}
/// Pagination options for GitLab.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Pagination {
/// Return all results.
///
/// Note that some endpoints may have a server-side limit to the number of results (e.g.,
/// `/projects` is limited to 10000 results).
All,
/// Limit to a number of results.
Limit(usize),
}
impl Default for Pagination {
fn default() -> Self {
Pagination::All
}
}
const MAX_PAGE_SIZE: usize = 100;
impl Pagination {
fn page_limit(self) -> usize {
match self {
Pagination::All => MAX_PAGE_SIZE,
Pagination::Limit(size) => size.min(MAX_PAGE_SIZE),
}
}
fn is_last_page<T>(self, last_page_size: usize, results: &[T]) -> bool {
// If the last page has fewer elements than our limit, we're definitely done.
if last_page_size < self.page_limit() {
return true;
}
// Otherwise, check if we have results which fill our limit.
if let Pagination::Limit(limit) = self {
return limit <= results.len();
}
// We're not done yet.
false
}
}
/// A query modifier that paginates an endpoint.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Paged<E> {
endpoint: E,
pagination: Pagination,
}
/// Collect data from a paged endpoint.
pub fn paged<E>(endpoint: E, pagination: Pagination) -> Paged<E> {
Paged {
endpoint,
pagination,
}
}
/// A trait to indicate that an endpoint is pageable.
pub trait Pageable {
/// Whether the endpoint uses keyset pagination or not.
fn use_keyset_pagination(&self) -> bool {
false
}
}
impl<E, T> Query<Vec<T>> for Paged<E>
where
E: SingleQuery<Vec<T>>,
E: Pageable,
T: DeserializeOwned,
{
fn query(&self, client: &dyn GitlabClient) -> Result<Vec<T>, GitlabError> {
let url = {
let mut url = client.rest_endpoint(&self.endpoint.endpoint())?;
self.endpoint.add_parameters(url.query_pairs_mut());
url
};
let mut page_num = 1;
let per_page = self.pagination.page_limit();
let per_page_str = format!("{}", per_page);
let mut results = Vec::new();
let mut next_url = None;
let use_keyset_pagination = self.endpoint.use_keyset_pagination();
loop {
let page_url = if let Some(url) = next_url.take() {
url
} else {
let page_str = format!("{}", page_num);
let mut page_url = url.clone();
{
let mut pairs = page_url.query_pairs_mut();
pairs.append_pair("per_page", &per_page_str);
if use_keyset_pagination {
pairs.append_pair("pagination", "keyset");
} else {
pairs.append_pair("page", &page_str);
}
}
page_url
};
let req = client.build_rest(Method::GET, page_url);
let rsp = client.rest(req)?;
let status = rsp.status();
if use_keyset_pagination {
next_url = next_page_from_headers(rsp.headers())?;
}
let v = serde_json::from_reader(rsp).map_err(GitlabError::json)?;
if !status.is_success() {
return Err(GitlabError::from_gitlab(v));
}
let page =
serde_json::from_value::<Vec<T>>(v).map_err(GitlabError::data_type::<Vec<T>>)?;
let page_len = page.len();
results.extend(page);
// Gitlab used to have issues returning paginated results; these have been fixed since,
// but if it is needed, the bug manifests as Gitlab returning *all* results instead of
// just the requested results. This can cause an infinite loop here if the number of
// total results is exactly equal to `per_page`.
if self.pagination.is_last_page(page_len, &results) {
break;
}
if use_keyset_pagination {
if next_url.is_none() {
break;
}
} else {
page_num += 1;
}
}
Ok(results)
}
}
fn next_page_from_headers(headers: &HeaderMap) -> Result<Option<Url>, PaginationError> {
headers
.get_all(reqwest::header::LINK)
.iter()
.map(|link| {
let value = link
.to_str()
.map_err(LinkHeaderParseError::invalid_header)?;
Ok(LinkHeader::parse(value)?)
})
.collect::<Result<Vec<_>, PaginationError>>()?
.into_iter()
.filter_map(|header| {
let is_next_link = header
.params
.into_iter()
.any(|(key, value)| key == "rel" && value == "next");
if is_next_link {
Some(header.url.parse().map_err(PaginationError::from))
} else {
None
}
})
.next()
.transpose()
}
......@@ -63,10 +63,6 @@ pub struct Jobs<'a> {
#[builder(setter(into))]
project: NameOrId<'a>,
/// Pagination to use for the results.
#[builder(default)]
pagination: Pagination,
/// The scopes to filter jobs by.
#[builder(setter(name = "_scopes"), default, private)]
scopes: HashSet<JobScope>,
......@@ -119,23 +115,7 @@ where
}
}
impl<'a, T> PagedQuery<T> for Jobs<'a>
where
T: DeserializeOwned,
{
fn pagination(&self) -> Pagination {
self.pagination
}
}
impl<'a, T> Query<Vec<T>> for Jobs<'a>
where
T: DeserializeOwned,
{
fn query(&self, client: &dyn GitlabClient) -> Result<Vec<T>, GitlabError> {
self.paged_query(client)
}
}
impl<'a> Pageable for Jobs<'a> {}
#[cfg(test)]
mod tests {
......
......@@ -22,10 +22,6 @@ pub struct Jobs<'a> {
/// The ID of the pipeline.
pipeline: u64,
/// Pagination to use for the results.
#[builder(default)]
pagination: Pagination,
/// The scopes to filter jobs by.
#[builder(setter(name = "_scopes"), default, private)]
scopes: HashSet<JobScope>,
......@@ -78,23 +74,7 @@ where
}
}
impl<'a, T> PagedQuery<T> for Jobs<'a>
where
T: DeserializeOwned,
{
fn pagination(&self) -> Pagination {
self.pagination
}
}
impl<'a, T> Query<Vec<T>> for Jobs<'a>
where
T: DeserializeOwned,
{
fn query(&self, client: &dyn GitlabClient) -> Result<Vec<T>, GitlabError> {
self.paged_query(client)
}
}
impl<'a> Pageable for Jobs<'a> {}
#[cfg(test)]
mod tests {
......
......@@ -138,10 +138,6 @@ pub struct Pipelines<'a> {
#[builder(setter(into))]
project: NameOrId<'a>,
/// Pagination to use for the results.
#[builder(default)]
pagination: Pagination,
/// Filter pipelines by its scope.
#[builder(default)]
scope: Option<PipelineScope>,
......@@ -246,23 +242,7 @@ where
}
}
impl<'a, T> PagedQuery<T> for Pipelines<'a>
where
T: DeserializeOwned,
{
fn pagination(&self) -> Pagination {
self.pagination
}
}
impl<'a, T> Query<Vec<T>> for Pipelines<'a>
where
T: DeserializeOwned,
{
fn query(&self, client: &dyn GitlabClient) -> Result<Vec<T>, GitlabError> {
self.paged_query(client)
}
}
impl<'a> Pageable for Pipelines<'a> {}
#[cfg(test)]
mod tests {
......
......@@ -64,10 +64,6 @@ impl fmt::Display for ProjectOrderBy {
#[derive(Debug, Builder)]
#[builder(setter(strip_option))]
pub struct Projects {
/// Pagination to use for the results.
#[builder(default)]
pagination: Pagination,
/// Search for projects using a query string.
///
/// The search query will be escaped automatically.
......@@ -275,29 +271,13 @@ where
}
}
impl<T> PagedQuery<T> for Projects
where
T: DeserializeOwned,
{
fn pagination(&self) -> Pagination {
self.pagination
}
impl Pageable for Projects {
fn use_keyset_pagination(&self) -> bool {
self.order_by
.map_or(false, |order_by| order_by.use_keyset_pagination())
}
}
impl<T> Query<Vec<T>> for Projects
where
T: DeserializeOwned,
{
fn query(&self, client: &dyn GitlabClient) -> Result<Vec<T>, GitlabError> {
self.paged_query(client)
}
}
#[cfg(test)]
mod tests {
use crate::api::projects::Projects;
......
......@@ -89,9 +89,6 @@ pub struct Users<'a> {
/// Return only blocked users.
#[builder(default)]
blocked: Option<()>,
/// Pagination to use for the results.
#[builder(default)]
pagination: Pagination,
/// Search for a user with a given external provider identity.
#[builder(default)]
......@@ -227,23 +224,7 @@ where
}
}
impl<'a, T> PagedQuery<T> for Users<'a>
where
T: DeserializeOwned,
{
fn pagination(&self) -> Pagination {
self.pagination
}
}
impl<'a, T> Query<Vec<T>> for Users<'a>
where
T: DeserializeOwned,
{
fn query(&self, client: &dyn GitlabClient) -> Result<Vec<T>, GitlabError> {
self.paged_query(client)
}
}
impl<'a> Pageable for Users<'a> {}
#[cfg(test)]
mod tests {
......
......@@ -20,11 +20,12 @@ use serde::ser::Serialize;
use serde::{Deserialize, Deserializer, Serializer};
use thiserror::Error;
use crate::api;
use crate::api::projects::pipelines;
use crate::api::projects::Projects;
use crate::api::users::{CurrentUser, User, Users};
use crate::auth::{Auth, AuthError};
use crate::query::{GitlabClient, LinkHeaderParseError, Query};
use crate::query::{GitlabClient, Query};
use crate::types::*;
macro_rules! query_param_slice {
......@@ -52,7 +53,7 @@ pub enum PaginationError {
#[error("failed to parse a Link HTTP header: {}", source)]
LinkHeader {
#[from]
source: LinkHeaderParseError,
source: api::LinkHeaderParseError,
},
#[error("failed to parse a Link HTTP header URL: {}", source)]
InvalidUrl {
......@@ -420,13 +421,13 @@ impl Gitlab {
T: UserResult,
N: AsRef<str>,
{
Users::builder()
.username(name.as_ref())
.build()
.unwrap()
.query(self)?
.pop()
.ok_or_else(|| GitlabError::no_such_user(name.as_ref()))
api::paged(
Users::builder().username(name.as_ref()).build().unwrap(),
api::Pagination::All,
)
.query(self)?
.pop()
.ok_or_else(|| GitlabError::no_such_user(name.as_ref()))
}
/// Create a project
......@@ -548,7 +549,11 @@ impl Gitlab {
note = "use `gitlab::api::projects::Projects.query()` instead"
)]
pub fn owned_projects(&self) -> GitlabResult<Vec<Project>> {
Projects::builder().owned(true).build().unwrap().query(self)
api::paged(
Projects::builder().owned(true).build().unwrap(),
api::Pagination::All,
)
.query(self)
}
/// Find a project by id.
......
......@@ -7,55 +7,15 @@
use std::borrow::Cow;
use reqwest::blocking::{RequestBuilder, Response};
use reqwest::header::HeaderMap;
use reqwest::Method;
use serde::de::DeserializeOwned;
use thiserror::Error;
use url::form_urlencoded::Serializer;
use url::{Url, UrlQuery};
use crate::gitlab::{GitlabError, PaginationError};
use crate::gitlab::GitlabError;
pub type Pairs<'a> = Serializer<'a, UrlQuery<'a>>;
const MAX_PAGE_SIZE: usize = 100;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Pagination {
All,
Limit(usize),
}
impl Default for Pagination {
fn default() -> Self {
Pagination::All
}
}
impl Pagination {
fn page_limit(self) -> usize {
match self {
Pagination::All => MAX_PAGE_SIZE,
Pagination::Limit(size) => size.min(MAX_PAGE_SIZE),
}
}
fn is_last_page<T>(self, last_page_size: usize, results: &[T]) -> bool {
// If the last page has fewer elements than our limit, we're definitely done.
if last_page_size < self.page_limit() {
return true;
}
// Otherwise, check if we have results which fill our limit.
if let Pagination::Limit(limit) = self {
return limit <= results.len();
}
// We're not done yet.
false
}
}
pub trait GitlabClient {
/// Get the URL for the endpoint for the client.
///
......@@ -120,185 +80,3 @@ where
Ok(())
}
}
struct LinkHeader<'a> {
url: &'a str,
params: Vec<(&'a str, &'a str)>,
}
impl<'a> LinkHeader<'a> {
fn parse(s: &'a str) -> Result<Self, LinkHeaderParseError> {
let mut parts = s.split(';');
let url_part = parts.next().expect("a split always has at least one part");
let url = {
let part = url_part.trim();
if part.starts_with('<') && part.ends_with('>') {
&part[1..part.len() - 1]
} else {
return Err(LinkHeaderParseError::NoBrackets);
}
};
let params = parts
.map(|part| {
let part = part.trim();
let mut halves = part.splitn(2, '=');
let key = halves.next().expect("a split always has at least one part");
let value = if let Some(value) = halves.next() {
if value.starts_with('"') && value.ends_with('"') {
&value[1..value.len() - 1]
} else {
value
}
} else {
return Err(LinkHeaderParseError::MissingParamValue);
};
Ok((key, value))
})
.collect::<Result<Vec<_>, LinkHeaderParseError>>()?;
Ok(Self {
url,