url/lib.rs
1// Copyright 2013-2015 The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9/*!
10
11rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/)
12for the [Rust](http://rust-lang.org/) programming language.
13
14
15# URL parsing and data structures
16
17First, URL parsing may fail for various reasons and therefore returns a `Result`.
18
19```
20use url::{Url, ParseError};
21
22assert!(Url::parse("http://[:::1]") == Err(ParseError::InvalidIpv6Address))
23```
24
25Let’s parse a valid URL and look at its components.
26
27```
28use url::{Url, Host, Position};
29# use url::ParseError;
30# fn run() -> Result<(), ParseError> {
31let issue_list_url = Url::parse(
32 "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open"
33)?;
34
35
36assert!(issue_list_url.scheme() == "https");
37assert!(issue_list_url.username() == "");
38assert!(issue_list_url.password() == None);
39assert!(issue_list_url.host_str() == Some("github.com"));
40assert!(issue_list_url.host() == Some(Host::Domain("github.com")));
41assert!(issue_list_url.port() == None);
42assert!(issue_list_url.path() == "/rust-lang/rust/issues");
43assert!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()) ==
44 Some(vec!["rust-lang", "rust", "issues"]));
45assert!(issue_list_url.query() == Some("labels=E-easy&state=open"));
46assert!(&issue_list_url[Position::BeforePath..] == "/rust-lang/rust/issues?labels=E-easy&state=open");
47assert!(issue_list_url.fragment() == None);
48assert!(!issue_list_url.cannot_be_a_base());
49# Ok(())
50# }
51# run().unwrap();
52```
53
54Some URLs are said to be *cannot-be-a-base*:
55they don’t have a username, password, host, or port,
56and their "path" is an arbitrary string rather than slash-separated segments:
57
58```
59use url::Url;
60# use url::ParseError;
61
62# fn run() -> Result<(), ParseError> {
63let data_url = Url::parse("data:text/plain,Hello?World#")?;
64
65assert!(data_url.cannot_be_a_base());
66assert!(data_url.scheme() == "data");
67assert!(data_url.path() == "text/plain,Hello");
68assert!(data_url.path_segments().is_none());
69assert!(data_url.query() == Some("World"));
70assert!(data_url.fragment() == Some(""));
71# Ok(())
72# }
73# run().unwrap();
74```
75
76## Default Features
77
78Versions `<= 2.5.2` of the crate have no default features. Versions `> 2.5.2` have the default feature 'std'.
79If you are upgrading across this boundary and you have specified `default-features = false`, then
80you will need to add the 'std' feature or the 'alloc' feature to your dependency.
81The 'std' feature has the same behavior as the previous versions. The 'alloc' feature
82provides no_std support.
83
84## Serde
85
86Enable the `serde` feature to include `Deserialize` and `Serialize` implementations for `url::Url`.
87
88# Base URL
89
90Many contexts allow URL *references* that can be relative to a *base URL*:
91
92```html
93<link rel="stylesheet" href="../main.css">
94```
95
96Since parsed URLs are absolute, giving a base is required for parsing relative URLs:
97
98```
99use url::{Url, ParseError};
100
101assert!(Url::parse("../main.css") == Err(ParseError::RelativeUrlWithoutBase))
102```
103
104Use the `join` method on an `Url` to use it as a base URL:
105
106```
107use url::Url;
108# use url::ParseError;
109
110# fn run() -> Result<(), ParseError> {
111let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html")?;
112let css_url = this_document.join("../main.css")?;
113assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css");
114# Ok(())
115# }
116# run().unwrap();
117```
118
119# Feature: `serde`
120
121If you enable the `serde` feature, [`Url`](struct.Url.html) will implement
122[`serde::Serialize`](https://docs.rs/serde/1/serde/trait.Serialize.html) and
123[`serde::Deserialize`](https://docs.rs/serde/1/serde/trait.Deserialize.html).
124See [serde documentation](https://serde.rs) for more information.
125
126```toml
127url = { version = "2", features = ["serde"] }
128```
129
130# Feature: `debugger_visualizer`
131
132If you enable the `debugger_visualizer` feature, the `url` crate will include
133a [natvis file](https://docs.microsoft.com/en-us/visualstudio/debugger/create-custom-views-of-native-objects)
134for [Visual Studio](https://www.visualstudio.com/) that allows you to view
135[`Url`](struct.Url.html) objects in the debugger.
136
137This feature requires Rust 1.71 or later.
138
139```toml
140url = { version = "2", features = ["debugger_visualizer"] }
141```
142
143*/
144
145#![no_std]
146#![doc(html_root_url = "https://docs.rs/url/2.5.3")]
147#![cfg_attr(
148 feature = "debugger_visualizer",
149 debugger_visualizer(natvis_file = "../../debug_metadata/url.natvis")
150)]
151
152pub use form_urlencoded;
153
154// For forwards compatibility
155#[cfg(feature = "std")]
156extern crate std;
157
158#[macro_use]
159extern crate alloc;
160
161#[cfg(feature = "serde")]
162extern crate serde;
163
164use crate::host::HostInternal;
165
166use crate::net::IpAddr;
167#[cfg(feature = "std")]
168#[cfg(any(
169 unix,
170 windows,
171 target_os = "redox",
172 target_os = "wasi",
173 target_os = "hermit"
174))]
175use crate::net::{SocketAddr, ToSocketAddrs};
176use crate::parser::{to_u32, Context, Parser, SchemeType, USERINFO};
177use alloc::borrow::ToOwned;
178use alloc::str;
179use alloc::string::{String, ToString};
180use core::borrow::Borrow;
181use core::convert::TryFrom;
182use core::fmt::Write;
183use core::ops::{Range, RangeFrom, RangeTo};
184use core::{cmp, fmt, hash, mem};
185use percent_encoding::utf8_percent_encode;
186#[cfg(feature = "std")]
187#[cfg(any(
188 unix,
189 windows,
190 target_os = "redox",
191 target_os = "wasi",
192 target_os = "hermit"
193))]
194use std::io;
195#[cfg(feature = "std")]
196use std::path::{Path, PathBuf};
197
198/// `std` version of `net`
199#[cfg(feature = "std")]
200pub(crate) mod net {
201 pub use std::net::*;
202}
203/// `no_std` nightly version of `net`
204#[cfg(not(feature = "std"))]
205pub(crate) mod net {
206 pub use core::net::*;
207}
208
209pub use crate::host::Host;
210pub use crate::origin::{OpaqueOrigin, Origin};
211pub use crate::parser::{ParseError, SyntaxViolation};
212pub use crate::path_segments::PathSegmentsMut;
213pub use crate::slicing::Position;
214pub use form_urlencoded::EncodingOverride;
215
216mod host;
217mod origin;
218mod parser;
219mod path_segments;
220mod slicing;
221
222#[doc(hidden)]
223pub mod quirks;
224
225/// A parsed URL record.
226#[derive(Clone)]
227pub struct Url {
228 /// Syntax in pseudo-BNF:
229 ///
230 /// url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]?
231 /// non-hierarchical = non-hierarchical-path
232 /// non-hierarchical-path = /* Does not start with "/" */
233 /// hierarchical = authority? hierarchical-path
234 /// authority = "//" userinfo? host [ ":" port ]?
235 /// userinfo = username [ ":" password ]? "@"
236 /// hierarchical-path = [ "/" path-segment ]+
237 serialization: String,
238
239 // Components
240 scheme_end: u32, // Before ':'
241 username_end: u32, // Before ':' (if a password is given) or '@' (if not)
242 host_start: u32,
243 host_end: u32,
244 host: HostInternal,
245 port: Option<u16>,
246 path_start: u32, // Before initial '/', if any
247 query_start: Option<u32>, // Before '?', unlike Position::QueryStart
248 fragment_start: Option<u32>, // Before '#', unlike Position::FragmentStart
249}
250
251/// Full configuration for the URL parser.
252#[derive(Copy, Clone)]
253#[must_use]
254pub struct ParseOptions<'a> {
255 base_url: Option<&'a Url>,
256 encoding_override: EncodingOverride<'a>,
257 violation_fn: Option<&'a dyn Fn(SyntaxViolation)>,
258}
259
260impl<'a> ParseOptions<'a> {
261 /// Change the base URL
262 ///
263 /// See the notes of [`Url::join`] for more details about how this base is considered
264 /// when parsing.
265 pub fn base_url(mut self, new: Option<&'a Url>) -> Self {
266 self.base_url = new;
267 self
268 }
269
270 /// Override the character encoding of query strings.
271 /// This is a legacy concept only relevant for HTML.
272 pub fn encoding_override(mut self, new: EncodingOverride<'a>) -> Self {
273 self.encoding_override = new;
274 self
275 }
276
277 /// Call the provided function or closure for a non-fatal `SyntaxViolation`
278 /// when it occurs during parsing. Note that since the provided function is
279 /// `Fn`, the caller might need to utilize _interior mutability_, such as with
280 /// a `RefCell`, to collect the violations.
281 ///
282 /// ## Example
283 /// ```
284 /// use std::cell::RefCell;
285 /// use url::{Url, SyntaxViolation};
286 /// # use url::ParseError;
287 /// # fn run() -> Result<(), url::ParseError> {
288 /// let violations = RefCell::new(Vec::new());
289 /// let url = Url::options()
290 /// .syntax_violation_callback(Some(&|v| violations.borrow_mut().push(v)))
291 /// .parse("https:////example.com")?;
292 /// assert_eq!(url.as_str(), "https://example.com/");
293 /// assert_eq!(violations.into_inner(),
294 /// vec!(SyntaxViolation::ExpectedDoubleSlash));
295 /// # Ok(())
296 /// # }
297 /// # run().unwrap();
298 /// ```
299 pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self {
300 self.violation_fn = new;
301 self
302 }
303
304 /// Parse an URL string with the configuration so far.
305 pub fn parse(self, input: &str) -> Result<Url, crate::ParseError> {
306 Parser {
307 serialization: String::with_capacity(input.len()),
308 base_url: self.base_url,
309 query_encoding_override: self.encoding_override,
310 violation_fn: self.violation_fn,
311 context: Context::UrlParser,
312 }
313 .parse_url(input)
314 }
315}
316
317impl Url {
318 /// Parse an absolute URL from a string.
319 ///
320 /// # Examples
321 ///
322 /// ```rust
323 /// use url::Url;
324 /// # use url::ParseError;
325 ///
326 /// # fn run() -> Result<(), ParseError> {
327 /// let url = Url::parse("https://example.net")?;
328 /// # Ok(())
329 /// # }
330 /// # run().unwrap();
331 /// ```
332 ///
333 /// # Errors
334 ///
335 /// If the function can not parse an absolute URL from the given string,
336 /// a [`ParseError`] variant will be returned.
337 ///
338 /// [`ParseError`]: enum.ParseError.html
339 #[inline]
340 pub fn parse(input: &str) -> Result<Url, crate::ParseError> {
341 Url::options().parse(input)
342 }
343
344 /// Parse an absolute URL from a string and add params to its query string.
345 ///
346 /// Existing params are not removed.
347 ///
348 /// # Examples
349 ///
350 /// ```rust
351 /// use url::Url;
352 /// # use url::ParseError;
353 ///
354 /// # fn run() -> Result<(), ParseError> {
355 /// let url = Url::parse_with_params("https://example.net?dont=clobberme",
356 /// &[("lang", "rust"), ("browser", "servo")])?;
357 /// assert_eq!("https://example.net/?dont=clobberme&lang=rust&browser=servo", url.as_str());
358 /// # Ok(())
359 /// # }
360 /// # run().unwrap();
361 /// ```
362 ///
363 /// # Errors
364 ///
365 /// If the function can not parse an absolute URL from the given string,
366 /// a [`ParseError`] variant will be returned.
367 ///
368 /// [`ParseError`]: enum.ParseError.html
369 #[inline]
370 pub fn parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError>
371 where
372 I: IntoIterator,
373 I::Item: Borrow<(K, V)>,
374 K: AsRef<str>,
375 V: AsRef<str>,
376 {
377 let mut url = Url::options().parse(input);
378
379 if let Ok(ref mut url) = url {
380 url.query_pairs_mut().extend_pairs(iter);
381 }
382
383 url
384 }
385
386 /// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path
387 fn strip_trailing_spaces_from_opaque_path(&mut self) {
388 if !self.cannot_be_a_base() {
389 return;
390 }
391
392 if self.fragment_start.is_some() {
393 return;
394 }
395
396 if self.query_start.is_some() {
397 return;
398 }
399
400 let trailing_space_count = self
401 .serialization
402 .chars()
403 .rev()
404 .take_while(|c| *c == ' ')
405 .count();
406
407 let start = self.serialization.len() - trailing_space_count;
408
409 self.serialization.truncate(start);
410 }
411
412 /// Parse a string as an URL, with this URL as the base URL.
413 ///
414 /// The inverse of this is [`make_relative`].
415 ///
416 /// # Notes
417 ///
418 /// - A trailing slash is significant.
419 /// Without it, the last path component is considered to be a “file” name
420 /// to be removed to get at the “directory” that is used as the base.
421 /// - A [scheme relative special URL](https://url.spec.whatwg.org/#scheme-relative-special-url-string)
422 /// as input replaces everything in the base URL after the scheme.
423 /// - An absolute URL (with a scheme) as input replaces the whole base URL (even the scheme).
424 ///
425 /// # Examples
426 ///
427 /// ```rust
428 /// use url::Url;
429 /// # use url::ParseError;
430 ///
431 /// // Base without a trailing slash
432 /// # fn run() -> Result<(), ParseError> {
433 /// let base = Url::parse("https://example.net/a/b.html")?;
434 /// let url = base.join("c.png")?;
435 /// assert_eq!(url.as_str(), "https://example.net/a/c.png"); // Not /a/b.html/c.png
436 ///
437 /// // Base with a trailing slash
438 /// let base = Url::parse("https://example.net/a/b/")?;
439 /// let url = base.join("c.png")?;
440 /// assert_eq!(url.as_str(), "https://example.net/a/b/c.png");
441 ///
442 /// // Input as scheme relative special URL
443 /// let base = Url::parse("https://alice.com/a")?;
444 /// let url = base.join("//eve.com/b")?;
445 /// assert_eq!(url.as_str(), "https://eve.com/b");
446 ///
447 /// // Input as absolute URL
448 /// let base = Url::parse("https://alice.com/a")?;
449 /// let url = base.join("http://eve.com/b")?;
450 /// assert_eq!(url.as_str(), "http://eve.com/b"); // http instead of https
451
452 /// # Ok(())
453 /// # }
454 /// # run().unwrap();
455 /// ```
456 ///
457 /// # Errors
458 ///
459 /// If the function can not parse an URL from the given string
460 /// with this URL as the base URL, a [`ParseError`] variant will be returned.
461 ///
462 /// [`ParseError`]: enum.ParseError.html
463 /// [`make_relative`]: #method.make_relative
464 #[inline]
465 pub fn join(&self, input: &str) -> Result<Url, crate::ParseError> {
466 Url::options().base_url(Some(self)).parse(input)
467 }
468
469 /// Creates a relative URL if possible, with this URL as the base URL.
470 ///
471 /// This is the inverse of [`join`].
472 ///
473 /// # Examples
474 ///
475 /// ```rust
476 /// use url::Url;
477 /// # use url::ParseError;
478 ///
479 /// # fn run() -> Result<(), ParseError> {
480 /// let base = Url::parse("https://example.net/a/b.html")?;
481 /// let url = Url::parse("https://example.net/a/c.png")?;
482 /// let relative = base.make_relative(&url);
483 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
484 ///
485 /// let base = Url::parse("https://example.net/a/b/")?;
486 /// let url = Url::parse("https://example.net/a/b/c.png")?;
487 /// let relative = base.make_relative(&url);
488 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
489 ///
490 /// let base = Url::parse("https://example.net/a/b/")?;
491 /// let url = Url::parse("https://example.net/a/d/c.png")?;
492 /// let relative = base.make_relative(&url);
493 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("../d/c.png"));
494 ///
495 /// let base = Url::parse("https://example.net/a/b.html?c=d")?;
496 /// let url = Url::parse("https://example.net/a/b.html?e=f")?;
497 /// let relative = base.make_relative(&url);
498 /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("?e=f"));
499 /// # Ok(())
500 /// # }
501 /// # run().unwrap();
502 /// ```
503 ///
504 /// # Errors
505 ///
506 /// If this URL can't be a base for the given URL, `None` is returned.
507 /// This is for example the case if the scheme, host or port are not the same.
508 ///
509 /// [`join`]: #method.join
510 pub fn make_relative(&self, url: &Url) -> Option<String> {
511 if self.cannot_be_a_base() {
512 return None;
513 }
514
515 // Scheme, host and port need to be the same
516 if self.scheme() != url.scheme() || self.host() != url.host() || self.port() != url.port() {
517 return None;
518 }
519
520 // We ignore username/password at this point
521
522 // The path has to be transformed
523 let mut relative = String::new();
524
525 // Extract the filename of both URIs, these need to be handled separately
526 fn extract_path_filename(s: &str) -> (&str, &str) {
527 let last_slash_idx = s.rfind('/').unwrap_or(0);
528 let (path, filename) = s.split_at(last_slash_idx);
529 if filename.is_empty() {
530 (path, "")
531 } else {
532 (path, &filename[1..])
533 }
534 }
535
536 let (base_path, base_filename) = extract_path_filename(self.path());
537 let (url_path, url_filename) = extract_path_filename(url.path());
538
539 let mut base_path = base_path.split('/').peekable();
540 let mut url_path = url_path.split('/').peekable();
541
542 // Skip over the common prefix
543 while base_path.peek().is_some() && base_path.peek() == url_path.peek() {
544 base_path.next();
545 url_path.next();
546 }
547
548 // Add `..` segments for the remainder of the base path
549 for base_path_segment in base_path {
550 // Skip empty last segments
551 if base_path_segment.is_empty() {
552 break;
553 }
554
555 if !relative.is_empty() {
556 relative.push('/');
557 }
558
559 relative.push_str("..");
560 }
561
562 // Append the remainder of the other URI
563 for url_path_segment in url_path {
564 if !relative.is_empty() {
565 relative.push('/');
566 }
567
568 relative.push_str(url_path_segment);
569 }
570
571 // Add the filename if they are not the same
572 if !relative.is_empty() || base_filename != url_filename {
573 // If the URIs filename is empty this means that it was a directory
574 // so we'll have to append a '/'.
575 //
576 // Otherwise append it directly as the new filename.
577 if url_filename.is_empty() {
578 relative.push('/');
579 } else {
580 if !relative.is_empty() {
581 relative.push('/');
582 }
583 relative.push_str(url_filename);
584 }
585 }
586
587 // Query and fragment are only taken from the other URI
588 if let Some(query) = url.query() {
589 relative.push('?');
590 relative.push_str(query);
591 }
592
593 if let Some(fragment) = url.fragment() {
594 relative.push('#');
595 relative.push_str(fragment);
596 }
597
598 Some(relative)
599 }
600
601 /// Return a default `ParseOptions` that can fully configure the URL parser.
602 ///
603 /// # Examples
604 ///
605 /// Get default `ParseOptions`, then change base url
606 ///
607 /// ```rust
608 /// use url::Url;
609 /// # use url::ParseError;
610 /// # fn run() -> Result<(), ParseError> {
611 /// let options = Url::options();
612 /// let api = Url::parse("https://api.example.com")?;
613 /// let base_url = options.base_url(Some(&api));
614 /// let version_url = base_url.parse("version.json")?;
615 /// assert_eq!(version_url.as_str(), "https://api.example.com/version.json");
616 /// # Ok(())
617 /// # }
618 /// # run().unwrap();
619 /// ```
620 pub fn options<'a>() -> ParseOptions<'a> {
621 ParseOptions {
622 base_url: None,
623 encoding_override: None,
624 violation_fn: None,
625 }
626 }
627
628 /// Return the serialization of this URL.
629 ///
630 /// This is fast since that serialization is already stored in the `Url` struct.
631 ///
632 /// # Examples
633 ///
634 /// ```rust
635 /// use url::Url;
636 /// # use url::ParseError;
637 ///
638 /// # fn run() -> Result<(), ParseError> {
639 /// let url_str = "https://example.net/";
640 /// let url = Url::parse(url_str)?;
641 /// assert_eq!(url.as_str(), url_str);
642 /// # Ok(())
643 /// # }
644 /// # run().unwrap();
645 /// ```
646 #[inline]
647 pub fn as_str(&self) -> &str {
648 &self.serialization
649 }
650
651 /// Return the serialization of this URL.
652 ///
653 /// This consumes the `Url` and takes ownership of the `String` stored in it.
654 ///
655 /// # Examples
656 ///
657 /// ```rust
658 /// use url::Url;
659 /// # use url::ParseError;
660 ///
661 /// # fn run() -> Result<(), ParseError> {
662 /// let url_str = "https://example.net/";
663 /// let url = Url::parse(url_str)?;
664 /// assert_eq!(String::from(url), url_str);
665 /// # Ok(())
666 /// # }
667 /// # run().unwrap();
668 /// ```
669 #[inline]
670 #[deprecated(since = "2.3.0", note = "use Into<String>")]
671 pub fn into_string(self) -> String {
672 self.into()
673 }
674
675 /// For internal testing, not part of the public API.
676 ///
677 /// Methods of the `Url` struct assume a number of invariants.
678 /// This checks each of these invariants and panic if one is not met.
679 /// This is for testing rust-url itself.
680 #[doc(hidden)]
681 pub fn check_invariants(&self) -> Result<(), String> {
682 macro_rules! assert {
683 ($x: expr) => {
684 if !$x {
685 return Err(format!(
686 "!( {} ) for URL {:?}",
687 stringify!($x),
688 self.serialization
689 ));
690 }
691 };
692 }
693
694 macro_rules! assert_eq {
695 ($a: expr, $b: expr) => {
696 {
697 let a = $a;
698 let b = $b;
699 if a != b {
700 return Err(format!("{:?} != {:?} ({} != {}) for URL {:?}",
701 a, b, stringify!($a), stringify!($b),
702 self.serialization))
703 }
704 }
705 }
706 }
707
708 assert!(self.scheme_end >= 1);
709 assert!(self.byte_at(0).is_ascii_alphabetic());
710 assert!(self
711 .slice(1..self.scheme_end)
712 .chars()
713 .all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.')));
714 assert_eq!(self.byte_at(self.scheme_end), b':');
715
716 if self.slice(self.scheme_end + 1..).starts_with("//") {
717 // URL with authority
718 if self.username_end != self.serialization.len() as u32 {
719 match self.byte_at(self.username_end) {
720 b':' => {
721 assert!(self.host_start >= self.username_end + 2);
722 assert_eq!(self.byte_at(self.host_start - 1), b'@');
723 }
724 b'@' => assert!(self.host_start == self.username_end + 1),
725 _ => assert_eq!(self.username_end, self.scheme_end + 3),
726 }
727 }
728 assert!(self.host_start >= self.username_end);
729 assert!(self.host_end >= self.host_start);
730 let host_str = self.slice(self.host_start..self.host_end);
731 match self.host {
732 HostInternal::None => assert_eq!(host_str, ""),
733 HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()),
734 HostInternal::Ipv6(address) => {
735 let h: Host<String> = Host::Ipv6(address);
736 assert_eq!(host_str, h.to_string())
737 }
738 HostInternal::Domain => {
739 if SchemeType::from(self.scheme()).is_special() {
740 assert!(!host_str.is_empty())
741 }
742 }
743 }
744 if self.path_start == self.host_end {
745 assert_eq!(self.port, None);
746 } else {
747 assert_eq!(self.byte_at(self.host_end), b':');
748 let port_str = self.slice(self.host_end + 1..self.path_start);
749 assert_eq!(
750 self.port,
751 Some(port_str.parse::<u16>().expect("Couldn't parse port?"))
752 );
753 }
754 assert!(
755 self.path_start as usize == self.serialization.len()
756 || matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?')
757 );
758 } else {
759 // Anarchist URL (no authority)
760 assert_eq!(self.username_end, self.scheme_end + 1);
761 assert_eq!(self.host_start, self.scheme_end + 1);
762 assert_eq!(self.host_end, self.scheme_end + 1);
763 assert_eq!(self.host, HostInternal::None);
764 assert_eq!(self.port, None);
765 if self.path().starts_with("//") {
766 // special case when first path segment is empty
767 assert_eq!(self.byte_at(self.scheme_end + 1), b'/');
768 assert_eq!(self.byte_at(self.scheme_end + 2), b'.');
769 assert_eq!(self.path_start, self.scheme_end + 3);
770 } else {
771 assert_eq!(self.path_start, self.scheme_end + 1);
772 }
773 }
774 if let Some(start) = self.query_start {
775 assert!(start >= self.path_start);
776 assert_eq!(self.byte_at(start), b'?');
777 }
778 if let Some(start) = self.fragment_start {
779 assert!(start >= self.path_start);
780 assert_eq!(self.byte_at(start), b'#');
781 }
782 if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) {
783 assert!(fragment_start > query_start);
784 }
785
786 let other = Url::parse(self.as_str()).expect("Failed to parse myself?");
787 assert_eq!(&self.serialization, &other.serialization);
788 assert_eq!(self.scheme_end, other.scheme_end);
789 assert_eq!(self.username_end, other.username_end);
790 assert_eq!(self.host_start, other.host_start);
791 assert_eq!(self.host_end, other.host_end);
792 assert!(
793 self.host == other.host ||
794 // XXX No host round-trips to empty host.
795 // See https://github.com/whatwg/url/issues/79
796 (self.host_str(), other.host_str()) == (None, Some(""))
797 );
798 assert_eq!(self.port, other.port);
799 assert_eq!(self.path_start, other.path_start);
800 assert_eq!(self.query_start, other.query_start);
801 assert_eq!(self.fragment_start, other.fragment_start);
802 Ok(())
803 }
804
805 /// Return the origin of this URL (<https://url.spec.whatwg.org/#origin>)
806 ///
807 /// Note: this returns an opaque origin for `file:` URLs, which causes
808 /// `url.origin() != url.origin()`.
809 ///
810 /// # Examples
811 ///
812 /// URL with `ftp` scheme:
813 ///
814 /// ```rust
815 /// use url::{Host, Origin, Url};
816 /// # use url::ParseError;
817 ///
818 /// # fn run() -> Result<(), ParseError> {
819 /// let url = Url::parse("ftp://example.com/foo")?;
820 /// assert_eq!(url.origin(),
821 /// Origin::Tuple("ftp".into(),
822 /// Host::Domain("example.com".into()),
823 /// 21));
824 /// # Ok(())
825 /// # }
826 /// # run().unwrap();
827 /// ```
828 ///
829 /// URL with `blob` scheme:
830 ///
831 /// ```rust
832 /// use url::{Host, Origin, Url};
833 /// # use url::ParseError;
834 ///
835 /// # fn run() -> Result<(), ParseError> {
836 /// let url = Url::parse("blob:https://example.com/foo")?;
837 /// assert_eq!(url.origin(),
838 /// Origin::Tuple("https".into(),
839 /// Host::Domain("example.com".into()),
840 /// 443));
841 /// # Ok(())
842 /// # }
843 /// # run().unwrap();
844 /// ```
845 ///
846 /// URL with `file` scheme:
847 ///
848 /// ```rust
849 /// use url::{Host, Origin, Url};
850 /// # use url::ParseError;
851 ///
852 /// # fn run() -> Result<(), ParseError> {
853 /// let url = Url::parse("file:///tmp/foo")?;
854 /// assert!(!url.origin().is_tuple());
855 ///
856 /// let other_url = Url::parse("file:///tmp/foo")?;
857 /// assert!(url.origin() != other_url.origin());
858 /// # Ok(())
859 /// # }
860 /// # run().unwrap();
861 /// ```
862 ///
863 /// URL with other scheme:
864 ///
865 /// ```rust
866 /// use url::{Host, Origin, Url};
867 /// # use url::ParseError;
868 ///
869 /// # fn run() -> Result<(), ParseError> {
870 /// let url = Url::parse("foo:bar")?;
871 /// assert!(!url.origin().is_tuple());
872 /// # Ok(())
873 /// # }
874 /// # run().unwrap();
875 /// ```
876 #[inline]
877 pub fn origin(&self) -> Origin {
878 origin::url_origin(self)
879 }
880
881 /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter.
882 ///
883 /// # Examples
884 ///
885 /// ```
886 /// use url::Url;
887 /// # use url::ParseError;
888 ///
889 /// # fn run() -> Result<(), ParseError> {
890 /// let url = Url::parse("file:///tmp/foo")?;
891 /// assert_eq!(url.scheme(), "file");
892 /// # Ok(())
893 /// # }
894 /// # run().unwrap();
895 /// ```
896 #[inline]
897 pub fn scheme(&self) -> &str {
898 self.slice(..self.scheme_end)
899 }
900
901 /// Return whether the URL is special (has a special scheme)
902 ///
903 /// # Examples
904 ///
905 /// ```
906 /// use url::Url;
907 /// # use url::ParseError;
908 ///
909 /// # fn run() -> Result<(), ParseError> {
910 /// assert!(Url::parse("http:///tmp/foo")?.is_special());
911 /// assert!(Url::parse("file:///tmp/foo")?.is_special());
912 /// assert!(!Url::parse("moz:///tmp/foo")?.is_special());
913 /// # Ok(())
914 /// # }
915 /// # run().unwrap();
916 /// ```
917 pub fn is_special(&self) -> bool {
918 let scheme_type = SchemeType::from(self.scheme());
919 scheme_type.is_special()
920 }
921
922 /// Return whether the URL has an 'authority',
923 /// which can contain a username, password, host, and port number.
924 ///
925 /// URLs that do *not* are either path-only like `unix:/run/foo.socket`
926 /// or cannot-be-a-base like `data:text/plain,Stuff`.
927 ///
928 /// See also the `authority` method.
929 ///
930 /// # Examples
931 ///
932 /// ```
933 /// use url::Url;
934 /// # use url::ParseError;
935 ///
936 /// # fn run() -> Result<(), ParseError> {
937 /// let url = Url::parse("ftp://rms@example.com")?;
938 /// assert!(url.has_authority());
939 ///
940 /// let url = Url::parse("unix:/run/foo.socket")?;
941 /// assert!(!url.has_authority());
942 ///
943 /// let url = Url::parse("data:text/plain,Stuff")?;
944 /// assert!(!url.has_authority());
945 /// # Ok(())
946 /// # }
947 /// # run().unwrap();
948 /// ```
949 #[inline]
950 pub fn has_authority(&self) -> bool {
951 debug_assert!(self.byte_at(self.scheme_end) == b':');
952 self.slice(self.scheme_end..).starts_with("://")
953 }
954
955 /// Return the authority of this URL as an ASCII string.
956 ///
957 /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
958 /// of a special URL, or percent encoded for non-special URLs.
959 /// IPv6 addresses are given between `[` and `]` brackets.
960 /// Ports are omitted if they match the well known port of a special URL.
961 ///
962 /// Username and password are percent-encoded.
963 ///
964 /// See also the `has_authority` method.
965 ///
966 /// # Examples
967 ///
968 /// ```
969 /// use url::Url;
970 /// # use url::ParseError;
971 ///
972 /// # fn run() -> Result<(), ParseError> {
973 /// let url = Url::parse("unix:/run/foo.socket")?;
974 /// assert_eq!(url.authority(), "");
975 /// let url = Url::parse("file:///tmp/foo")?;
976 /// assert_eq!(url.authority(), "");
977 /// let url = Url::parse("https://user:password@example.com/tmp/foo")?;
978 /// assert_eq!(url.authority(), "user:password@example.com");
979 /// let url = Url::parse("irc://àlex.рф.example.com:6667/foo")?;
980 /// assert_eq!(url.authority(), "%C3%A0lex.%D1%80%D1%84.example.com:6667");
981 /// let url = Url::parse("http://àlex.рф.example.com:80/foo")?;
982 /// assert_eq!(url.authority(), "xn--lex-8ka.xn--p1ai.example.com");
983 /// # Ok(())
984 /// # }
985 /// # run().unwrap();
986 /// ```
987 pub fn authority(&self) -> &str {
988 let scheme_separator_len = "://".len() as u32;
989 if self.has_authority() && self.path_start > self.scheme_end + scheme_separator_len {
990 self.slice(self.scheme_end + scheme_separator_len..self.path_start)
991 } else {
992 ""
993 }
994 }
995
996 /// Return whether this URL is a cannot-be-a-base URL,
997 /// meaning that parsing a relative URL string with this URL as the base will return an error.
998 ///
999 /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash,
1000 /// as is typically the case of `data:` and `mailto:` URLs.
1001 ///
1002 /// # Examples
1003 ///
1004 /// ```
1005 /// use url::Url;
1006 /// # use url::ParseError;
1007 ///
1008 /// # fn run() -> Result<(), ParseError> {
1009 /// let url = Url::parse("ftp://rms@example.com")?;
1010 /// assert!(!url.cannot_be_a_base());
1011 ///
1012 /// let url = Url::parse("unix:/run/foo.socket")?;
1013 /// assert!(!url.cannot_be_a_base());
1014 ///
1015 /// let url = Url::parse("data:text/plain,Stuff")?;
1016 /// assert!(url.cannot_be_a_base());
1017 /// # Ok(())
1018 /// # }
1019 /// # run().unwrap();
1020 /// ```
1021 #[inline]
1022 pub fn cannot_be_a_base(&self) -> bool {
1023 !self.slice(self.scheme_end + 1..).starts_with('/')
1024 }
1025
1026 /// Return the username for this URL (typically the empty string)
1027 /// as a percent-encoded ASCII string.
1028 ///
1029 /// # Examples
1030 ///
1031 /// ```
1032 /// use url::Url;
1033 /// # use url::ParseError;
1034 ///
1035 /// # fn run() -> Result<(), ParseError> {
1036 /// let url = Url::parse("ftp://rms@example.com")?;
1037 /// assert_eq!(url.username(), "rms");
1038 ///
1039 /// let url = Url::parse("ftp://:secret123@example.com")?;
1040 /// assert_eq!(url.username(), "");
1041 ///
1042 /// let url = Url::parse("https://example.com")?;
1043 /// assert_eq!(url.username(), "");
1044 /// # Ok(())
1045 /// # }
1046 /// # run().unwrap();
1047 /// ```
1048 pub fn username(&self) -> &str {
1049 let scheme_separator_len = "://".len() as u32;
1050 if self.has_authority() && self.username_end > self.scheme_end + scheme_separator_len {
1051 self.slice(self.scheme_end + scheme_separator_len..self.username_end)
1052 } else {
1053 ""
1054 }
1055 }
1056
1057 /// Return the password for this URL, if any, as a percent-encoded ASCII string.
1058 ///
1059 /// # Examples
1060 ///
1061 /// ```
1062 /// use url::Url;
1063 /// # use url::ParseError;
1064 ///
1065 /// # fn run() -> Result<(), ParseError> {
1066 /// let url = Url::parse("ftp://rms:secret123@example.com")?;
1067 /// assert_eq!(url.password(), Some("secret123"));
1068 ///
1069 /// let url = Url::parse("ftp://:secret123@example.com")?;
1070 /// assert_eq!(url.password(), Some("secret123"));
1071 ///
1072 /// let url = Url::parse("ftp://rms@example.com")?;
1073 /// assert_eq!(url.password(), None);
1074 ///
1075 /// let url = Url::parse("https://example.com")?;
1076 /// assert_eq!(url.password(), None);
1077 /// # Ok(())
1078 /// # }
1079 /// # run().unwrap();
1080 /// ```
1081 pub fn password(&self) -> Option<&str> {
1082 // This ':' is not the one marking a port number since a host can not be empty.
1083 // (Except for file: URLs, which do not have port numbers.)
1084 if self.has_authority()
1085 && self.username_end != self.serialization.len() as u32
1086 && self.byte_at(self.username_end) == b':'
1087 {
1088 debug_assert!(self.byte_at(self.host_start - 1) == b'@');
1089 Some(self.slice(self.username_end + 1..self.host_start - 1))
1090 } else {
1091 None
1092 }
1093 }
1094
1095 /// Equivalent to `url.host().is_some()`.
1096 ///
1097 /// # Examples
1098 ///
1099 /// ```
1100 /// use url::Url;
1101 /// # use url::ParseError;
1102 ///
1103 /// # fn run() -> Result<(), ParseError> {
1104 /// let url = Url::parse("ftp://rms@example.com")?;
1105 /// assert!(url.has_host());
1106 ///
1107 /// let url = Url::parse("unix:/run/foo.socket")?;
1108 /// assert!(!url.has_host());
1109 ///
1110 /// let url = Url::parse("data:text/plain,Stuff")?;
1111 /// assert!(!url.has_host());
1112 /// # Ok(())
1113 /// # }
1114 /// # run().unwrap();
1115 /// ```
1116 pub fn has_host(&self) -> bool {
1117 !matches!(self.host, HostInternal::None)
1118 }
1119
1120 /// Return the string representation of the host (domain or IP address) for this URL, if any.
1121 ///
1122 /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
1123 /// of a special URL, or percent encoded for non-special URLs.
1124 /// IPv6 addresses are given between `[` and `]` brackets.
1125 ///
1126 /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
1127 /// don’t have a host.
1128 ///
1129 /// See also the `host` method.
1130 ///
1131 /// # Examples
1132 ///
1133 /// ```
1134 /// use url::Url;
1135 /// # use url::ParseError;
1136 ///
1137 /// # fn run() -> Result<(), ParseError> {
1138 /// let url = Url::parse("https://127.0.0.1/index.html")?;
1139 /// assert_eq!(url.host_str(), Some("127.0.0.1"));
1140 ///
1141 /// let url = Url::parse("ftp://rms@example.com")?;
1142 /// assert_eq!(url.host_str(), Some("example.com"));
1143 ///
1144 /// let url = Url::parse("unix:/run/foo.socket")?;
1145 /// assert_eq!(url.host_str(), None);
1146 ///
1147 /// let url = Url::parse("data:text/plain,Stuff")?;
1148 /// assert_eq!(url.host_str(), None);
1149 /// # Ok(())
1150 /// # }
1151 /// # run().unwrap();
1152 /// ```
1153 pub fn host_str(&self) -> Option<&str> {
1154 if self.has_host() {
1155 Some(self.slice(self.host_start..self.host_end))
1156 } else {
1157 None
1158 }
1159 }
1160
1161 /// Return the parsed representation of the host for this URL.
1162 /// Non-ASCII domain labels are punycode-encoded per IDNA if this is the host
1163 /// of a special URL, or percent encoded for non-special URLs.
1164 ///
1165 /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
1166 /// don’t have a host.
1167 ///
1168 /// See also the `host_str` method.
1169 ///
1170 /// # Examples
1171 ///
1172 /// ```
1173 /// use url::Url;
1174 /// # use url::ParseError;
1175 ///
1176 /// # fn run() -> Result<(), ParseError> {
1177 /// let url = Url::parse("https://127.0.0.1/index.html")?;
1178 /// assert!(url.host().is_some());
1179 ///
1180 /// let url = Url::parse("ftp://rms@example.com")?;
1181 /// assert!(url.host().is_some());
1182 ///
1183 /// let url = Url::parse("unix:/run/foo.socket")?;
1184 /// assert!(url.host().is_none());
1185 ///
1186 /// let url = Url::parse("data:text/plain,Stuff")?;
1187 /// assert!(url.host().is_none());
1188 /// # Ok(())
1189 /// # }
1190 /// # run().unwrap();
1191 /// ```
1192 pub fn host(&self) -> Option<Host<&str>> {
1193 match self.host {
1194 HostInternal::None => None,
1195 HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))),
1196 HostInternal::Ipv4(address) => Some(Host::Ipv4(address)),
1197 HostInternal::Ipv6(address) => Some(Host::Ipv6(address)),
1198 }
1199 }
1200
1201 /// If this URL has a host and it is a domain name (not an IP address), return it.
1202 /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
1203 /// of a special URL, or percent encoded for non-special URLs.
1204 ///
1205 /// # Examples
1206 ///
1207 /// ```
1208 /// use url::Url;
1209 /// # use url::ParseError;
1210 ///
1211 /// # fn run() -> Result<(), ParseError> {
1212 /// let url = Url::parse("https://127.0.0.1/")?;
1213 /// assert_eq!(url.domain(), None);
1214 ///
1215 /// let url = Url::parse("mailto:rms@example.net")?;
1216 /// assert_eq!(url.domain(), None);
1217 ///
1218 /// let url = Url::parse("https://example.com/")?;
1219 /// assert_eq!(url.domain(), Some("example.com"));
1220 /// # Ok(())
1221 /// # }
1222 /// # run().unwrap();
1223 /// ```
1224 pub fn domain(&self) -> Option<&str> {
1225 match self.host {
1226 HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)),
1227 _ => None,
1228 }
1229 }
1230
1231 /// Return the port number for this URL, if any.
1232 ///
1233 /// Note that default port numbers are never reflected by the serialization,
1234 /// use the `port_or_known_default()` method if you want a default port number returned.
1235 ///
1236 /// # Examples
1237 ///
1238 /// ```
1239 /// use url::Url;
1240 /// # use url::ParseError;
1241 ///
1242 /// # fn run() -> Result<(), ParseError> {
1243 /// let url = Url::parse("https://example.com")?;
1244 /// assert_eq!(url.port(), None);
1245 ///
1246 /// let url = Url::parse("https://example.com:443/")?;
1247 /// assert_eq!(url.port(), None);
1248 ///
1249 /// let url = Url::parse("ssh://example.com:22")?;
1250 /// assert_eq!(url.port(), Some(22));
1251 /// # Ok(())
1252 /// # }
1253 /// # run().unwrap();
1254 /// ```
1255 #[inline]
1256 pub fn port(&self) -> Option<u16> {
1257 self.port
1258 }
1259
1260 /// Return the port number for this URL, or the default port number if it is known.
1261 ///
1262 /// This method only knows the default port number
1263 /// of the `http`, `https`, `ws`, `wss` and `ftp` schemes.
1264 ///
1265 /// For URLs in these schemes, this method always returns `Some(_)`.
1266 /// For other schemes, it is the same as `Url::port()`.
1267 ///
1268 /// # Examples
1269 ///
1270 /// ```
1271 /// use url::Url;
1272 /// # use url::ParseError;
1273 ///
1274 /// # fn run() -> Result<(), ParseError> {
1275 /// let url = Url::parse("foo://example.com")?;
1276 /// assert_eq!(url.port_or_known_default(), None);
1277 ///
1278 /// let url = Url::parse("foo://example.com:1456")?;
1279 /// assert_eq!(url.port_or_known_default(), Some(1456));
1280 ///
1281 /// let url = Url::parse("https://example.com")?;
1282 /// assert_eq!(url.port_or_known_default(), Some(443));
1283 /// # Ok(())
1284 /// # }
1285 /// # run().unwrap();
1286 /// ```
1287 #[inline]
1288 pub fn port_or_known_default(&self) -> Option<u16> {
1289 self.port.or_else(|| parser::default_port(self.scheme()))
1290 }
1291
1292 /// Resolve a URL’s host and port number to `SocketAddr`.
1293 ///
1294 /// If the URL has the default port number of a scheme that is unknown to this library,
1295 /// `default_port_number` provides an opportunity to provide the actual port number.
1296 /// In non-example code this should be implemented either simply as `|| None`,
1297 /// or by matching on the URL’s `.scheme()`.
1298 ///
1299 /// If the host is a domain, it is resolved using the standard library’s DNS support.
1300 ///
1301 /// # Examples
1302 ///
1303 /// ```no_run
1304 /// let url = url::Url::parse("https://example.net/").unwrap();
1305 /// let addrs = url.socket_addrs(|| None).unwrap();
1306 /// std::net::TcpStream::connect(&*addrs)
1307 /// # ;
1308 /// ```
1309 ///
1310 /// ```
1311 /// /// With application-specific known default port numbers
1312 /// fn socket_addrs(url: url::Url) -> std::io::Result<Vec<std::net::SocketAddr>> {
1313 /// url.socket_addrs(|| match url.scheme() {
1314 /// "socks5" | "socks5h" => Some(1080),
1315 /// _ => None,
1316 /// })
1317 /// }
1318 /// ```
1319 #[cfg(feature = "std")]
1320 #[cfg(any(
1321 unix,
1322 windows,
1323 target_os = "redox",
1324 target_os = "wasi",
1325 target_os = "hermit"
1326 ))]
1327 pub fn socket_addrs(
1328 &self,
1329 default_port_number: impl Fn() -> Option<u16>,
1330 ) -> io::Result<alloc::vec::Vec<SocketAddr>> {
1331 // Note: trying to avoid the Vec allocation by returning `impl AsRef<[SocketAddr]>`
1332 // causes borrowck issues because the return value borrows `default_port_number`:
1333 //
1334 // https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md#scoping-for-type-and-lifetime-parameters
1335 //
1336 // > This RFC proposes that *all* type parameters are considered in scope
1337 // > for `impl Trait` in return position
1338
1339 fn io_result<T>(opt: Option<T>, message: &str) -> io::Result<T> {
1340 opt.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, message))
1341 }
1342
1343 let host = io_result(self.host(), "No host name in the URL")?;
1344 let port = io_result(
1345 self.port_or_known_default().or_else(default_port_number),
1346 "No port number in the URL",
1347 )?;
1348 Ok(match host {
1349 Host::Domain(domain) => (domain, port).to_socket_addrs()?.collect(),
1350 Host::Ipv4(ip) => vec![(ip, port).into()],
1351 Host::Ipv6(ip) => vec![(ip, port).into()],
1352 })
1353 }
1354
1355 /// Return the path for this URL, as a percent-encoded ASCII string.
1356 /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'.
1357 /// For other URLs, this starts with a '/' slash
1358 /// and continues with slash-separated path segments.
1359 ///
1360 /// # Examples
1361 ///
1362 /// ```rust
1363 /// use url::{Url, ParseError};
1364 ///
1365 /// # fn run() -> Result<(), ParseError> {
1366 /// let url = Url::parse("https://example.com/api/versions?page=2")?;
1367 /// assert_eq!(url.path(), "/api/versions");
1368 ///
1369 /// let url = Url::parse("https://example.com")?;
1370 /// assert_eq!(url.path(), "/");
1371 ///
1372 /// let url = Url::parse("https://example.com/countries/việt nam")?;
1373 /// assert_eq!(url.path(), "/countries/vi%E1%BB%87t%20nam");
1374 /// # Ok(())
1375 /// # }
1376 /// # run().unwrap();
1377 /// ```
1378 pub fn path(&self) -> &str {
1379 match (self.query_start, self.fragment_start) {
1380 (None, None) => self.slice(self.path_start..),
1381 (Some(next_component_start), _) | (None, Some(next_component_start)) => {
1382 self.slice(self.path_start..next_component_start)
1383 }
1384 }
1385 }
1386
1387 /// Unless this URL is cannot-be-a-base,
1388 /// return an iterator of '/' slash-separated path segments,
1389 /// each as a percent-encoded ASCII string.
1390 ///
1391 /// Return `None` for cannot-be-a-base URLs.
1392 ///
1393 /// When `Some` is returned, the iterator always contains at least one string
1394 /// (which may be empty).
1395 ///
1396 /// # Examples
1397 ///
1398 /// ```
1399 /// use url::Url;
1400 ///
1401 /// # #[cfg(feature = "std")]
1402 /// # use std::error::Error;
1403 /// # #[cfg(not(feature = "std"))]
1404 /// # use core::error::Error;
1405 ///
1406 /// # fn run() -> Result<(), Box<dyn Error>> {
1407 /// let url = Url::parse("https://example.com/foo/bar")?;
1408 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1409 /// assert_eq!(path_segments.next(), Some("foo"));
1410 /// assert_eq!(path_segments.next(), Some("bar"));
1411 /// assert_eq!(path_segments.next(), None);
1412 ///
1413 /// let url = Url::parse("https://example.com")?;
1414 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1415 /// assert_eq!(path_segments.next(), Some(""));
1416 /// assert_eq!(path_segments.next(), None);
1417 ///
1418 /// let url = Url::parse("data:text/plain,HelloWorld")?;
1419 /// assert!(url.path_segments().is_none());
1420 ///
1421 /// let url = Url::parse("https://example.com/countries/việt nam")?;
1422 /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1423 /// assert_eq!(path_segments.next(), Some("countries"));
1424 /// assert_eq!(path_segments.next(), Some("vi%E1%BB%87t%20nam"));
1425 /// # Ok(())
1426 /// # }
1427 /// # run().unwrap();
1428 /// ```
1429 pub fn path_segments(&self) -> Option<str::Split<'_, char>> {
1430 let path = self.path();
1431 path.strip_prefix('/').map(|remainder| remainder.split('/'))
1432 }
1433
1434 /// Return this URL’s query string, if any, as a percent-encoded ASCII string.
1435 ///
1436 /// # Examples
1437 ///
1438 /// ```rust
1439 /// use url::Url;
1440 /// # use url::ParseError;
1441 ///
1442 /// fn run() -> Result<(), ParseError> {
1443 /// let url = Url::parse("https://example.com/products?page=2")?;
1444 /// let query = url.query();
1445 /// assert_eq!(query, Some("page=2"));
1446 ///
1447 /// let url = Url::parse("https://example.com/products")?;
1448 /// let query = url.query();
1449 /// assert!(query.is_none());
1450 ///
1451 /// let url = Url::parse("https://example.com/?country=español")?;
1452 /// let query = url.query();
1453 /// assert_eq!(query, Some("country=espa%C3%B1ol"));
1454 /// # Ok(())
1455 /// # }
1456 /// # run().unwrap();
1457 /// ```
1458 pub fn query(&self) -> Option<&str> {
1459 match (self.query_start, self.fragment_start) {
1460 (None, _) => None,
1461 (Some(query_start), None) => {
1462 debug_assert!(self.byte_at(query_start) == b'?');
1463 Some(self.slice(query_start + 1..))
1464 }
1465 (Some(query_start), Some(fragment_start)) => {
1466 debug_assert!(self.byte_at(query_start) == b'?');
1467 Some(self.slice(query_start + 1..fragment_start))
1468 }
1469 }
1470 }
1471
1472 /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded`
1473 /// and return an iterator of (key, value) pairs.
1474 ///
1475 /// # Examples
1476 ///
1477 /// ```rust
1478 /// use std::borrow::Cow;
1479 ///
1480 /// use url::Url;
1481 /// # use url::ParseError;
1482 ///
1483 /// # fn run() -> Result<(), ParseError> {
1484 /// let url = Url::parse("https://example.com/products?page=2&sort=desc")?;
1485 /// let mut pairs = url.query_pairs();
1486 ///
1487 /// assert_eq!(pairs.count(), 2);
1488 ///
1489 /// assert_eq!(pairs.next(), Some((Cow::Borrowed("page"), Cow::Borrowed("2"))));
1490 /// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort"), Cow::Borrowed("desc"))));
1491 /// # Ok(())
1492 /// # }
1493 /// # run().unwrap();
1494 /// ```
1495
1496 #[inline]
1497 pub fn query_pairs(&self) -> form_urlencoded::Parse<'_> {
1498 form_urlencoded::parse(self.query().unwrap_or("").as_bytes())
1499 }
1500
1501 /// Return this URL’s fragment identifier, if any.
1502 ///
1503 /// A fragment is the part of the URL after the `#` symbol.
1504 /// The fragment is optional and, if present, contains a fragment identifier
1505 /// that identifies a secondary resource, such as a section heading
1506 /// of a document.
1507 ///
1508 /// In HTML, the fragment identifier is usually the id attribute of a an element
1509 /// that is scrolled to on load. Browsers typically will not send the fragment portion
1510 /// of a URL to the server.
1511 ///
1512 /// **Note:** the parser did *not* percent-encode this component,
1513 /// but the input may have been percent-encoded already.
1514 ///
1515 /// # Examples
1516 ///
1517 /// ```rust
1518 /// use url::Url;
1519 /// # use url::ParseError;
1520 ///
1521 /// # fn run() -> Result<(), ParseError> {
1522 /// let url = Url::parse("https://example.com/data.csv#row=4")?;
1523 ///
1524 /// assert_eq!(url.fragment(), Some("row=4"));
1525 ///
1526 /// let url = Url::parse("https://example.com/data.csv#cell=4,1-6,2")?;
1527 ///
1528 /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1529 /// # Ok(())
1530 /// # }
1531 /// # run().unwrap();
1532 /// ```
1533 pub fn fragment(&self) -> Option<&str> {
1534 self.fragment_start.map(|start| {
1535 debug_assert!(self.byte_at(start) == b'#');
1536 self.slice(start + 1..)
1537 })
1538 }
1539
1540 fn mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R {
1541 let mut parser = Parser::for_setter(mem::take(&mut self.serialization));
1542 let result = f(&mut parser);
1543 self.serialization = parser.serialization;
1544 result
1545 }
1546
1547 /// Change this URL’s fragment identifier.
1548 ///
1549 /// # Examples
1550 ///
1551 /// ```rust
1552 /// use url::Url;
1553 /// # use url::ParseError;
1554 ///
1555 /// # fn run() -> Result<(), ParseError> {
1556 /// let mut url = Url::parse("https://example.com/data.csv")?;
1557 /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1558
1559 /// url.set_fragment(Some("cell=4,1-6,2"));
1560 /// assert_eq!(url.as_str(), "https://example.com/data.csv#cell=4,1-6,2");
1561 /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1562 ///
1563 /// url.set_fragment(None);
1564 /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1565 /// assert!(url.fragment().is_none());
1566 /// # Ok(())
1567 /// # }
1568 /// # run().unwrap();
1569 /// ```
1570 pub fn set_fragment(&mut self, fragment: Option<&str>) {
1571 // Remove any previous fragment
1572 if let Some(start) = self.fragment_start {
1573 debug_assert!(self.byte_at(start) == b'#');
1574 self.serialization.truncate(start as usize);
1575 }
1576 // Write the new one
1577 if let Some(input) = fragment {
1578 self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1579 self.serialization.push('#');
1580 self.mutate(|parser| parser.parse_fragment(parser::Input::new_no_trim(input)))
1581 } else {
1582 self.fragment_start = None;
1583 self.strip_trailing_spaces_from_opaque_path();
1584 }
1585 }
1586
1587 fn take_fragment(&mut self) -> Option<String> {
1588 self.fragment_start.take().map(|start| {
1589 debug_assert!(self.byte_at(start) == b'#');
1590 let fragment = self.slice(start + 1..).to_owned();
1591 self.serialization.truncate(start as usize);
1592 fragment
1593 })
1594 }
1595
1596 fn restore_already_parsed_fragment(&mut self, fragment: Option<String>) {
1597 if let Some(ref fragment) = fragment {
1598 assert!(self.fragment_start.is_none());
1599 self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1600 self.serialization.push('#');
1601 self.serialization.push_str(fragment);
1602 }
1603 }
1604
1605 /// Change this URL’s query string. If `query` is `None`, this URL's
1606 /// query string will be cleared.
1607 ///
1608 /// # Examples
1609 ///
1610 /// ```rust
1611 /// use url::Url;
1612 /// # use url::ParseError;
1613 ///
1614 /// # fn run() -> Result<(), ParseError> {
1615 /// let mut url = Url::parse("https://example.com/products")?;
1616 /// assert_eq!(url.as_str(), "https://example.com/products");
1617 ///
1618 /// url.set_query(Some("page=2"));
1619 /// assert_eq!(url.as_str(), "https://example.com/products?page=2");
1620 /// assert_eq!(url.query(), Some("page=2"));
1621 /// # Ok(())
1622 /// # }
1623 /// # run().unwrap();
1624 /// ```
1625 pub fn set_query(&mut self, query: Option<&str>) {
1626 let fragment = self.take_fragment();
1627
1628 // Remove any previous query
1629 if let Some(start) = self.query_start.take() {
1630 debug_assert!(self.byte_at(start) == b'?');
1631 self.serialization.truncate(start as usize);
1632 }
1633 // Write the new query, if any
1634 if let Some(input) = query {
1635 self.query_start = Some(to_u32(self.serialization.len()).unwrap());
1636 self.serialization.push('?');
1637 let scheme_type = SchemeType::from(self.scheme());
1638 let scheme_end = self.scheme_end;
1639 self.mutate(|parser| {
1640 let vfn = parser.violation_fn;
1641 parser.parse_query(
1642 scheme_type,
1643 scheme_end,
1644 parser::Input::new_trim_tab_and_newlines(input, vfn),
1645 )
1646 });
1647 } else {
1648 self.query_start = None;
1649 if fragment.is_none() {
1650 self.strip_trailing_spaces_from_opaque_path();
1651 }
1652 }
1653
1654 self.restore_already_parsed_fragment(fragment);
1655 }
1656
1657 /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs
1658 /// in `application/x-www-form-urlencoded` syntax.
1659 ///
1660 /// The return value has a method-chaining API:
1661 ///
1662 /// ```rust
1663 /// # use url::{Url, ParseError};
1664 ///
1665 /// # fn run() -> Result<(), ParseError> {
1666 /// let mut url = Url::parse("https://example.net?lang=fr#nav")?;
1667 /// assert_eq!(url.query(), Some("lang=fr"));
1668 ///
1669 /// url.query_pairs_mut().append_pair("foo", "bar");
1670 /// assert_eq!(url.query(), Some("lang=fr&foo=bar"));
1671 /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav");
1672 ///
1673 /// url.query_pairs_mut()
1674 /// .clear()
1675 /// .append_pair("foo", "bar & baz")
1676 /// .append_pair("saisons", "\u{00C9}t\u{00E9}+hiver");
1677 /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver"));
1678 /// assert_eq!(url.as_str(),
1679 /// "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav");
1680 /// # Ok(())
1681 /// # }
1682 /// # run().unwrap();
1683 /// ```
1684 ///
1685 /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`,
1686 /// not `url.set_query(None)`.
1687 ///
1688 /// The state of `Url` is unspecified if this return value is leaked without being dropped.
1689 pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>> {
1690 let fragment = self.take_fragment();
1691
1692 let query_start;
1693 if let Some(start) = self.query_start {
1694 debug_assert!(self.byte_at(start) == b'?');
1695 query_start = start as usize;
1696 } else {
1697 query_start = self.serialization.len();
1698 self.query_start = Some(to_u32(query_start).unwrap());
1699 self.serialization.push('?');
1700 }
1701
1702 let query = UrlQuery {
1703 url: Some(self),
1704 fragment,
1705 };
1706 form_urlencoded::Serializer::for_suffix(query, query_start + "?".len())
1707 }
1708
1709 fn take_after_path(&mut self) -> String {
1710 match (self.query_start, self.fragment_start) {
1711 (Some(i), _) | (None, Some(i)) => {
1712 let after_path = self.slice(i..).to_owned();
1713 self.serialization.truncate(i as usize);
1714 after_path
1715 }
1716 (None, None) => String::new(),
1717 }
1718 }
1719
1720 /// Change this URL’s path.
1721 ///
1722 /// # Examples
1723 ///
1724 /// ```rust
1725 /// use url::Url;
1726 /// # use url::ParseError;
1727 ///
1728 /// # fn run() -> Result<(), ParseError> {
1729 /// let mut url = Url::parse("https://example.com")?;
1730 /// url.set_path("api/comments");
1731 /// assert_eq!(url.as_str(), "https://example.com/api/comments");
1732 /// assert_eq!(url.path(), "/api/comments");
1733 ///
1734 /// let mut url = Url::parse("https://example.com/api")?;
1735 /// url.set_path("data/report.csv");
1736 /// assert_eq!(url.as_str(), "https://example.com/data/report.csv");
1737 /// assert_eq!(url.path(), "/data/report.csv");
1738 ///
1739 /// // `set_path` percent-encodes the given string if it's not already percent-encoded.
1740 /// let mut url = Url::parse("https://example.com")?;
1741 /// url.set_path("api/some comments");
1742 /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments");
1743 /// assert_eq!(url.path(), "/api/some%20comments");
1744 ///
1745 /// // `set_path` will not double percent-encode the string if it's already percent-encoded.
1746 /// let mut url = Url::parse("https://example.com")?;
1747 /// url.set_path("api/some%20comments");
1748 /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments");
1749 /// assert_eq!(url.path(), "/api/some%20comments");
1750 ///
1751 /// # Ok(())
1752 /// # }
1753 /// # run().unwrap();
1754 /// ```
1755 pub fn set_path(&mut self, mut path: &str) {
1756 let after_path = self.take_after_path();
1757 let old_after_path_pos = to_u32(self.serialization.len()).unwrap();
1758 let cannot_be_a_base = self.cannot_be_a_base();
1759 let scheme_type = SchemeType::from(self.scheme());
1760 let mut path_empty = false;
1761
1762 // Check ':' and then see if the next character is '/'
1763 let mut has_host = if let Some(index) = self.serialization.find(":") {
1764 if self.serialization.len() > index + 1
1765 && self.serialization.as_bytes().get(index + 1) == Some(&b'/')
1766 {
1767 let rest = &self.serialization[(index + ":/".len())..];
1768 let host_part = rest.split('/').next().unwrap_or("");
1769 path_empty = rest.is_empty();
1770 !host_part.is_empty() && !host_part.contains('@')
1771 } else {
1772 false
1773 }
1774 } else {
1775 false
1776 };
1777
1778 // Ensure the path length is greater than 1 to account
1779 // for cases where "/." is already appended from serialization
1780 // If we set path, then we already checked the other two conditions:
1781 // https://url.spec.whatwg.org/#url-serializing
1782 // 1. The host is null
1783 // 2. the first segment of the URL's path is an empty string
1784 if path.len() > 1 {
1785 if let Some(index) = self.serialization.find(":") {
1786 let removal_start = index + ":".len();
1787 if self.serialization[removal_start..].starts_with("/.") {
1788 self.path_start -= "/.".len() as u32;
1789 }
1790 }
1791 }
1792
1793 self.serialization.truncate(self.path_start as usize);
1794 self.mutate(|parser| {
1795 if cannot_be_a_base {
1796 if path.starts_with('/') {
1797 parser.serialization.push_str("%2F");
1798 path = &path[1..];
1799 }
1800 parser.parse_cannot_be_a_base_path(parser::Input::new_no_trim(path));
1801 } else {
1802 parser.parse_path_start(
1803 scheme_type,
1804 &mut has_host,
1805 parser::Input::new_no_trim(path),
1806 );
1807 }
1808 });
1809
1810 // For cases where normalization is applied across both the serialization and the path.
1811 // Append "/." immediately after the scheme (up to ":")
1812 // This is done if three conditions are met.
1813 // https://url.spec.whatwg.org/#url-serializing
1814 // 1. The host is null
1815 // 2. The url's path length is greater than 1
1816 // 3. the first segment of the URL's path is an empty string
1817 if !has_host && path.len() > 1 && path_empty {
1818 if let Some(index) = self.serialization.find(":") {
1819 if self.serialization.len() > index + 2
1820 && self.serialization.as_bytes().get(index + 1) == Some(&b'/')
1821 && self.serialization.as_bytes().get(index + 2) == Some(&b'/')
1822 {
1823 self.serialization.insert_str(index + ":".len(), "/.");
1824 self.path_start += "/.".len() as u32;
1825 }
1826 }
1827 }
1828
1829 self.restore_after_path(old_after_path_pos, &after_path);
1830 }
1831
1832 /// Return an object with methods to manipulate this URL’s path segments.
1833 ///
1834 /// Return `Err(())` if this URL is cannot-be-a-base.
1835 #[allow(clippy::result_unit_err)]
1836 pub fn path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()> {
1837 if self.cannot_be_a_base() {
1838 Err(())
1839 } else {
1840 Ok(path_segments::new(self))
1841 }
1842 }
1843
1844 fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) {
1845 let new_after_path_position = to_u32(self.serialization.len()).unwrap();
1846 let adjust = |index: &mut u32| {
1847 *index -= old_after_path_position;
1848 *index += new_after_path_position;
1849 };
1850 if let Some(ref mut index) = self.query_start {
1851 adjust(index)
1852 }
1853 if let Some(ref mut index) = self.fragment_start {
1854 adjust(index)
1855 }
1856 self.serialization.push_str(after_path)
1857 }
1858
1859 /// Change this URL’s port number.
1860 ///
1861 /// Note that default port numbers are not reflected in the serialization.
1862 ///
1863 /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme;
1864 /// do nothing and return `Err`.
1865 ///
1866 /// # Examples
1867 ///
1868 /// ```
1869 /// use url::Url;
1870 ///
1871 /// # #[cfg(feature = "std")]
1872 /// # use std::error::Error;
1873 /// # #[cfg(not(feature = "std"))]
1874 /// # use core::error::Error;
1875 ///
1876 /// # fn run() -> Result<(), Box<dyn Error>> {
1877 /// let mut url = Url::parse("ssh://example.net:2048/")?;
1878 ///
1879 /// url.set_port(Some(4096)).map_err(|_| "cannot be base")?;
1880 /// assert_eq!(url.as_str(), "ssh://example.net:4096/");
1881 ///
1882 /// url.set_port(None).map_err(|_| "cannot be base")?;
1883 /// assert_eq!(url.as_str(), "ssh://example.net/");
1884 /// # Ok(())
1885 /// # }
1886 /// # run().unwrap();
1887 /// ```
1888 ///
1889 /// Known default port numbers are not reflected:
1890 ///
1891 /// ```rust
1892 /// use url::Url;
1893 ///
1894 /// # #[cfg(feature = "std")]
1895 /// # use std::error::Error;
1896 /// # #[cfg(not(feature = "std"))]
1897 /// # use core::error::Error;
1898 ///
1899 /// # fn run() -> Result<(), Box<dyn Error>> {
1900 /// let mut url = Url::parse("https://example.org/")?;
1901 ///
1902 /// url.set_port(Some(443)).map_err(|_| "cannot be base")?;
1903 /// assert!(url.port().is_none());
1904 /// # Ok(())
1905 /// # }
1906 /// # run().unwrap();
1907 /// ```
1908 ///
1909 /// Cannot set port for cannot-be-a-base URLs:
1910 ///
1911 /// ```
1912 /// use url::Url;
1913 /// # use url::ParseError;
1914 ///
1915 /// # fn run() -> Result<(), ParseError> {
1916 /// let mut url = Url::parse("mailto:rms@example.net")?;
1917 ///
1918 /// let result = url.set_port(Some(80));
1919 /// assert!(result.is_err());
1920 ///
1921 /// let result = url.set_port(None);
1922 /// assert!(result.is_err());
1923 /// # Ok(())
1924 /// # }
1925 /// # run().unwrap();
1926 /// ```
1927 #[allow(clippy::result_unit_err)]
1928 pub fn set_port(&mut self, mut port: Option<u16>) -> Result<(), ()> {
1929 // has_host implies !cannot_be_a_base
1930 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
1931 return Err(());
1932 }
1933 if port.is_some() && port == parser::default_port(self.scheme()) {
1934 port = None
1935 }
1936 self.set_port_internal(port);
1937 Ok(())
1938 }
1939
1940 fn set_port_internal(&mut self, port: Option<u16>) {
1941 match (self.port, port) {
1942 (None, None) => {}
1943 (Some(_), None) => {
1944 self.serialization
1945 .drain(self.host_end as usize..self.path_start as usize);
1946 let offset = self.path_start - self.host_end;
1947 self.path_start = self.host_end;
1948 if let Some(ref mut index) = self.query_start {
1949 *index -= offset
1950 }
1951 if let Some(ref mut index) = self.fragment_start {
1952 *index -= offset
1953 }
1954 }
1955 (Some(old), Some(new)) if old == new => {}
1956 (_, Some(new)) => {
1957 let path_and_after = self.slice(self.path_start..).to_owned();
1958 self.serialization.truncate(self.host_end as usize);
1959 write!(&mut self.serialization, ":{}", new).unwrap();
1960 let old_path_start = self.path_start;
1961 let new_path_start = to_u32(self.serialization.len()).unwrap();
1962 self.path_start = new_path_start;
1963 let adjust = |index: &mut u32| {
1964 *index -= old_path_start;
1965 *index += new_path_start;
1966 };
1967 if let Some(ref mut index) = self.query_start {
1968 adjust(index)
1969 }
1970 if let Some(ref mut index) = self.fragment_start {
1971 adjust(index)
1972 }
1973 self.serialization.push_str(&path_and_after);
1974 }
1975 }
1976 self.port = port;
1977 }
1978
1979 /// Change this URL’s host.
1980 ///
1981 /// Removing the host (calling this with `None`)
1982 /// will also remove any username, password, and port number.
1983 ///
1984 /// # Examples
1985 ///
1986 /// Change host:
1987 ///
1988 /// ```
1989 /// use url::Url;
1990 /// # use url::ParseError;
1991 ///
1992 /// # fn run() -> Result<(), ParseError> {
1993 /// let mut url = Url::parse("https://example.net")?;
1994 /// let result = url.set_host(Some("rust-lang.org"));
1995 /// assert!(result.is_ok());
1996 /// assert_eq!(url.as_str(), "https://rust-lang.org/");
1997 /// # Ok(())
1998 /// # }
1999 /// # run().unwrap();
2000 /// ```
2001 ///
2002 /// Remove host:
2003 ///
2004 /// ```
2005 /// use url::Url;
2006 /// # use url::ParseError;
2007 ///
2008 /// # fn run() -> Result<(), ParseError> {
2009 /// let mut url = Url::parse("foo://example.net")?;
2010 /// let result = url.set_host(None);
2011 /// assert!(result.is_ok());
2012 /// assert_eq!(url.as_str(), "foo:/");
2013 /// # Ok(())
2014 /// # }
2015 /// # run().unwrap();
2016 /// ```
2017 ///
2018 /// Cannot remove host for 'special' schemes (e.g. `http`):
2019 ///
2020 /// ```
2021 /// use url::Url;
2022 /// # use url::ParseError;
2023 ///
2024 /// # fn run() -> Result<(), ParseError> {
2025 /// let mut url = Url::parse("https://example.net")?;
2026 /// let result = url.set_host(None);
2027 /// assert!(result.is_err());
2028 /// assert_eq!(url.as_str(), "https://example.net/");
2029 /// # Ok(())
2030 /// # }
2031 /// # run().unwrap();
2032 /// ```
2033 ///
2034 /// Cannot change or remove host for cannot-be-a-base URLs:
2035 ///
2036 /// ```
2037 /// use url::Url;
2038 /// # use url::ParseError;
2039 ///
2040 /// # fn run() -> Result<(), ParseError> {
2041 /// let mut url = Url::parse("mailto:rms@example.net")?;
2042 ///
2043 /// let result = url.set_host(Some("rust-lang.org"));
2044 /// assert!(result.is_err());
2045 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
2046 ///
2047 /// let result = url.set_host(None);
2048 /// assert!(result.is_err());
2049 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
2050 /// # Ok(())
2051 /// # }
2052 /// # run().unwrap();
2053 /// ```
2054 ///
2055 /// # Errors
2056 ///
2057 /// If this URL is cannot-be-a-base or there is an error parsing the given `host`,
2058 /// a [`ParseError`] variant will be returned.
2059 ///
2060 /// [`ParseError`]: enum.ParseError.html
2061 pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> {
2062 if self.cannot_be_a_base() {
2063 return Err(ParseError::SetHostOnCannotBeABaseUrl);
2064 }
2065
2066 let scheme_type = SchemeType::from(self.scheme());
2067
2068 if let Some(host) = host {
2069 if host.is_empty() && scheme_type.is_special() && !scheme_type.is_file() {
2070 return Err(ParseError::EmptyHost);
2071 }
2072 let mut host_substr = host;
2073 // Otherwise, if c is U+003A (:) and the [] flag is unset, then
2074 if !host.starts_with('[') || !host.ends_with(']') {
2075 match host.find(':') {
2076 Some(0) => {
2077 // If buffer is the empty string, validation error, return failure.
2078 return Err(ParseError::InvalidDomainCharacter);
2079 }
2080 // Let host be the result of host parsing buffer
2081 Some(colon_index) => {
2082 host_substr = &host[..colon_index];
2083 }
2084 None => {}
2085 }
2086 }
2087 if SchemeType::from(self.scheme()).is_special() {
2088 self.set_host_internal(Host::parse(host_substr)?, None);
2089 } else {
2090 self.set_host_internal(Host::parse_opaque(host_substr)?, None);
2091 }
2092 } else if self.has_host() {
2093 if scheme_type.is_special() && !scheme_type.is_file() {
2094 return Err(ParseError::EmptyHost);
2095 } else if self.serialization.len() == self.path_start as usize {
2096 self.serialization.push('/');
2097 }
2098 debug_assert!(self.byte_at(self.scheme_end) == b':');
2099 debug_assert!(self.byte_at(self.path_start) == b'/');
2100
2101 let new_path_start = if scheme_type.is_file() {
2102 self.scheme_end + 3
2103 } else {
2104 self.scheme_end + 1
2105 };
2106
2107 self.serialization
2108 .drain(new_path_start as usize..self.path_start as usize);
2109 let offset = self.path_start - new_path_start;
2110 self.path_start = new_path_start;
2111 self.username_end = new_path_start;
2112 self.host_start = new_path_start;
2113 self.host_end = new_path_start;
2114 self.port = None;
2115 if let Some(ref mut index) = self.query_start {
2116 *index -= offset
2117 }
2118 if let Some(ref mut index) = self.fragment_start {
2119 *index -= offset
2120 }
2121 }
2122 Ok(())
2123 }
2124
2125 /// opt_new_port: None means leave unchanged, Some(None) means remove any port number.
2126 fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) {
2127 let old_suffix_pos = if opt_new_port.is_some() {
2128 self.path_start
2129 } else {
2130 self.host_end
2131 };
2132 let suffix = self.slice(old_suffix_pos..).to_owned();
2133 self.serialization.truncate(self.host_start as usize);
2134 if !self.has_authority() {
2135 debug_assert!(self.slice(self.scheme_end..self.host_start) == ":");
2136 debug_assert!(self.username_end == self.host_start);
2137 self.serialization.push('/');
2138 self.serialization.push('/');
2139 self.username_end += 2;
2140 self.host_start += 2;
2141 }
2142 write!(&mut self.serialization, "{}", host).unwrap();
2143 self.host_end = to_u32(self.serialization.len()).unwrap();
2144 self.host = host.into();
2145
2146 if let Some(new_port) = opt_new_port {
2147 self.port = new_port;
2148 if let Some(port) = new_port {
2149 write!(&mut self.serialization, ":{}", port).unwrap();
2150 }
2151 }
2152 let new_suffix_pos = to_u32(self.serialization.len()).unwrap();
2153 self.serialization.push_str(&suffix);
2154
2155 let adjust = |index: &mut u32| {
2156 *index -= old_suffix_pos;
2157 *index += new_suffix_pos;
2158 };
2159 adjust(&mut self.path_start);
2160 if let Some(ref mut index) = self.query_start {
2161 adjust(index)
2162 }
2163 if let Some(ref mut index) = self.fragment_start {
2164 adjust(index)
2165 }
2166 }
2167
2168 /// Change this URL’s host to the given IP address.
2169 ///
2170 /// If this URL is cannot-be-a-base, do nothing and return `Err`.
2171 ///
2172 /// Compared to `Url::set_host`, this skips the host parser.
2173 ///
2174 /// # Examples
2175 ///
2176 /// ```rust
2177 /// use url::{Url, ParseError};
2178 ///
2179 /// # fn run() -> Result<(), ParseError> {
2180 /// let mut url = Url::parse("http://example.com")?;
2181 /// url.set_ip_host("127.0.0.1".parse().unwrap());
2182 /// assert_eq!(url.host_str(), Some("127.0.0.1"));
2183 /// assert_eq!(url.as_str(), "http://127.0.0.1/");
2184 /// # Ok(())
2185 /// # }
2186 /// # run().unwrap();
2187 /// ```
2188 ///
2189 /// Cannot change URL's from mailto(cannot-be-base) to ip:
2190 ///
2191 /// ```rust
2192 /// use url::{Url, ParseError};
2193 ///
2194 /// # fn run() -> Result<(), ParseError> {
2195 /// let mut url = Url::parse("mailto:rms@example.com")?;
2196 /// let result = url.set_ip_host("127.0.0.1".parse().unwrap());
2197 ///
2198 /// assert_eq!(url.as_str(), "mailto:rms@example.com");
2199 /// assert!(result.is_err());
2200 /// # Ok(())
2201 /// # }
2202 /// # run().unwrap();
2203 /// ```
2204 ///
2205 #[allow(clippy::result_unit_err)]
2206 pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> {
2207 if self.cannot_be_a_base() {
2208 return Err(());
2209 }
2210
2211 let address = match address {
2212 IpAddr::V4(address) => Host::Ipv4(address),
2213 IpAddr::V6(address) => Host::Ipv6(address),
2214 };
2215 self.set_host_internal(address, None);
2216 Ok(())
2217 }
2218
2219 /// Change this URL’s password.
2220 ///
2221 /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
2222 ///
2223 /// # Examples
2224 ///
2225 /// ```rust
2226 /// use url::{Url, ParseError};
2227 ///
2228 /// # fn run() -> Result<(), ParseError> {
2229 /// let mut url = Url::parse("mailto:rmz@example.com")?;
2230 /// let result = url.set_password(Some("secret_password"));
2231 /// assert!(result.is_err());
2232 ///
2233 /// let mut url = Url::parse("ftp://user1:secret1@example.com")?;
2234 /// let result = url.set_password(Some("secret_password"));
2235 /// assert_eq!(url.password(), Some("secret_password"));
2236 ///
2237 /// let mut url = Url::parse("ftp://user2:@example.com")?;
2238 /// let result = url.set_password(Some("secret2"));
2239 /// assert!(result.is_ok());
2240 /// assert_eq!(url.password(), Some("secret2"));
2241 /// # Ok(())
2242 /// # }
2243 /// # run().unwrap();
2244 /// ```
2245 #[allow(clippy::result_unit_err)]
2246 pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> {
2247 // has_host implies !cannot_be_a_base
2248 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
2249 return Err(());
2250 }
2251 let password = password.unwrap_or_default();
2252 if !password.is_empty() {
2253 let host_and_after = self.slice(self.host_start..).to_owned();
2254 self.serialization.truncate(self.username_end as usize);
2255 self.serialization.push(':');
2256 self.serialization
2257 .extend(utf8_percent_encode(password, USERINFO));
2258 self.serialization.push('@');
2259
2260 let old_host_start = self.host_start;
2261 let new_host_start = to_u32(self.serialization.len()).unwrap();
2262 let adjust = |index: &mut u32| {
2263 *index -= old_host_start;
2264 *index += new_host_start;
2265 };
2266 self.host_start = new_host_start;
2267 adjust(&mut self.host_end);
2268 adjust(&mut self.path_start);
2269 if let Some(ref mut index) = self.query_start {
2270 adjust(index)
2271 }
2272 if let Some(ref mut index) = self.fragment_start {
2273 adjust(index)
2274 }
2275
2276 self.serialization.push_str(&host_and_after);
2277 } else if self.byte_at(self.username_end) == b':' {
2278 // If there is a password to remove
2279 let has_username_or_password = self.byte_at(self.host_start - 1) == b'@';
2280 debug_assert!(has_username_or_password);
2281 let username_start = self.scheme_end + 3;
2282 let empty_username = username_start == self.username_end;
2283 let start = self.username_end; // Remove the ':'
2284 let end = if empty_username {
2285 self.host_start // Remove the '@' as well
2286 } else {
2287 self.host_start - 1 // Keep the '@' to separate the username from the host
2288 };
2289 self.serialization.drain(start as usize..end as usize);
2290 let offset = end - start;
2291 self.host_start -= offset;
2292 self.host_end -= offset;
2293 self.path_start -= offset;
2294 if let Some(ref mut index) = self.query_start {
2295 *index -= offset
2296 }
2297 if let Some(ref mut index) = self.fragment_start {
2298 *index -= offset
2299 }
2300 }
2301 Ok(())
2302 }
2303
2304 /// Change this URL’s username.
2305 ///
2306 /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
2307 /// # Examples
2308 ///
2309 /// Cannot setup username from mailto(cannot-be-base)
2310 ///
2311 /// ```rust
2312 /// use url::{Url, ParseError};
2313 ///
2314 /// # fn run() -> Result<(), ParseError> {
2315 /// let mut url = Url::parse("mailto:rmz@example.com")?;
2316 /// let result = url.set_username("user1");
2317 /// assert_eq!(url.as_str(), "mailto:rmz@example.com");
2318 /// assert!(result.is_err());
2319 /// # Ok(())
2320 /// # }
2321 /// # run().unwrap();
2322 /// ```
2323 ///
2324 /// Setup username to user1
2325 ///
2326 /// ```rust
2327 /// use url::{Url, ParseError};
2328 ///
2329 /// # fn run() -> Result<(), ParseError> {
2330 /// let mut url = Url::parse("ftp://:secre1@example.com/")?;
2331 /// let result = url.set_username("user1");
2332 /// assert!(result.is_ok());
2333 /// assert_eq!(url.username(), "user1");
2334 /// assert_eq!(url.as_str(), "ftp://user1:secre1@example.com/");
2335 /// # Ok(())
2336 /// # }
2337 /// # run().unwrap();
2338 /// ```
2339 #[allow(clippy::result_unit_err)]
2340 pub fn set_username(&mut self, username: &str) -> Result<(), ()> {
2341 // has_host implies !cannot_be_a_base
2342 if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
2343 return Err(());
2344 }
2345 let username_start = self.scheme_end + 3;
2346 debug_assert!(self.slice(self.scheme_end..username_start) == "://");
2347 if self.slice(username_start..self.username_end) == username {
2348 return Ok(());
2349 }
2350 let after_username = self.slice(self.username_end..).to_owned();
2351 self.serialization.truncate(username_start as usize);
2352 self.serialization
2353 .extend(utf8_percent_encode(username, USERINFO));
2354
2355 let mut removed_bytes = self.username_end;
2356 self.username_end = to_u32(self.serialization.len()).unwrap();
2357 let mut added_bytes = self.username_end;
2358
2359 let new_username_is_empty = self.username_end == username_start;
2360 match (new_username_is_empty, after_username.chars().next()) {
2361 (true, Some('@')) => {
2362 removed_bytes += 1;
2363 self.serialization.push_str(&after_username[1..]);
2364 }
2365 (false, Some('@')) | (_, Some(':')) | (true, _) => {
2366 self.serialization.push_str(&after_username);
2367 }
2368 (false, _) => {
2369 added_bytes += 1;
2370 self.serialization.push('@');
2371 self.serialization.push_str(&after_username);
2372 }
2373 }
2374
2375 let adjust = |index: &mut u32| {
2376 *index -= removed_bytes;
2377 *index += added_bytes;
2378 };
2379 adjust(&mut self.host_start);
2380 adjust(&mut self.host_end);
2381 adjust(&mut self.path_start);
2382 if let Some(ref mut index) = self.query_start {
2383 adjust(index)
2384 }
2385 if let Some(ref mut index) = self.fragment_start {
2386 adjust(index)
2387 }
2388 Ok(())
2389 }
2390
2391 /// Change this URL’s scheme.
2392 ///
2393 /// Do nothing and return `Err` under the following circumstances:
2394 ///
2395 /// * If the new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+`
2396 /// * If this URL is cannot-be-a-base and the new scheme is one of
2397 /// `http`, `https`, `ws`, `wss` or `ftp`
2398 /// * If either the old or new scheme is `http`, `https`, `ws`,
2399 /// `wss` or `ftp` and the other is not one of these
2400 /// * If the new scheme is `file` and this URL includes credentials
2401 /// or has a non-null port
2402 /// * If this URL's scheme is `file` and its host is empty or null
2403 ///
2404 /// See also [the URL specification's section on legal scheme state
2405 /// overrides](https://url.spec.whatwg.org/#scheme-state).
2406 ///
2407 /// # Examples
2408 ///
2409 /// Change the URL’s scheme from `https` to `http`:
2410 ///
2411 /// ```
2412 /// use url::Url;
2413 /// # use url::ParseError;
2414 ///
2415 /// # fn run() -> Result<(), ParseError> {
2416 /// let mut url = Url::parse("https://example.net")?;
2417 /// let result = url.set_scheme("http");
2418 /// assert_eq!(url.as_str(), "http://example.net/");
2419 /// assert!(result.is_ok());
2420 /// # Ok(())
2421 /// # }
2422 /// # run().unwrap();
2423 /// ```
2424 /// Change the URL’s scheme from `foo` to `bar`:
2425 ///
2426 /// ```
2427 /// use url::Url;
2428 /// # use url::ParseError;
2429 ///
2430 /// # fn run() -> Result<(), ParseError> {
2431 /// let mut url = Url::parse("foo://example.net")?;
2432 /// let result = url.set_scheme("bar");
2433 /// assert_eq!(url.as_str(), "bar://example.net");
2434 /// assert!(result.is_ok());
2435 /// # Ok(())
2436 /// # }
2437 /// # run().unwrap();
2438 /// ```
2439 ///
2440 /// Cannot change URL’s scheme from `https` to `foõ`:
2441 ///
2442 /// ```
2443 /// use url::Url;
2444 /// # use url::ParseError;
2445 ///
2446 /// # fn run() -> Result<(), ParseError> {
2447 /// let mut url = Url::parse("https://example.net")?;
2448 /// let result = url.set_scheme("foõ");
2449 /// assert_eq!(url.as_str(), "https://example.net/");
2450 /// assert!(result.is_err());
2451 /// # Ok(())
2452 /// # }
2453 /// # run().unwrap();
2454 /// ```
2455 ///
2456 /// Cannot change URL’s scheme from `mailto` (cannot-be-a-base) to `https`:
2457 ///
2458 /// ```
2459 /// use url::Url;
2460 /// # use url::ParseError;
2461 ///
2462 /// # fn run() -> Result<(), ParseError> {
2463 /// let mut url = Url::parse("mailto:rms@example.net")?;
2464 /// let result = url.set_scheme("https");
2465 /// assert_eq!(url.as_str(), "mailto:rms@example.net");
2466 /// assert!(result.is_err());
2467 /// # Ok(())
2468 /// # }
2469 /// # run().unwrap();
2470 /// ```
2471 /// Cannot change the URL’s scheme from `foo` to `https`:
2472 ///
2473 /// ```
2474 /// use url::Url;
2475 /// # use url::ParseError;
2476 ///
2477 /// # fn run() -> Result<(), ParseError> {
2478 /// let mut url = Url::parse("foo://example.net")?;
2479 /// let result = url.set_scheme("https");
2480 /// assert_eq!(url.as_str(), "foo://example.net");
2481 /// assert!(result.is_err());
2482 /// # Ok(())
2483 /// # }
2484 /// # run().unwrap();
2485 /// ```
2486 /// Cannot change the URL’s scheme from `http` to `foo`:
2487 ///
2488 /// ```
2489 /// use url::Url;
2490 /// # use url::ParseError;
2491 ///
2492 /// # fn run() -> Result<(), ParseError> {
2493 /// let mut url = Url::parse("http://example.net")?;
2494 /// let result = url.set_scheme("foo");
2495 /// assert_eq!(url.as_str(), "http://example.net/");
2496 /// assert!(result.is_err());
2497 /// # Ok(())
2498 /// # }
2499 /// # run().unwrap();
2500 /// ```
2501 #[allow(clippy::result_unit_err, clippy::suspicious_operation_groupings)]
2502 pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> {
2503 let mut parser = Parser::for_setter(String::new());
2504 let remaining = parser.parse_scheme(parser::Input::new_no_trim(scheme))?;
2505 let new_scheme_type = SchemeType::from(&parser.serialization);
2506 let old_scheme_type = SchemeType::from(self.scheme());
2507 // If url’s scheme is a special scheme and buffer is not a special scheme, then return.
2508 if (new_scheme_type.is_special() && !old_scheme_type.is_special()) ||
2509 // If url’s scheme is not a special scheme and buffer is a special scheme, then return.
2510 (!new_scheme_type.is_special() && old_scheme_type.is_special()) ||
2511 // If url includes credentials or has a non-null port, and buffer is "file", then return.
2512 // If url’s scheme is "file" and its host is an empty host or null, then return.
2513 (new_scheme_type.is_file() && self.has_authority())
2514 {
2515 return Err(());
2516 }
2517
2518 if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) {
2519 return Err(());
2520 }
2521 let old_scheme_end = self.scheme_end;
2522 let new_scheme_end = to_u32(parser.serialization.len()).unwrap();
2523 let adjust = |index: &mut u32| {
2524 *index -= old_scheme_end;
2525 *index += new_scheme_end;
2526 };
2527
2528 self.scheme_end = new_scheme_end;
2529 adjust(&mut self.username_end);
2530 adjust(&mut self.host_start);
2531 adjust(&mut self.host_end);
2532 adjust(&mut self.path_start);
2533 if let Some(ref mut index) = self.query_start {
2534 adjust(index)
2535 }
2536 if let Some(ref mut index) = self.fragment_start {
2537 adjust(index)
2538 }
2539
2540 parser.serialization.push_str(self.slice(old_scheme_end..));
2541 self.serialization = parser.serialization;
2542
2543 // Update the port so it can be removed
2544 // If it is the scheme's default
2545 // we don't mind it silently failing
2546 // if there was no port in the first place
2547 let previous_port = self.port();
2548 let _ = self.set_port(previous_port);
2549
2550 Ok(())
2551 }
2552
2553 /// Convert a file name as `std::path::Path` into an URL in the `file` scheme.
2554 ///
2555 /// This returns `Err` if the given path is not absolute or,
2556 /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2557 ///
2558 /// # Examples
2559 ///
2560 /// On Unix-like platforms:
2561 ///
2562 /// ```
2563 /// # if cfg!(unix) {
2564 /// use url::Url;
2565 ///
2566 /// # fn run() -> Result<(), ()> {
2567 /// let url = Url::from_file_path("/tmp/foo.txt")?;
2568 /// assert_eq!(url.as_str(), "file:///tmp/foo.txt");
2569 ///
2570 /// let url = Url::from_file_path("../foo.txt");
2571 /// assert!(url.is_err());
2572 ///
2573 /// let url = Url::from_file_path("https://google.com/");
2574 /// assert!(url.is_err());
2575 /// # Ok(())
2576 /// # }
2577 /// # run().unwrap();
2578 /// # }
2579 /// ```
2580 ///
2581 /// This method is only available if the `std` Cargo feature is enabled.
2582 #[cfg(all(
2583 feature = "std",
2584 any(
2585 unix,
2586 windows,
2587 target_os = "redox",
2588 target_os = "wasi",
2589 target_os = "hermit"
2590 )
2591 ))]
2592 #[allow(clippy::result_unit_err)]
2593 pub fn from_file_path<P: AsRef<std::path::Path>>(path: P) -> Result<Url, ()> {
2594 let mut serialization = "file://".to_owned();
2595 let host_start = serialization.len() as u32;
2596 let (host_end, host) = path_to_file_url_segments(path.as_ref(), &mut serialization)?;
2597 Ok(Url {
2598 serialization,
2599 scheme_end: "file".len() as u32,
2600 username_end: host_start,
2601 host_start,
2602 host_end,
2603 host,
2604 port: None,
2605 path_start: host_end,
2606 query_start: None,
2607 fragment_start: None,
2608 })
2609 }
2610
2611 /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme.
2612 ///
2613 /// This returns `Err` if the given path is not absolute or,
2614 /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2615 ///
2616 /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash
2617 /// so that the entire path is considered when using this URL as a base URL.
2618 ///
2619 /// For example:
2620 ///
2621 /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))`
2622 /// as the base URL is `file:///var/www/index.html`
2623 /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))`
2624 /// as the base URL is `file:///var/index.html`, which might not be what was intended.
2625 ///
2626 /// Note that `std::path` does not consider trailing slashes significant
2627 /// and usually does not include them (e.g. in `Path::parent()`).
2628 ///
2629 /// This method is only available if the `std` Cargo feature is enabled.
2630 #[cfg(all(
2631 feature = "std",
2632 any(
2633 unix,
2634 windows,
2635 target_os = "redox",
2636 target_os = "wasi",
2637 target_os = "hermit"
2638 )
2639 ))]
2640 #[allow(clippy::result_unit_err)]
2641 pub fn from_directory_path<P: AsRef<std::path::Path>>(path: P) -> Result<Url, ()> {
2642 let mut url = Url::from_file_path(path)?;
2643 if !url.serialization.ends_with('/') {
2644 url.serialization.push('/')
2645 }
2646 Ok(url)
2647 }
2648
2649 /// Serialize with Serde using the internal representation of the `Url` struct.
2650 ///
2651 /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2652 /// for speed, compared to the `Deserialize` trait impl.
2653 ///
2654 /// This method is only available if the `serde` Cargo feature is enabled.
2655 #[cfg(feature = "serde")]
2656 #[deny(unused)]
2657 pub fn serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2658 where
2659 S: serde::Serializer,
2660 {
2661 use serde::Serialize;
2662 // Destructuring first lets us ensure that adding or removing fields forces this method
2663 // to be updated
2664 let Url {
2665 ref serialization,
2666 ref scheme_end,
2667 ref username_end,
2668 ref host_start,
2669 ref host_end,
2670 ref host,
2671 ref port,
2672 ref path_start,
2673 ref query_start,
2674 ref fragment_start,
2675 } = *self;
2676 (
2677 serialization,
2678 scheme_end,
2679 username_end,
2680 host_start,
2681 host_end,
2682 host,
2683 port,
2684 path_start,
2685 query_start,
2686 fragment_start,
2687 )
2688 .serialize(serializer)
2689 }
2690
2691 /// Serialize with Serde using the internal representation of the `Url` struct.
2692 ///
2693 /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2694 /// for speed, compared to the `Deserialize` trait impl.
2695 ///
2696 /// This method is only available if the `serde` Cargo feature is enabled.
2697 #[cfg(feature = "serde")]
2698 #[deny(unused)]
2699 pub fn deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error>
2700 where
2701 D: serde::Deserializer<'de>,
2702 {
2703 use serde::de::{Deserialize, Error};
2704 let (
2705 serialization,
2706 scheme_end,
2707 username_end,
2708 host_start,
2709 host_end,
2710 host,
2711 port,
2712 path_start,
2713 query_start,
2714 fragment_start,
2715 ) = Deserialize::deserialize(deserializer)?;
2716 let url = Url {
2717 serialization,
2718 scheme_end,
2719 username_end,
2720 host_start,
2721 host_end,
2722 host,
2723 port,
2724 path_start,
2725 query_start,
2726 fragment_start,
2727 };
2728 if cfg!(debug_assertions) {
2729 url.check_invariants()
2730 .map_err(|reason| Error::custom(reason))?
2731 }
2732 Ok(url)
2733 }
2734
2735 /// Assuming the URL is in the `file` scheme or similar,
2736 /// convert its path to an absolute `std::path::Path`.
2737 ///
2738 /// **Note:** This does not actually check the URL’s `scheme`,
2739 /// and may give nonsensical results for other schemes.
2740 /// It is the user’s responsibility to check the URL’s scheme before calling this.
2741 ///
2742 /// ```
2743 /// # use url::Url;
2744 /// # let url = Url::parse("file:///etc/passwd").unwrap();
2745 /// let path = url.to_file_path();
2746 /// ```
2747 ///
2748 /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where
2749 /// `file:` URLs may have a non-local host),
2750 /// or if `Path::new_opt()` returns `None`.
2751 /// (That is, if the percent-decoded path contains a NUL byte or,
2752 /// for a Windows path, is not UTF-8.)
2753 ///
2754 /// This method is only available if the `std` Cargo feature is enabled.
2755 #[inline]
2756 #[cfg(all(
2757 feature = "std",
2758 any(
2759 unix,
2760 windows,
2761 target_os = "redox",
2762 target_os = "wasi",
2763 target_os = "hermit"
2764 )
2765 ))]
2766 #[allow(clippy::result_unit_err)]
2767 pub fn to_file_path(&self) -> Result<PathBuf, ()> {
2768 if let Some(segments) = self.path_segments() {
2769 let host = match self.host() {
2770 None | Some(Host::Domain("localhost")) => None,
2771 Some(_) if cfg!(windows) && self.scheme() == "file" => {
2772 Some(&self.serialization[self.host_start as usize..self.host_end as usize])
2773 }
2774 _ => return Err(()),
2775 };
2776
2777 return file_url_segments_to_pathbuf(host, segments);
2778 }
2779 Err(())
2780 }
2781
2782 // Private helper methods:
2783
2784 #[inline]
2785 fn slice<R>(&self, range: R) -> &str
2786 where
2787 R: RangeArg,
2788 {
2789 range.slice_of(&self.serialization)
2790 }
2791
2792 #[inline]
2793 fn byte_at(&self, i: u32) -> u8 {
2794 self.serialization.as_bytes()[i as usize]
2795 }
2796}
2797
2798/// Parse a string as an URL, without a base URL or encoding override.
2799impl str::FromStr for Url {
2800 type Err = ParseError;
2801
2802 #[inline]
2803 fn from_str(input: &str) -> Result<Url, crate::ParseError> {
2804 Url::parse(input)
2805 }
2806}
2807
2808impl<'a> TryFrom<&'a str> for Url {
2809 type Error = ParseError;
2810
2811 fn try_from(s: &'a str) -> Result<Self, Self::Error> {
2812 Url::parse(s)
2813 }
2814}
2815
2816/// Display the serialization of this URL.
2817impl fmt::Display for Url {
2818 #[inline]
2819 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
2820 fmt::Display::fmt(&self.serialization, formatter)
2821 }
2822}
2823
2824/// String conversion.
2825impl From<Url> for String {
2826 fn from(value: Url) -> String {
2827 value.serialization
2828 }
2829}
2830
2831/// Debug the serialization of this URL.
2832impl fmt::Debug for Url {
2833 #[inline]
2834 fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2835 formatter
2836 .debug_struct("Url")
2837 .field("scheme", &self.scheme())
2838 .field("cannot_be_a_base", &self.cannot_be_a_base())
2839 .field("username", &self.username())
2840 .field("password", &self.password())
2841 .field("host", &self.host())
2842 .field("port", &self.port())
2843 .field("path", &self.path())
2844 .field("query", &self.query())
2845 .field("fragment", &self.fragment())
2846 .finish()
2847 }
2848}
2849
2850/// URLs compare like their serialization.
2851impl Eq for Url {}
2852
2853/// URLs compare like their serialization.
2854impl PartialEq for Url {
2855 #[inline]
2856 fn eq(&self, other: &Self) -> bool {
2857 self.serialization == other.serialization
2858 }
2859}
2860
2861/// URLs compare like their serialization.
2862impl Ord for Url {
2863 #[inline]
2864 fn cmp(&self, other: &Self) -> cmp::Ordering {
2865 self.serialization.cmp(&other.serialization)
2866 }
2867}
2868
2869/// URLs compare like their serialization.
2870impl PartialOrd for Url {
2871 #[inline]
2872 fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
2873 Some(self.cmp(other))
2874 }
2875}
2876
2877/// URLs hash like their serialization.
2878impl hash::Hash for Url {
2879 #[inline]
2880 fn hash<H>(&self, state: &mut H)
2881 where
2882 H: hash::Hasher,
2883 {
2884 hash::Hash::hash(&self.serialization, state)
2885 }
2886}
2887
2888/// Return the serialization of this URL.
2889impl AsRef<str> for Url {
2890 #[inline]
2891 fn as_ref(&self) -> &str {
2892 &self.serialization
2893 }
2894}
2895
2896trait RangeArg {
2897 fn slice_of<'a>(&self, s: &'a str) -> &'a str;
2898}
2899
2900impl RangeArg for Range<u32> {
2901 #[inline]
2902 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2903 &s[self.start as usize..self.end as usize]
2904 }
2905}
2906
2907impl RangeArg for RangeFrom<u32> {
2908 #[inline]
2909 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2910 &s[self.start as usize..]
2911 }
2912}
2913
2914impl RangeArg for RangeTo<u32> {
2915 #[inline]
2916 fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2917 &s[..self.end as usize]
2918 }
2919}
2920
2921/// Serializes this URL into a `serde` stream.
2922///
2923/// This implementation is only available if the `serde` Cargo feature is enabled.
2924#[cfg(feature = "serde")]
2925impl serde::Serialize for Url {
2926 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2927 where
2928 S: serde::Serializer,
2929 {
2930 serializer.serialize_str(self.as_str())
2931 }
2932}
2933
2934/// Deserializes this URL from a `serde` stream.
2935///
2936/// This implementation is only available if the `serde` Cargo feature is enabled.
2937#[cfg(feature = "serde")]
2938impl<'de> serde::Deserialize<'de> for Url {
2939 fn deserialize<D>(deserializer: D) -> Result<Url, D::Error>
2940 where
2941 D: serde::Deserializer<'de>,
2942 {
2943 use serde::de::{Error, Visitor};
2944
2945 struct UrlVisitor;
2946
2947 impl<'de> Visitor<'de> for UrlVisitor {
2948 type Value = Url;
2949
2950 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2951 formatter.write_str("a string representing an URL")
2952 }
2953
2954 fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
2955 where
2956 E: Error,
2957 {
2958 Url::parse(s).map_err(|err| Error::custom(format!("{}: {:?}", err, s)))
2959 }
2960 }
2961
2962 deserializer.deserialize_str(UrlVisitor)
2963 }
2964}
2965
2966#[cfg(all(
2967 feature = "std",
2968 any(unix, target_os = "redox", target_os = "wasi", target_os = "hermit")
2969))]
2970fn path_to_file_url_segments(
2971 path: &Path,
2972 serialization: &mut String,
2973) -> Result<(u32, HostInternal), ()> {
2974 use parser::SPECIAL_PATH_SEGMENT;
2975 use percent_encoding::percent_encode;
2976 #[cfg(target_os = "hermit")]
2977 use std::os::hermit::ffi::OsStrExt;
2978 #[cfg(any(unix, target_os = "redox"))]
2979 use std::os::unix::prelude::OsStrExt;
2980 if !path.is_absolute() {
2981 return Err(());
2982 }
2983 let host_end = to_u32(serialization.len()).unwrap();
2984 let mut empty = true;
2985 // skip the root component
2986 for component in path.components().skip(1) {
2987 empty = false;
2988 serialization.push('/');
2989 #[cfg(not(target_os = "wasi"))]
2990 serialization.extend(percent_encode(
2991 component.as_os_str().as_bytes(),
2992 SPECIAL_PATH_SEGMENT,
2993 ));
2994 #[cfg(target_os = "wasi")]
2995 serialization.extend(percent_encode(
2996 component.as_os_str().to_string_lossy().as_bytes(),
2997 SPECIAL_PATH_SEGMENT,
2998 ));
2999 }
3000 if empty {
3001 // An URL’s path must not be empty.
3002 serialization.push('/');
3003 }
3004 Ok((host_end, HostInternal::None))
3005}
3006
3007#[cfg(all(feature = "std", windows))]
3008fn path_to_file_url_segments(
3009 path: &Path,
3010 serialization: &mut String,
3011) -> Result<(u32, HostInternal), ()> {
3012 path_to_file_url_segments_windows(path, serialization)
3013}
3014
3015// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
3016#[cfg(feature = "std")]
3017#[cfg_attr(not(windows), allow(dead_code))]
3018fn path_to_file_url_segments_windows(
3019 path: &Path,
3020 serialization: &mut String,
3021) -> Result<(u32, HostInternal), ()> {
3022 use crate::parser::PATH_SEGMENT;
3023 use percent_encoding::percent_encode;
3024 use std::path::{Component, Prefix};
3025 if !path.is_absolute() {
3026 return Err(());
3027 }
3028 let mut components = path.components();
3029
3030 let host_start = serialization.len() + 1;
3031 let host_end;
3032 let host_internal;
3033
3034 match components.next() {
3035 Some(Component::Prefix(ref p)) => match p.kind() {
3036 Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => {
3037 host_end = to_u32(serialization.len()).unwrap();
3038 host_internal = HostInternal::None;
3039 serialization.push('/');
3040 serialization.push(letter as char);
3041 serialization.push(':');
3042 }
3043 Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => {
3044 let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?;
3045 write!(serialization, "{}", host).unwrap();
3046 host_end = to_u32(serialization.len()).unwrap();
3047 host_internal = host.into();
3048 serialization.push('/');
3049 let share = share.to_str().ok_or(())?;
3050 serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT));
3051 }
3052 _ => return Err(()),
3053 },
3054 _ => return Err(()),
3055 }
3056
3057 let mut path_only_has_prefix = true;
3058 for component in components {
3059 if component == Component::RootDir {
3060 continue;
3061 }
3062
3063 path_only_has_prefix = false;
3064 // FIXME: somehow work with non-unicode?
3065 let component = component.as_os_str().to_str().ok_or(())?;
3066
3067 serialization.push('/');
3068 serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT));
3069 }
3070
3071 // A windows drive letter must end with a slash.
3072 if serialization.len() > host_start
3073 && parser::is_windows_drive_letter(&serialization[host_start..])
3074 && path_only_has_prefix
3075 {
3076 serialization.push('/');
3077 }
3078
3079 Ok((host_end, host_internal))
3080}
3081
3082#[cfg(all(
3083 feature = "std",
3084 any(unix, target_os = "redox", target_os = "wasi", target_os = "hermit")
3085))]
3086fn file_url_segments_to_pathbuf(
3087 host: Option<&str>,
3088 segments: str::Split<'_, char>,
3089) -> Result<PathBuf, ()> {
3090 use alloc::vec::Vec;
3091 use percent_encoding::percent_decode;
3092 #[cfg(not(target_os = "wasi"))]
3093 use std::ffi::OsStr;
3094 #[cfg(target_os = "hermit")]
3095 use std::os::hermit::ffi::OsStrExt;
3096 #[cfg(any(unix, target_os = "redox"))]
3097 use std::os::unix::prelude::OsStrExt;
3098 use std::path::PathBuf;
3099
3100 if host.is_some() {
3101 return Err(());
3102 }
3103
3104 let mut bytes = if cfg!(target_os = "redox") {
3105 b"file:".to_vec()
3106 } else {
3107 Vec::new()
3108 };
3109
3110 for segment in segments {
3111 bytes.push(b'/');
3112 bytes.extend(percent_decode(segment.as_bytes()));
3113 }
3114
3115 // A windows drive letter must end with a slash.
3116 if bytes.len() > 2
3117 && bytes[bytes.len() - 2].is_ascii_alphabetic()
3118 && matches!(bytes[bytes.len() - 1], b':' | b'|')
3119 {
3120 bytes.push(b'/');
3121 }
3122
3123 #[cfg(not(target_os = "wasi"))]
3124 let path = PathBuf::from(OsStr::from_bytes(&bytes));
3125 #[cfg(target_os = "wasi")]
3126 let path = String::from_utf8(bytes)
3127 .map(|path| PathBuf::from(path))
3128 .map_err(|_| ())?;
3129
3130 debug_assert!(
3131 path.is_absolute(),
3132 "to_file_path() failed to produce an absolute Path"
3133 );
3134
3135 Ok(path)
3136}
3137
3138#[cfg(all(feature = "std", windows))]
3139fn file_url_segments_to_pathbuf(
3140 host: Option<&str>,
3141 segments: str::Split<char>,
3142) -> Result<PathBuf, ()> {
3143 file_url_segments_to_pathbuf_windows(host, segments)
3144}
3145
3146// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
3147#[cfg(feature = "std")]
3148#[cfg_attr(not(windows), allow(dead_code))]
3149fn file_url_segments_to_pathbuf_windows(
3150 host: Option<&str>,
3151 mut segments: str::Split<'_, char>,
3152) -> Result<PathBuf, ()> {
3153 use percent_encoding::percent_decode;
3154 let mut string = if let Some(host) = host {
3155 r"\\".to_owned() + host
3156 } else {
3157 let first = segments.next().ok_or(())?;
3158
3159 match first.len() {
3160 2 => {
3161 if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' {
3162 return Err(());
3163 }
3164
3165 first.to_owned()
3166 }
3167
3168 4 => {
3169 if !first.starts_with(parser::ascii_alpha) {
3170 return Err(());
3171 }
3172 let bytes = first.as_bytes();
3173 if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') {
3174 return Err(());
3175 }
3176
3177 first[0..1].to_owned() + ":"
3178 }
3179
3180 _ => return Err(()),
3181 }
3182 };
3183
3184 for segment in segments {
3185 string.push('\\');
3186
3187 // Currently non-unicode windows paths cannot be represented
3188 match String::from_utf8(percent_decode(segment.as_bytes()).collect()) {
3189 Ok(s) => string.push_str(&s),
3190 Err(..) => return Err(()),
3191 }
3192 }
3193 let path = PathBuf::from(string);
3194 debug_assert!(
3195 path.is_absolute(),
3196 "to_file_path() failed to produce an absolute Path"
3197 );
3198 Ok(path)
3199}
3200
3201/// Implementation detail of `Url::query_pairs_mut`. Typically not used directly.
3202#[derive(Debug)]
3203pub struct UrlQuery<'a> {
3204 url: Option<&'a mut Url>,
3205 fragment: Option<String>,
3206}
3207
3208// `as_mut_string` string here exposes the internal serialization of an `Url`,
3209// which should not be exposed to users.
3210// We achieve that by not giving users direct access to `UrlQuery`:
3211// * Its fields are private
3212// (and so can not be constructed with struct literal syntax outside of this crate),
3213// * It has no constructor
3214// * It is only visible (on the type level) to users in the return type of
3215// `Url::query_pairs_mut` which is `Serializer<UrlQuery>`
3216// * `Serializer` keeps its target in a private field
3217// * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`.
3218impl<'a> form_urlencoded::Target for UrlQuery<'a> {
3219 fn as_mut_string(&mut self) -> &mut String {
3220 &mut self.url.as_mut().unwrap().serialization
3221 }
3222
3223 fn finish(mut self) -> &'a mut Url {
3224 let url = self.url.take().unwrap();
3225 url.restore_already_parsed_fragment(self.fragment.take());
3226 url
3227 }
3228
3229 type Finished = &'a mut Url;
3230}
3231
3232impl<'a> Drop for UrlQuery<'a> {
3233 fn drop(&mut self) {
3234 if let Some(url) = self.url.take() {
3235 url.restore_already_parsed_fragment(self.fragment.take())
3236 }
3237 }
3238}