html.rs 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. // Copyright 2019-2023 Tauri Programme within The Commons Conservancy
  2. // SPDX-License-Identifier: Apache-2.0
  3. // SPDX-License-Identifier: MIT
  4. //! The module to process HTML in Tauri.
  5. use std::path::{Path, PathBuf};
  6. use html5ever::{
  7. interface::QualName,
  8. namespace_url, ns,
  9. serialize::{HtmlSerializer, SerializeOpts, Serializer, TraversalScope},
  10. tendril::TendrilSink,
  11. LocalName,
  12. };
  13. pub use kuchiki::NodeRef;
  14. use kuchiki::{Attribute, ExpandedName, NodeData};
  15. use serde::Serialize;
  16. #[cfg(feature = "isolation")]
  17. use serialize_to_javascript::DefaultTemplate;
  18. use crate::config::{DisabledCspModificationKind, PatternKind};
  19. #[cfg(feature = "isolation")]
  20. use crate::pattern::isolation::IsolationJavascriptCodegen;
  21. /// The token used on the CSP tag content.
  22. pub const CSP_TOKEN: &str = "__TAURI_CSP__";
  23. /// The token used for script nonces.
  24. pub const SCRIPT_NONCE_TOKEN: &str = "__TAURI_SCRIPT_NONCE__";
  25. /// The token used for style nonces.
  26. pub const STYLE_NONCE_TOKEN: &str = "__TAURI_STYLE_NONCE__";
  27. // taken from https://github.com/kuchiki-rs/kuchiki/blob/57ee6920d835315a498e748ba4b07a851ae5e498/src/serializer.rs#L12
  28. fn serialize_node_ref_internal<S: Serializer>(
  29. node: &NodeRef,
  30. serializer: &mut S,
  31. traversal_scope: TraversalScope,
  32. ) -> crate::Result<()> {
  33. match (traversal_scope, node.data()) {
  34. (ref scope, NodeData::Element(element)) => {
  35. if *scope == TraversalScope::IncludeNode {
  36. let attrs = element.attributes.borrow();
  37. // Unfortunately we need to allocate something to hold these &'a QualName
  38. let attrs = attrs
  39. .map
  40. .iter()
  41. .map(|(name, attr)| {
  42. (
  43. QualName::new(attr.prefix.clone(), name.ns.clone(), name.local.clone()),
  44. &attr.value,
  45. )
  46. })
  47. .collect::<Vec<_>>();
  48. serializer.start_elem(
  49. element.name.clone(),
  50. attrs.iter().map(|&(ref name, value)| (name, &**value)),
  51. )?
  52. }
  53. let children = match element.template_contents.as_ref() {
  54. Some(template_root) => template_root.children(),
  55. None => node.children(),
  56. };
  57. for child in children {
  58. serialize_node_ref_internal(&child, serializer, TraversalScope::IncludeNode)?
  59. }
  60. if *scope == TraversalScope::IncludeNode {
  61. serializer.end_elem(element.name.clone())?
  62. }
  63. Ok(())
  64. }
  65. (_, &NodeData::DocumentFragment) | (_, &NodeData::Document(_)) => {
  66. for child in node.children() {
  67. serialize_node_ref_internal(&child, serializer, TraversalScope::IncludeNode)?
  68. }
  69. Ok(())
  70. }
  71. (TraversalScope::ChildrenOnly(_), _) => Ok(()),
  72. (TraversalScope::IncludeNode, NodeData::Doctype(doctype)) => {
  73. serializer.write_doctype(&doctype.name).map_err(Into::into)
  74. }
  75. (TraversalScope::IncludeNode, NodeData::Text(text)) => {
  76. serializer.write_text(&text.borrow()).map_err(Into::into)
  77. }
  78. (TraversalScope::IncludeNode, NodeData::Comment(text)) => {
  79. serializer.write_comment(&text.borrow()).map_err(Into::into)
  80. }
  81. (TraversalScope::IncludeNode, NodeData::ProcessingInstruction(contents)) => {
  82. let contents = contents.borrow();
  83. serializer
  84. .write_processing_instruction(&contents.0, &contents.1)
  85. .map_err(Into::into)
  86. }
  87. }
  88. }
  89. /// Serializes the node to HTML.
  90. pub fn serialize_node(node: &NodeRef) -> Vec<u8> {
  91. let mut u8_vec = Vec::new();
  92. let mut ser = HtmlSerializer::new(
  93. &mut u8_vec,
  94. SerializeOpts {
  95. traversal_scope: TraversalScope::IncludeNode,
  96. ..Default::default()
  97. },
  98. );
  99. serialize_node_ref_internal(node, &mut ser, TraversalScope::IncludeNode).unwrap();
  100. u8_vec
  101. }
  102. /// Parses the given HTML string.
  103. pub fn parse(html: String) -> NodeRef {
  104. kuchiki::parse_html().one(html)
  105. }
  106. fn with_head<F: FnOnce(&NodeRef)>(document: &mut NodeRef, f: F) {
  107. if let Ok(ref node) = document.select_first("head") {
  108. f(node.as_node())
  109. } else {
  110. let node = NodeRef::new_element(
  111. QualName::new(None, ns!(html), LocalName::from("head")),
  112. None,
  113. );
  114. f(&node);
  115. document.prepend(node)
  116. }
  117. }
  118. fn inject_nonce(document: &mut NodeRef, selector: &str, token: &str) {
  119. if let Ok(scripts) = document.select(selector) {
  120. for target in scripts {
  121. let node = target.as_node();
  122. let element = node.as_element().unwrap();
  123. let mut attrs = element.attributes.borrow_mut();
  124. // if the node already has the `nonce` attribute, skip it
  125. if attrs.get("nonce").is_some() {
  126. continue;
  127. }
  128. attrs.insert("nonce", token.into());
  129. }
  130. }
  131. }
  132. /// Inject nonce tokens to all scripts and styles.
  133. pub fn inject_nonce_token(
  134. document: &mut NodeRef,
  135. dangerous_disable_asset_csp_modification: &DisabledCspModificationKind,
  136. ) {
  137. if dangerous_disable_asset_csp_modification.can_modify("script-src") {
  138. inject_nonce(document, "script[src^='http']", SCRIPT_NONCE_TOKEN);
  139. }
  140. if dangerous_disable_asset_csp_modification.can_modify("style-src") {
  141. inject_nonce(document, "style", STYLE_NONCE_TOKEN);
  142. }
  143. }
  144. /// Injects a content security policy to the HTML.
  145. pub fn inject_csp(document: &mut NodeRef, csp: &str) {
  146. with_head(document, |head| {
  147. head.append(create_csp_meta_tag(csp));
  148. });
  149. }
  150. /// Injects a content security policy token to the HTML.
  151. pub fn inject_csp_token(document: &mut NodeRef) {
  152. inject_csp(document, CSP_TOKEN)
  153. }
  154. fn create_csp_meta_tag(csp: &str) -> NodeRef {
  155. NodeRef::new_element(
  156. QualName::new(None, ns!(html), LocalName::from("meta")),
  157. vec![
  158. (
  159. ExpandedName::new(ns!(), LocalName::from("http-equiv")),
  160. Attribute {
  161. prefix: None,
  162. value: "Content-Security-Policy".into(),
  163. },
  164. ),
  165. (
  166. ExpandedName::new(ns!(), LocalName::from("content")),
  167. Attribute {
  168. prefix: None,
  169. value: csp.into(),
  170. },
  171. ),
  172. ],
  173. )
  174. }
  175. /// The shape of the JavaScript Pattern config
  176. #[derive(Debug, Serialize)]
  177. #[serde(rename_all = "lowercase", tag = "pattern")]
  178. pub enum PatternObject {
  179. /// Brownfield pattern.
  180. Brownfield,
  181. /// Isolation pattern. Recommended for security purposes.
  182. Isolation {
  183. /// Which `IsolationSide` this `PatternObject` is getting injected into
  184. side: IsolationSide,
  185. },
  186. }
  187. impl From<&PatternKind> for PatternObject {
  188. fn from(pattern_kind: &PatternKind) -> Self {
  189. match pattern_kind {
  190. PatternKind::Brownfield => Self::Brownfield,
  191. PatternKind::Isolation { .. } => Self::Isolation {
  192. side: IsolationSide::default(),
  193. },
  194. }
  195. }
  196. }
  197. /// Where the JavaScript is injected to
  198. #[derive(Debug, Serialize)]
  199. #[serde(rename_all = "lowercase")]
  200. pub enum IsolationSide {
  201. /// Original frame, the Brownfield application
  202. Original,
  203. /// Secure frame, the isolation security application
  204. Secure,
  205. }
  206. impl Default for IsolationSide {
  207. fn default() -> Self {
  208. Self::Original
  209. }
  210. }
  211. /// Injects the Isolation JavaScript to a codegen time document.
  212. ///
  213. /// Note: This function is not considered part of the stable API.
  214. #[cfg(feature = "isolation")]
  215. pub fn inject_codegen_isolation_script(document: &mut NodeRef) {
  216. with_head(document, |head| {
  217. let script = NodeRef::new_element(QualName::new(None, ns!(html), "script".into()), None);
  218. script.append(NodeRef::new_text(
  219. IsolationJavascriptCodegen {}
  220. .render_default(&Default::default())
  221. .expect("unable to render codegen isolation script template")
  222. .into_string(),
  223. ));
  224. head.prepend(script);
  225. });
  226. }
  227. /// Temporary workaround for Windows not allowing requests
  228. ///
  229. /// Note: this does not prevent path traversal due to the isolation application expectation that it
  230. /// is secure.
  231. pub fn inline_isolation(document: &mut NodeRef, dir: &Path) {
  232. for script in document
  233. .select("script[src]")
  234. .expect("unable to parse document for scripts")
  235. {
  236. let src = {
  237. let attributes = script.attributes.borrow();
  238. attributes
  239. .get(LocalName::from("src"))
  240. .expect("script with src attribute has no src value")
  241. .to_string()
  242. };
  243. let mut path = PathBuf::from(src);
  244. if path.has_root() {
  245. path = path
  246. .strip_prefix("/")
  247. .expect("Tauri \"Isolation\" Pattern only supports relative or absolute (`/`) paths.")
  248. .into();
  249. }
  250. let file = std::fs::read_to_string(dir.join(path)).expect("unable to find isolation file");
  251. script.as_node().append(NodeRef::new_text(file));
  252. let mut attributes = script.attributes.borrow_mut();
  253. attributes.remove(LocalName::from("src"));
  254. }
  255. }
  256. #[cfg(test)]
  257. mod tests {
  258. use kuchiki::traits::*;
  259. #[test]
  260. fn csp() {
  261. let htmls = vec![
  262. "<html><head></head></html>".to_string(),
  263. "<html></html>".to_string(),
  264. ];
  265. for html in htmls {
  266. let mut document = kuchiki::parse_html().one(html);
  267. super::inject_csp_token(&mut document);
  268. assert_eq!(
  269. document.to_string(),
  270. format!(
  271. r#"<html><head><meta content="{}" http-equiv="Content-Security-Policy"></head><body></body></html>"#,
  272. super::CSP_TOKEN
  273. )
  274. );
  275. }
  276. }
  277. }