extract.rs 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. // Copyright 2019-2023 Tauri Programme within The Commons Conservancy
  2. // SPDX-License-Identifier: Apache-2.0
  3. // SPDX-License-Identifier: MIT
  4. use std::{
  5. borrow::Cow,
  6. fs,
  7. io::{self, Cursor, Read, Seek},
  8. path::{self, Component, Path, PathBuf},
  9. };
  10. /// The archive reader.
  11. #[derive(Debug)]
  12. pub enum ArchiveReader<R: Read + Seek> {
  13. /// A plain reader.
  14. Plain(R),
  15. /// A GZ- compressed reader (decoder).
  16. GzCompressed(Box<flate2::read::GzDecoder<R>>),
  17. }
  18. impl<R: Read + Seek> Read for ArchiveReader<R> {
  19. fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
  20. match self {
  21. Self::Plain(r) => r.read(buf),
  22. Self::GzCompressed(decoder) => decoder.read(buf),
  23. }
  24. }
  25. }
  26. impl<R: Read + Seek> ArchiveReader<R> {
  27. #[allow(dead_code)]
  28. fn get_mut(&mut self) -> &mut R {
  29. match self {
  30. Self::Plain(r) => r,
  31. Self::GzCompressed(decoder) => decoder.get_mut(),
  32. }
  33. }
  34. }
  35. /// The supported archive formats.
  36. #[derive(Debug, Clone, Copy, PartialEq, Eq)]
  37. #[non_exhaustive]
  38. pub enum ArchiveFormat {
  39. /// Tar archive.
  40. Tar(Option<Compression>),
  41. /// Zip archive.
  42. Zip,
  43. }
  44. /// The supported compression types.
  45. #[derive(Debug, Clone, Copy, PartialEq, Eq)]
  46. #[non_exhaustive]
  47. pub enum Compression {
  48. /// Gz compression (e.g. `.tar.gz` archives)
  49. Gz,
  50. }
  51. /// The zip entry.
  52. pub struct ZipEntry {
  53. path: PathBuf,
  54. is_dir: bool,
  55. file_contents: Vec<u8>,
  56. }
  57. /// A read-only view into an entry of an archive.
  58. #[non_exhaustive]
  59. pub enum Entry<'a, R: Read> {
  60. /// An entry of a tar archive.
  61. #[non_exhaustive]
  62. Tar(Box<tar::Entry<'a, R>>),
  63. /// An entry of a zip archive.
  64. #[non_exhaustive]
  65. Zip(ZipEntry),
  66. }
  67. impl<'a, R: Read> Entry<'a, R> {
  68. /// The entry path.
  69. pub fn path(&self) -> crate::api::Result<Cow<'_, Path>> {
  70. match self {
  71. Self::Tar(e) => e.path().map_err(Into::into),
  72. Self::Zip(e) => Ok(Cow::Borrowed(&e.path)),
  73. }
  74. }
  75. /// Extract this entry into `into_path`.
  76. /// If it's a directory, the target will be created, if it's a file, it'll be extracted at this location.
  77. /// If it's a symlink, it will be created.
  78. /// Note: You need to include the complete path, with file name and extension.
  79. pub fn extract(self, into_path: &path::Path) -> crate::api::Result<()> {
  80. match self {
  81. Self::Tar(mut entry) => {
  82. // validate path
  83. let path = entry.path()?;
  84. if path.components().any(|c| matches!(c, Component::ParentDir)) {
  85. return Err(
  86. std::io::Error::new(
  87. std::io::ErrorKind::InvalidInput,
  88. "cannot extract path with parent dir component",
  89. )
  90. .into(),
  91. );
  92. }
  93. // determine if it's a file or a directory
  94. if entry.header().entry_type() == tar::EntryType::Directory {
  95. // this is a directory, lets create it
  96. match fs::create_dir_all(into_path) {
  97. Ok(_) => (),
  98. Err(e) => {
  99. if e.kind() != io::ErrorKind::AlreadyExists {
  100. return Err(e.into());
  101. }
  102. }
  103. }
  104. } else {
  105. // handle files, symlinks, hard links, etc. and set permissions
  106. entry.unpack(into_path)?;
  107. }
  108. }
  109. Self::Zip(entry) => {
  110. if entry.is_dir {
  111. // this is a directory, lets create it
  112. match fs::create_dir_all(into_path) {
  113. Ok(_) => (),
  114. Err(e) => {
  115. if e.kind() != io::ErrorKind::AlreadyExists {
  116. return Err(e.into());
  117. }
  118. }
  119. }
  120. } else {
  121. let mut out_file = fs::File::create(into_path)?;
  122. io::copy(&mut Cursor::new(entry.file_contents), &mut out_file)?;
  123. }
  124. }
  125. }
  126. Ok(())
  127. }
  128. }
  129. /// The extract manager to retrieve files from archives.
  130. pub struct Extract<'a, R: Read + Seek> {
  131. reader: ArchiveReader<R>,
  132. archive_format: ArchiveFormat,
  133. tar_archive: Option<tar::Archive<&'a mut ArchiveReader<R>>>,
  134. }
  135. impl<'a, R: std::fmt::Debug + Read + Seek> std::fmt::Debug for Extract<'a, R> {
  136. fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  137. f.debug_struct("Extract")
  138. .field("reader", &self.reader)
  139. .field("archive_format", &self.archive_format)
  140. .finish()
  141. }
  142. }
  143. impl<'a, R: Read + Seek> Extract<'a, R> {
  144. /// Create archive from reader.
  145. pub fn from_cursor(mut reader: R, archive_format: ArchiveFormat) -> Extract<'a, R> {
  146. if reader.rewind().is_err() {
  147. #[cfg(debug_assertions)]
  148. eprintln!("Could not seek to start of the file");
  149. }
  150. let compression = if let ArchiveFormat::Tar(compression) = archive_format {
  151. compression
  152. } else {
  153. None
  154. };
  155. Extract {
  156. reader: match compression {
  157. Some(Compression::Gz) => {
  158. ArchiveReader::GzCompressed(Box::new(flate2::read::GzDecoder::new(reader)))
  159. }
  160. _ => ArchiveReader::Plain(reader),
  161. },
  162. archive_format,
  163. tar_archive: None,
  164. }
  165. }
  166. /// Reads the archive content.
  167. pub fn with_files<
  168. E: Into<crate::api::Error>,
  169. F: FnMut(Entry<'_, &mut ArchiveReader<R>>) -> std::result::Result<bool, E>,
  170. >(
  171. &'a mut self,
  172. mut f: F,
  173. ) -> crate::api::Result<()> {
  174. match self.archive_format {
  175. ArchiveFormat::Tar(_) => {
  176. let archive = tar::Archive::new(&mut self.reader);
  177. self.tar_archive.replace(archive);
  178. for entry in self.tar_archive.as_mut().unwrap().entries()? {
  179. let entry = entry?;
  180. if entry.path().is_ok() {
  181. let stop = f(Entry::Tar(Box::new(entry))).map_err(Into::into)?;
  182. if stop {
  183. break;
  184. }
  185. }
  186. }
  187. }
  188. ArchiveFormat::Zip => {
  189. #[cfg(feature = "fs-extract-api")]
  190. {
  191. let mut archive = zip::ZipArchive::new(self.reader.get_mut())?;
  192. let file_names = archive
  193. .file_names()
  194. .map(|f| f.to_string())
  195. .collect::<Vec<String>>();
  196. for path in file_names {
  197. let mut zip_file = archive.by_name(&path)?;
  198. let is_dir = zip_file.is_dir();
  199. let mut file_contents = Vec::new();
  200. zip_file.read_to_end(&mut file_contents)?;
  201. let stop = f(Entry::Zip(ZipEntry {
  202. path: path.into(),
  203. is_dir,
  204. file_contents,
  205. }))
  206. .map_err(Into::into)?;
  207. if stop {
  208. break;
  209. }
  210. }
  211. }
  212. }
  213. }
  214. Ok(())
  215. }
  216. /// Extract an entire source archive into a specified path. If the source is a single compressed
  217. /// file and not an archive, it will be extracted into a file with the same name inside of
  218. /// `into_dir`.
  219. pub fn extract_into(&mut self, into_dir: &path::Path) -> crate::api::Result<()> {
  220. match self.archive_format {
  221. ArchiveFormat::Tar(_) => {
  222. let mut archive = tar::Archive::new(&mut self.reader);
  223. archive.unpack(into_dir)?;
  224. }
  225. ArchiveFormat::Zip => {
  226. #[cfg(feature = "fs-extract-api")]
  227. {
  228. let mut archive = zip::ZipArchive::new(self.reader.get_mut())?;
  229. for i in 0..archive.len() {
  230. let mut file = archive.by_index(i)?;
  231. // Decode the file name from raw bytes instead of using file.name() directly.
  232. // file.name() uses String::from_utf8_lossy() which may return messy characters
  233. // such as: 爱交易.app/, that does not work as expected.
  234. // Here we require the file name must be a valid UTF-8.
  235. let file_name = String::from_utf8(file.name_raw().to_vec())?;
  236. let out_path = into_dir.join(file_name);
  237. if file.is_dir() {
  238. fs::create_dir_all(&out_path)?;
  239. } else {
  240. if let Some(out_path_parent) = out_path.parent() {
  241. fs::create_dir_all(out_path_parent)?;
  242. }
  243. let mut out_file = fs::File::create(&out_path)?;
  244. io::copy(&mut file, &mut out_file)?;
  245. }
  246. // Get and Set permissions
  247. #[cfg(unix)]
  248. {
  249. use std::os::unix::fs::PermissionsExt;
  250. if let Some(mode) = file.unix_mode() {
  251. fs::set_permissions(&out_path, fs::Permissions::from_mode(mode))?;
  252. }
  253. }
  254. }
  255. }
  256. }
  257. }
  258. Ok(())
  259. }
  260. }