Browse Source

Working refactor

testing-framework
Noam Ross Noam Ross 1 year ago
parent
commit
da781981b3
76 changed files with 2968 additions and 1059 deletions
  1. +3
    -0
      .Rbuildignore
  2. +2
    -6
      .github/CONTRIBUTING.md
  3. +4
    -0
      .gitignore
  4. +19
    -15
      DESCRIPTION
  5. +34
    -7
      NAMESPACE
  6. +4
    -0
      NEWS.md
  7. +88
    -0
      R/addins.R
  8. +109
    -0
      R/criticmarkup.R
  9. +305
    -0
      R/dedoc.R
  10. +82
    -0
      R/diff.R
  11. +0
    -86
      R/document_hooks.R
  12. +24
    -12
      R/docx-utils.R
  13. +0
    -147
      R/docx_reversible.R
  14. +19
    -0
      R/extract-outputs.R
  15. +0
    -274
      R/extract.R
  16. +31
    -0
      R/get_style_dist.R
  17. +41
    -0
      R/officedown.R
  18. +27
    -6
      R/officer-embed.R
  19. +38
    -9
      R/officer-modify_style.R
  20. +37
    -0
      R/pandocoml.R
  21. +38
    -32
      R/preprocessor.R
  22. +1
    -1
      R/redoc-package.R
  23. +141
    -0
      R/redoc.R
  24. +90
    -0
      R/stri-utils.R
  25. +93
    -18
      R/utils.R
  26. +206
    -0
      R/wrap.R
  27. +155
    -0
      R/wrappers.R
  28. +57
    -39
      README.Rmd
  29. +55
    -45
      README.md
  30. +0
    -18
      _pkgdown.yml
  31. +28
    -0
      inst/WORDLIST
  32. BIN
      inst/examples/example-edited.docx
  33. +47
    -0
      inst/examples/example.Rmd
  34. BIN
      inst/examples/example.docx
  35. +30
    -0
      inst/lua-filters/criticmarkup-commentsonly.lua
  36. +12
    -12
      inst/lua-filters/criticmarkup.lua
  37. +46
    -0
      inst/lua-filters/extract-outputs.lua
  38. +32
    -0
      inst/lua-filters/inline-headers-tomd.lua
  39. +37
    -0
      inst/lua-filters/inline-headers-toword.lua
  40. +44
    -0
      inst/lua-filters/protect-empty.lua
  41. +27
    -0
      inst/lua-filters/revchunks.lua
  42. +0
    -28
      inst/protect-empty.lua
  43. +0
    -28
      inst/revchunks.lua
  44. +0
    -71
      inst/rmarkdown/templates/rdocx_reversible/skeleton/skeleton.Rmd
  45. BIN
      inst/rmarkdown/templates/rdocx_reversible/skeleton/skeleton.docx
  46. +1
    -0
      inst/rmarkdown/templates/redoc/skeleton/.gitignore
  47. +57
    -0
      inst/rmarkdown/templates/redoc/skeleton/skeleton.Rmd
  48. BIN
      inst/rmarkdown/templates/redoc/skeleton/skeleton.docx
  49. +1
    -1
      inst/rmarkdown/templates/redoc/template.yaml
  50. +14
    -0
      inst/rstudio/addins.dcf
  51. +35
    -0
      man/addins.Rd
  52. +58
    -0
      man/dedoc.Rd
  53. BIN
      man/figures/readme-diff.png
  54. +2
    -2
      man/is_redoc.Rd
  55. +35
    -0
      man/make_preknitter.Rd
  56. +89
    -0
      man/make_wrapper.Rd
  57. +26
    -0
      man/pandoc_ast.Rd
  58. +0
    -20
      man/parse_rmd_to_df.Rd
  59. +0
    -35
      man/rdocx_reversible.Rd
  60. +22
    -0
      man/redoc-package.Rd
  61. +36
    -14
      man/redoc.Rd
  62. +42
    -0
      man/redoc_diff.Rd
  63. +0
    -14
      man/redoc_example_docx.Rd
  64. +0
    -14
      man/redoc_example_rmd.Rd
  65. +23
    -0
      man/redoc_examples.Rd
  66. +4
    -4
      man/redoc_extract_rmd.Rd
  67. +0
    -48
      man/undoc.Rd
  68. +46
    -0
      pkgdown/_pkgdown.yml
  69. +169
    -0
      pkgdown/extra.css
  70. +5
    -0
      tests/spelling.R
  71. +1
    -3
      tests/testthat/.gitignore
  72. +0
    -17
      tests/testthat/test-reverse.R
  73. +71
    -0
      tests/testthat/test-roundtrip.R
  74. BIN
      vignettes/figures/readme-diff.png
  75. +150
    -0
      vignettes/mixed-workflows-with-redoc.Rmd
  76. +75
    -33
      vignettes/redoc-package-design.Rmd

+ 3
- 0
.Rbuildignore View File

@@ -15,8 +15,11 @@
tests/testthat/token_file.enc
^appveyor\.yml$
^README\.Rmd$
^README\.md$
^README-.*\.png$
^\.V8history$
^TODO\.md$
^CONTRIBUTING\.md$
^inst/endnote$
^tests/testthat/artifacts$
^testing$

+ 2
- 6
.github/CONTRIBUTING.md View File

@@ -52,16 +52,12 @@ Here are some topics I'm mulling over. Feel free to open an issue to discuss any
more general Endnote package. (Hmm, [possibly useful](https://github.com/kaizhang/citeproc-endnote))
- Handling formats with figures/captions at the end of the file.
- Packaging up not just the Rmd but supporting files into the `docx` file.
- Supporting conversion of Critic Markup from Markdown to word (via a pandoc
lua filter, shouldn't be too hard.)
- Google Doc outputs. Google docs has a new API in invite-only beta that exposes
- Google Doc outputs. Google docs has a new API that exposes
a JSON document model, which may enable this. It would require some considerable
pandoc/lua work.
- Reversible Powerpoint? Whoah, nelly. Pandoc doesn't even have a Powerpoint
reader yet.
- Naming things and API/workflow. Package name, function names, etc., are
all malleable.
all malleable, as are the appropriate sensible defaults.
- Sustainability. If this works out, should **redoc** live in an org or be
absorbed into another package. Should there be an "officeverse"?

If any of these interest you please do open an issue to dicuss them.

+ 4
- 0
.gitignore View File

@@ -1,3 +1,4 @@
inst/doc
Meta
doc
.Rproj.user
@@ -6,3 +7,6 @@ doc
.DS_store
inst/random
docs
TODO.md
tests/testthat/artifacts
testing

+ 19
- 15
DESCRIPTION View File

@@ -1,5 +1,5 @@
Package: redoc
Version: 0.0.0.9000
Version: 0.1.0.9000
Title: Reversible Reproducible Documents
Description: Implements a reversible 'R Markdown' to 'Microsoft Word' pipeline.
Authors@R: person(given = "Noam",
@@ -13,27 +13,31 @@ LazyData: true
ByteCompile: true
URL: https://github.com/noamross/redoc
BugReports: https://github.com/noamross/redoc/issues
Suggests:
testthat,
roxygen2,
covr,
pkgdown
RoxygenNote: 6.1.1
Imports:
rmarkdown,
officer,
tools,
utils,
knitr,
xfun,
mime,
officer,
diffobj,
stringi,
mime,
xml2,
backports,
whoami,
yaml
yaml,
jsonlite,
sessioninfo
Suggests:
spelling,
testthat,
roxygen2,
covr,
pkgdown,
rstudioapi
Remotes:
r-lib/pkgdown,
r-lib/covr
davidgohel/officer,
brodieG/diffobj,
r-lib/pkgdown
SystemRequirements: pandoc (>= 2.1.2) - http://pandoc.org
VignetteBuilder: knitr
Roxygen: list(markdown = TRUE)
Language: en-US

+ 34
- 7
NAMESPACE View File

@@ -1,13 +1,27 @@
# Generated by roxygen2: do not edit by hand

export(citationwrap)
export(dedoc)
export(dedoc_to_active_file)
export(dedoc_to_new_file)
export(htmlcommentwrap)
export(is_redoc)
export(parse_rmd_to_df)
export(rdocx_reversible)
export(latexwrap)
export(make_preknitter)
export(make_wrapper)
export(pandoc_ast)
export(rawblockwrap)
export(rawspanwrap)
export(redoc)
export(redoc_diff)
export(redoc_example_docx)
export(redoc_example_edited_docx)
export(redoc_example_rmd)
export(redoc_extract_rmd)
export(undoc)
importFrom(knitr,all_patterns)
export(roundtrip_active_file)
import(xml2)
importFrom(diffobj,diffFile)
importFrom(jsonlite,fromJSON)
importFrom(knitr,knit_global)
importFrom(knitr,knit_print)
importFrom(knitr,opts_chunk)
@@ -18,28 +32,41 @@ importFrom(officer,styles_info)
importFrom(rmarkdown,output_format)
importFrom(rmarkdown,pandoc_convert)
importFrom(rmarkdown,word_document)
importFrom(stringi,"stri_sub<-")
importFrom(stringi,stri_c)
importFrom(stringi,stri_count_fixed)
importFrom(stringi,stri_detect_fixed)
importFrom(stringi,stri_detect_regex)
importFrom(stringi,stri_extract_all_regex)
importFrom(stringi,stri_extract_first_regex)
importFrom(stringi,stri_join)
importFrom(stringi,stri_length)
importFrom(stringi,stri_locate_all_fixed)
importFrom(stringi,stri_locate_all_regex)
importFrom(stringi,stri_match_all_regex)
importFrom(stringi,stri_locate_first_fixed)
importFrom(stringi,stri_replace_all_fixed)
importFrom(stringi,stri_replace_all_regex)
importFrom(stringi,stri_replace_first_fixed)
importFrom(stringi,stri_replace_first_regex)
importFrom(stringi,stri_replace_last_fixed)
importFrom(stringi,stri_split_lines1)
importFrom(stringi,stri_trim_both)
importFrom(stringi,stri_sub)
importFrom(stringi,stri_subset_fixed)
importFrom(stringi,stri_subset_regex)
importFrom(tools,file_path_sans_ext)
importFrom(utils,unzip)
importFrom(whoami,fullname)
importFrom(whoami,username)
importFrom(xfun,parse_only)
importFrom(xml2,read_xml)
importFrom(xml2,write_xml)
importFrom(xml2,xml_add_child)
importFrom(xml2,xml_child)
importFrom(xml2,xml_children)
importFrom(xml2,xml_find_all)
importFrom(xml2,xml_find_first)
importFrom(xml2,xml_remove)
importFrom(xml2,xml_set_attrs)
importFrom(yaml,as.yaml)
importFrom(yaml,read_yaml)
importFrom(yaml,write_yaml)
importFrom(yaml,yaml.load)

+ 4
- 0
NEWS.md View File

@@ -1,3 +1,7 @@
# redoc 0.1.0.9000

* Major refactor for presentation at NY R Conference

# redoc 0.0.0.9000

* Initial commit

+ 88
- 0
R/addins.R View File

@@ -0,0 +1,88 @@
#' RStudio Addin Functions
#'
#' @description
#' These functions act on the RStudio text editor and can be used via the
#' Addins menu or the R console. `rountrip_active_file()` ("Render and Update")
#' knits the current file using [redoc()] replaces the text with text that has
#' been round-tripped to Word and back. This helps eliminate small formatting
#' differences.
#'
#' The `dedoc_*` functions de-render Word documents (using [dedoc()]),
#' and place the results in the current or new file. By default, they will
#' also display a diff (generated via [redoc_diff()]) of edits in the word file
#' compared to its original version a compilation time. If called without
#' an input file, they will prompt for file selection via the RStudio GUI.
#'
#'
#' @param docx The input Word file, originally generated by [redoc()]
#' @param showdiff Display a diff of the current version of the document against
#' the rendered version?
#' @aliases addins redoc_addins
#' @rdname addins
#' @export
roundtrip_active_file <- function() {
if (!requireNamespace("rstudioapi")) {
stop("The 'rstudioapi' package is required for this function")
}
active_file <- rstudioapi::getSourceEditorContext()
cursor_position <- active_file$selection[[1]]$range$start
rstudioapi::documentSave(active_file$id)
docfile <- rmarkdown::render(
normalizePath(active_file$path),
output_format = redoc(roundtrip = TRUE),
quiet = TRUE,
clean = TRUE
)
rfile <- redoc_extract_rmd(docfile,
type = "roundtrip", dir = tempdir(),
overwrite = TRUE
)
rstudioapi::setDocumentContents(readfile(rfile), active_file$id)
rstudioapi::setCursorPosition(cursor_position, active_file$id)
}

#' @export
#' @rdname addins
dedoc_to_active_file <- function(docx = NULL, showdiff = TRUE) {
if (!requireNamespace("rstudioapi")) {
stop("The 'rstudioapi' package is required for this function")
}

active_file <- rstudioapi::getSourceEditorContext()
cursor_position <- active_file$selection[[1]]$range$start
if (is.null(docx)) {
docx <- rstudioapi::selectFile(
caption = "Select Word file to dedoc",
filter = "Word Files (*.docx)"
)
}
tmprmd <- dedoc(docx, dir = tempdir(), overwrite = TRUE)
rstudioapi::setDocumentContents(readfile(tmprmd), active_file$id)
rstudioapi::setCursorPosition(cursor_position, active_file$id)
if (showdiff) {
print(
redoc_diff(docx)
)
}
}

#' @export
#' @rdname addins
dedoc_to_new_file <- function(docx = NULL, showdiff = TRUE) {
if (!requireNamespace("rstudioapi")) {
stop("The 'rstudioapi' package is required for this function")
}
if (is.null(docx)) {
docx <- rstudioapi::selectFile(
caption = "Select Word file to dedoc",
filter = "Word Files (*.docx)"
)
}
tmprmd <- dedoc(docx, dir = tempdir(), overwrite = TRUE)
rstudioapi::documentNew(readfile(tmprmd), type = "rmarkdown")
if (showdiff) {
print(
redoc_diff(docx)
)
}
}

+ 109
- 0
R/criticmarkup.R View File

@@ -0,0 +1,109 @@
# Heavily borrowed from https://github.com/ropenscilabs/trackmd
# Might end up in worded/officedown? Best place to use seems to
# be the knitr document hook

#' Process Critic Markup syntax for processing by pandoc
#'
#' Converts critic markup syntax in markdown to syntax appropriate for
#' processing by pandoc. If converted to to `docx` format, will display
#' as MS Word tracked changes.
#'
#' A good place to put this function is in the `document` knitr hook of an
#' [R Markdown output format][rmarkdown::output_format()].
#'
#' @param md markdown input with critic markup, a length-1 character vector
#' @param author Name to attribute Critic Markup changes to. If NULL,
#' defaults to system user information via [whoami::fullname()].
#' @return A character vector of markdown, split by lines
#' @noRd
#' @importFrom whoami fullname username
#' @importFrom stringi stri_replace_all_regex stri_replace_first_fixed stri_join
#' stri_extract_all_regex
criticmarkup_to_pandoc <- function(md, author = NULL) {
if (is.null(author)) {
author <- fullname(fallback = username(fallback = "R User"))
}

timestamp <- as.character(Sys.time(), format = "%Y-%m-%dT%H:%M:%SZ")

captures <- c(
insertion = "(?s)\\{\\+\\+(.*?)\\+\\+\\}",
deletion = "(?s)\\{--(.*?)--\\}",
substitution = "\\{~~(.*?)~>(.*?)~~\\}",
highlight = "(?s)\\{==(.*?)==\\}\\{>>(.*?)<<\\}\\[\\[(\\d+)\\]\\]",
comment = "(?s)\\{>>(.*?)<<\\}\\[\\[(\\d+)\\]\\]"
)

insertions <- stri_extract_all_regex(md, captures["insertion"],
omit_no_match = TRUE
)[[1]]
deletions <- stri_extract_all_regex(md, captures["deletion"],
omit_no_match = TRUE
)[[1]]
comments <- stri_extract_all_regex(md, "(?s)\\{>>(.*?)<<\\}",
omit_no_match = TRUE
)[[1]]
# Mark paragraph breaks in insertions and deletions
for (i in insertions) {
md <- stri_replace_first_fixed(
md, i,
stri_replace_all_regex(
i, "\n{2,}",
"++}[]{.paragraph-insertion}\n\n{++"
)
)
}
for (i in deletions) {
md <- stri_replace_first_fixed(
md, i,
stri_replace_all_regex(
i, "\n{2,}",
"--}[]{.paragraph-deletion}\n\n{--"
)
)
}
# Number comments
for (i in seq_along(comments)) {
md <- stri_replace_first_fixed(
md, comments[i],
stri_join(comments[i], "[[", i, "]]")
)
}

replacements <- c(
insertion = stri_join(
"[$1]{.insertion author=\"", author, "\" date=\"",
timestamp, "\"}"
),
deletion = stri_join(
"[$1]{.deletion author=\"", author, "\" date=\"",
timestamp, "\"}"
),
substitution = stri_join(
"[$1]{.deletion author=\"", author, "\" date=\"",
timestamp, "\"}",
"[$2]{.insertion author=\"", author, "\" date=\"",
timestamp, "\"}"
),
highlight = stri_join(
"[$2]{.comment-start id=\"$3\" author=\"", author,
"\" date=\"", timestamp,
"\"}$1[]{.comment-end id=\"$3\"}"
),
comment = stri_join(
"[$1]{.comment-start id=\"$2\" author=\"", author,
"\" date=\"", timestamp,
"\"}[]{.comment-end id=\"$2\"}"
)
)

md <- stri_replace_all_regex(md, captures, replacements,
vectorize_all = FALSE
)
md
}

cmwrap <- function(rmd) {
rmd$text <- criticmarkup_to_pandoc(rmd$text)
return(rmd)
}

+ 305
- 0
R/dedoc.R View File

@@ -0,0 +1,305 @@
#' Convert an Reversible Document back to R Markdown
#'
#' Converts a document originally created with [redoc()] back to R
#' Markdown, including changes made to text in MS Word.
#'
#' @details R chunks may be lost in the editing process if using non-Microsoft
#' word processors (e.g. LibreOffice or in copy-and-pasting text into a new document.
#'
#' @param docx The `.docx`` file to convert
#' @param to the filename to write the resulting `.Rmd` file. The default is to
#' use the same basename as the docx document
#' @param dir The directory to write the `.Rmd`` to. Defaults to current working
#' directory
#' @param track_changes How to deal with tracked changes and comments in the
#' `.docx` file. `"accept"` accepts all changes, and `"reject"` rejects all of
#' them. The default, `"criticmarkup"`, converts the tracked changes to
#' [Critic Markup syntax](http://criticmarkup.com/spec.php#thebasicsyntax).
#' "comments_only" will only convert comments, as other changes can be
#' viewed with [redoc_diff()].
#' `"all"` marks up tracked changes and comments in `<span>` tags and is
#' useful for debugging. See the
#' [pandoc manual](http://pandoc.org/MANUAL.html#option--track-changes) for
#' details.
#' @param block_missing,inline_missing What to do about code blocks or inline code
#' whose output has been removed in the editing of the Word document. "restore"
#' attempts to restore the code as close to its original location in the document
#' as possible. "comment" will do so but wrap it in HTML comments. "omit" will
#' not restore the code at all.
#' @param wrap The width at which to wrap text. If `NA`, text is not wrapped
#' @param overwrite Whether to overwrite an existing file
#' @param orig_codefile,orig_docx The original `.codelist.yml` or Word document
#' created when the document was first knit. Useful for debugging, or in
#' cases where the word file has been corrupted or transformed, for instance,
#' by copy-and-pasting the content into a new file. If provided, `dedoc` will
#' use this codefile or word file to re-create the `.Rmd` file with the text
#' of the input.
#' @param verbose whether to print pandoc progress text
#' @importFrom rmarkdown pandoc_convert
#' @importFrom yaml read_yaml
#' @importFrom tools file_path_sans_ext
#' @export
dedoc <- function(docx, to = NULL, dir = ".",
track_changes = "comments_only",
block_missing = "comment",
inline_missing = "omit",
wrap = 80, overwrite = FALSE,
orig_docx = NULL, orig_codefile = NULL,
verbose = FALSE ) {
if (!is_redoc(docx) && is.null(orig_codefile) && is.null(orig_docx)) {
md_only <- TRUE
if (verbose) {
message("Document is not reversible - no internal data on R chunks found.
Returning markdown only. Alternate data may be provided via `orig_codefile or `orig_docx`")
}
} else {
md_only <- FALSE
}

stopifnot(track_changes %in% c("comments_only", "criticmarkup", "accept", "reject", "all"))
stopifnot(block_missing %in% c("comment", "omit", "restore"))
stopifnot(inline_missing %in% c("comment", "omit", "restore"))

if (is.null(to)) to <- paste0(file_path_sans_ext(basename(docx)), ".Rmd")
if (is.null(dir)) dir <- "."
to <- file.path(dir, to)
if (!overwrite && file.exists(to)) stop(to, " exists and overwrite = FALSE")

if (!is.null(orig_codefile)) {
codelist <- read_yaml(orig_codefile)
} else if (!is.null(orig_docx)) {
codelist <- redoc_extract_code(orig_docx)
} else {
codelist <- redoc_extract_code(docx)
}

md <- convert_docx_to_md(docx, track_changes, wrap, verbose, md_only)
if (!md_only) {
codelist <- sort_by(codelist, "lineno")
md <- merge_yaml_headers(md, codelist)
md <- restore_code(md, codelist = list_subset(codelist, type = "block"),
missing = block_missing)
md <- restore_code(md, codelist = list_subset(codelist, type = "inline"),
missing = inline_missing)
md <- remove_extra_newlines(md)
}

cat(md, file = to, sep = "")
return(to)
}


#' Extract the Rmd used to to produce a Reversible Word Doc
#'
#' Documents produced with [redoc()] store an copy of the original
#' `.Rmd` files used to produce them. This is useful for diffing against the
#' version created with [dedoc()], especially if tracked changes have not been
#' used.
#' @param docx A path to a word file or a an `rdocx` object created with
#' [officer::read_docx()].
#' @param type One of `"original"` or `"roundtrip"`. `"original"` extracts the
#' exact document originally knit. `"roundtrip"` (default) extracts a document
#' that has been converted to Word and back with no edits in between. The
#' latter should be more useful for comparing against edits, as line-wrapping
#' and placement of no-output chunks should match.
#' @param dir The directory to write the `.Rmd`` to. Defaults to current working
#' directory
#' @param to the filename to write the resulting `.Rmd` file. The default is to
#' use the the original name with either `.orignal.Rmd` or `roundtrip.Rmd`
#' extensions.
#' @param overwrite whether to overwrite existing files
#' @export
#' @importFrom stringi stri_subset_fixed stri_subset_regex
#' @return The path to the extracted `.Rmd`
#' @examples
#' redoc_extract_rmd(redoc_example_docx(), dir = tempdir())
redoc_extract_rmd <- function(docx, type = c("original", "roundtrip"),
dir = ".", to = NULL, overwrite = FALSE) {
docx <- to_docx(docx)
assert_redoc(docx)
type <- match.arg(type)
rmdfiles <- list.files(file.path(docx$package_dir, "redoc"),
pattern = "\\.(r|R)md$",
full.names = TRUE)
if (type == "original")
rmdfile <- stri_subset_regex(rmdfiles, "(?:\\.preprocessed\\.|\\.roundtrip\\.)",
negate = TRUE)[1]
else if (type == "roundtrip")
rmdfile <- stri_subset_fixed(rmdfiles, ".roundtrip.")


if (is.null(to)) to <- basename(rmdfile)
out <- file.path(dir, to)
if (file.exists(out) && !overwrite) stop(out, " exists and overwrite=FALSE")
file.copy(rmdfile, out, overwrite = overwrite)
return(file.path(dir, to))
}

redoc_extract_code <- function(docx) {
docx <- to_docx(docx)
assert_redoc(docx)
codefile <- list.files(file.path(docx$package_dir, "redoc"),
pattern = "\\.codelist\\.yml$",
full.names = TRUE)
codelist <- read_yaml(codefile)
codelist
}

#' @importFrom stringi stri_replace_first_fixed stri_detect_fixed stri_join
restore_code <- function(md, codelist, missing) {
codelist <- sort_by(codelist, "lineno")
offset <- attr(md, "yaml_offset") %||% 0

last_detected_end <- offset

for (item in codelist) {
marker <- stri_join("<<<", item$name, ">>>")
marker_line <- stri_lineno_first_fixed(md, marker)
if (!is.na(marker_line)) {
last_detected_end <- marker_line + stri_count_lines(item$code)
md <- stri_replace_first_fixed(md, marker, item$code,
vectorize_all = FALSE)
md <- stri_replace_all_fixed(md, marker, "")
} else if (missing != "omit") {
if (missing == "comment") {
if (item$type == "inline") {
restorecode <- stri_join("<!--", item$code, ", originally line ",
item$lineno, " -->\n")
} else {
restorecode <- stri_join("<!-- originally line ", item$lineno, "\n",
item$code, "\n -->\n")
}
} else {
restorecode <- stri_join(item$code, "\n")
}
restoreline <- get_prior_empty_line_loc(md,
max(last_detected_end, item$lineno + offset)
)
md <- insert_at_prior_empty_line(md, restorecode,
item$lineno + offset)
offset <- offset + 2
}
}
return(md)
}

#' @importFrom stringi stri_extract_first_regex stri_replace_first_regex
#' stri_replace_last_fixed
#' @importFrom yaml yaml.load as.yaml
merge_yaml_headers <- function(md, codelist) {
old_header <- list_subset(codelist, label = "yamlheader")[[1]]
new_yaml <- stri_extract_first_regex(md, "(?s)\\A\\s*---\\n.*?\\n---\\n")
if (is.null(old_header) ||
all(is.na(old_header)) || length(old_header) == 0) {
attr(md, "yaml_offset") <- stri_count_fixed(new_yaml, '\n') - 1
return(md)
}

old_metadata <- yaml.load(old_header$code)

if (!is.na(new_yaml)) {
new_metadata <- yaml.load(new_yaml)
for (name in names(new_metadata)) {
old_metadata[[name]] <- new_metadata[[name]]
}
}
merged_yaml <- oneline("---",
stri_replace_last_fixed(
as.yaml(old_metadata,
handlers = NULL),
"\n", ""),
"---")
yaml_offset <- stri_count_lines(merged_yaml) -
stri_count_lines(old_header$code)

md <- stri_replace_first_regex(md, "(?s)^\\n*?---\\n.*?\\n---\\n", "")
md <- oneline(merged_yaml, md)
attr(md, "yaml_offset") <- yaml_offset
return(md)
}

# replace_yaml_blocks <- function(md, codelist) {
# yaml_blocks <- codelist[stri_detect_regex(names(codlist)), "^redoc-yaml-\\d+"]
# if (!length(yaml_blocks)) return(md)
# md <- paste(md, collapse = "\n")
# patterns <- paste0("[[chunk-", codelist$label, "]]")
# replacements <- paste(codelist$code)
# detected <- logical(1)
# prepend <- ""
# for (i in seq_along(patterns)) {
# detected <- stri_detect_fixed(md, patterns[i])
# if (!detected) {
# prepend <- paste0(c(prepend, replacements[i]), collapse = "\n\n")
# } else {
# replacements[i] <-
# paste0(c(prepend, replacements[i]), collapse = "\n\n")
# prepend <- ""
# }
# }
# for (i in seq_along(patterns)) {
# md <- stri_replace_first_fixed(
# md, patterns[i],
# replacements[i]
# )
# md <- stri_replace_all_fixed(md, patterns[i], "")
# }
# if (prepend != "") {
# md <- paste0(c(md, prepend), collapse = "")
# }
# md <- stri_replace_all_regex(md, "\n{3,}", "\n\n")
# md <- stri_split_lines1(md)
# }


convert_docx_to_md <- function(docx,
track_changes,
wrap = 80, verbose, md_only) {
docx <- normalizePath(docx)
track_changes <- match.arg(track_changes, track_changes)
if (track_changes == "criticmarkup") {
track_opts <- c(
"--track-changes=all",
paste0(
"--lua-filter=",
system.file("lua-filters", "criticmarkup.lua", package = "redoc")
)
)
} else if (track_changes == "comments_only") {
track_opts <- c(
"--track-changes=all",
paste0(
"--lua-filter=",
system.file("lua-filters", "criticmarkup-commentsonly.lua", package = "redoc")
)
)
} else {
track_opts <- paste0("--track-changes=", track_changes)
}

if (is.null(wrap)) {
wrap_opts <- "--wrap=none"
} else {
wrap_opts <- c("--wrap=auto", paste0("--columns=", wrap))
}
if (!md_only) {
filter_opts <- c(paste0(
"--lua-filter=",
system.file("lua-filters", "revchunks.lua", package = "redoc")
))
from_format = "docx+styles+empty_paragraphs"
} else {
filter_opts <- character(0)
from_format = "docx"
}
other_opts <- c("--standalone") # note adding metadata args for additional title block elements here might work (possibly as title block variable), though can't control order?
opts <- c(filter_opts, track_opts, wrap_opts, other_opts)
md_tmp <- tempfile(fileext = ".md")
pandoc_convert(docx,
from = from_format,
to = "markdown",
output = md_tmp,
options = opts,
verbose = verbose
)
return(readfile(md_tmp))
}

+ 82
- 0
R/diff.R View File

@@ -0,0 +1,82 @@
#' Compare changes in an edited redoc Word Document against the original
#'
#' `redoc_diff()` produces a diff object comparing the current contents of a Word
#' document originally generated by [redoc()] to the original R markdown file
#' used to create it.
#'
#' When an `.docx` file is created with `redoc()`, it internally stores the
#' original R Markdown file as well as a version that is round-tripped to
#' `.docx` and back. `redoc_diff()` de-renders the current `.docx` to R
#' Markdown (with [dedoc()]) and compares against these versions.
#'
#' @param docx Path to an MS Word `.docx` file originally generated with
#' `redoc()` and since edited.
#' @param target,current Which versions of the document to compare. One of
#' "original", "roundtrip", or "current".
#' @param track_changes,block_missing,inline_missing Arguments passed to
#' [dedoc()] to determine how to handle edits in the Word document.
#' @param wrap Width to wrap text lines when converting from docx to markdown.
#' If `NULL`, no wrapping.
#' @param mode,context,tar.banner,cur.banner,... Arguments passed to
#' [diffobj::diffFile()] to customize printing of the diff.
#' @return A [`Diff`][diffobj::diffPrint()] object, which will be displayed in the RStudio
#' Viewer, a browser, or the console, depending on the context.
#'
#' @importFrom diffobj diffFile
#' @export
redoc_diff <- function(docx,
target = "original",
current = "current",
track_changes = "comments_only",
block_missing = "comment",
inline_missing = "omit",
wrap = 80,
mode = "sidebyside", context = "auto",
tar.banner = NULL, cur.banner = NULL,
...) {

stopifnot(target %in% c("original", "roundtrip", "current"))
stopifnot(current %in% c("original", "roundtrip", "current"))

if (!is_redoc(docx)) stop("Word file not generated by redoc")

tmpd <- tempdir()
comps <- lapply(c(target, current), function(x) {
switch(x,
original = redoc_extract_rmd(docx,
type = "original", dir = tmpd,
overwrite = TRUE
),
roundtrip = redoc_extract_rmd(docx,
type = "roundtrip", dir = tmpd,
overwrite = TRUE
),
current = dedoc(docx, to = "current.Rmd", dir = tmpd,
track_changes = track_changes,
inline_missing = inline_missing,
wrap = wrap, overwrite = TRUE)
)
})

labs <- lapply(c(target, current), function(x) {
switch(x,
original = "Original R Markdown",
roundtrip = "Original R Markdown (roundtripped)",
current = "Current Word Document"
)
})

if (is.null(tar.banner)) tar.banner <- labs[[1]]
if (is.null(tar.banner)) tar.banner <- labs[[2]]

diff <- diffFile(target = comps[[1]], comps[[2]],
mode = mode, context = context,
tar.banner = labs[[1]], cur.banner = labs[[2]],
pager = list(file.path = tempfile(fileext = ".html")),
...)

#Temporary workaround for diffobj issue #133
if (isTRUE(all.equal(readLines(comps[[1]]), readLines(comps[[2]]))))
diff <- diff[-c(1:2),]
return(diff)
}

+ 0
- 86
R/document_hooks.R View File

@@ -1,86 +0,0 @@
# Heavily borrowed from https://github.com/ropenscilabs/trackmd
# Might end up in worded/officedown? Best place to use seems to
# be the knitr document hook

#' Process Critic Markup syntax for processing by pandoc
#'
#' Converts critic markup syntax in markdown to syntax appropriate for
#' processing by pandoc. If converted to to `docx` format, will dispaly
#' as MS Word tracked changes.
#'
#' A good place to put this function is in the `document` knitr hook of an
#' [R Markdown output format][rmarkdown::output_format()].
#'
#' @param input_lines markdown input, a character vector
#' @param author Name to attribute Critic Markup changes to. If NULL,
#' defaults to system user information.
#' @return A character vector of markdown, split by lines
#' @noRd
#' @importFrom whoami fullname username
#' @importFrom stringi stri_replace_all_regex stri_replace_first_fixed stri_join
#' stri_extract_all_regex
preprocess_criticmarkup <- function(input_lines, author = NULL) {

if (is.null(author)) {
author <- fullname(fallback = username(fallback = "R User"))
}

md <- oneline(input_lines)

captures <- c(
insertion = "(?s)\\{\\+\\+(.*?)\\+\\+\\}",
deletion = "(?s)\\{--(.*?)--\\}",
substitution = "\\{~~(.*?)~>(.*?)~~\\}",
highlight = "(?s)\\{==(.*?)==\\}\\{>>(.*?)<<\\}\\[\\[(\\d+)\\]\\]",
comment = "(?s)\\{>>(.*?)<<\\}\\[\\[(\\d+)\\]\\]"
)

insertions <- stri_extract_all_regex(md, captures["insertion"])[[1]]
deletions <- stri_extract_all_regex(md, captures["deletion"])[[1]]
comments <- stri_extract_all_regex(md, "(?s)\\{>>(.*?)<<\\}")[[1]]
#Mark paragraph breaks in insertions and deletions
for (i in insertions) {
md <- stri_replace_first_fixed(md, i,
stri_replace_all_regex(i, "\n{2,}",
"++}[]{.paragraph-insertion}\n\n{++"))
}
for (i in deletions) {
md <- stri_replace_first_fixed(md, i,
stri_replace_all_regex(i, "\n{2,}",
"++}[]{.paragraph-insertion}\n\n{++")
)
}
#Number comments
for (i in seq_along(comments)) {
md <- stri_replace_first_fixed(md, comments[i],
stri_join(comments[i], "[[", i, "]]"))
}

replacements <- c(
insertion = stri_join("[$1]{.insertion author=\"", author, "\"}"),
deletion = stri_join("[$1]{.deletion author=\"", author, "\"}"),
substitution = stri_join("[$1]{.deletion author=\"", author, "\"}",
"[$2]{.insertion author=\"", author, "\"}"),
highlight = stri_join("[$2]{.comment-start id=\"$3\" author=\"", author,
"\"}$1[]{.comment-end id=\"$3\"}"),
comment = stri_join("[$1]{.comment-start id=\"$2\" author=\"", author,
"\"}[]{.comment-end id=\"$2\"}")
)

md <- stri_replace_all_regex(md, captures, replacements,
vectorize_all = FALSE)
}

#' @importFrom stringi stri_replace_first_fixed
wrap_yaml <- function(lines, chunk_df) {
md <- paste(lines, collapse = "\n")
chunk_df <- chunk_df[chunk_df$type == "yaml" & chunk_df$label != "yaml-header",]
for (i in seq_along(chunk_df$label)) {
md <- stri_replace_first_fixed(md, chunk_df$code[i],
paste0("::: {custom-style=\"chunk-", chunk_df$label[i], "\"}",
"\n\n",
chunk_df$code[i],
"\n\n:::"))
}
stri_split_lines1(md)
}

+ 24
- 12
R/docx-utils.R View File

@@ -1,7 +1,7 @@
#' Is this a reversible document?
#'
#' A function for testing is the file can be un-knit. If not, un-knitting
#' may be attempted with the `orig_chunkfile` or `orig_docx` files in [undoc()].
#' A function for testing is the file can be de-rendered. If not, un-knitting
#' may be attempted with the `orig_chunkfile` or `orig_docx` files in [dedoc()].
#'
#' @param docx A path to a `.docx` file or an `rdocx` object produced by
#' [officer::read_docx()]
@@ -11,8 +11,9 @@
#' is_redoc(redoc_example_docx())
is_redoc <- function(docx) {
docx <- to_docx(docx)
chunkfile <- list.files(docx$package_dir, pattern = "\\.chunks\\.csv$")
return(as.logical(length(chunkfile)))
codefile <- list.files(file.path(docx$package_dir, "redoc"),
pattern = "\\codelist\\.yml$")
return(as.logical(length(codefile)))
}

#' @importFrom officer read_docx
@@ -30,20 +31,31 @@ assert_redoc <- function(docx) {
}
}

#' Path to an example R Markdown file
#' Files for examples and testing
#'
#' @export
#' @rdname redoc_examples
#' @aliases redoc_examples
#' @examples
#' redoc_example_rmd()
#' redoc_example_docx()
#' redoc_example_edited_docx()
redoc_example_rmd <- function() {
system.file("rmarkdown", "templates", "rdocx_reversible", "skeleton",
"skeleton.Rmd", package = "redoc")
system.file("examples", "example.Rmd", package = "redoc")
}

#' Path to an example Revserible Microsoft Word file
#' @export
#' @examples
#' redoc_example_docx()
#' @rdname redoc_examples
#' @aliases redoc_examples
redoc_example_docx <- function() {
system.file("rmarkdown", "templates", "rdocx_reversible", "skeleton",
"skeleton.docx", package = "redoc")
system.file("examples", "example.docx", package = "redoc")

}

#' @export
#' @rdname redoc_examples
#' @aliases redoc_examples
redoc_example_edited_docx <- function() {
system.file("examples", "example-edited.docx", package = "redoc")

}

+ 0
- 147
R/docx_reversible.R View File

@@ -1,147 +0,0 @@
#' Convert to a Reversible Microsoft Word Document
#'
#' Format for converting from R Markdown to a Microsoft Word Document that can
#' be reversed using [undoc()] after editing in Word.
#'
#' @param highlight_outputs whether to highlight outputs from chunks and inline
#' code in the final document
#' @param wrap when round-tripping the document, at what width to wrap the
#' markdown output? See [undoc()].
#' @param margins page margin size. Can be a single value or a named vector
#' with values, `top`, `bottom`, `left`, `right`, `gutter`, `header`, and
#' `footer`. If NULL defaults to the reference document.
#' @param line_numbers either TRUE or list with any of the arguments `start`,
#' `by`, `restart`, and `distance`
#' @param comment_author The name to affilliate any Critic Markup tracked
#' changes with
#' @param keep_md whether to keep the markdown document
#' @param ... other parameters passed to [rmarkdown::word_document()]
#' @importFrom rmarkdown output_format word_document
#' @importFrom officer read_docx
#' @importFrom tools file_path_sans_ext
#' @importFrom rmarkdown word_document
#' @importFrom knitr knit_print knit_global opts_chunk opts_knit
#' @importFrom xfun parse_only
#' @export
rdocx_reversible <- function(highlight_outputs = FALSE, wrap = 80,
margins = NULL, line_numbers = NULL,
comment_author = NULL, keep_md = FALSE, ...) {
out <- word_document(...)


# Pre-parse, name inline chunks and save chunk contents to lookup table
out$pre_knit <- function(input, ...) {
chunkfile <- paste0(file_path_sans_ext(input), ".chunks.csv")
knitr::opts_knit$set(chunkfile = chunkfile)
utils::write.table(parse_rmd_to_df(input),
file = chunkfile,
sep = ",", row.names = FALSE, qmethod = "double"
)
inline_counter(reset = TRUE)
chunk_counter(reset = TRUE)
}

out$knitr <- rmarkdown::knitr_options(
# Wrap code outputs in spans and divs
knit_hooks = list(
evaluate.inline = function(code, envir = knit_global()) {
v <- withVisible(eval(parse_only(code), envir = envir))
if (is.null(v$value) || v$value == "") v$value <- "\uFEFF"
if (v$visible) {
knit_print(v$value, inline = TRUE, options = opts_chunk$get())
}
},
inline = function(x) {
id <- paste0("inline-", inline_counter())
paste0("[", x, "]{custom-style=\"", id, "\"}")
},
chunk = function(x, options) {
if (isFALSE(options$redoc_include)) x <- ""
paste0(
"::: {custom-style=\"chunk-", options$label, "\"}\n",
x, "\n:::"
)
},
document = function(x) {
chunkfile <- opts_knit$get("chunkfile")
x <- preprocess_criticmarkup(x, author = comment_author)
x <- wrap_yaml(x, readcsv(chunkfile))
x
}
),
opts_hooks = list(
include = function(options) {
if (isFALSE(options$include)) {
options$include <- TRUE
options$redoc_include <- FALSE
}
options
}
)
)

md_extensions <- c("+smart", "+fenced_divs", "+bracketed_spans")

out$pandoc <- rmarkdown::pandoc_options(
to = "docx",
from = rmarkdown::from_rmarkdown(extensions = md_extensions),
args = c(
"--lua-filter",
system.file("protect-empty.lua", package = "redoc")
)
)

out$post_processor <-
function(metadata, input_file, output_file, clean, verbose) {
docx <- read_docx(output_file)
rmd_input <- get(envir = parent.frame(n = 1), "original_input")
chunkfile <- opts_knit$get("chunkfile")
mdfile <- paste0(basename(file_path_sans_ext(rmd_input)), ".md")

tmpd <- tempdir()

orig_rmd <- file.path(
tmpd, paste0(file_path_sans_ext(basename(rmd_input)), ".original.Rmd")
)
orig_md <- file.path(
tmpd, paste0(file_path_sans_ext(file_path_sans_ext(basename(input_file))), ".md")
)
file.copy(rmd_input, orig_rmd)
file.copy(input_file, orig_md)

roundtrip_rmd <- undoc(
output_file,
to = paste0(basename(file_path_sans_ext(rmd_input)), ".roundtrip.Rmd"),
dir = tmpd, wrap = wrap, overwrite = TRUE,
orig_chunkfile = chunkfile
)

docx <- embed_file(docx, roundtrip_rmd)
docx <- embed_file(docx, chunkfile)
docx <- embed_file(docx, orig_rmd)
docx <- embed_file(docx, orig_md)

if (highlight_outputs) {
docx <- highlight_output_styles(docx)
}

# Stuff to go to worded/officedown
if (!is.null(margins)) {
set_body_margins(docx, margins)
}

if (isTRUE(line_numbers)) {
set_body_linenumbers(docx)
} else if (is.list(line_numbers)) {
do.call(set_body_linenumbers, c(list(x = docx), line_numbers))
}

print(docx, output_file)
if (clean) {
file.remove(chunkfile)
}
return(output_file)
}
out$keep_md <- keep_md
out
}

+ 19
- 0
R/extract-outputs.R View File

@@ -0,0 +1,19 @@
#' @importFrom rmarkdown pandoc_convert
#' @importFrom yaml read_yaml
extract_outputs <- function(docx,
track_changes,
wrap,
verbose = FALSE) {
md_tmp <- tempfile(fileext = ".md")
pandoc_convert(normalizePath(docx),
from = "docx+styles+empty_paragraphs",
to = "markdown+fenced_code_blocks",
options = c(paste0("--lua-filter=",
system.file("lua-filters", "extract-outputs.lua", package = "redoc")),
"--standalone"
),
output = md_tmp,
verbose = verbose)
yml <- yaml::read_yaml(md_tmp)
yml
}

+ 0
- 274
R/extract.R View File

@@ -1,274 +0,0 @@
#' Convert an Reversible Document back to R Markdown
#'
#' Converts a document originally created with [rdocx_reversible()] back to R
#' Markdown, including changes made to text in MS Word.
#'
#' @details Internal data storing R chunks may be lost in the editing process.
#' This is known to occur with documents edited with LibreOffice.
#'
#' @param docx The `.docx file to convert`
#' @param to the filename to write the resulting `.Rmd` file. The default is to
#' use the same basename as the docx document
#' @param dir The directory to write the `.Rmd`` to. Defaults to current working
#' directory
#' @param track_changes How to deal with tracked changes and comments in the
#' `.docx` file. `"accept"` accepts all changes, and `"reject"` rejects all of
#' them. The default, `"criticmarkup"`, converts the tracked changes to
#' [Critic Markup syntax](http://criticmarkup.com/spec.php#thebasicsyntax).
#' `"all"` marks up tracked changes and comments in `<span>` tags. See the
#' [pandoc manual](http://pandoc.org/MANUAL.html#option--track-changes) for
#' details.
#' @param wrap The width at which to wrap text. If `NA`, text is not wrapped
#' @param overwrite Whether to overwrite an existing file
#' @param orig_chunkfile,orig_docx The original chunkfile or Word document
#' created when the document was first knit. Useful for debugging, or in
#' cases where the word file has been corrupted or transformed, for instance
#' by copy-and-pasting the content into a new file. If provided, undoc will
#' use this chunkfile or word file to re-create the `.Rmd` file with the text
#' of the input.
#' @param verbose whether to print pandoc progress text
#' @importFrom rmarkdown pandoc_convert
#' @importFrom tools file_path_sans_ext
#' @export
undoc <- function(docx, to = NULL, dir = ".",
track_changes = c("criticmarkup", "accept", "reject", "all"),
wrap = 80, overwrite = FALSE,
orig_chunkfile = NULL, orig_docx = NULL, verbose = FALSE) {
if (!is_redoc(docx) && is.null(orig_chunkfile) && is.null(orig_docx)) {
stop("Document is not reversible - no internal data on R chunks found.
Alternate data may be provided via orig_chunkfile or orig_docx")
}

if (is.null(to)) to <- paste0(file_path_sans_ext(basename(docx)), ".Rmd")
to <- file.path(dir, to)
if (!overwrite && file.exists(to)) stop(to, " exists and overwrite = FALSE")

if (!is.null(orig_chunkfile)) {
chunk_df <- readcsv(orig_chunkfile)
} else if (!is.null(orig_docx)) {
chunk_df <- redoc_extract_chunks(orig_docx)
} else {
chunk_df <- redoc_extract_chunks(docx)
}

md_lines <- convert_docx_to_md(docx, track_changes, wrap, verbose)
md_lines <- merge_yaml_headers(md_lines, chunk_df)
md_lines <- replace_yaml_blocks(md_lines, chunk_df)
md_lines <- replace_inlines(md_lines, chunk_df)
md_lines <- replace_chunks(md_lines, chunk_df)

cat(md_lines, file = to, sep = "\n")

return(to)
}

#' Extract the Rmd used to to produce a Reversible Word Doc
#'
#' Documents produced with [rdocx_reversible()] store an copy of the original
#' `.Rmd` files used to produce them. This is useful for diffing against the
#' version created with [undoc()], especially if tracked changes have not been
#' used.
#' @param docx A path to a word file or a an `rdocx` object created with
#' [officer::read_docx()].
#' @param type One of `"original"` or `"roundtrip"`. `"original"` extracts the
#' exact document originally knit. `"roundtrip"` (default) extracts a document
#' that has been converted to Word and back with no edits in between. The
#' latter should be more useful for comparing against edits, as line-wrapping
#' and placement of no-output chunks should match.
#' @param dir The directory to write the `.Rmd`` to. Defaults to current working
#' directory
#' @param to the filename to write the resulting `.Rmd` file. The default is to
#' use the the original name with either `.orignal.Rmd` or `roundtrip.Rmd`
#' extensions.
#' @param overwrite whether to overwite existing files
#' @export
#' @return The path to the extracted `.Rmd`
#' @examples
#' redoc_extract_rmd(redoc_example_docx(), dir = tempdir())
redoc_extract_rmd <- function(docx, type = c("original", "roundtrip"),
dir = ".", to = NULL, overwrite = FALSE) {
docx <- to_docx(docx)
assert_redoc(docx)
type <- match.arg(type)
rmdfile <- list.files(docx$package_dir,
pattern = paste0("\\.", type, "\\.Rmd$"),
full.names = TRUE
)
if (is.null(to)) to <- basename(rmdfile)
out <- file.path(dir, to)
if (file.exists(out) && !overwrite) stop(out, " exists and overwrite=FALSE")
file.copy(rmdfile, out, overwrite = overwrite)
return(file.path(dir, to))
}

#' @importFrom officer read_docx
redoc_extract_chunks <- function(docx) {
docx <- to_docx(docx)
assert_redoc(docx)
chunkfile <- list.files(docx$package_dir,
pattern = "\\.chunks\\.csv$",
full.names = TRUE
)
chunk_df <- readcsv(chunkfile)
chunk_df
}

#' @importFrom stringi stri_replace_all_fixed
replace_inlines <- function(md_lines, chunk_df) {
chunk_df <- chunk_df[chunk_df$type == "inline", ]
if (nrow(chunk_df)) {
patterns <- paste0("[[", chunk_df$label, "]]")
replacements <- paste0("`r ", chunk_df$code, "`")
md_lines <- stri_replace_all_fixed(md_lines, patterns, replacements,
vectorize_all = FALSE
)
}
md_lines
}

#' @importFrom stringi stri_replace_all_fixed stri_replace_first_fixed
#' stri_replace_all_regex stri_split_lines1 stri_detect_fixed
replace_chunks <- function(md_lines, chunk_df) {
chunk_df <- chunk_df[chunk_df$type == "block", ]
md_lines <- oneline(md_lines)
if (nrow(chunk_df)) {
patterns <- paste0("[[", chunk_df$label, "]]")
replacements <- paste(chunk_df$header, chunk_df$code, "```", sep = "\n")
detected <- logical(1)
append <- ""
start_append <- ""
last_detected <- NA_integer_
for (i in seq_along(patterns)) {
detected <- stri_detect_fixed(md_lines, patterns[i])
if (!detected) {
append <- oneline(append, replacements[i], collapse = "\n\n")
} else {
if (is.na(last_detected)) {
start_append <- append
} else {
replacements[last_detected] <-
oneline(replacements[last_detected], append, collapse = "\n\n")
}
last_detected <- i
append <- ""
}
}
if (start_append != "") {
has_yaml <- stri_detect_regex(md_lines, "(?s)^---.*---\n")
if (has_yaml) {
md_lines <- stri_replace_first_regex(
md_lines,
"(?s)(?<!^)(\n+)---\n",
oneline("$1---\n", escape_captures(start_append), "\n")
)
} else {
md_lines <- oneline(start_append, md_lines)
}
}
for (i in seq_along(patterns)) {
md_lines <- stri_replace_first_fixed(
md_lines, patterns[i],
replacements[i]
)
md_lines <- stri_replace_all_fixed(md_lines, patterns[i], "")
}
md_lines <- stri_replace_all_regex(md_lines, "\n{3,}", "\n\n")
}
reline(md_lines)
}


#' @importFrom stringi stri_extract_first_regex stri_replace_first_regex
#' @importFrom yaml yaml.load as.yaml
merge_yaml_headers <- function(md_lines, chunk_df) {
chunk_df <- chunk_df[chunk_df$label == "yaml-header", ]
if (nrow(chunk_df)) {
md_lines <- paste0(md_lines, collapse = "\n")
new_yaml <- stri_extract_first_regex(md_lines, "(?s)^---\\n.*\\n---\\n")
new_metadata <- yaml.load(new_yaml)
old_metadata <- yaml.load(chunk_df$code)
for (name in names(new_metadata)) {
old_metadata[[name]] <- new_metadata[[name]]
}
md_lines <- stri_replace_first_regex(md_lines, "(?s)^---\\n.*\\n---\\n", "")
md_lines <- paste0(
paste0("---\n", as.yaml(old_metadata), "---\n"),
md_lines,
collapse = "\n"
)
md_lines <- stri_split_lines1(md_lines)
}
md_lines
}

replace_yaml_blocks <- function(md_lines, chunk_df) {
chunk_df <- chunk_df[chunk_df$type == "yaml" & chunk_df$label != "yaml-header", ]
if (nrow(chunk_df)) {
md_lines <- paste(md_lines, collapse = "\n")
patterns <- paste0("[[chunk-", chunk_df$label, "]]")
replacements <- paste(chunk_df$code)
detected <- logical(1)
prepend <- ""
for (i in seq_along(patterns)) {
detected <- stri_detect_fixed(md_lines, patterns[i])
if (!detected) {
prepend <- paste0(c(prepend, replacements[i]), collapse = "\n\n")
} else {
replacements[i] <-
paste0(c(prepend, replacements[i]), collapse = "\n\n")
prepend <- ""
}
}
for (i in seq_along(patterns)) {
md_lines <- stri_replace_first_fixed(
md_lines, patterns[i],
replacements[i]
)
md_lines <- stri_replace_all_fixed(md_lines, patterns[i], "")
}
if (prepend != "") {
md_lines <- paste0(c(md_lines, prepend), collapse = "")
}
md_lines <- stri_replace_all_regex(md_lines, "\n{3,}", "\n\n")
md_lines <- stri_split_lines1(md_lines)
}
md_lines
}


convert_docx_to_md <- function(docx, track_changes, wrap, verbose) {
docx <- normalizePath(docx)
track_changes <- match.arg(track_changes, track_changes)
if (track_changes == "criticmarkup") {
track_opts <- c(
"--track-changes=all",
paste0(
"--lua-filter=",
system.file("criticmarkup.lua", package = "redoc")
)
)
} else {
track_opts <- paste0("--track-changes=", track_changes)
}

if (is.na(wrap)) {
wrap_opts <- "--wrap=none"
} else {
wrap_opts <- c("--wrap=auto", paste0("--columns=", wrap))
}
filter_opts <- c(paste0(
"--lua-filter=",
system.file("revchunks.lua", package = "redoc")
))
other_opts <- c("--standalone") # note adding metadata args for additional title block elements here might work (possibly as title block variable), though can't control order?
opts <- c(track_opts, filter_opts, wrap_opts, other_opts)
md_tmp <- tempfile(fileext = ".md")
pandoc_convert(docx,
from = "docx+styles+empty_paragraphs",
to = "markdown",
output = md_tmp,
options = opts,
verbose = verbose
)
return(readLines(md_tmp))
}

+ 31
- 0
R/get_style_dist.R View File

@@ -0,0 +1,31 @@
#' @import xml2
get_style_distribution <- function(docx) {
docx <- "custom-reference.docx"
dxml <- read_xml(unz(docx, filename = "word/document.xml"))
sxml <- read_xml(unz(docx, filename = "word/styles.xml"))
txml <- read_xml(unz(docx, filename = "word/theme/theme1.xml"))
st <- xml_find_all(sxml, "/w:styles/w:style")
all_desc <- data.frame(stringsAsFactors = FALSE,
style_type = xml_attr(st, "type"),
style_id = xml_attr(st, "styleId"),
style_name = xml_attr(xml_child(st, "w:name"), "val"),
based_on = xml_attr(xml_child(st, "w:basedOn"), "val"),
next_style = xml_attr(xml_child(st, "w:next"), "val"),
p_space_before = xml_attr(xml_find_first(st, "w:pPr/w:spacing"), "before"),
p_space_after = xml_attr(xml_find_first(st, "w:pPr/w:spacing"), "after"),
p_space_line = xml_attr(xml_find_first(st, "w:pPr/w:spacing"), "line"),
p_space_linerule = xml_attr(xml_find_first(st, "w:pPr/w:spacing"), "lineRule"),
p_ind_left = xml_attr(xml_find_first(st, "w:pPr/w:ind"), "left"),
p_ind_right = xml_attr(xml_find_first(st, "w:pPr/w:ind"), "right"),
p_ind_hanging = xml_attr(xml_find_first(st, "w:pPr/w:ind"), "hanging"),
p_ind_firstLine = xml_attr(xml_find_first(st, "w:pPr/w:ind"), "firstLine")
)
}

#' @importFrom stringi stri_c
get_style_property <- function(docx, style_id, sub = c("", "pPr", "rPr"), el, attr = "val") {
docx <- "custom-reference.docx"
sxml <- read_xml(unz(docx, filename = "word/styles.xml"))
st <- xml_find_all(sxml, stri_c("/w:styles/w:style[@w:styleId=\"", style_id, "\"]"))
}


+ 41
- 0
R/officedown.R View File

@@ -0,0 +1,41 @@
rdocx_document2 <- function(mapstyles, ...) {

redoc_format <- redoc::redoc(...)

if( missing(mapstyles) )
mapstyles <- list()

pre_processor = function(metadata, input_file, runtime, knit_meta, files_dir, output_dir){
md <- readLines(input_file)
md <- officedown:::chunk_macro(md)
md <- officedown:::block_macro(md)
writeLines(md, input_file)
}

post_processor <- function(metadata, input_file, output_file, clean, verbose) {
x <- officer::read_docx(output_file)
x <- officedown:::process_images(x)
x <- officedown:::process_links(x)
x <- officedown:::process_embedded_docx(x)
x <- officedown:::process_chunk_style(x)
x <- officedown:::process_sections(x)
x <- officedown:::process_par_settings(x)
x <- officer::change_styles(x, mapstyles = mapstyles)

print(x, target = output_file)
output_file
}


output_format <- rmarkdown::output_format(
knitr = redoc_format$knitr,
pandoc = redoc_format$pandoc,
keep_md = redoc_format$keep_md,
clean_supporting = redoc_format$clean_supporting,
pre_processor = pre_processor,
post_processor = post_processor,
base_format = redoc_format
)

output_format
}

+ 27
- 6
R/officer-embed.R View File

@@ -1,13 +1,35 @@
# Functions in this file should probably be migrated to the `officer` package

embed_files <- function(docx, files, internal_dir = NULL) {
for (file in files) {
if (!file.exists(file)) next
if (file.info(file)$isdir) {
embed_files(docx, list.files(file, full.names = TRUE),
internal_dir = do.call(file.path,
as.list(c(internal_dir, basename(file)))))
} else {
embed_file(docx, file, internal_dir = internal_dir)
}
}
return(docx)
}

#' @importFrom mime guess_type
embed_file <- function(docx, file, content_type = guess_type(file)) {
docx <- to_docx(docx)
file.copy(file, to = file.path(docx$package_dir, basename(file)))
embed_file <- function(docx, file, content_type = guess_type(file),
internal_dir = NULL) {
if (!is.null(internal_dir) &&
!dir.exists(file.path(docx$package_dir, internal_dir)))
dir.create(file.path(docx$package_dir, internal_dir), recursive = TRUE)

file.copy(file,
to = do.call(file.path,
as.list(
c(docx$package_dir, internal_dir, basename(file)))
))

extension <- tools::file_ext(file)
docx$content_type$add_ext(extension, content_type)
docx$content_type$save()
#docx$content_type$save()

rel <- docx$doc_obj$relationship()
new_rid <- sprintf("rId%.0f", rel$get_next_id())
@@ -17,7 +39,6 @@ embed_file <- function(docx, file, content_type = guess_type(file)) {
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/",
extension
),
target = file.path("..", basename(file))
target = file.path("..", file.path(internal_dir, basename(file)))
)
return(docx)
}

+ 38
- 9
R/officer-modify_style.R View File

@@ -3,7 +3,8 @@

#' @importFrom xml2 read_xml xml_find_first xml_add_child xml_set_attrs
#' write_xml
add_to_style <- function(docx, style_id, name, attrs = NULL) {
add_to_style <- function(docx, style_id, name, attrs = NULL, where = c("both", "rPr", "pPr", "top")) {
where = match.arg(where)
docx <- to_docx(docx)
name <- prepend_ns(name)
if (!is.null(attrs)) names(attrs) <- prepend_ns(names(attrs))
@@ -13,12 +14,23 @@ add_to_style <- function(docx, style_id, name, attrs = NULL) {
styles_xml,
paste0("//w:style[@w:styleId='", style_id, "']")
)
rPr <- xml_add_child(style_xml, "w:rPr")
pPr <- xml_add_child(style_xml, "w:pPr")
style <- xml_add_child(rPr, name)
if (!is.null(attrs)) xml_set_attrs(style, attrs)
style <- xml_add_child(pPr, name)
if (!is.null(attrs)) xml_set_attrs(style, attrs)

if (where %in% c("rPr", "both")) {
rPr <- xml_add_child(style_xml, "w:rPr")
style <- xml_add_child(rPr, name)
if (!is.null(attrs)) xml_set_attrs(style, attrs)
}

if (where %in% c("pPr", "both")) {
pPr <- xml_add_child(style_xml, "w:pPr")
style <- xml_add_child(pPr, name)
if (!is.null(attrs)) xml_set_attrs(style, attrs)
}

if (where == "top") {
topstyle <- xml_add_child(style_xml, name)
if (!is.null(attrs)) xml_set_attrs(topstyle, attrs)
}
write_xml(styles_xml, styles_path)
return(docx)
}
@@ -34,10 +46,27 @@ highlight_output_styles <- function(docx, name = "shd",
docx <- to_docx(docx)
styles <- styles_info(docx)
styles <-
styles$style_id[stri_detect_regex(styles$style_id, "^(inline|chunk)-")]
styles$style_id[stri_detect_regex(styles$style_id, "^redoc-")]
lapply(styles, function(s) {
add_to_style(docx, s, name = name, attrs = attrs)
add_to_style(docx, s, name = "hidden")
})
return(docx)
}

#' @importFrom officer styles_info
#' @importFrom stringi stri_detect_regex
hide_output_styles <- function(docx, name = "shd",
attrs = c(
val = "clear",
color = "auto",
fill = "FFBEBF"
)) {
docx <- to_docx(docx)
styles <- styles_info(docx)
styles <-
styles$style_id[stri_detect_regex(styles$style_id, "^redoc-")]
lapply(styles, function(s) {
add_to_style(docx, s, name = "hidden", where = "top")
})
return(docx)
}


+ 37
- 0
R/pandocoml.R View File

@@ -0,0 +1,37 @@
#' Prints the body of Office Open XML generated when converting markdown
#' to docx via pandoc
#'
#' Intended as a developer function for creating docx-generating code.
#' Will usually include some components unneccessary for generating open xml
#' elements, such as bookmarks and `<w:sectPr>` elements
#' @importFrom xml2 read_xml xml_find_first xml_children xml_remove xml_find_all
#' @importFrom rmarkdown pandoc_convert
#' @noRd
md_to_openxml <- function(text, simplify = TRUE,
remove_bookmarks = simplify,
remove_secs = simplify) {
tmpf <- tempfile(fileext = ".md")
cat(text, file = tmpf)
tmpw <- tempfile(fileext = ".docx")
pandoc_convert(tmpf, to = "docx", from = "markdown", output = tmpw)
oml <- (
xml_find_first(
read_xml(unz(tmpw, filename = "word/document.xml")),
"//w:body")
)
if (remove_bookmarks) {
xml_remove(xml_find_all(oml, "//w:bookmarkStart | //w:bookmarkEnd"))
}
if (remove_secs) {
xml_remove(xml_find_all(oml, "//w:sectPr"))
}

out <- paste(as.character(xml_children(oml)), collapse = "\n")
unlink(c(tmpf, tmpw))
class(out) <- "xmltext"
out
}

print.xmltext <- function(x) {
cat(x)
}

+ 38
- 32
R/preprocessor.R View File

@@ -1,43 +1,49 @@
make_rmd_preprocessor <- function(text_processor,
meta_ext= "preprocessed") {

preprocessor <- function(input, encoding, intermediates_dir) {

#' Generate a function to run prior to knitting
#'
#' @description **redoc** modifies R Markdown documents prior to knitting to
#' capture and store document elements that will need to be restored later. It
#' does this via a function passed to the `pre_knit` argument of
#' [rmarkdown::output_format()]. `make_preknitter` generates this function from
#' a list of [wrapper functions][make_wrapper()].
#'
#' @param wrappers a list of [wrapper functions][make_wrapper()].
#'
#' @details **rmarkdown** does not provide a mechanism to modify the file prior
#' to knitting, so **redoc** accomplishes this by reaching up the call stack and
#' modifying the environment in the [rmarkdown::render()] function. The
#' function generated by `make_preknitter` will do this by generating the
#' pre-processed R Markdown file and associated list of code chunks, adding these
#' the list of intermediates for cleanup, and switching the document input to
#' this pre-processed document.
#'
#' @return A function to be used in `output_format(pre_knit = ...)``
#' @importFrom yaml write_yaml
#' @export
#' @examples
#'
#' make_preknitter(wrappers = list(htmlcommentwrap, latexwrap))
#'
make_preknitter <- function(wrappers = list()) {

pre_knit <- function(input, ...) {
render_env <- get_parent_env_with("knit_input")
knit_input <- get("knit_input", envir = render_env)
intermediates <- get("intermediates", envir = render_env)
pre_knit_input <- get("knit_input", envir = render_env)
intermediates_loc <- get("intermediates_loc", envir = render_env)

rmd_text <- readfile(input)
rmd <- wrap_code(rmd_text, wrappers = wrappers)

rmd_text_preprocessed <- text_processor(rmd_text)
codefile <- intermediates_loc(
file_with_meta_ext(basename(input), "codelist", "yml")
)

preprocessed_rmd_file <- intermediates_loc(
file_with_meta_ext(knit_input, meta_ext)
file_with_meta_ext(pre_knit_input, "preprocessed")
)
cat(rmd_text_preprocessed, file = preprocessed_rmd_file)

write_yaml(rmd$code, codefile)
cat(rmd$text, file = preprocessed_rmd_file)
assign("knit_input", preprocessed_rmd_file, envir = render_env)
assign("intermediates", c(intermediates, preprocessed_rmd_file),
envir = render_env)

add_intermediates(c(codefile, preprocessed_rmd_file))
}

return(preprocessor)
return(pre_knit)
}

# add_date <- function(text) {
# oneline(text, "`r Sys.Date()`")
# }
# of <- rmarkdown::output_format(
# knit = rmarkdown::knitr_options(),
# pandoc = rmarkdown::pandoc_options(to = "html"),
# pre_knit = make_rmd_preprocessor(add_date, "dateadded"),
# base_format = rmarkdown::html_document()
#
# )
#
# #undebug(preprocess_rmd)
# #debug(get_render_env)
# rmarkdown::render("x x.Rmd", output_format = of, clean = TRUE,
# intermediates_dir = "i_d")

+ 1
- 1
R/redoc-package.R View File

@@ -10,7 +10,7 @@
#' bug reports or other feedback at
#' <https://github.com/noamross/redoc/issues>
#'
#' @name redoc
#' @name redoc-package
#' @author Noam Ross \email{noam.ross@gmail.com}
#' @keywords package
NULL

+ 141
- 0
R/redoc.R View File

@@ -0,0 +1,141 @@
#' R Markdown format for Reversible Reproducible Word Documents
#'
#' Format for converting from R Markdown to a Microsoft Word Document that can
#' be reversed using [dedoc()] after editing in Word.
#'
#' @param highlight_outputs whether to highlight outputs from chunks and inline
#' code in the final document
#' @param wrap when round-tripping the document, at what width to wrap the
#' markdown output? See [dedoc()].
#' @param margins page margin size. Can be a single value or a named vector
#' with values, `top`, `bottom`, `left`, `right`, `gutter`, `header`, and
#' `footer`. If NULL defaults to the reference document.
#' @param line_numbers either TRUE or list with any of the arguments `start`,
#' `by`, `restart`, and `distance`
#' @param comment_author The name to attribute any CriticMarkup tracked
#' changes to. Defaults to [whoami::fullname()].
#' @param keep_md whether to keep the intermediate markdown document
#' @param wrappers a list of wrapper functions to capture text to protect when
#' rendering and de-rendering. See [make_wrapper()].
#' @param diagnostics Whether to embed diagnostic information in the output
#' file. If TRUE, this will save session information and the current
#' pandoc and (if used) RStudio versions inside the Word document for later
#' bug-checking.
#' @param ... other parameters passed to [rmarkdown::word_document()]
#' @importFrom rmarkdown output_format word_document
#' @importFrom officer read_docx
#' @importFrom tools file_path_sans_ext
#' @importFrom rmarkdown word_document
#' @importFrom knitr knit_print knit_global opts_chunk opts_knit
#' @export
redoc <- function(highlight_outputs = TRUE, wrap = 80,
margins = NULL, line_numbers = NULL,
comment_author = NULL, keep_md = FALSE,
wrappers = list(htmlcommentwrap, latexwrap,
rawblockwrap, rawspanwrap,
cmwrap, citationwrap),
diagnostics = TRUE,
...) {

# Make a function to pre-process the Rmd file

pre_knit <- make_preknitter(wrappers = wrappers)

md_extensions <- c("+smart", "+fenced_divs", "+bracketed_spans")

pandoc <- rmarkdown::pandoc_options(
to = "docx+empty_paragraphs",
from = rmarkdown::from_rmarkdown(extensions = md_extensions),
args = c("--lua-filter",
system.file("lua-filters", "protect-empty.lua",
package = "redoc"))
)

post_processor <-
function(metadata, input_file, output_file, clean, verbose) {
docx <- officer::read_docx(output_file)

render_env <- get_parent_env_with(c("intermediates", "intermediates_loc",
"knit_input", "original_input"))

original_rmd_input <- get("original_input", envir = render_env)
renv_intermediates <- get("intermediates", envir = render_env)
renv_intermediates_loc <- get("intermediates_loc", envir = render_env)
renv_intermediates_dir <- get("intermediates_dir", envir = render_env)

codefile <- renv_intermediates_loc(
file_with_meta_ext(basename(original_rmd_input), "codelist", "yml")
)
codelist <- read_yaml(codefile)

embed_files(docx, c(original_rmd_input, renv_intermediates),
internal_dir = "redoc")

roundtrip_rmd <- dedoc(
output_file,
to = file_with_meta_ext(
basename(original_rmd_input), "roundtrip", "Rmd"
),
dir = renv_intermediates_dir,
wrap = wrap,
overwrite = TRUE,
orig_codefile = codefile
)

add_intermediates(roundtrip_rmd)
embed_files(docx, roundtrip_rmd, internal_dir = "redoc")

if (diagnostics) {
diag_file <- renv_intermediates_loc(
file_with_meta_ext(basename(original_rmd_input), "diagnostics", "yml")
)
write_yaml(get_diagnostics(), diag_file, column.major = FALSE)
add_intermediates(diag_file)
embed_files(docx, diag_file, internal_dir = "redoc")
}

docx <- hide_output_styles(docx)
if (highlight_outputs) docx <- highlight_output_styles(docx)


# Stuff to go to worded/officedown
if (!is.null(margins)) docx <- set_body_margins(docx, margins)
if (isTRUE(line_numbers)) {
set_body_linenumbers(docx)
} else if (is.list(line_numbers)) {
do.call(set_body_linenumbers, c(list(x = docx), line_numbers))
}

print(docx, output_file)
return(output_file)
}
output_format <- rmarkdown::output_format(
pandoc = pandoc,
knitr = rmarkdown::knitr_options(),
keep_md = keep_md,
pre_knit = pre_knit,
post_processor = post_processor,
base_format = word_document()
)
output_format
}

get_diagnostics <- function() {
pandoc_version = as.character(rmarkdown::pandoc_version())
session_info = sessioninfo::session_info()
if (requireNamespace("rstudioapi") &&
rstudioapi::isAvailable()) {
rstudio_info <- rstudioapi::versionInfo()[c("version", "mode")]
} else {
rstudio_info <- NULL
}
list(
redoc_version = as.list(
session_info$packages[session_info$packages$package == "redoc",]
),
pandoc_version = pandoc_version,
rstudio_info = rstudio_info,
session_info = session_info
)
}


+ 90
- 0
R/stri-utils.R View File

@@ -0,0 +1,90 @@
#' @importFrom stringi stri_join
oneline <- function(..., collapse = "\n") {
stri_join(c(...), collapse = collapse)
}

#' @importFrom stringi stri_split_lines1
reline <- function(...) {
stri_split_lines1(oneline(...))
}

#' @importFrom stringi stri_join
brkt <- function(x) {
stri_join("[[", x, "]]")
}

#' @importFrom stringi stri_join
brktn <- function(x) {
stri_join("\n[[", x, "]]\n")
}

#' @importFrom stringi stri_join
divwrap <- function(text, id, class = "redoc") {
stri_join("<div class=\"", class, "\" id=\"", id, "\">\n", text, "\n</div>")
}

#' @importFrom stringi stri_join
spanwrap <- function(text, id, class = "redoc") {
# stri_join("[\n", text, "\n]{class=\"", class, "\" id=\"", id, "\"}")
stri_join("<span class=\"", class, "\" id=\"", id, "\">", text, "</span>")

}

#' Get the line number of the first fixed match
#' @noRd
#' @importFrom stringi stri_locate_first_fixed stri_sub stri_replace_all_fixed
#' stri_count_fixed
stri_lineno_first_fixed <- function(text, pattern) {
loc <- stri_locate_first_fixed(text, pattern)
pre <- stri_sub(text, from = 1L, to = loc[,1] - 1)
as.integer(stri_count_lines(pre))
}

stri_count_lines <- function(text) {
text <- normalize_newlines(text)
stri_count_fixed(text, '\n') + 1
}

#' @importFrom stringi stri_locate_all_fixed stri_replace_all_fixed
insert_at_line <- function(text, insertion, line, newline = TRUE) {
text2 <- normalize_newlines(text)
if (newline) insertion <- stri_join(insertion, "\n")
line_locs <- stri_locate_all_fixed(text2, "\n")[[1]][, 1]
stri_sub(text2, line_locs[line], to = line_locs[line] - 1) <- insertion
return(text2)
}

get_prior_empty_line_loc <- function(text, line) {
text <- normalize_newlines(text)
line_locs <- c(0, stri_locate_all_fixed(text, "\n")[[1]][, 1])
if (line > length(line_locs)) {
return(stri_length(text) + 1)
}
empty_locs <- rbind(c(0,0), stri_locate_all_regex(text, "(?s)\n\\h?\n")[[1]])
empty_loc <- max(empty_locs[empty_locs <= line_locs[line]])
empty_loc
}

#' @importFrom stringi stri_locate_all_fixed stri_replace_all_fixed
#' stri_locate_all_regex stri_length stri_sub<-
insert_at_prior_empty_line <- function(text, insertion, line) {
text2 <- normalize_newlines(text)
ll <- get_prior_empty_line_loc(text2, line)
if (ll == stri_length(text2) + 1) insertion <- stri_join("\n", insertion)
stri_sub(text2, ll, ll) <- stri_join(insertion, "\n")
return(text2)
}

normalize_newlines <- function(text) {
stri_replace_all_fixed(text, c('\r\n', '\n\r', '\r'), '\n',
vectorize_all = FALSE)
}

remove_extra_newlines <- function(text) {
stri_replace_all_regex(text, '[\n\r]{2,}', '\n\n')
}

last <- function(x) {
x[length(x)]
}


+ 93
- 18
R/utils.R View File

@@ -1,19 +1,17 @@
#' @importFrom stringi stri_join
oneline <- function(..., collapse = "\n") {
stri_join(c(...), collapse = collapse)
}
`%||%` <- function(x, y) if (is.null(x) || length(x) == 0) y else x


#' @importFrom stringi stri_split_lines1
reline <- function(...) {
stri_split_lines1(oneline(...))
subel <- function(x, name) {
lapply(x, function(x) x[[name]])
}

escape_captures <- function(str) {
stri_replace_all_fixed(str, "$", "\\$")
sort_by <- function(list, by, null_val = NA) {
vals <- unlist(lapply(list, function(x) x[[by]] %||% null_val))
list[order(vals)]
}

readcsv <- function(x) {
utils::read.csv(x, stringsAsFactors = FALSE)
na_rm <- function(x) {
x[!is.na(x)]
}

readfile <- function(x) {
@@ -25,14 +23,91 @@ file_with_meta_ext <- function(file, meta_ext, ext = tools::file_ext(file)) {
".", meta_ext, ".", ext, sep = "")
}

file_with_ext <- function(file, ext) {
paste(tools::file_path_sans_ext(file), ".", ext, sep = "")
get_parent_env_with <- function(var_names) {
for (frame in rev(sys.frames())[-1]) {
present <- all(vapply(
var_names, exists, logical(1), envir = frame, inherits = FALSE
))
if (present) return(frame)
}
stop("No parent environment found with ",
paste(var_names, collapse = ", "))
}

add_intermediates <- function(new_intermediates) {
render_env <- get_parent_env_with(c("intermediates", "intermediates_loc",
"knit_input"))
old_intermediates <- get("intermediates", envir = render_env)
assign("intermediates",
c(old_intermediates, new_intermediates),
envir = render_env)
}

get_parent_env_with <- function(var_name) {
for (frame in rev(sys.frames())[-1]) {
if (exists(var_name, envir = frame, inherits = FALSE))
return(frame)
list_subset <- function(list, ...) {
filters <- list(...)
for (i in seq_along(filters)) {
list <- Filter(list,
f = function(x) {
x[[names(filters)[i]]] == filters[[i]]
})
}
return(list)
}

#' Convert a document to Pandoc's abstract syntax tree format
#'
#' This is a convenience function for testing and development.
#' @param file the file to convert using pandoc.
#' @param from the format to convert from. If `NULL` (default) File type will be
#' auto-detected by extension. `.Rmd` files will be treated as `.md`.
#' @param tolist whether to return the AST as an R list. If `FALSE`, will
#' return length-1 character vector of raw JSON.
#' @export
#' @importFrom rmarkdown pandoc_convert
#' @importFrom jsonlite fromJSON
#' @return A list containing the structured document
#' @examples
#' ast <- pandoc_ast(redoc_example_docx())
pandoc_ast <- function(file, from = NULL, tolist = TRUE) {
tmp <- tempfile()
if (is.null(from) && tools::file_ext(file) == "Rmd") from = "markdown"
rmarkdown::pandoc_convert(input = normalizePath(file),
to = "json",
from = from,
output = tmp)