optimize filename normalize (#1460)

* Normalize slug using ucs-normalize-NFC-string after remove Unicode spacing mark
* Add org-roam-slug--preserve-chars-from-normalization for Unicode Normalization
* Add org-roam-slug-trim-chars instead of org-roam-slug--preserve-chars-from-normalization
This commit is contained in:
USAMI Kenta
2021-04-05 22:35:58 +09:00
committed by GitHub
parent 8ad57b1218
commit 2d58651699

View File

@ -234,6 +234,35 @@ Function should return a filename string based on title."
:type 'function :type 'function
:group 'org-roam) :group 'org-roam)
(defcustom org-roam-slug-trim-chars
'(;; Combining Diacritical Marks https://www.unicode.org/charts/PDF/U0300.pdf
768 ; U+0300 COMBINING GRAVE ACCENT
769 ; U+0301 COMBINING ACUTE ACCENT
770 ; U+0302 COMBINING CIRCUMFLEX ACCENT
771 ; U+0303 COMBINING TILDE
772 ; U+0304 COMBINING MACRON
774 ; U+0306 COMBINING BREVE
775 ; U+0307 COMBINING DOT ABOVE
776 ; U+0308 COMBINING DIAERESIS
777 ; U+0309 COMBINING HOOK ABOVE
778 ; U+030A COMBINING RING ABOVE
780 ; U+030C COMBINING CARON
795 ; U+031B COMBINING HORN
803 ; U+0323 COMBINING DOT BELOW
804 ; U+0324 COMBINING DIAERESIS BELOW
805 ; U+0325 COMBINING RING BELOW
807 ; U+0327 COMBINING CEDILLA
813 ; U+032D COMBINING CIRCUMFLEX ACCENT BELOW
814 ; U+032E COMBINING BREVE BELOW
816 ; U+0330 COMBINING TILDE BELOW
817 ; U+0331 COMBINING MACRON BELOW
)
"Characters to trim from Unicode normalization for slug.
By default, the characters are specified to remove Diacritical Marks from the Latin alphabet."
:type '(repeat character)
:group 'org-roam)
(defcustom org-roam-title-sources '((title headline) alias) (defcustom org-roam-title-sources '((title headline) alias)
"The list of sources from which to retrieve a note title. "The list of sources from which to retrieve a note title.
Each element in the list is either: Each element in the list is either:
@ -794,10 +823,11 @@ Each ref is returned as a cons of its type and its key."
(defun org-roam--title-to-slug (title) (defun org-roam--title-to-slug (title)
"Convert TITLE to a filename-suitable slug." "Convert TITLE to a filename-suitable slug."
(cl-flet* ((nonspacing-mark-p (char) (cl-flet* ((nonspacing-mark-p (char)
(eq 'Mn (get-char-code-property char 'general-category))) (memq char org-roam-slug-trim-chars))
(strip-nonspacing-marks (s) (strip-nonspacing-marks (s)
(apply #'string (seq-remove #'nonspacing-mark-p (ucs-normalize-NFC-string
(ucs-normalize-NFD-string s)))) (apply #'string (seq-remove #'nonspacing-mark-p
(ucs-normalize-NFD-string s)))))
(cl-replace (title pair) (cl-replace (title pair)
(replace-regexp-in-string (car pair) (cdr pair) title))) (replace-regexp-in-string (car pair) (cdr pair) title)))
(let* ((pairs `(("[^[:alnum:][:digit:]]" . "_") ;; convert anything not alphanumeric (let* ((pairs `(("[^[:alnum:][:digit:]]" . "_") ;; convert anything not alphanumeric