refactor(tree-sitter): centralize grammar config & hacks

Easier to track and maintain.
This commit is contained in:
Henrik Lissner
2025-08-27 00:35:39 +02:00
parent 6009c2b838
commit 3b58741522
18 changed files with 85 additions and 142 deletions

View File

@@ -15,10 +15,11 @@ MODE and TS-MODE are major mode symbols. If RECIPES is provided, fall back to
MODE if RECIPES don't pass `treesit-ready-p' when activating TS-MODE. Use this
for ts modes that error out instead of failing gracefully.
RECIPES are an alist of plists with the format (LANG &key URL REV SOURCE-DIR CC
CPP COMMIT), which will be transformed into entries for
`treesit-language-source-alist' (which descrie what each of these keys mean).
Note that COMMIT is only available in Emacs >=31."
RECIPES is a symbol (a grammar language name), list thereof, or alist of plists
with the format (LANG &key URL REV SOURCE-DIR CC CPP COMMIT). If an alist of
plists, it will be transformed into entries for `treesit-language-source-alist'
(which describe what each of these keys mean). Note that COMMIT is ignored
pre-Emacs 31."
(declare (indent 2))
(cl-check-type mode symbol)
(cl-check-type ts-mode symbol)
@@ -41,6 +42,11 @@ Note that COMMIT is only available in Emacs >=31."
(fboundp ts-mode)
(or (eq treesit-enabled-modes t)
(memq ts-mode treesit-enabled-modes))
;; Lazily load autoload so
;; `treesit-language-source-alist' is initialized.
(let ((fn (symbol-function ts-mode)))
(or (not (autoloadp fn))
(autoload-do-load fn)))
;; Only prompt once, and log other times.
(cl-every (if ensured?
(doom-rpartial #'treesit-ready-p 'message)
@@ -54,16 +60,16 @@ Note that COMMIT is only available in Emacs >=31."
m))))))))
(with-eval-after-load 'treesit
(dolist (recipe recipes)
(cl-destructuring-bind (name &key url rev source-dir cc cpp commit)
(ensure-list recipe)
(setf (alist-get name treesit-language-source-alist)
(append (list url rev source-dir cc cpp)
;; COMPAT: 31.1 introduced a COMMIT recipe argument. On
;; <=30.x, extra arguments will trigger an arity error
;; when installing grammars.
(if (eq (cdr (func-arity 'treesit--install-language-grammar-1))
'many)
(list commit))))))))
(when (cdr (setq recipe (ensure-list recipe)))
(cl-destructuring-bind (name &key url rev source-dir cc cpp commit) recipe
(setf (alist-get name treesit-language-source-alist)
(append (list url rev source-dir cc cpp)
;; COMPAT: 31.1 introduced a COMMIT recipe argument. On
;; <=30.x, extra arguments will trigger an arity error
;; when installing grammars.
(if (eq (cdr (func-arity 'treesit--install-language-grammar-1))
'many)
(list commit)))))))))
;; ;; HACK: Remove and refactor when `use-package' eager macro expansion is solved or `use-package!' is removed
;; ;;;###autoload
@@ -81,6 +87,11 @@ Note that COMMIT is only available in Emacs >=31."
;; (evil-textobj-tree-sitter-goto-textobj group previous end query)))
;; sym))
;;;###autoload
(defun +tree-sitter-ts-mode-inhibit-side-effects-a (fn &rest args)
"Suppress changes to `auto-mode-alist' and `interpreter-mode-alist'."
(let (auto-mode-alist interpreter-mode-alist)
(apply fn args)))
;;; TODO: Backwards compatibility

View File

@@ -10,38 +10,42 @@
:preface
(setq treesit-enabled-modes t)
;; HACK: These *-ts-mode-maybe functions all treat `treesit-enabled-modes'
;; strangely in the event the language's grammar is unavailable. Plus, they
;; add yet-another-layer of complexity for users to be cognicent of. Get rid
;; of them.
;; REVIEW: Handle this during the 'doom sync' process instead.
(setq auto-mode-alist
(save-match-data
(cl-loop for (src . fn) in auto-mode-alist
unless (and (functionp fn)
(string-match "-ts-mode-maybe$" (symbol-name fn)))
collect (cons src fn))))
;; HACK: The *-ts-mode major modes are inconsistent about how they treat
;; missing language grammars (some error out, some respect
;; `treesit-auto-install-grammar', some fall back to `fundamental-mode').
;; I'd like to address this poor UX using `major-mode-remap-alist' entries
;; created by `set-tree-sitter!' (which will fall back to the non-treesit
;; modes), but most *-ts-mode's clobber `auto-mode-alist' and/or
;; `interpreter-mode-alist' each time the major mode is activated, so those
;; must be undone too so they don't overwrite user config.
;; TODO: Handle this during the 'doom sync' process instead.
(save-match-data
(dolist (sym '(auto-mode-alist interpreter-mode-alist))
(set
sym (cl-loop for (src . fn) in (symbol-value sym)
unless (and (functionp fn)
(string-match "-ts-mode\\(?:-maybe\\)?$" (symbol-name fn)))
collect (cons src fn)))))
;; HACK: These *-ts-modes change `auto-mode-alist' and/or
;; `interpreter-mode-alist' every time they are activated, running the risk
;; of overwriting user (or Doom) config.
;; REVIEW: Should be addressed upstream.
(dolist (mode '(csharp-ts-mode
python-ts-mode))
(advice-add mode :around #'+tree-sitter-ts-mode-inhibit-side-effects-a))
:config
;; HACK: The implementation of `treesit-enabled-modes's setter and
;; `treesit-major-mode-remap-alist' are intrusively opinionated, so I
;; disable it altogether as to not unexpectedly modify
;; `major-mode-remap-alist' at runtime. What's more, there's no guarantee
;; this will be populated correctly unless the user is on a particular
;; commit of Emacs 31 or newer. Best we simply ignore it.
(dolist (m treesit-major-mode-remap-alist)
(setq major-mode-remap-alist (delete m major-mode-remap-alist)))
;; `treesit-major-mode-remap-alist' is intrusively opinionated, so disable
;; it ato avoid untimely (and overriding) modifications of
;; `major-mode-remap-alist' at runtime. What's more, this was only
;; introduced in 31, so ignoring them is more consistent for pre-31 users.
(when major-mode-remap-alist
(dolist (m treesit-major-mode-remap-alist)
(setq major-mode-remap-alist (delete m major-mode-remap-alist))))
(setq treesit-major-mode-remap-alist nil)
;; HACK: treesit lacks any way to dictate where to install grammars.
(add-to-list 'treesit-extra-load-path (concat doom-profile-data-dir "tree-sitter"))
(defadvice! +tree-sitter--install-grammar-to-local-dir-a (fn &rest args)
"Write grammars to `doom-profile-data-dir'."
:around #'treesit-install-language-grammar
:around #'treesit--build-grammar
(let ((user-emacs-directory doom-profile-data-dir))
(apply fn args)))
;; HACK: Some *-ts-mode packages modify `major-mode-remap-defaults'
;; inconsistently. Playing whack-a-mole to undo those changes is more hassle
;; then simply ignoring them (by overriding `major-mode-remap-defaults' for
@@ -56,6 +60,15 @@
major-mode-remap-defaults)))
(funcall fn mode)))
;; HACK: Keep $EMACSDIR clean by installing grammars to the active profile.
(add-to-list 'treesit-extra-load-path (concat doom-profile-data-dir "tree-sitter"))
(defadvice! +tree-sitter--install-grammar-to-local-dir-a (fn &rest args)
"Write grammars to `doom-profile-data-dir'."
:around #'treesit-install-language-grammar
:around #'treesit--build-grammar
(let ((user-emacs-directory doom-profile-data-dir))
(apply fn args)))
;; TODO: Move most of these out to modules
(dolist (map '((awk "https://github.com/Beaglefoot/tree-sitter-awk" nil nil nil nil)
(bibtex "https://github.com/latex-lsp/tree-sitter-bibtex" nil nil nil nil)