From 1d9968bf69750101da3ec96d67083af412e1b130 Mon Sep 17 00:00:00 2001 From: targit Date: Thu, 23 Jul 2020 16:36:57 +0200 Subject: [PATCH] Make the db caching more efficient for gpg encrypted files (#963) Before this patch all hash-sums were computed over the files or buffers content. For encrypted files this means that we first have decrypt the file before we can compute the hash-sum. So when the cache get's updated all gpg files need to be decrypted which is a very expensive operation and nearly defeats the purpose of having a cache in the first place (for gpg files). This changes the computation of hash-sums for gpg encrypted files. Instead of the content the raw files on disk will be read. This shouldn't interfere with the current use of hashes. There is one ugly (but otherwise inconsequential) ward, though. For open buffers of to be gpg encrypted files we need to compute the hash sum over the contents as well. This is because there is no (easy) way to get the encrypted version. The consequence is that that buffers file will be rehashed again (then using the bytes on disk). But all other non changed gpg files will only be hashed once, as desired. Co-authored-by: Jethro Kuan --- org-roam-db.el | 97 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 59 insertions(+), 38 deletions(-) diff --git a/org-roam-db.el b/org-roam-db.el index 4832233..a0e2299 100644 --- a/org-roam-db.el +++ b/org-roam-db.el @@ -49,6 +49,7 @@ (declare-function org-roam--extract-links "org-roam") (declare-function org-roam--list-all-files "org-roam") (declare-function org-roam--path-to-slug "org-roam") +(declare-function org-roam--file-name-extension "org-roam") (declare-function org-roam-buffer--update-maybe "org-roam-buffer") ;;;; Options @@ -367,6 +368,26 @@ connections, nil is returned." (files (mapcar 'car-safe (emacsql (org-roam-db) query file max-distance)))) files)) +(defun org-roam-db--file-hash (&optional file-path) + "Compute the hash of FILE-PATH, a file or current buffer." + (let* ((file-p (and file-path)) + (file-path (or file-path + (buffer-file-name (current-buffer)))) + (encrypted-p (and file-path + (string= (org-roam--file-name-extension file-path) + "gpg")))) + (cond ((and encrypted-p file-p) + (with-temp-buffer + (set-buffer-multibyte nil) + (insert-file-contents-literally file-path) + (secure-hash 'sha1 (current-buffer)))) + (file-p + (with-temp-buffer + (insert-file-contents file-path) + (secure-hash 'sha1 (current-buffer)))) + (t + (secure-hash 'sha1 (current-buffer)))))) + ;;;;; Updating (defun org-roam-db--update-meta () "Update the metadata of the current buffer into the cache." @@ -374,7 +395,7 @@ connections, nil is returned." (attr (file-attributes file)) (atime (file-attribute-access-time attr)) (mtime (file-attribute-modification-time attr)) - (hash (secure-hash 'sha1 (current-buffer)))) + (hash (org-roam-db--file-hash))) (org-roam-db-query [:delete :from files :where (= file $s1)] file) @@ -468,45 +489,45 @@ If FORCE, force a rebuild of the cache from scratch." (let* ((attr (file-attributes file)) (atime (file-attribute-access-time attr)) (mtime (file-attribute-modification-time attr))) - (org-roam--with-temp-buffer file - (let ((contents-hash (secure-hash 'sha1 (current-buffer)))) - (unless (string= (gethash file current-files) - contents-hash) - (org-roam-db--clear-file file) - (org-roam-db-query - [:insert :into files - :values $v1] - (vector file contents-hash (list :atime atime :mtime mtime))) - (setq file-count (1+ file-count)) - (when-let ((headlines (org-roam--extract-headlines file))) - (when (org-roam-db--insert-headlines headlines) - (setq headline-count (1+ headline-count))))))))) - ;; Second step: Rebuild the rest - (dolist (file org-roam-files) - (org-roam--with-temp-buffer file - (let ((contents-hash (secure-hash 'sha1 (current-buffer)))) + (let ((contents-hash (org-roam-db--file-hash file))) (unless (string= (gethash file current-files) contents-hash) - (when-let (links (org-roam--extract-links file)) - (org-roam-db-query - [:insert :into links - :values $v1] - links) - (setq link-count (1+ link-count))) - (when-let (tags (org-roam--extract-tags file)) - (org-roam-db-query - [:insert :into tags - :values $v1] - (vector file tags)) - (setq tag-count (1+ tag-count))) - (let ((titles (or (org-roam--extract-titles) - (list (org-roam--path-to-slug file))))) - (org-roam-db--insert-titles file titles) - (setq title-count (+ title-count (length titles)))) - (when-let* ((ref (org-roam--extract-ref))) - (when (org-roam-db--insert-ref file ref) - (setq ref-count (1+ ref-count))))) - (remhash file current-files)))) + (org-roam--with-temp-buffer file + (org-roam-db--clear-file file) + (org-roam-db-query + [:insert :into files + :values $v1] + (vector file contents-hash (list :atime atime :mtime mtime))) + (setq file-count (1+ file-count)) + (when-let ((headlines (org-roam--extract-headlines file))) + (when (org-roam-db--insert-headlines headlines) + (setq headline-count (1+ headline-count))))))))) + ;; Second step: Rebuild the rest + (dolist (file org-roam-files) + (let ((contents-hash (org-roam-db--file-hash file))) + (unless (string= (gethash file current-files) + contents-hash) + (org-roam--with-temp-buffer file + (when-let (links (org-roam--extract-links file)) + (org-roam-db-query + [:insert :into links + :values $v1] + links) + (setq link-count (1+ link-count))) + (when-let (tags (org-roam--extract-tags file)) + (org-roam-db-query + [:insert :into tags + :values $v1] + (vector file tags)) + (setq tag-count (1+ tag-count))) + (let ((titles (or (org-roam--extract-titles) + (list (org-roam--path-to-slug file))))) + (org-roam-db--insert-titles file titles) + (setq title-count (+ title-count (length titles)))) + (when-let* ((ref (org-roam--extract-ref))) + (when (org-roam-db--insert-ref file ref) + (setq ref-count (1+ ref-count)))))) + (remhash file current-files))) (dolist (file (hash-table-keys current-files)) ;; These files are no longer around, remove from cache... (org-roam-db--clear-file file)