fix: improve validation of bulk imports

2025-11-15 00:13:47 -05:00 · 2023-03-20 12:56:22 -04:00
parent d636fb2672
commit 36adfece51
8 changed files with 229 additions and 24 deletions
--- a/README.md
+++ b/README.md
@@ -141,14 +141,14 @@ To bypass the configuration file and specify a vault to use at runtime use the `

 ### Making bulk edits

-Bulk edits are supported by importing a CSV file containing the following columns
+Bulk edits are supported by importing a CSV file containing the following columns. Column headers must be lowercase.

-1. `Path` - Path to note relative to the vault root folder
-2. `Type` - Type of metadata. One of `frontmatter`, `inline_metadata`, or `tag`
-3. `Key` - The key to add (leave blank for a tag)
-4. `Value` - the value to add to the key
+1. `path` - Path to note relative to the vault root folder
+2. `type` - Type of metadata. One of `frontmatter`, `inline_metadata`, or `tag`
+3. `key` - The key to add (leave blank for a tag)
+4. `value` - the value to add to the key

-Notes which match a Path in the file will be updated to contain ONLY the information in the CSV file. Notes which do not match a path will be left untouched. The example CSV below will remove any frontmatter, inline metadata, or tags within with `vault/folder 01/note1.md` and then add the specified metadata.
+An example valid CSV file is

 ```csv
 path,type,key,value
@@ -160,6 +160,13 @@ folder 1/note1.md,tag,,tag1
 folder 1/note1.md,tag,,tag2
 ```

+How bulk imports work:
+
+-   Only notes which match the path in the CSV file are updated
+-   Effected notes will have ALL of their metadata changed to reflect the values in the CSV file
+-   Existing metadata in an effected note will be rewritten. This may result in it's location and/or formatting within the note being changed
+-   inline tags ignore any value added to the `key` column
+
 You can export all your notes with their associated metadata in this format from the "Export Metadata" section of the script to be used as a template for your bulk changes.

 # Contributing
--- a/src/obsidian_metadata/_utils/init.py
+++ b/src/obsidian_metadata/_utils/init.py
@@ -6,10 +6,12 @@ from obsidian_metadata._utils.utilities import (
    clean_dictionary,
    clear_screen,
    dict_contains,
+    dict_keys_to_lower,
    dict_values_to_lists_strings,
    docstring_parameter,
    merge_dictionaries,
    remove_markdown_sections,
+    validate_csv_bulk_imports,
    version_callback,
 )

@@ -18,11 +20,12 @@ __all__ = [
    "clean_dictionary",
    "clear_screen",
    "dict_contains",
+    "dict_keys_to_lower",
    "dict_values_to_lists_strings",
    "docstring_parameter",
    "LoggerManager",
    "merge_dictionaries",
    "remove_markdown_sections",
-    "vault_validation",
+    "validate_csv_bulk_imports",
    "version_callback",
 ]
--- a/src/obsidian_metadata/_utils/utilities.py
+++ b/src/obsidian_metadata/_utils/utilities.py
@@ -1,11 +1,15 @@
 """Utility functions."""
+import csv
 import re
 from os import name, system
+from pathlib import Path
 from typing import Any

 import typer

 from obsidian_metadata.__version__ import __version__
+from obsidian_metadata._utils import alerts
+from obsidian_metadata._utils.alerts import logger as log
 from obsidian_metadata._utils.console import console


@@ -63,6 +67,18 @@ def dict_contains(
    return key in dictionary and value in dictionary[key]


+def dict_keys_to_lower(dictionary: dict) -> dict:
+    """Convert all keys in a dictionary to lowercase.
+
+    Args:
+        dictionary (dict): Dictionary to convert
+
+    Returns:
+        dict: Dictionary with all keys converted to lowercase
+    """
+    return {key.lower(): value for key, value in dictionary.items()}
+
+
 def dict_values_to_lists_strings(
    dictionary: dict,
    strip_null_values: bool = False,
@@ -182,6 +198,55 @@ def remove_markdown_sections(
    return text


+def validate_csv_bulk_imports(csv_path: Path, note_paths: list) -> dict[str, list[dict[str, str]]]:
+    """Validate the bulk import CSV file.
+
+    Args:
+        csv_path (dict): Dictionary to validate
+        note_paths (list): List of paths to all notes in vault
+
+    Returns:
+        dict: Validated dictionary
+    """
+    csv_dict: dict[str, Any] = {}
+    with csv_path.expanduser().open("r") as csv_file:
+        csv_reader = csv.DictReader(csv_file, delimiter=",")
+        row_num = 0
+        for row in csv_reader:
+            if row_num == 0:
+                if "path" not in row:
+                    raise typer.BadParameter("Missing 'path' column in CSV file")
+                if "type" not in row:
+                    raise typer.BadParameter("Missing 'type' column in CSV file")
+                if "key" not in row:
+                    raise typer.BadParameter("Missing 'key' column in CSV file")
+                if "value" not in row:
+                    raise typer.BadParameter("Missing 'value' column in CSV file")
+            row_num += 1
+
+            if row["path"] not in csv_dict:
+                csv_dict[row["path"]] = []
+
+            csv_dict[row["path"]].append(
+                {"type": row["type"], "key": row["key"], "value": row["value"]}
+            )
+
+        if row_num == 0 or row_num == 1:
+            raise typer.BadParameter("Empty CSV file")
+
+        paths_to_remove = [x for x in csv_dict if x not in note_paths]
+
+        for _path in paths_to_remove:
+            alerts.warning(f"'{_path}' does not exist in vault. Skipping...")
+            del csv_dict[_path]
+
+        if len(csv_dict) == 0:
+            log.error("No paths in the CSV file matched paths in the vault")
+            raise typer.Exit(1)
+
+    return csv_dict
+
+
 def version_callback(value: bool) -> None:
    """Print version and exit."""
    if value:
--- a/src/obsidian_metadata/models/application.py
+++ b/src/obsidian_metadata/models/application.py
@@ -1,7 +1,6 @@
 """Questions for the cli."""


-import csv
 from pathlib import Path
 from typing import Any

@@ -11,7 +10,7 @@ from rich import box
 from rich.table import Table

 from obsidian_metadata._config import VaultConfig
-from obsidian_metadata._utils import alerts
+from obsidian_metadata._utils import alerts, validate_csv_bulk_imports
 from obsidian_metadata._utils.console import console
 from obsidian_metadata.models import InsertLocation, Vault, VaultFilter
 from obsidian_metadata.models.enums import MetadataType
@@ -301,18 +300,12 @@ class Application:
            alerts.error("File must be a CSV file")
            return

-        csv_dict: dict[str, Any] = {}
-        with csv_path.open("r") as csv_file:
-            csv_reader = csv.DictReader(csv_file, delimiter=",")
-            for row in csv_reader:
-                if row["path"] not in csv_dict:
-                    csv_dict[row["path"]] = []
+        note_paths = [
+            str(n.note_path.relative_to(self.vault.vault_path)) for n in self.vault.all_notes
+        ]

-                csv_dict[row["path"]].append(
-                    {"type": row["type"], "key": row["key"], "value": row["value"]}
-                )
-
-        num_changed = self.vault.update_from_dict(csv_dict)
+        dict_from_csv = validate_csv_bulk_imports(csv_path, note_paths)
+        num_changed = self.vault.update_from_dict(dict_from_csv)

        if num_changed == 0:
            alerts.warning("No notes were changed")
--- a/src/obsidian_metadata/models/vault.py
+++ b/src/obsidian_metadata/models/vault.py
@@ -572,7 +572,7 @@ class Vault:
        for _note in self.all_notes:
            path = _note.note_path.relative_to(self.vault_path)
            if str(path) in dictionary:
-                log.debug(f"Updating metadata for {path}")
+                log.info(f"Updating metadata for '{path}'")
                num_changed += 1
                _note.delete_all_metadata()
                for row in dictionary[str(path)]:
@@ -590,7 +590,6 @@ class Vault:
                        )

                    if row["type"].lower() == "tag" or row["type"].lower() == "tags":
-                        console.print(f"Adding tag {row['value']}")
                        _note.add_metadata(
                            area=MetadataType.TAGS,
                            value=row["value"],
--- a/tests/notes_test.py
+++ b/tests/notes_test.py
@@ -264,7 +264,7 @@ def test_commit_1(sample_note, tmp_path) -> None:
    assert "Heading 1" not in note2.file_content


-def test_commit_2(sample_note, tmp_path) -> None:
+def test_commit_2(sample_note) -> None:
    """Test commit() method.

    GIVEN a note object with commit() called
--- a/tests/utilities_test.py
+++ b/tests/utilities_test.py
@@ -1,13 +1,18 @@
 # type: ignore
 """Test the utilities module."""

+import pytest
+import typer

 from obsidian_metadata._utils import (
    clean_dictionary,
    dict_contains,
+    dict_keys_to_lower,
    dict_values_to_lists_strings,
    remove_markdown_sections,
+    validate_csv_bulk_imports,
 )
+from tests.helpers import Regex, remove_ansi


 def test_dict_contains() -> None:
@@ -25,6 +30,17 @@ def test_dict_contains() -> None:
    assert dict_contains(d, r"key\d", "value5", is_regex=True) is True


+def test_dict_keys_to_lower() -> None:
+    """Test the dict_keys_to_lower() function.
+
+    GIVEN a dictionary with mixed case keys
+    WHEN the dict_keys_to_lower() function is called
+    THEN the dictionary keys should be converted to lowercase
+    """
+    test_dict = {"Key1": "Value1", "KEY2": "Value2", "key3": "Value3"}
+    assert dict_keys_to_lower(test_dict) == {"key1": "Value1", "key2": "Value2", "key3": "Value3"}
+
+
 def test_dict_values_to_lists_strings():
    """Test converting dictionary values to lists of strings."""
    dictionary = {
@@ -106,3 +122,125 @@ def test_clean_dictionary():

    new_dict = clean_dictionary(dictionary)
    assert new_dict == {"key": ["value", "value2", "value3"]}
+
+
+def test_validate_csv_bulk_imports_1(tmp_path):
+    """Test the validate_csv_bulk_imports function.
+
+    GIVEN a csv file missing the `path` column
+    WHEN the validate_csv_bulk_imports function is called
+    THEN an exception should be raised
+    """
+    csv_path = tmp_path / "test.csv"
+    csv_content = """\
+PATH,type,key,value
+note1.md,type,key,value"""
+    csv_path.write_text(csv_content)
+
+    with pytest.raises(typer.BadParameter):
+        validate_csv_bulk_imports(csv_path=csv_path, note_paths=[])
+
+
+def test_validate_csv_bulk_imports_2(tmp_path):
+    """Test the validate_csv_bulk_imports function.
+
+    GIVEN a csv file missing the `type` column
+    WHEN the validate_csv_bulk_imports function is called
+    THEN an exception should be raised
+    """
+    csv_path = tmp_path / "test.csv"
+    csv_content = """\
+path,Type,key,value
+note1.md,type,key,value"""
+    csv_path.write_text(csv_content)
+
+    with pytest.raises(typer.BadParameter):
+        validate_csv_bulk_imports(csv_path=csv_path, note_paths=[])
+
+
+def test_validate_csv_bulk_imports_3(tmp_path):
+    """Test the validate_csv_bulk_imports function.
+
+    GIVEN a csv file missing the `key` column
+    WHEN the validate_csv_bulk_imports function is called
+    THEN an exception should be raised
+    """
+    csv_path = tmp_path / "test.csv"
+    csv_content = """\
+path,type,value
+note1.md,type,key,value"""
+    csv_path.write_text(csv_content)
+
+    with pytest.raises(typer.BadParameter):
+        validate_csv_bulk_imports(csv_path=csv_path, note_paths=[])
+
+
+def test_validate_csv_bulk_imports_4(tmp_path):
+    """Test the validate_csv_bulk_imports function.
+
+    GIVEN a csv file missing the `value` column
+    WHEN the validate_csv_bulk_imports function is called
+    THEN an exception should be raised
+    """
+    csv_path = tmp_path / "test.csv"
+    csv_content = """\
+path,type,key,values
+note1.md,type,key,value"""
+    csv_path.write_text(csv_content)
+
+    with pytest.raises(typer.BadParameter):
+        validate_csv_bulk_imports(csv_path=csv_path, note_paths=[])
+
+
+def test_validate_csv_bulk_imports_5(tmp_path):
+    """Test the validate_csv_bulk_imports function.
+
+    GIVEN a csv file with only headers
+    WHEN the validate_csv_bulk_imports function is called
+    THEN an exception should be raised
+    """
+    csv_path = tmp_path / "test.csv"
+    csv_content = "path,type,key,value"
+    csv_path.write_text(csv_content)
+
+    with pytest.raises(typer.BadParameter):
+        validate_csv_bulk_imports(csv_path=csv_path, note_paths=[])
+
+
+def test_validate_csv_bulk_imports_6(tmp_path, capsys):
+    """Test the validate_csv_bulk_imports function.
+
+    GIVEN a valid csv file
+    WHEN a path is given that does not exist in the vault
+    THEN show the user a warning
+    """
+    csv_path = tmp_path / "test.csv"
+    csv_content = """\
+path,type,key,value
+note1.md,type,key,value
+note2.md,type,key,value
+"""
+    csv_path.write_text(csv_content)
+
+    csv_dict = validate_csv_bulk_imports(csv_path=csv_path, note_paths=["note1.md"])
+    captured = remove_ansi(capsys.readouterr().out)
+    assert "WARNING  | 'note2.md' does not exist in vault." in captured
+    assert csv_dict == {"note1.md": [{"key": "key", "type": "type", "value": "value"}]}
+
+
+def test_validate_csv_bulk_imports_7(tmp_path):
+    """Test the validate_csv_bulk_imports function.
+
+    GIVEN a valid csv file
+    WHEN no paths match paths in the vault
+    THEN exit the program
+    """
+    csv_path = tmp_path / "test.csv"
+    csv_content = """\
+path,type,key,value
+note1.md,type,key,value
+note2.md,type,key,value
+"""
+    csv_path.write_text(csv_content)
+    with pytest.raises(typer.Exit):
+        validate_csv_bulk_imports(csv_path=csv_path, note_paths=[])
--- a/tests/vault_test.py
+++ b/tests/vault_test.py
@@ -239,7 +239,7 @@ def test_commit_changes_2(test_vault, tmp_path):
    assert "new_key: new_key_value" not in committed_content


-def test_backup_1(test_vault, tmp_path, capsys):
+def test_backup_1(test_vault, capsys):
    """Test the backup method.

    GIVEN a vault object