fix: improve validation of bulk imports

This commit is contained in:
Nathaniel Landau
2023-03-20 12:56:22 -04:00
parent d636fb2672
commit 36adfece51
8 changed files with 229 additions and 24 deletions

View File

@@ -141,14 +141,14 @@ To bypass the configuration file and specify a vault to use at runtime use the `
### Making bulk edits ### Making bulk edits
Bulk edits are supported by importing a CSV file containing the following columns Bulk edits are supported by importing a CSV file containing the following columns. Column headers must be lowercase.
1. `Path` - Path to note relative to the vault root folder 1. `path` - Path to note relative to the vault root folder
2. `Type` - Type of metadata. One of `frontmatter`, `inline_metadata`, or `tag` 2. `type` - Type of metadata. One of `frontmatter`, `inline_metadata`, or `tag`
3. `Key` - The key to add (leave blank for a tag) 3. `key` - The key to add (leave blank for a tag)
4. `Value` - the value to add to the key 4. `value` - the value to add to the key
Notes which match a Path in the file will be updated to contain ONLY the information in the CSV file. Notes which do not match a path will be left untouched. The example CSV below will remove any frontmatter, inline metadata, or tags within with `vault/folder 01/note1.md` and then add the specified metadata. An example valid CSV file is
```csv ```csv
path,type,key,value path,type,key,value
@@ -160,6 +160,13 @@ folder 1/note1.md,tag,,tag1
folder 1/note1.md,tag,,tag2 folder 1/note1.md,tag,,tag2
``` ```
How bulk imports work:
- Only notes which match the path in the CSV file are updated
- Effected notes will have ALL of their metadata changed to reflect the values in the CSV file
- Existing metadata in an effected note will be rewritten. This may result in it's location and/or formatting within the note being changed
- inline tags ignore any value added to the `key` column
You can export all your notes with their associated metadata in this format from the "Export Metadata" section of the script to be used as a template for your bulk changes. You can export all your notes with their associated metadata in this format from the "Export Metadata" section of the script to be used as a template for your bulk changes.
# Contributing # Contributing

View File

@@ -6,10 +6,12 @@ from obsidian_metadata._utils.utilities import (
clean_dictionary, clean_dictionary,
clear_screen, clear_screen,
dict_contains, dict_contains,
dict_keys_to_lower,
dict_values_to_lists_strings, dict_values_to_lists_strings,
docstring_parameter, docstring_parameter,
merge_dictionaries, merge_dictionaries,
remove_markdown_sections, remove_markdown_sections,
validate_csv_bulk_imports,
version_callback, version_callback,
) )
@@ -18,11 +20,12 @@ __all__ = [
"clean_dictionary", "clean_dictionary",
"clear_screen", "clear_screen",
"dict_contains", "dict_contains",
"dict_keys_to_lower",
"dict_values_to_lists_strings", "dict_values_to_lists_strings",
"docstring_parameter", "docstring_parameter",
"LoggerManager", "LoggerManager",
"merge_dictionaries", "merge_dictionaries",
"remove_markdown_sections", "remove_markdown_sections",
"vault_validation", "validate_csv_bulk_imports",
"version_callback", "version_callback",
] ]

View File

@@ -1,11 +1,15 @@
"""Utility functions.""" """Utility functions."""
import csv
import re import re
from os import name, system from os import name, system
from pathlib import Path
from typing import Any from typing import Any
import typer import typer
from obsidian_metadata.__version__ import __version__ from obsidian_metadata.__version__ import __version__
from obsidian_metadata._utils import alerts
from obsidian_metadata._utils.alerts import logger as log
from obsidian_metadata._utils.console import console from obsidian_metadata._utils.console import console
@@ -63,6 +67,18 @@ def dict_contains(
return key in dictionary and value in dictionary[key] return key in dictionary and value in dictionary[key]
def dict_keys_to_lower(dictionary: dict) -> dict:
"""Convert all keys in a dictionary to lowercase.
Args:
dictionary (dict): Dictionary to convert
Returns:
dict: Dictionary with all keys converted to lowercase
"""
return {key.lower(): value for key, value in dictionary.items()}
def dict_values_to_lists_strings( def dict_values_to_lists_strings(
dictionary: dict, dictionary: dict,
strip_null_values: bool = False, strip_null_values: bool = False,
@@ -182,6 +198,55 @@ def remove_markdown_sections(
return text return text
def validate_csv_bulk_imports(csv_path: Path, note_paths: list) -> dict[str, list[dict[str, str]]]:
"""Validate the bulk import CSV file.
Args:
csv_path (dict): Dictionary to validate
note_paths (list): List of paths to all notes in vault
Returns:
dict: Validated dictionary
"""
csv_dict: dict[str, Any] = {}
with csv_path.expanduser().open("r") as csv_file:
csv_reader = csv.DictReader(csv_file, delimiter=",")
row_num = 0
for row in csv_reader:
if row_num == 0:
if "path" not in row:
raise typer.BadParameter("Missing 'path' column in CSV file")
if "type" not in row:
raise typer.BadParameter("Missing 'type' column in CSV file")
if "key" not in row:
raise typer.BadParameter("Missing 'key' column in CSV file")
if "value" not in row:
raise typer.BadParameter("Missing 'value' column in CSV file")
row_num += 1
if row["path"] not in csv_dict:
csv_dict[row["path"]] = []
csv_dict[row["path"]].append(
{"type": row["type"], "key": row["key"], "value": row["value"]}
)
if row_num == 0 or row_num == 1:
raise typer.BadParameter("Empty CSV file")
paths_to_remove = [x for x in csv_dict if x not in note_paths]
for _path in paths_to_remove:
alerts.warning(f"'{_path}' does not exist in vault. Skipping...")
del csv_dict[_path]
if len(csv_dict) == 0:
log.error("No paths in the CSV file matched paths in the vault")
raise typer.Exit(1)
return csv_dict
def version_callback(value: bool) -> None: def version_callback(value: bool) -> None:
"""Print version and exit.""" """Print version and exit."""
if value: if value:

View File

@@ -1,7 +1,6 @@
"""Questions for the cli.""" """Questions for the cli."""
import csv
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
@@ -11,7 +10,7 @@ from rich import box
from rich.table import Table from rich.table import Table
from obsidian_metadata._config import VaultConfig from obsidian_metadata._config import VaultConfig
from obsidian_metadata._utils import alerts from obsidian_metadata._utils import alerts, validate_csv_bulk_imports
from obsidian_metadata._utils.console import console from obsidian_metadata._utils.console import console
from obsidian_metadata.models import InsertLocation, Vault, VaultFilter from obsidian_metadata.models import InsertLocation, Vault, VaultFilter
from obsidian_metadata.models.enums import MetadataType from obsidian_metadata.models.enums import MetadataType
@@ -301,18 +300,12 @@ class Application:
alerts.error("File must be a CSV file") alerts.error("File must be a CSV file")
return return
csv_dict: dict[str, Any] = {} note_paths = [
with csv_path.open("r") as csv_file: str(n.note_path.relative_to(self.vault.vault_path)) for n in self.vault.all_notes
csv_reader = csv.DictReader(csv_file, delimiter=",") ]
for row in csv_reader:
if row["path"] not in csv_dict:
csv_dict[row["path"]] = []
csv_dict[row["path"]].append( dict_from_csv = validate_csv_bulk_imports(csv_path, note_paths)
{"type": row["type"], "key": row["key"], "value": row["value"]} num_changed = self.vault.update_from_dict(dict_from_csv)
)
num_changed = self.vault.update_from_dict(csv_dict)
if num_changed == 0: if num_changed == 0:
alerts.warning("No notes were changed") alerts.warning("No notes were changed")

View File

@@ -572,7 +572,7 @@ class Vault:
for _note in self.all_notes: for _note in self.all_notes:
path = _note.note_path.relative_to(self.vault_path) path = _note.note_path.relative_to(self.vault_path)
if str(path) in dictionary: if str(path) in dictionary:
log.debug(f"Updating metadata for {path}") log.info(f"Updating metadata for '{path}'")
num_changed += 1 num_changed += 1
_note.delete_all_metadata() _note.delete_all_metadata()
for row in dictionary[str(path)]: for row in dictionary[str(path)]:
@@ -590,7 +590,6 @@ class Vault:
) )
if row["type"].lower() == "tag" or row["type"].lower() == "tags": if row["type"].lower() == "tag" or row["type"].lower() == "tags":
console.print(f"Adding tag {row['value']}")
_note.add_metadata( _note.add_metadata(
area=MetadataType.TAGS, area=MetadataType.TAGS,
value=row["value"], value=row["value"],

View File

@@ -264,7 +264,7 @@ def test_commit_1(sample_note, tmp_path) -> None:
assert "Heading 1" not in note2.file_content assert "Heading 1" not in note2.file_content
def test_commit_2(sample_note, tmp_path) -> None: def test_commit_2(sample_note) -> None:
"""Test commit() method. """Test commit() method.
GIVEN a note object with commit() called GIVEN a note object with commit() called

View File

@@ -1,13 +1,18 @@
# type: ignore # type: ignore
"""Test the utilities module.""" """Test the utilities module."""
import pytest
import typer
from obsidian_metadata._utils import ( from obsidian_metadata._utils import (
clean_dictionary, clean_dictionary,
dict_contains, dict_contains,
dict_keys_to_lower,
dict_values_to_lists_strings, dict_values_to_lists_strings,
remove_markdown_sections, remove_markdown_sections,
validate_csv_bulk_imports,
) )
from tests.helpers import Regex, remove_ansi
def test_dict_contains() -> None: def test_dict_contains() -> None:
@@ -25,6 +30,17 @@ def test_dict_contains() -> None:
assert dict_contains(d, r"key\d", "value5", is_regex=True) is True assert dict_contains(d, r"key\d", "value5", is_regex=True) is True
def test_dict_keys_to_lower() -> None:
"""Test the dict_keys_to_lower() function.
GIVEN a dictionary with mixed case keys
WHEN the dict_keys_to_lower() function is called
THEN the dictionary keys should be converted to lowercase
"""
test_dict = {"Key1": "Value1", "KEY2": "Value2", "key3": "Value3"}
assert dict_keys_to_lower(test_dict) == {"key1": "Value1", "key2": "Value2", "key3": "Value3"}
def test_dict_values_to_lists_strings(): def test_dict_values_to_lists_strings():
"""Test converting dictionary values to lists of strings.""" """Test converting dictionary values to lists of strings."""
dictionary = { dictionary = {
@@ -106,3 +122,125 @@ def test_clean_dictionary():
new_dict = clean_dictionary(dictionary) new_dict = clean_dictionary(dictionary)
assert new_dict == {"key": ["value", "value2", "value3"]} assert new_dict == {"key": ["value", "value2", "value3"]}
def test_validate_csv_bulk_imports_1(tmp_path):
"""Test the validate_csv_bulk_imports function.
GIVEN a csv file missing the `path` column
WHEN the validate_csv_bulk_imports function is called
THEN an exception should be raised
"""
csv_path = tmp_path / "test.csv"
csv_content = """\
PATH,type,key,value
note1.md,type,key,value"""
csv_path.write_text(csv_content)
with pytest.raises(typer.BadParameter):
validate_csv_bulk_imports(csv_path=csv_path, note_paths=[])
def test_validate_csv_bulk_imports_2(tmp_path):
"""Test the validate_csv_bulk_imports function.
GIVEN a csv file missing the `type` column
WHEN the validate_csv_bulk_imports function is called
THEN an exception should be raised
"""
csv_path = tmp_path / "test.csv"
csv_content = """\
path,Type,key,value
note1.md,type,key,value"""
csv_path.write_text(csv_content)
with pytest.raises(typer.BadParameter):
validate_csv_bulk_imports(csv_path=csv_path, note_paths=[])
def test_validate_csv_bulk_imports_3(tmp_path):
"""Test the validate_csv_bulk_imports function.
GIVEN a csv file missing the `key` column
WHEN the validate_csv_bulk_imports function is called
THEN an exception should be raised
"""
csv_path = tmp_path / "test.csv"
csv_content = """\
path,type,value
note1.md,type,key,value"""
csv_path.write_text(csv_content)
with pytest.raises(typer.BadParameter):
validate_csv_bulk_imports(csv_path=csv_path, note_paths=[])
def test_validate_csv_bulk_imports_4(tmp_path):
"""Test the validate_csv_bulk_imports function.
GIVEN a csv file missing the `value` column
WHEN the validate_csv_bulk_imports function is called
THEN an exception should be raised
"""
csv_path = tmp_path / "test.csv"
csv_content = """\
path,type,key,values
note1.md,type,key,value"""
csv_path.write_text(csv_content)
with pytest.raises(typer.BadParameter):
validate_csv_bulk_imports(csv_path=csv_path, note_paths=[])
def test_validate_csv_bulk_imports_5(tmp_path):
"""Test the validate_csv_bulk_imports function.
GIVEN a csv file with only headers
WHEN the validate_csv_bulk_imports function is called
THEN an exception should be raised
"""
csv_path = tmp_path / "test.csv"
csv_content = "path,type,key,value"
csv_path.write_text(csv_content)
with pytest.raises(typer.BadParameter):
validate_csv_bulk_imports(csv_path=csv_path, note_paths=[])
def test_validate_csv_bulk_imports_6(tmp_path, capsys):
"""Test the validate_csv_bulk_imports function.
GIVEN a valid csv file
WHEN a path is given that does not exist in the vault
THEN show the user a warning
"""
csv_path = tmp_path / "test.csv"
csv_content = """\
path,type,key,value
note1.md,type,key,value
note2.md,type,key,value
"""
csv_path.write_text(csv_content)
csv_dict = validate_csv_bulk_imports(csv_path=csv_path, note_paths=["note1.md"])
captured = remove_ansi(capsys.readouterr().out)
assert "WARNING | 'note2.md' does not exist in vault." in captured
assert csv_dict == {"note1.md": [{"key": "key", "type": "type", "value": "value"}]}
def test_validate_csv_bulk_imports_7(tmp_path):
"""Test the validate_csv_bulk_imports function.
GIVEN a valid csv file
WHEN no paths match paths in the vault
THEN exit the program
"""
csv_path = tmp_path / "test.csv"
csv_content = """\
path,type,key,value
note1.md,type,key,value
note2.md,type,key,value
"""
csv_path.write_text(csv_content)
with pytest.raises(typer.Exit):
validate_csv_bulk_imports(csv_path=csv_path, note_paths=[])

View File

@@ -239,7 +239,7 @@ def test_commit_changes_2(test_vault, tmp_path):
assert "new_key: new_key_value" not in committed_content assert "new_key: new_key_value" not in committed_content
def test_backup_1(test_vault, tmp_path, capsys): def test_backup_1(test_vault, capsys):
"""Test the backup method. """Test the backup method.
GIVEN a vault object GIVEN a vault object