diff --git a/src/obsidian_metadata/_utils/utilities.py b/src/obsidian_metadata/_utils/utilities.py index c483a27..2bb6f2c 100644 --- a/src/obsidian_metadata/_utils/utilities.py +++ b/src/obsidian_metadata/_utils/utilities.py @@ -21,24 +21,26 @@ def clean_dictionary(dictionary: dict[str, Any]) -> dict[str, Any]: Returns: dict: Cleaned dictionary """ - new_dict = {key.strip(): value for key, value in dictionary.items()} - new_dict = {key.strip("*[]#"): value for key, value in new_dict.items()} + new_dict = copy.deepcopy(dictionary) + new_dict = {key.strip("*[]# "): value for key, value in new_dict.items()} for key, value in new_dict.items(): - new_dict[key] = [s.strip("*[]#") for s in value if isinstance(value, list)] + if isinstance(value, list): + new_dict[key] = [s.strip("*[]# ") for s in value if isinstance(value, list)] + elif isinstance(value, str): + new_dict[key] = value.strip("*[]# ") return new_dict def clear_screen() -> None: # pragma: no cover """Clear the screen.""" - # for windows _ = system("cls") if name == "nt" else system("clear") def dict_contains( dictionary: dict[str, list[str]], key: str, value: str = None, is_regex: bool = False ) -> bool: - """Check if a dictionary contains a key or if a specified key contains a value. + """Check if a dictionary contains a key or if a key contains a value. Args: dictionary (dict): Dictionary to check @@ -47,7 +49,7 @@ def dict_contains( is_regex (bool, optional): Whether the key is a regex. Defaults to False. Returns: - bool: Whether the dictionary contains the key + bool: Whether the dictionary contains the key or value """ if value is None: if is_regex: @@ -55,13 +57,11 @@ def dict_contains( return key in dictionary if is_regex: - found_keys = [] for _key in dictionary: - if re.search(key, str(_key)): - found_keys.append( - any(re.search(value, _v) for _v in dictionary[_key]), - ) - return any(found_keys) + if re.search(key, str(_key)) and any(re.search(value, _v) for _v in dictionary[_key]): + return True + + return False return key in dictionary and value in dictionary[key] @@ -93,6 +93,7 @@ def dict_values_to_lists_strings( {key: sorted(new_dict[key]) for key in sorted(new_dict)} """ + dictionary = copy.deepcopy(dictionary) new_dict = {} if strip_null_values: @@ -100,7 +101,7 @@ def dict_values_to_lists_strings( if isinstance(value, list): new_dict[key] = sorted([str(item) for item in value if item is not None]) elif isinstance(value, dict): - new_dict[key] = dict_values_to_lists_strings(value) # type: ignore[assignment] + new_dict[key] = dict_values_to_lists_strings(value, strip_null_values=True) # type: ignore[assignment] elif value is None or value == "None" or not value: new_dict[key] = [] else: @@ -110,11 +111,11 @@ def dict_values_to_lists_strings( for key, value in dictionary.items(): if isinstance(value, list): - new_dict[key] = sorted([str(item) for item in value]) + new_dict[key] = sorted([str(item) if item is not None else "" for item in value]) elif isinstance(value, dict): new_dict[key] = dict_values_to_lists_strings(value) # type: ignore[assignment] else: - new_dict[key] = [str(value)] + new_dict[key] = [str(value) if value is not None else ""] return new_dict @@ -192,22 +193,24 @@ def merge_dictionaries(dict1: dict, dict2: dict) -> dict: Returns: dict: Merged dictionary. """ - for k, v in dict2.items(): - if k in dict1: - if isinstance(v, list): - dict1[k].extend(v) + d1 = copy.deepcopy(dict1) + d2 = copy.deepcopy(dict2) + + for _key in d1: + if not isinstance(d1[_key], list): + raise TypeError(f"Key {_key} is not a list.") + for _key in d2: + if not isinstance(d2[_key], list): + raise TypeError(f"Key {_key} is not a list.") + + for k, v in d2.items(): + if k in d1: + d1[k].extend(v) + d1[k] = sorted(set(d1[k])) else: - dict1[k] = v + d1[k] = sorted(set(v)) - for k, v in dict1.items(): - if isinstance(v, list): - dict1[k] = sorted(set(v)) - elif isinstance(v, dict): # pragma: no cover - for kk, vv in v.items(): - if isinstance(vv, list): - v[kk] = sorted(set(vv)) - - return dict(sorted(dict1.items())) + return dict(sorted(d1.items())) def rename_in_dict( @@ -241,7 +244,7 @@ def remove_markdown_sections( strip_inlinecode: bool = False, strip_frontmatter: bool = False, ) -> str: - """Strip markdown sections from text. + """Strip unwanted markdown sections from text. This is used to remove code blocks and frontmatter from the body of notes before tags and inline metadata are processed. Args: text (str): Text to remove code blocks from @@ -256,7 +259,7 @@ def remove_markdown_sections( text = re.sub(r"`{3}.*?`{3}", "", text, flags=re.DOTALL) if strip_inlinecode: - text = re.sub(r"`.*?`", "", text) + text = re.sub(r"(? None: - """Test dict_contains.""" - d = {"key1": ["value1", "value2"], "key2": ["value3", "value4"], "key3": ["value5", "value6"]} +def test_dict_contains_1(): + """Test dict_contains() function. - assert dict_contains(d, "key1") is True - assert dict_contains(d, "key5") is False - assert dict_contains(d, "key1", "value1") is True - assert dict_contains(d, "key1", "value5") is False - assert dict_contains(d, "key[1-2]", is_regex=True) is True - assert dict_contains(d, "^1", is_regex=True) is False - assert dict_contains(d, r"key\d", r"value\d", is_regex=True) is True - assert dict_contains(d, "key1$", "^alue", is_regex=True) is False - assert dict_contains(d, r"key\d", "value5", is_regex=True) is True + GIVEN calling dict_contains() with a dictionary + WHEN the dictionary is empty + THEN the function should return False + """ + assert dict_contains({}, "key1") is False + + +def test_dict_contains_2(): + """Test dict_contains() function. + + GIVEN calling dict_contains() with a dictionary + WHEN when the key is not in the dictionary + THEN the function should return False + """ + assert dict_contains({"key1": "value1"}, "key2") is False + + +def test_dict_contains_3(): + """Test dict_contains() function. + + GIVEN calling dict_contains() with a dictionary + WHEN when the key is in the dictionary + THEN the function should return True + """ + assert dict_contains({"key1": "value1"}, "key1") is True + + +def test_dict_contains_4(): + """Test dict_contains() function. + + GIVEN calling dict_contains() with a dictionary + WHEN when the key and value are in the dictionary + THEN the function should return True + """ + assert dict_contains({"key1": "value1"}, "key1", "value1") is True + + +def test_dict_contains_5(): + """Test dict_contains() function. + + GIVEN calling dict_contains() with a dictionary + WHEN when the key and value are not in the dictionary + THEN the function should return False + """ + assert dict_contains({"key1": "value1"}, "key1", "value2") is False + + +def test_dict_contains_6(): + """Test dict_contains() function. + + GIVEN calling dict_contains() with a dictionary + WHEN a regex is used for the key and the key is in the dictionary + THEN the function should return True + """ + assert dict_contains({"key1": "value1"}, r"key\d", is_regex=True) is True + + +def test_dict_contains_7(): + """Test dict_contains() function. + + GIVEN calling dict_contains() with a dictionary + WHEN a regex is used for the key and the key is not in the dictionary + THEN the function should return False + """ + assert dict_contains({"key1": "value1"}, r"key\d\d", is_regex=True) is False + + +def test_dict_contains_8(): + """Test dict_contains() function. + + GIVEN calling dict_contains() with a dictionary + WHEN a regex is used for a value and the value is in the dictionary + THEN the function should return True + """ + assert dict_contains({"key1": "value1"}, "key1", r"\w+", is_regex=True) is True + + +def test_dict_contains_9(): + """Test dict_contains() function. + + GIVEN calling dict_contains() with a dictionary + WHEN a regex is used for a value and the value is not in the dictionary + THEN the function should return False + """ + assert dict_contains({"key1": "value1"}, "key1", r"\d{2}", is_regex=True) is False def test_dict_keys_to_lower() -> None: @@ -200,45 +342,202 @@ def test_dict_keys_to_lower() -> None: assert dict_keys_to_lower(test_dict) == {"key1": "Value1", "key2": "Value2", "key3": "Value3"} -def test_dict_values_to_lists_strings(): - """Test converting dictionary values to lists of strings.""" - dictionary = { - "key1": "value1", - "key2": ["value2", "value3", None], - "key3": {"key4": "value4"}, - "key5": {"key6": {"key7": "value7"}}, - "key6": None, - "key8": [1, 3, None, 4], - "key9": [None, "", "None"], - "key10": "None", - "key11": "", - } +def test_dict_values_to_lists_strings_1(): + """Test the dict_values_to_lists_strings() function. - result = dict_values_to_lists_strings(dictionary) - assert result == { - "key1": ["value1"], - "key10": ["None"], - "key11": [""], - "key2": ["None", "value2", "value3"], - "key3": {"key4": ["value4"]}, - "key5": {"key6": {"key7": ["value7"]}}, - "key6": ["None"], - "key8": ["1", "3", "4", "None"], - "key9": ["", "None", "None"], - } + GIVEN a dictionary passed to the dict_values_to_lists_strings() function + WHEN the dictionary is empty + THEN the function should return an empty dictionary + """ + assert dict_values_to_lists_strings({}) == {} + assert dict_values_to_lists_strings({}, strip_null_values=True) == {} - result = dict_values_to_lists_strings(dictionary, strip_null_values=True) - assert result == { + +def test_dict_values_to_lists_strings_2(): + """Test the dict_values_to_lists_strings() function. + + GIVEN a dictionary passed to the dict_values_to_lists_strings() function + WHEN the dictionary values are already lists of strings + THEN the function should return the dictionary + """ + test_dict = {"key1": ["value1"], "key2": ["value2", "value3"]} + assert dict_values_to_lists_strings(test_dict) == { "key1": ["value1"], - "key10": [], - "key11": [], "key2": ["value2", "value3"], - "key3": {"key4": ["value4"]}, - "key5": {"key6": {"key7": ["value7"]}}, - "key6": [], - "key8": ["1", "3", "4"], - "key9": ["", "None"], } + assert dict_values_to_lists_strings(test_dict, strip_null_values=True) == { + "key1": ["value1"], + "key2": ["value2", "value3"], + } + + +def test_dict_values_to_lists_strings_3(): + """Test the dict_values_to_lists_strings() function. + + GIVEN a dictionary passed to the dict_values_to_lists_strings() function + WHEN the a value is None and strip_null_values is False + THEN then convert None to an empty string + """ + test_dict = {"key1": None, "key2": ["value", None]} + assert dict_values_to_lists_strings(test_dict) == {"key1": [""], "key2": ["", "value"]} + + +def test_dict_values_to_lists_strings_4(): + """Test the dict_values_to_lists_strings() function. + + GIVEN a dictionary passed to the dict_values_to_lists_strings() function + WHEN the a value is None and strip_null_values is True + THEN remove null values + """ + test_dict = {"key1": None, "key2": ["value", None]} + assert dict_values_to_lists_strings(test_dict, strip_null_values=True) == { + "key1": [], + "key2": ["value"], + } + + +def test_dict_values_to_lists_strings_5(): + """Test the dict_values_to_lists_strings() function. + + GIVEN a dictionary passed to the dict_values_to_lists_strings() function + WHEN the a value is a string "None" and strip_null_values is True or False + THEN ensure the value is not removed + """ + test_dict = {"key1": "None", "key2": [None, "None"]} + assert dict_values_to_lists_strings(test_dict) == {"key1": ["None"], "key2": ["", "None"]} + assert dict_values_to_lists_strings(test_dict, strip_null_values=True) == { + "key1": [], + "key2": ["None"], + } + + +def test_dict_values_to_lists_strings_6(): + """Test the dict_values_to_lists_strings() function. + + GIVEN a dictionary passed to the dict_values_to_lists_strings() function + WHEN the a value is another dictionary + THEN ensure the values in the inner dictionary are converted to lists of strings + """ + test_dict = {"key1": {"key2": "value2", "key3": ["value3", None]}} + assert dict_values_to_lists_strings(test_dict) == { + "key1": {"key2": ["value2"], "key3": ["", "value3"]} + } + assert dict_values_to_lists_strings(test_dict, strip_null_values=True) == { + "key1": {"key2": ["value2"], "key3": ["value3"]} + } + + +def test_merge_dictionaries_1(): + """Test merge_dictionaries() function. + + GIVEN two dictionaries supplied to the merge_dictionaries() function + WHEN a value in dict1 is not a list + THEN raise a TypeError + """ + test_dict_1 = {"key1": "value1", "key2": "value2"} + test_dict_2 = {"key3": ["value3"], "key4": ["value4"]} + + with pytest.raises(TypeError, match=r"key.*is not a list"): + merge_dictionaries(test_dict_1, test_dict_2) + + +def test_merge_dictionaries_2(): + """Test merge_dictionaries() function. + + GIVEN two dictionaries supplied to the merge_dictionaries() function + WHEN a value in dict2 is not a list + THEN raise a TypeError + """ + test_dict_1 = {"key3": ["value3"], "key4": ["value4"]} + test_dict_2 = {"key1": "value1", "key2": "value2"} + + with pytest.raises(TypeError, match=r"key.*is not a list"): + merge_dictionaries(test_dict_1, test_dict_2) + + +def test_merge_dictionaries_3(): + """Test merge_dictionaries() function. + + GIVEN two dictionaries supplied to the merge_dictionaries() function + WHEN keys and values in both dictionaries are unique + THEN return a dictionary with the keys and values from both dictionaries + """ + test_dict_1 = {"key1": ["value1"], "key2": ["value2"]} + test_dict_2 = {"key3": ["value3"], "key4": ["value4"]} + + assert merge_dictionaries(test_dict_1, test_dict_2) == { + "key1": ["value1"], + "key2": ["value2"], + "key3": ["value3"], + "key4": ["value4"], + } + + +def test_merge_dictionaries_4(): + """Test merge_dictionaries() function. + + GIVEN two dictionaries supplied to the merge_dictionaries() function + WHEN keys in both dictionaries are not unique + THEN return a dictionary with the merged keys and values from both dictionaries + """ + test_dict_1 = {"key1": ["value1"], "key2": ["value2"]} + test_dict_2 = {"key1": ["value3"], "key2": ["value4"]} + + assert merge_dictionaries(test_dict_1, test_dict_2) == { + "key1": ["value1", "value3"], + "key2": ["value2", "value4"], + } + + +def test_merge_dictionaries_5(): + """Test merge_dictionaries() function. + + GIVEN two dictionaries supplied to the merge_dictionaries() function + WHEN keys and values both dictionaries are not unique + THEN return a dictionary with the merged keys and values from both dictionaries + """ + test_dict_1 = {"key1": ["a", "c"], "key2": ["a", "b"]} + test_dict_2 = {"key1": ["a", "b"], "key2": ["a", "c"]} + + assert merge_dictionaries(test_dict_1, test_dict_2) == { + "key1": ["a", "b", "c"], + "key2": ["a", "b", "c"], + } + + +def test_merge_dictionaries_6(): + """Test merge_dictionaries() function. + + GIVEN two dictionaries supplied to the merge_dictionaries() function + WHEN one of the dictionaries is empty + THEN return a dictionary the other dictionary + """ + test_dict_1 = {"key1": ["a", "c"], "key2": ["a", "b"]} + test_dict_2 = {} + + assert merge_dictionaries(test_dict_1, test_dict_2) == {"key1": ["a", "c"], "key2": ["a", "b"]} + + test_dict_1 = {} + test_dict_2 = {"key1": ["a", "c"], "key2": ["a", "b"]} + assert merge_dictionaries(test_dict_1, test_dict_2) == {"key1": ["a", "c"], "key2": ["a", "b"]} + + +def test_merge_dictionaries_7(): + """Test merge_dictionaries() function. + + GIVEN two dictionaries supplied to the merge_dictionaries() function + WHEN keys and values both dictionaries are not unique + THEN ensure the original dictionaries objects are not modified + """ + test_dict_1 = {"key1": ["a", "c"], "key2": ["a", "b"]} + test_dict_2 = {"key1": ["a", "b"], "key2": ["a", "c"]} + + assert merge_dictionaries(test_dict_1, test_dict_2) == { + "key1": ["a", "b", "c"], + "key2": ["a", "b", "c"], + } + assert test_dict_1 == {"key1": ["a", "c"], "key2": ["a", "b"]} + assert test_dict_2 == {"key1": ["a", "b"], "key2": ["a", "c"]} def test_rename_in_dict_1(): @@ -313,46 +612,197 @@ def test_rename_in_dict_5(): } -def test_remove_markdown_sections(): - """Test removing markdown sections.""" +def test_remove_markdown_sections_1(): + """Test remove_markdown_sections() function. + + GIVEN a string with markdown sections + WHEN the remove_markdown_sections() function is called with the default arguments + THEN return the string without removing any markdown sections + """ text: str = """ --- key: value --- -Lorem ipsum `dolor sit` amet. +# heading ```bash - echo "Hello World" +echo "Hello world" ``` + +Lorem ipsum `inline_code` lorem ipsum. +``` +echo "foo bar" +``` + +--- +dd +--- + """ + + assert remove_markdown_sections(text) == text + + +def test_remove_markdown_sections_2(): + """Test remove_markdown_sections() function. + + GIVEN a string with markdown sections + WHEN the remove_markdown_sections() function is called with strip_codeblocks set to True + THEN return the string without the codeblocks + """ + text: str = """ +--- +key: value +--- + +# heading + +```bash +echo "Hello world" +``` + +Lorem ipsum `inline_code` lorem ipsum. +``` +echo "foo bar" +``` + +--- +dd +--- + """ + result = remove_markdown_sections(text, strip_codeblocks=True) + assert "inline_code" in result + assert "```bash" not in result + assert "```" not in result + assert "foo" not in result + assert "world" not in result + assert "key: value" in result + assert "heading" in result + assert "Lorem ipsum" in result + assert "---\n" in result + assert "dd" in result + + +def test_remove_markdown_sections_3(): + """Test remove_markdown_sections() function. + + GIVEN a string with markdown sections + WHEN the remove_markdown_sections() function is called with strip_inlinecode set to True + THEN return the string without the inline code + """ + text: str = """ +--- +key: value +--- + +# heading + +```bash +echo "Hello world" +``` + +Lorem ipsum `inline_code` lorem ipsum. +``` +echo "foo bar" +``` + +--- +dd +--- + """ + result = remove_markdown_sections(text, strip_inlinecode=True) + assert "`inline_code`" not in result + assert "```bash" in result + assert "```" in result + assert "foo" in result + assert "world" in result + assert "key: value" in result + assert "heading" in result + assert "Lorem ipsum" in result + assert "---\n" in result + assert "dd" in result + + +def test_remove_markdown_sections_4(): + """Test remove_markdown_sections() function. + + GIVEN a string with markdown sections + WHEN the remove_markdown_sections() function is called with strip_frontmatter set to True + THEN return the string without the frontmatter + """ + text: str = """ +--- +key: value +--- + +# heading + +```bash +echo "Hello world" +``` + +Lorem ipsum `inline_code` lorem ipsum. +``` +echo "foo bar" +``` + +--- +dd +--- + """ + result = remove_markdown_sections(text, strip_frontmatter=True) + assert "`inline_code`" in result + assert "```bash" in result + assert "```" in result + assert "foo" in result + assert "world" in result + assert "key: value" not in result + assert "heading" in result + assert "Lorem ipsum" in result + assert "---\n" in result + assert "dd" in result + + +def test_remove_markdown_sections_5(): + """Test remove_markdown_sections() function. + + GIVEN a string with markdown sections + WHEN the remove_markdown_sections() function is called with all arguments set to True + THEN return the string without the frontmatter, inline code, and codeblocks + """ + text: str = """ +--- +key: value +--- + +# heading + +```bash +echo "Hello world" +``` + +Lorem ipsum `inline_code` lorem ipsum. +``` +echo "foo bar" +``` + --- dd --- """ result = remove_markdown_sections( - text, - strip_codeblocks=True, - strip_frontmatter=True, - strip_inlinecode=True, + text, strip_frontmatter=True, strip_inlinecode=True, strip_codeblocks=True ) - assert "```bash" not in result - assert "`dolor sit`" not in result - assert "---\nkey: value" not in result - assert "`" not in result - - result = remove_markdown_sections(text) - assert "```bash" in result - assert "`dolor sit`" in result - assert "---\nkey: value" in result - assert "`" in result - - -def test_clean_dictionary(): - """Test cleaning a dictionary.""" - dictionary = {" *key* ": ["**value**", "[[value2]]", "#value3"]} - - new_dict = clean_dictionary(dictionary) - assert new_dict == {"key": ["value", "value2", "value3"]} + assert "`inline_code`" not in result + assert "bash" not in result + assert "```" not in result + assert "foo" not in result + assert "world" not in result + assert "key: value" not in result + assert "heading" in result + assert "Lorem ipsum" in result + assert "---\n" in result + assert "dd" in result def test_validate_csv_bulk_imports_1(tmp_path):