我需要合并多个字典,这是我有例如:

dict1 = {1:{"a":{A}}, 2:{"b":{B}}}

dict2 = {2:{"c":{C}}, 3:{"d":{D}}}

A、B、C和D是树的叶子,比如{"info1":"value", "info2":"value2"}

字典的级别(深度)未知,可能是{2:{"c":{"z":{"y":{c}}}}}

在我的例子中,它表示一个目录/文件结构,节点是文档,叶子是文件。

我想将它们合并得到:

 dict3 = {1:{"a":{A}}, 2:{"b":{B},"c":{C}}, 3:{"d":{D}}}

我不确定如何用Python轻松做到这一点。


当前回答

这实际上是相当棘手的-特别是如果你想要一个有用的错误消息时,事情是不一致的,同时正确地接受重复但一致的条目(这是这里没有其他答案做的..)。

假设你没有大量的条目,递归函数是最简单的:

from functools import reduce

def merge(a, b, path=None):
    "merges b into a"
    if path is None: path = []
    for key in b:
        if key in a:
            if isinstance(a[key], dict) and isinstance(b[key], dict):
                merge(a[key], b[key], path + [str(key)])
            elif a[key] == b[key]:
                pass # same leaf value
            else:
                raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
        else:
            a[key] = b[key]
    return a

# works
print(merge({1:{"a":"A"},2:{"b":"B"}}, {2:{"c":"C"},3:{"d":"D"}}))
# has conflict
merge({1:{"a":"A"},2:{"b":"B"}}, {1:{"a":"A"},2:{"b":"C"}})

注意,这会使a发生变化——b的内容被添加到a(也会返回a)。如果你想保留a,你可以叫它merge(dict(a) b)

Agf指出(下面),你可能有两个以上的字典,在这种情况下,你可以使用:

reduce(merge, [dict1, dict2, dict3...])

所有内容都将被添加到dict1中。

注意:我编辑了我的初始答案以改变第一个参数;这使得“reduce”更容易解释

其他回答

这实际上是相当棘手的-特别是如果你想要一个有用的错误消息时,事情是不一致的,同时正确地接受重复但一致的条目(这是这里没有其他答案做的..)。

假设你没有大量的条目,递归函数是最简单的:

from functools import reduce

def merge(a, b, path=None):
    "merges b into a"
    if path is None: path = []
    for key in b:
        if key in a:
            if isinstance(a[key], dict) and isinstance(b[key], dict):
                merge(a[key], b[key], path + [str(key)])
            elif a[key] == b[key]:
                pass # same leaf value
            else:
                raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
        else:
            a[key] = b[key]
    return a

# works
print(merge({1:{"a":"A"},2:{"b":"B"}}, {2:{"c":"C"},3:{"d":"D"}}))
# has conflict
merge({1:{"a":"A"},2:{"b":"B"}}, {1:{"a":"A"},2:{"b":"C"}})

注意,这会使a发生变化——b的内容被添加到a(也会返回a)。如果你想保留a,你可以叫它merge(dict(a) b)

Agf指出(下面),你可能有两个以上的字典,在这种情况下,你可以使用:

reduce(merge, [dict1, dict2, dict3...])

所有内容都将被添加到dict1中。

注意:我编辑了我的初始答案以改变第一个参数;这使得“reduce”更容易解释

你可以使用toolz包中的merge函数,例如:

>>> import toolz
>>> dict1 = {1: {'a': 'A'}, 2: {'b': 'B'}}
>>> dict2 = {2: {'c': 'C'}, 3: {'d': 'D'}}
>>> toolz.merge_with(toolz.merge, dict1, dict2)
{1: {'a': 'A'}, 2: {'c': 'C'}, 3: {'d': 'D'}}

以下是来自@andrew cooke的回答。 它以更好的方式处理嵌套列表。

def deep_merge_lists(original, incoming):
    """
    Deep merge two lists. Modifies original.
    Recursively call deep merge on each correlated element of list. 
    If item type in both elements are
     a. dict: Call deep_merge_dicts on both values.
     b. list: Recursively call deep_merge_lists on both values.
     c. any other type: Value is overridden.
     d. conflicting types: Value is overridden.

    If length of incoming list is more that of original then extra values are appended.
    """
    common_length = min(len(original), len(incoming))
    for idx in range(common_length):
        if isinstance(original[idx], dict) and isinstance(incoming[idx], dict):
            deep_merge_dicts(original[idx], incoming[idx])

        elif isinstance(original[idx], list) and isinstance(incoming[idx], list):
            deep_merge_lists(original[idx], incoming[idx])

        else:
            original[idx] = incoming[idx]

    for idx in range(common_length, len(incoming)):
        original.append(incoming[idx])


def deep_merge_dicts(original, incoming):
    """
    Deep merge two dictionaries. Modifies original.
    For key conflicts if both values are:
     a. dict: Recursively call deep_merge_dicts on both values.
     b. list: Call deep_merge_lists on both values.
     c. any other type: Value is overridden.
     d. conflicting types: Value is overridden.

    """
    for key in incoming:
        if key in original:
            if isinstance(original[key], dict) and isinstance(incoming[key], dict):
                deep_merge_dicts(original[key], incoming[key])

            elif isinstance(original[key], list) and isinstance(incoming[key], list):
                deep_merge_lists(original[key], incoming[key])

            else:
                original[key] = incoming[key]
        else:
            original[key] = incoming[key]

还有一个轻微的变化:

下面是一个纯粹的基于python3集的深度更新函数。它通过一次循环遍历一层来更新嵌套字典,并调用自己来更新下一层的字典值:

def deep_update(dict_original, dict_update):
    if isinstance(dict_original, dict) and isinstance(dict_update, dict):
        output=dict(dict_original)
        keys_original=set(dict_original.keys())
        keys_update=set(dict_update.keys())
        similar_keys=keys_original.intersection(keys_update)
        similar_dict={key:deep_update(dict_original[key], dict_update[key]) for key in similar_keys}
        new_keys=keys_update.difference(keys_original)
        new_dict={key:dict_update[key] for key in new_keys}
        output.update(similar_dict)
        output.update(new_dict)
        return output
    else:
        return dict_update

举个简单的例子:

x={'a':{'b':{'c':1, 'd':1}}}
y={'a':{'b':{'d':2, 'e':2}}, 'f':2}

print(deep_update(x, y))
>>> {'a': {'b': {'c': 1, 'd': 2, 'e': 2}}, 'f': 2}
from collections import defaultdict
from itertools import chain

class DictHelper:

@staticmethod
def merge_dictionaries(*dictionaries, override=True):
    merged_dict = defaultdict(set)
    all_unique_keys = set(chain(*[list(dictionary.keys()) for dictionary in dictionaries]))  # Build a set using all dict keys
    for key in all_unique_keys:
        keys_value_type = list(set(filter(lambda obj_type: obj_type != type(None), [type(dictionary.get(key, None)) for dictionary in dictionaries])))
        # Establish the object type for each key, return None if key is not present in dict and remove None from final result
        if len(keys_value_type) != 1:
            raise Exception("Different objects type for same key: {keys_value_type}".format(keys_value_type=keys_value_type))

        if keys_value_type[0] == list:
            values = list(chain(*[dictionary.get(key, []) for dictionary in dictionaries]))  # Extract the value for each key
            merged_dict[key].update(values)

        elif keys_value_type[0] == dict:
            # Extract all dictionaries by key and enter in recursion
            dicts_to_merge = list(filter(lambda obj: obj != None, [dictionary.get(key, None) for dictionary in dictionaries]))
            merged_dict[key] = DictHelper.merge_dictionaries(*dicts_to_merge)

        else:
            # if override => get value from last dictionary else make a list of all values
            values = list(filter(lambda obj: obj != None, [dictionary.get(key, None) for dictionary in dictionaries]))
            merged_dict[key] = values[-1] if override else values

    return dict(merged_dict)



if __name__ == '__main__':
  d1 = {'aaaaaaaaa': ['to short', 'to long'], 'bbbbb': ['to short', 'to long'], "cccccc": ["the is a test"]}
  d2 = {'aaaaaaaaa': ['field is not a bool'], 'bbbbb': ['field is not a bool']}
  d3 = {'aaaaaaaaa': ['filed is not a string', "to short"], 'bbbbb': ['field is not an integer']}
  print(DictHelper.merge_dictionaries(d1, d2, d3))

  d4 = {"a": {"x": 1, "y": 2, "z": 3, "d": {"x1": 10}}}
  d5 = {"a": {"x": 10, "y": 20, "d": {"x2": 20}}}
  print(DictHelper.merge_dictionaries(d4, d5))

输出:

{'bbbbb': {'to long', 'field is not an integer', 'to short', 'field is not a bool'}, 
'aaaaaaaaa': {'to long', 'to short', 'filed is not a string', 'field is not a bool'}, 
'cccccc': {'the is a test'}}

{'a': {'y': 20, 'd': {'x1': 10, 'x2': 20}, 'z': 3, 'x': 10}}