How do I compute sha256 for arbitrary python data

Tags:

Computing sha256 for python data is useful as ID or as filename.

import hashlib
import inspect


def hexdigest(value):
    if isinstance(value, str):
        return hashlib.sha256(value.encode("utf-8")).hexdigest()
    if isinstance(value, int | float):
        return hexdigest(str(value))
    if isinstance(value, dict):
        return hexdigest(
            "{" + ", ".join(f"{k}: {v}" for k, v in sorted(value.items())) + "}"
        )
    if isinstance(value, set):
        return hexdigest("{" + ", ".join(str(item) for item in sorted(value)) + "}")
    if isinstance(value, list):
        return hexdigest("[" + ", ".join(str(item) for item in value) + "]")
    if isinstance(value, tuple):
        return hexdigest("(" + ", ".join(str(item) for item in value) + ")")
    if callable(value):
        module_name = inspect.getmodule(value).__name__
        func_name = value.__name__
        return hexdigest(f"{module_name}.{func_name}")
    raise ValueError(f"Unsupported type {type(value)}")

See this test.

import unittest

from util.hash_util import hexdigest


class HashUtilTestCase(unittest.TestCase):
    def test_ordered(self):
        # list
        self.assertEqual(hexdigest([1, 2, 3]), hexdigest([1, 2, 3]))
        self.assertNotEqual(hexdigest([1, 2, 3]), hexdigest([3, 2, 1]))
        # tuple
        self.assertEqual(hexdigest((1, 2)), hexdigest((1, 2)))
        self.assertNotEqual(hexdigest((1, 2)), hexdigest((2, 1)))

    def test_unordered(self):
        # dict
        self.assertEqual(hexdigest({"a": 1, "b": 2}), hexdigest({"b": 2, "a": 1}))
        # set
        self.assertEqual(hexdigest({1, 2}), hexdigest({2, 1}))

    def test_callable(self):
        def a():
            pass

        def b():
            pass

        self.assertEqual(hexdigest(a), hexdigest(a))
        self.assertNotEqual(hexdigest(a), hexdigest(b))


if __name__ == "__main__":
    unittest.main()