Computing sha256 for python data is useful as ID or as filename.
import hashlib
import inspect
def hexdigest(value):
if isinstance(value, str):
return hashlib.sha256(value.encode("utf-8")).hexdigest()
if isinstance(value, int | float):
return hexdigest(str(value))
if isinstance(value, dict):
return hexdigest(
"{" + ", ".join(f"{k}: {v}" for k, v in sorted(value.items())) + "}"
)
if isinstance(value, set):
return hexdigest("{" + ", ".join(str(item) for item in sorted(value)) + "}")
if isinstance(value, list):
return hexdigest("[" + ", ".join(str(item) for item in value) + "]")
if isinstance(value, tuple):
return hexdigest("(" + ", ".join(str(item) for item in value) + ")")
if callable(value):
module_name = inspect.getmodule(value).__name__
func_name = value.__name__
return hexdigest(f"{module_name}.{func_name}")
raise ValueError(f"Unsupported type {type(value)}")
See this test.
import unittest
from util.hash_util import hexdigest
class HashUtilTestCase(unittest.TestCase):
def test_ordered(self):
# list
self.assertEqual(hexdigest([1, 2, 3]), hexdigest([1, 2, 3]))
self.assertNotEqual(hexdigest([1, 2, 3]), hexdigest([3, 2, 1]))
# tuple
self.assertEqual(hexdigest((1, 2)), hexdigest((1, 2)))
self.assertNotEqual(hexdigest((1, 2)), hexdigest((2, 1)))
def test_unordered(self):
# dict
self.assertEqual(hexdigest({"a": 1, "b": 2}), hexdigest({"b": 2, "a": 1}))
# set
self.assertEqual(hexdigest({1, 2}), hexdigest({2, 1}))
def test_callable(self):
def a():
pass
def b():
pass
self.assertEqual(hexdigest(a), hexdigest(a))
self.assertNotEqual(hexdigest(a), hexdigest(b))
if __name__ == "__main__":
unittest.main()