Commit 2ea03335 authored by Médéric Boquien's avatar Médéric Boquien

Implement partial cache clearing in order not to clog the memory with models...

Implement partial cache clearing in order not to clog the memory with models that are not necessary anymore. This requires the proper computation of models that will not be used anymore. This is why we used ordered dictionaries to ensures that parameters are always considered in the same order.
parent f3f9b0df
# -*- coding: utf-8 -*-
# Copyright (C) 2014 Médéric Boquien
# Licensed under the CeCILL-v2 licence - see Licence_CeCILL_V2-en.txt
# Author: Médéric Boquien
"""
Various utility functions for pcigale analysis modules
"""
def find_changed_parameters(list_parameters):
"""
Given a list of parameters dictionaries corresponding each to a given SED,
find which parameters have changed between two adjacent items in the list.
When used for to find which SEDs to discard for partial cache clearing,
this relies on the assumption that the list is properly ordered. In this
case, when a parameter changes, the corresponding SEDs can be discarded.
This should work when the list of dictionaries is generating using
itertools.product().
Parameters
----------
list_parameters: list of list of dictionaries
Each item is a list of dictionaries containing the parameters for each
module.
Return
------
A list a tuples with the same size as the input list. Each tuple contains
the parameter that has changed and its value. When several parameters have
changed, it selects only the one that would discard the most models.
"""
changed = [None] * len(list_parameters)
for i in range(len(list_parameters)-1):
for par, par_next in zip(list_parameters[i], list_parameters[i+1]):
for k in par.keys():
if par[k] != par_next[k]:
if changed[i] is not None:
print('Warning! It went wrong in the cache cleaning')
changed[i] = (k, par[k])
break
if changed[i] is not None:
break
# TODO: handle the special case of the last element
return changed
......@@ -242,7 +242,7 @@ class Configuration(object):
# Parsing the SED modules parameters
configuration['creation_modules_params'] = []
for module in self.config['creation_modules']:
module_params = {}
module_params = collections.OrderedDict()
for key, value in \
self.config['sed_creation_modules'][module].items():
module_params[key] = evaluate_description(value)
......
......@@ -24,7 +24,7 @@ def param_dict_combine(dictionary):
"""
# We make a copy of the dictionary as we are modifying it.
dictionary = dict(dictionary)
dictionary = collections.OrderedDict(dictionary)
# First, we must ensure that all values are lists; when a value is a
# single element, we put it in a list.
......@@ -44,7 +44,8 @@ def param_dict_combine(dictionary):
# value lists.
key_list = dictionary.keys()
value_array_list = [dictionary[key] for key in key_list]
combination_list = [dict(zip(key_list, combination)) for combination in
combination_list = [collections.OrderedDict(zip(key_list, combination))
for combination in
itertools.product(*value_array_list)]
return combination_list
......@@ -3,7 +3,7 @@
# Licensed under the CeCILL-v2 licence - see Licence_CeCILL_V2-en.txt
# Author: Yannick Roehlly
from json import JSONEncoder
from json import JSONEncoder, JSONDecoder
from ..sed import SED
from .. import creation_modules
......@@ -67,6 +67,33 @@ class SedWarehouse(object):
return module
def partial_clear_cache(self, flagged_param):
"""Clear the cache of SEDs that are not relevant anymore
To do partial clearing of the cache, we go through the entire cache and
delete the SEDs that correspond to a given parameter key/value.
Parameters
----------
flagged_param: tuple
Tuple of 2 elements containing the parameter name and its value
"""
if flagged_param is not None:
decoder = JSONDecoder()
# Going through all SEDs
for k in list(self.storage.dictionary.keys()):
list_params = decoder.decode(k)[1]
# Going through all parameters of a given SED. We start with
# the last module because it is more likely that the parameter
# we look for belongs to one of the last modules.
list_params.reverse()
for params in list_params:
if (flagged_param[0] in params.keys() and
params[flagged_param[0]] == flagged_param[1]):
self.storage.delete(k)
break
def get_sed(self, module_list, parameter_list):
"""Get the SED corresponding to the module and parameter lists
......
......@@ -39,6 +39,16 @@ class SedStore(object):
# We store a copy not to modify the stored object.
self.dictionary[key] = deepcopy(value)
def delete(self, key):
"""Delete a key, value pair from the cache
Parameters
----------
key: key of the element to be deleted
"""
del self.dictionary[key]
def close(self):
"""Do nothing"""
pass
......@@ -40,6 +40,16 @@ class SedStore(object):
# We store a copy not to modify the stored object.
self.shelf_storage[key] = deepcopy(value)
def delete(self, key):
"""Delete a key, value pair from the cache
Parameters
----------
key: key of the element to be deleted
"""
del self.dictionary[key]
def close(self):
"""Close the shelf file"""
self.shelf_storage.close()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment