001/** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.kahadb.util; 018 019import java.util.Collection; 020import java.util.HashMap; 021import java.util.Iterator; 022import java.util.LinkedHashSet; 023import java.util.Map; 024import java.util.Set; 025 026/** 027 * LFU cache implementation based on http://dhruvbird.com/lfu.pdf, with some notable differences: 028 * <ul> 029 * <li> 030 * Frequency list is stored as an array with no next/prev pointers between nodes: looping over the array should be faster and more CPU-cache friendly than 031 * using an ad-hoc linked-pointers structure. 032 * </li> 033 * <li> 034 * The max frequency is capped at the cache size to avoid creating more and more frequency list entries, and all elements residing in the max frequency entry 035 * are re-positioned in the frequency entry linked set in order to put most recently accessed elements ahead of less recently ones, 036 * which will be collected sooner. 037 * </li> 038 * <li> 039 * The eviction factor determines how many elements (more specifically, the percentage of) will be evicted. 040 * </li> 041 * </ul> 042 * As a consequence, this cache runs in *amortized* O(1) time (considering the worst case of having the lowest frequency at 0 and having to evict all 043 * elements). 044 * 045 * @author Sergio Bossa 046 */ 047public class LFUCache<Key, Value> implements Map<Key, Value> { 048 049 private final Map<Key, CacheNode<Key, Value>> cache; 050 private final LinkedHashSet[] frequencyList; 051 private int lowestFrequency; 052 private int maxFrequency; 053 // 054 private final int maxCacheSize; 055 private final float evictionFactor; 056 057 public LFUCache(int maxCacheSize, float evictionFactor) { 058 if (evictionFactor <= 0 || evictionFactor >= 1) { 059 throw new IllegalArgumentException("Eviction factor must be greater than 0 and lesser than or equal to 1"); 060 } 061 this.cache = new HashMap<Key, CacheNode<Key, Value>>(maxCacheSize); 062 this.frequencyList = new LinkedHashSet[maxCacheSize]; 063 this.lowestFrequency = 0; 064 this.maxFrequency = maxCacheSize - 1; 065 this.maxCacheSize = maxCacheSize; 066 this.evictionFactor = evictionFactor; 067 initFrequencyList(); 068 } 069 070 public Value put(Key k, Value v) { 071 Value oldValue = null; 072 CacheNode<Key, Value> currentNode = cache.get(k); 073 if (currentNode == null) { 074 if (cache.size() == maxCacheSize) { 075 doEviction(); 076 } 077 LinkedHashSet<CacheNode<Key, Value>> nodes = frequencyList[0]; 078 currentNode = new CacheNode(k, v, 0); 079 nodes.add(currentNode); 080 cache.put(k, currentNode); 081 lowestFrequency = 0; 082 } else { 083 oldValue = currentNode.v; 084 currentNode.v = v; 085 } 086 return oldValue; 087 } 088 089 090 public void putAll(Map<? extends Key, ? extends Value> map) { 091 for (Map.Entry<? extends Key, ? extends Value> me : map.entrySet()) { 092 put(me.getKey(), me.getValue()); 093 } 094 } 095 096 public Value get(Object k) { 097 CacheNode<Key, Value> currentNode = cache.get(k); 098 if (currentNode != null) { 099 int currentFrequency = currentNode.frequency; 100 if (currentFrequency < maxFrequency) { 101 int nextFrequency = currentFrequency + 1; 102 LinkedHashSet<CacheNode<Key, Value>> currentNodes = frequencyList[currentFrequency]; 103 LinkedHashSet<CacheNode<Key, Value>> newNodes = frequencyList[nextFrequency]; 104 moveToNextFrequency(currentNode, nextFrequency, currentNodes, newNodes); 105 cache.put((Key) k, currentNode); 106 if (lowestFrequency == currentFrequency && currentNodes.isEmpty()) { 107 lowestFrequency = nextFrequency; 108 } 109 } else { 110 // Hybrid with LRU: put most recently accessed ahead of others: 111 LinkedHashSet<CacheNode<Key, Value>> nodes = frequencyList[currentFrequency]; 112 nodes.remove(currentNode); 113 nodes.add(currentNode); 114 } 115 return currentNode.v; 116 } else { 117 return null; 118 } 119 } 120 121 public Value remove(Object k) { 122 CacheNode<Key, Value> currentNode = cache.remove(k); 123 if (currentNode != null) { 124 LinkedHashSet<CacheNode<Key, Value>> nodes = frequencyList[currentNode.frequency]; 125 nodes.remove(currentNode); 126 if (lowestFrequency == currentNode.frequency) { 127 findNextLowestFrequency(); 128 } 129 return currentNode.v; 130 } else { 131 return null; 132 } 133 } 134 135 public int frequencyOf(Key k) { 136 CacheNode<Key, Value> node = cache.get(k); 137 if (node != null) { 138 return node.frequency + 1; 139 } else { 140 return 0; 141 } 142 } 143 144 public void clear() { 145 for (int i = 0; i <= maxFrequency; i++) { 146 frequencyList[i].clear(); 147 } 148 cache.clear(); 149 lowestFrequency = 0; 150 } 151 152 public Set<Key> keySet() { 153 return this.cache.keySet(); 154 } 155 156 public Collection<Value> values() { 157 return null; //To change body of implemented methods use File | Settings | File Templates. 158 } 159 160 public Set<Entry<Key, Value>> entrySet() { 161 return null; //To change body of implemented methods use File | Settings | File Templates. 162 } 163 164 public int size() { 165 return cache.size(); 166 } 167 168 public boolean isEmpty() { 169 return this.cache.isEmpty(); 170 } 171 172 public boolean containsKey(Object o) { 173 return this.cache.containsKey(o); 174 } 175 176 public boolean containsValue(Object o) { 177 return false; //To change body of implemented methods use File | Settings | File Templates. 178 } 179 180 181 private void initFrequencyList() { 182 for (int i = 0; i <= maxFrequency; i++) { 183 frequencyList[i] = new LinkedHashSet<CacheNode<Key, Value>>(); 184 } 185 } 186 187 private void doEviction() { 188 int currentlyDeleted = 0; 189 float target = maxCacheSize * evictionFactor; 190 while (currentlyDeleted < target) { 191 LinkedHashSet<CacheNode<Key, Value>> nodes = frequencyList[lowestFrequency]; 192 if (nodes.isEmpty()) { 193 throw new IllegalStateException("Lowest frequency constraint violated!"); 194 } else { 195 Iterator<CacheNode<Key, Value>> it = nodes.iterator(); 196 while (it.hasNext() && currentlyDeleted++ < target) { 197 CacheNode<Key, Value> node = it.next(); 198 it.remove(); 199 cache.remove(node.k); 200 } 201 if (!it.hasNext()) { 202 findNextLowestFrequency(); 203 } 204 } 205 } 206 } 207 208 private void moveToNextFrequency(CacheNode<Key, Value> currentNode, int nextFrequency, LinkedHashSet<CacheNode<Key, Value>> currentNodes, LinkedHashSet<CacheNode<Key, Value>> newNodes) { 209 currentNodes.remove(currentNode); 210 newNodes.add(currentNode); 211 currentNode.frequency = nextFrequency; 212 } 213 214 private void findNextLowestFrequency() { 215 while (lowestFrequency <= maxFrequency && frequencyList[lowestFrequency].isEmpty()) { 216 lowestFrequency++; 217 } 218 if (lowestFrequency > maxFrequency) { 219 lowestFrequency = 0; 220 } 221 } 222 223 private static class CacheNode<Key, Value> { 224 225 public final Key k; 226 public Value v; 227 public int frequency; 228 229 public CacheNode(Key k, Value v, int frequency) { 230 this.k = k; 231 this.v = v; 232 this.frequency = frequency; 233 } 234 235 } 236}