package com.equitysoft.hashstore;
import java.io.*;
import java.text.*;
import java.util.*;

/**
 * This class implements a hashtable on disk. It mimics the behaviour of <code>java.util.Hashtable</code> with
 * identical methods to put, get and remove instances from the table.
 *<p>
 * The underlying file system is used to allocate and deallocate space for storage so no initial capacity
 * or load factor need be specified. A capacity of one billion keys is hardcoded into the class. Disk space is only
 * requested from the file system piece by piece and when required so the programmer need not be concerned if a few hundred
 * or a few million entries are being stored. Performance should always be the same no matter how many keys are stored.
 *<p>
 * None of the methods are synchronized. The class <code>SyncDiskHashtable</code> provides a synchronized version.
 *
 * This source code is subject to the terms and conditions of the General Public License<p>
 * @author  Colin Mummery - colin_mummery@my-deja.com - http://www.kagi.com/equitysoft
 */

public class DiskHashtable {

	final static int DEPTH=4;
	final static int NOTSET=-1;
	final private static int DELETE_FILES=0;
	final private static int DELETE_ALL=1;
	final private static int COUNT=2;
	final static int KEYS=0;
	final static int ELEMENTS=1;
	final private static String KEY="Key";
	final private static String DATA="Data";
	final private static int MOD=1000000001; //a billion
	final private static DecimalFormat df=new DecimalFormat("000000000"); //a billion less 1
	protected File root; //if the root exists then so must the keyroot and dataroot
	protected File keyroot;
	protected File dataroot;
	protected String current; //the pointer to the current object in a duplicate table
	protected Object currkey; //Only valid if current is non-null
	private boolean ready; //Set to false after a delete all, if true then the root, keyroot and dataroot exist

   public DiskHashtable(File root) {
	this.root=root; ready=false; checkDirs();
    }
//-------------------------------------------------------------------------
   protected void checkDirs(){ //Check the dirs exist before any operation
	if(ready)return;
	checkExists(root);
	keyroot=new File(root,KEY); checkExists(keyroot);
	dataroot=new File(root,DATA); checkExists(dataroot);
	ready=true;
   }
//-------------------------------------------------------------------------
   private static boolean checkExists(File dir){
	if(!dir.exists()){dir.mkdir(); return false;}
	return true;
   }
//-------------------------------------------------------------------------
/**
 * Puts the specified <code>key</code> and <code>value</code> pair into the table and returns the original
 * value for the key if the key already exists.
 * @return The original value for the key if it already exists
 */
   public Object put(Object key,Object value) throws IOException{
	return put(key,value,true);
   }
//-------------------------------------------------------------------------
   protected Object put(Object key,Object value,boolean noduplicates) throws IOException{
	checkDirs();
	if (value == null) {
	    throw new NullPointerException();
	}

	String loc=getLocation(key);
	noDirectories(loc,true);

//now try to add the object
	File keyfile=null;
	LOOP: while(true){
		keyfile=new File(keyroot,loc);
		if(!keyfile.exists())break LOOP; //it doesn't exist so we store it
		if(noduplicates){
			Object obj=loadObject(keyfile);
			if(key.equals(obj)){ //return the previous value
				File datafile=new File(dataroot,loc); Object olddata=loadObject(datafile);
				saveObject(datafile,value); current=loc; currkey=key; return olddata;
			}
		}
		loc=incName(loc,1);
	}
	saveObject(keyfile,key); saveObject(new File(dataroot,loc),value); current=loc; currkey=key;
	return null;
   }
//--------------------------------------------------------------------------
/**
 * Gets the object associated with the key. If the object causes a <code>ClassNotFoundException</code>
 * when deserializing then the exception is transformed into an <code>IOException</code> for convenience.
 * @return <code>null</code> if the key isn't found otherwise the associated object
 */
   public Object get(Object key) throws IOException{
	return get(key,false);
   }
//--------------------------------------------------------------------------
   protected Object get(Object key,boolean getnext) throws IOException{
	checkDirs();
	String loc=null;
	if(!getnext){
		loc=getLocation(key);
		if(noDirectories(loc,false))return null;
	}
	else {
		if(current==null)return null; //no previous successful search
		loc=incName(current,1); key=currkey;
	}
//now try to add the object
	File keyfile=null;
	while(true){
		keyfile=new File(keyroot,loc);
		if(!keyfile.exists()){current=null; currkey=null; return null;} //we didn't find it
		Object obj=loadObject(keyfile);
		if(key.equals(obj)){current=loc; currkey=key; return loadObject(new File(dataroot,loc));} //we found it
		loc=incName(loc,1); //get the next in the list
	}
   }
//--------------------------------------------------------------------------
   protected Object remove(Object key,boolean noduplicates) throws IOException{
	checkDirs();
	String loc=getLocation(key);
	if(noDirectories(loc,false))return null;
//now try to remove the entry
	File keyfile=null;
	OUTER:while(true){
		keyfile=new File(keyroot,loc);
		if(!keyfile.exists())return null; //we didn't find it
		Object obj=loadObject(keyfile);
		if(key.equals(obj)){ //we found it so remove it
			File datafile=new File(dataroot,loc); Object oldobj=loadObject(datafile);
			keyfile.delete(); datafile.delete();
//Now shift any subsequent files along by renaming
			String nxt=loc; //the current loc will remain the same
			while(true){
				nxt=incName(nxt,1);
				keyfile=new File(keyroot,nxt);
				if(!keyfile.exists()){if(noduplicates)return oldobj; else continue OUTER;}
				keyfile.renameTo(new File(keyroot,incName(nxt,-1)));
				datafile=new File(dataroot,nxt); datafile.renameTo(new File(dataroot,incName(nxt,-1)));
			}
		}
		loc=incName(loc,1); //get the next in the list
	}
   }
//--------------------------------------------------------------------------
/**
 * Removes the specified key from the hashtable
 * @return The Object associated with the key removed
 */
   public Object remove(Object key) throws IOException{
	return remove(key,true);
   }
//--------------------------------------------------------------------------
   private boolean noDirectories(String loc,boolean createdata){ //code common to get and remove and put

	for(int i=2;i<9;i+=3){
		String name=loc.substring(0,i);
		File dir=new File(keyroot,name);
		if(!dir.exists()){
			if(createdata){
				dir.mkdir(); File datadir=new File(dataroot,name); datadir.mkdir(); //used by put only
			}
			else return true;
		}
	}
	return false;
   }
//--------------------------------------------------------------------------
/**
 * Returns true <code>true</code> if the hashtable contains the given key otherwise <code>false</code>. This
 * method is implemented by seeing if invoking <code>get</code> returns <code>null</code>.
 * @return <code>true</code> if the key exists
 */
   public boolean containsKey(Object key) throws IOException{
	if(get(key)==null)return false; else return true;
   }
//--------------------------------------------------------------------------
/**
 * Returns true if the table contains at least one entry. This method functions by getting an enumeration and checking
 * if it has any elements.
 * @return <code>true</code> is there are no entries in the hashtable.
 */
   public boolean isEmpty(){ //use the keys method, if it has one entry then return true
	if(!ready)return true;
	Enumeration e=keys(); return !e.hasMoreElements();
   }
//--------------------------------------------------------------------------
/**
 * Returns the number of entries in the hashtable. Care should be exercised with this method as it may take some time
 * to return as it functions by counting all the key entries.
 * @return The number of keys in the hashtable.
 */
   public int size(){
	if(!ready)return 0; return goThroughFiles(keyroot,COUNT);
   }
//--------------------------------------------------------------------------
/**
 * Returns an <code>Enumeration</code> of all the keys in the hashtable. Repeated <code>getNextElement</code> calls
 * will then return the hashtable keys. It is more efficient to make repeated called of <code>getNextElement</code>
 * until a <code>null</code> value is returned instead of using <code>hasMoreElements</code>.
 * @return An Enumeration of the hashtable keys
 * @see #elements()
 */
   public Enumeration keys(){
	return new Enumerator(KEYS);
   }
//--------------------------------------------------------------------------
/**
 * Returns an <code>Enumeration</code> of all the values in the hashtable. Repeated <code>getNextElement</code> calls
 * will then return the hashtable values. It is more efficient to make repeated called of <code>getNextElement</code>
 * until a <code>null</code> value is returned instead of using <code>hasMoreElements</code>. The enumeration order
 * will be the same as <code>keys()</code>.
 * @return An Enumeration of the hashtable values
 * @see #keys()
 */
   public Enumeration elements(){
	return new Enumerator(ELEMENTS);
   }
//--------------------------------------------------------------------------
/**
 * Removes all the keys and values in the table. All the hashtable files are deleted but the directories are left intact.
 * This should be used if the
 * the table is to be refilled with entries and is the quickest way to remove all the entries.
 * The method won't return until the oeration is complete. This method is synchronized as only one thread should do this.
 * @see #deleteTable()
 */
   public synchronized void clear(){ //erase all the files but not the directories
	if(!ready)return; goThroughFiles(keyroot,DELETE_FILES); goThroughFiles(dataroot,DELETE_FILES);
   }
//--------------------------------------------------------------------------
/**
 * Deletes all the files and directories associated with the table and leaves nothing behind. This is useful if
 * all traces of the table should disappear. It is not necessary to call <code>clear()</code> before invoking this method.
 * The method won't return until the operation is complete and may take some time. This method is synchronized as only
 * one thread should do this.
 * @see #clear()
 */
   public synchronized void deleteTable(){
	if(!ready)return;
	goThroughFiles(keyroot,DELETE_ALL); goThroughFiles(dataroot,DELETE_ALL); root.delete(); ready=false; 
   }
//--------------------------------------------------------------------------
   private int goThroughFiles(File dir,int action){ //recursive method to delete or count files
	File[] files=dir.listFiles(); int len=files.length; int count=0;
	for(int i=0;i<len;i++){
		File f=files[i];
		if(f.isDirectory()){
			count+=goThroughFiles(f,action);
		}
		else {
			if(action==COUNT)count++; else f.delete();
		}
	}
	if(action==DELETE_ALL)dir.delete();
	return count; //will be zero unless we count some files
   }
//--------------------------------------------------------------------------
   protected String incName(String name,int inc){
//increments a file name by one so we can get the next file
//Also decrements for shift down one on deletion
	int idx=name.lastIndexOf(File.separatorChar)+1;
	int len=name.length();
	int nextnum=Integer.parseInt(name.substring(idx,len-3))+inc;
	return name.substring(0,idx) + nextnum + name.substring(len-3);
   }
//--------------------------------------------------------------------------
   protected String getLocation(Object key){ //returns the location of the key as a path relative to the root
	int idx=(key.hashCode() & 0x7FFFFFFF) % MOD;
	String whole=df.format(idx);
	StringBuffer sb=new StringBuffer(13);
	sb.append(whole.substring(7)).append(File.separator);
	sb.append(whole.substring(5,7)).append(File.separator);
	sb.append(whole.substring(3,5)).append(File.separator);
	sb.append(0); //append a zero for the first file with this name
	sb.append(whole.substring(0,3));
	return sb.toString();
   }
//--------------------------------------------------------------------------
   protected void saveObject(File dest,Object obj) throws IOException {
	ObjectOutputStream oos=new ObjectOutputStream(new FileOutputStream(dest));
	oos.writeObject(obj);
	oos.close();
   }
//--------------------------------------------------------------------------
   protected Object loadObject(File src) throws IOException {
	ObjectInputStream ois=new ObjectInputStream(new FileInputStream(src));
	Object obj=null;
	try{
		obj=ois.readObject();
	}catch(ClassNotFoundException cnf){throw new IOException("Class not found");}
	ois.close(); return obj;
   }
//--------------------------------------------------------------------------
  private class Enumerator implements Enumeration {

	int type;
	File[][] lists=new File[DEPTH][]; //an array of arrays of files
	int[] levelcnt=new int[DEPTH]; //the counter for each level, where are we in the list of files for each level

   Enumerator(int type){
	this.type=type;
	for(int i=0;i<DEPTH;i++)levelcnt[i]=NOTSET; //NOTSET tells us we have to go to the next dir
	checkDirs();
   }
//...........................................................................
//Each time this method is called it knows which level it is and recreates it's state
   private File sequenceThroughFiles(File dir,int level,boolean hasmore) throws IOException{
	if(levelcnt[level]==NOTSET){
		lists[level]=dir.listFiles(); levelcnt[level]=0;
	}
	if(levelcnt[level]==lists[level].length){ //No more in this level so one above must increment
		levelcnt[level]=NOTSET;
		return null;
	}
	File f=lists[level][levelcnt[level]];
	if(f.isDirectory()){
		File returned=null;
		while((returned=sequenceThroughFiles(f,level+1,hasmore))==null){
			levelcnt[level]+=1;
			if(levelcnt[level]==lists[level].length){ //No more in this level so one above must increment
				levelcnt[level]=NOTSET; return null;
			}
			else f=lists[level][levelcnt[level]]; //get the next directory to look in
		}
		return returned; //A file is returned from the bottom level which we pass up the chain
	}
	else { //We have found a file
		if(!hasmore)levelcnt[level]+=1; //Don't increment the count if it's a hasMoreElements() call
		return f; //increment the count so we get the next file next time
	}
   }
//..........................................................................
/**
 * Indicates if there are more elements in the enumeration
 * This method can be used but it is more efficient to repeat <code>nextElement()</code> until null is returned
 * @return <code>true</code> if the enumeration has more elements otherwise <code>false</code>.
 * @see #nextElement()
 */
   public boolean hasMoreElements(){
	try{
		File f=sequenceThroughFiles((type==KEYS)? keyroot : dataroot,0,true);
		if(f==null)return false;
	}catch(IOException ioe){return false;}

	return true;
   }
//...........................................................................
/**
 * Returns the next element in the enumeration. Returns <code>null</code> when all elements are exhausted.
 * It is recommended that this method is used <i>without</i> <code>hasMoreElements()</code> since both methods
 * use the same code.
 * It is more efficient to repeat <code>nextElement()</code> until null is returned
 * @return The next element in the enumeration or null if there are none.
 */
   public Object nextElement() {
	try{
		File f=sequenceThroughFiles((type==KEYS)? keyroot : dataroot,0,false);
		if(f==null)return null; return loadObject(f);
	}catch(IOException ioe){return null;}
   }
  }
//--------------------------------------------------------------------------
}

