/* -*- Mode: C; indent-tabs-mode: t; c-basic-offset: 2; tab-width: 2 -*-  */
/*
 * relalgebra.h
 * Copyright (C) 2016 Shahab Tasharrofi <shahab@tasharrofi.net>
 *
 * grounder-generator is free software: you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * grounder-generator is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License along
 * with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * Class Description:
 *
 * Here, nodes represent decision trees. Each node can have the form {false},
 * negated(N) or {X,V,T,E} where X and V are integers, and T, E and N are decision
 * tree nodes themselves. Here N stands for node, X for variable name, V for value,
 * T for "Then", and E for "Else". Intuitively, node {X,V,T,E} is equivalent to
 * conditional "If assigned value to varibale X is less than or equal to V then
 * follow branch T else follow branch E".
 *
 * An assignment is a function from integers to integers.
 *
 * The semantics of a decision tree node is defined by the set of assignments that
 * the node accepts. Given a node N and an assignment A : Z --> Z, the
 * acceptance of A by node N is defined as follows:
 * N == {false} --> N does not accept A
 * N == negated(M) --> N accepts A if and only if M does not accept A
 * N == {X,V,T,E} and A(X) <= V --> N accepts A iff T accepts A
 * N == {X,V,T,E} and A(X) > V --> N accepts A iff E accepts A
 *
 * Note that for a node N={X,V,T,E}, we have that negated(N) is semantically
 * equivalent to node N'={X,V,negated(T),negated(E)}. Replacing node N by node N'
 * is called pushing negation. Also, semantically, we have that negated(negated(N))
 * is equivalent to N. This is called double negation elimination. Applying these
 * rules up to fixpoint is called delayed negation elimination which, regardless of
 * the order of application, always converges to the same decision tree.
 *
 * Our decision tree nodes guarantee the followings:
 * 1) Two decision tree nodes N and M are semantically equivalent if and only if,
 *    after applying delayed negation elimination to N and M, we get decision trees
 *    that have exactly the same form (i.e., they are syntactically equivalent).
 * 2) The nodes will always be acyclic.
 *
 * In order to guarantee the canonicity requirements above, we guarantee the
 * following for all internal decision nodes N = {X, V, T, E}:
 * 1) T is always different from E. This is because, if T = E then N would represent
 *    the same decision tree as does T which violates our canonicity constraint.
 * 2) If T = {X',V',T',E'} then X' > X. That is, the T branch cannot decide on a
 *    variable that is either the same or less than the current variable.
 * 3) If E = {X',V',T',E'} then X' >= X.
 * 4) Also, if E = {X',V',T',E'} and X' = X then V' > V.
 *
 * Conditions (2) -- (4) above, together, define a well-ordering on the nodes of a
 * decision tree which guarantees acyclicity. That is, by following the T or E
 * branches, we can never return to the same decision. By following the T branch, we
 * always get to a decision about a greater variable and, by following the E branch,
 * we will either get to a decision about a greater variable or to a decision about
 * the same variable but a greater value.
 *
 * Since we allow delayed negation elimination, we might end up with several
 * representations of the same decision tree. In order to reduce the number of
 * different representations of a decision tree, we getdo the following
 * simplifications:
 * 1) negated(negated(X)) is always replaced by X.
 * 2) A node N = {X, V, T, negated(E)} will always be replaced by negated(N') where
 *    N' = {X, V, negated(T), E}. In other words, the Else branch in an internal node
 *    is always non-negated.
 *
 * Condition (1) above guarantees the finiteness of different representations for the
 * same decision tree. Using only condition (1), the number of different representation
 * for a decision tree N without delayed negation is 2^((|N| - 1)/2). This is because
 * every internal node can be negated or non-negated and whether the leaves of a tree
 * are {false} or {negated(false)} can be uniquely determined based on the parity of
 * the number of negation on the path from the root node to this leaf node.
 *
 * When condition (2) above is added, the number of possible different representations
 * for a decision tree is exponentially reduced to 2^((|N|-3)/4). This is because, the
 * "else" branch of an internal node cannot be negated. Hence, the only nodes that can
 * be freely negated are those internal nodes that are the "then" child of another
 * internal node. The number of such nodes is (|N|-3)/4.
 */

#ifndef _RELALGEBRA_H_
#define _RELALGEBRA_H_

#include <boost/functional/hash.hpp>
#include <limits>
#include <stack>
#include <tuple>
#include <unordered_map>

#include "assignment-manager.h"
#include "compact-string.h"
#include "global-stack.h"

using namespace std;

typedef size_t NodeIndexType;
typedef int64_t VariableType;
typedef int64_t ValueType;
typedef tuple<VariableType, ValueType, NodeIndexType, NodeIndexType> NodeData;
typedef pair<NodeIndexType, NodeIndexType> JoinIndexType;

class Table
{
private:
	friend class rel_algebra;

	NodeIndexType index;
	Table(NodeIndexType tableIndex) : index(tableIndex) { }
public:
	inline bool operator==(const Table &other) const { return index == other.index; }
	inline bool operator!=(const Table &other) const { return index != other.index; }

	Table() : index(numeric_limits<NodeIndexType>::max() - 1) { } // initializes a Table to falseNode();
};

struct NodeDataHash
{
	public:
		inline size_t operator()(const NodeData &nodeData) const { return boost::hash_value<NodeData>(nodeData); }
};

struct JoinIndexTypeHash
{
	public:
		inline size_t operator()(const JoinIndexType &tableIndices) const { return boost::hash_value(tableIndices); }
};

class SingleVariableUnitIterator;
class UnitIterator;

class rel_algebra
{
private:
	// constants "true" and "false"
	static const NodeIndexType falseNodeIndex = numeric_limits<NodeIndexType>::max() - 1;
	static const NodeIndexType trueNodeIndex = numeric_limits<NodeIndexType>::max(); // This is equivalent to rel_algebra::complement(falseNodeIndex)

	// static internal variables
	static unordered_map<NodeData, NodeIndexType, NodeDataHash> nodeToIndex;
	static vector<NodeData> nodeRepository;
	static unordered_map<JoinIndexType, NodeIndexType, JoinIndexTypeHash> joinCache;
	static unordered_map<NodeIndexType, NodeIndexType> complementationCache;

	// static internal methods
	static JoinIndexType makeJoinIndexPair(const Table t1, const Table t2);
	static Table unwindNegation(const Table t);
	static inline bool isNegated(const Table t) { return ((t.index & 1) != 0); }
	// Function getNode constructs an internal decision node. It is tabled so that the
	// same decision tree is always defined by the same pointer.
	static Table getNode(VariableType variableIndex, ValueType value, Table thenNode, Table elseNode);
public:
	// Functions to obtain information about a table
	static inline bool isLeafNode(const Table t) { return (((t.index ^ falseNodeIndex) >> 1) == 0); }
	static inline bool isInternalNode(const Table t) { return !isLeafNode(t); }
	static inline bool isEmpty(const Table t) { return t.index == falseNodeIndex; }
	static inline bool isFull(const Table t) { return t.index == trueNodeIndex; }
	static inline VariableType getVariable(const Table t) { NodeIndexType i = t.index >> 1; assert(i < nodeRepository.size()); return std::get<0>(nodeRepository[i]); }
	static inline ValueType getValue(const Table t) { NodeIndexType i = t.index >> 1; assert(i < nodeRepository.size()); return std::get<1>(nodeRepository[i]); }
	static inline Table getThenBranch(const Table t) { NodeIndexType i = t.index >> 1; assert(i < nodeRepository.size()); Table result(std::get<2>(nodeRepository[i])); return (isNegated(t) ? complement(result) : result); }
	static inline Table getElseBranch(const Table t) { NodeIndexType i = t.index >> 1; assert(i < nodeRepository.size()); Table result(std::get<3>(nodeRepository[i])); return (isNegated(t) ? complement(result) : result); }

	// Function trueNode returns complement({false}) and function falseNode returns {false}.
	static inline Table trueNode() { return Table(trueNodeIndex); }
	static inline Table trueTable() { return trueNode(); }
	static inline Table falseNode() { return Table(falseNodeIndex); }
	static inline Table falseTable() { return falseNode(); }

	static Table canonical(VariableType variableIndex, ValueType value, const Table thenNode, const Table elseNode);

	// Main relational algebraic operations
	static inline Table complement(const Table t) { return Table(t.index ^ 1); }
	static Table join(const Table t1, const Table t2);
	static inline Table union_tables(const Table t1, const Table t2) { return complement(join(complement(t1),complement(t2))); }
	static Table divide(Table keptVariables, Table t);
	static inline Table project(Table keptVariables, Table t) { return complement(divide(keptVariables,complement(t))); }

	// filterLessThanOrEqualTo(V,Val,Node)[value] == Node[value] /\ (value[Var] <= Val).
	static inline Table filterLessThanOrEqualTo(VariableType variable, ValueType value, Table t) { return join(canonical(variable, value, trueNode(), falseNode()), t); }

	// filterGreaterThanOrEqualTo(Var,Val,Node)[value] == Node[value] /\ (value[Var] >= Val).
	static inline Table filterGreaterThanOrEqualTo(VariableType variable, ValueType value, Table t) { return join(canonical(variable, value - 1, falseNode(), trueNode()), t); }

	// filterLessThan(V,Val,Node)[value] == Node[value] /\ (value[Var] < Val).
	static inline Table filterLessThan(VariableType variable, ValueType value, Table t) { return join(canonical(variable, value - 1, trueNode(), falseNode()), t); }

	// filterGreaterThan(V,Val,Node)[value] == Node[value] /\ (value[Var] > Val).
	static inline Table filterGreaterThan(VariableType variable, ValueType value, Table t) { return join(canonical(variable, value, falseNode(), trueNode()), t); }

	// filterEqualTo(V,Val,Node)[value] == Node[value] /\ (value[Var] == Val).
	static inline Table filterEqualTo(VariableType variable, ValueType value, Table t) { return filterGreaterThanOrEqualTo(variable, value, filterLessThanOrEqualTo(variable, value, t)); }

	// filterInBetween(V,L,U,Node)[value] == Node[value] /\ (L <= value[Var] < U).
	static inline Table filterInBetween(VariableType variable, ValueType lowerBound, ValueType upperBound, Table t) { return filterGreaterThanOrEqualTo(variable, lowerBound, filterLessThan(variable, upperBound, t)); }

	// filter(Assignments,Node)[value] == Node[value] /\ !var in dom(Assignments) : (L <= value[var] < U).
	static inline Table filter(AssignmentManager::AssignmentIterator a, Table t)
	{
		for (; !a.atEnd(); a++)
			t = filterEqualTo(a.getVariable(), a.getValue(), t);
		return t;
	}

	// ifThenElse(Var,Val,Then,Else)[value] == (value[Var] <= Val /\ Then[value]) \/ (value[Var] > Val /\ Else[value]).
	static inline Table ifThenElse(VariableType variable, ValueType value, Table thenNode, Table elseNode) { return union_tables(filterLessThanOrEqualTo(variable, value, thenNode), filterGreaterThan(variable, value, elseNode)); }

	// Iterator functions
	static SingleVariableUnitIterator getSingleVariableUnitIterator(Table t);
	static UnitIterator getUnitIterator(Table t);
};

class SingleVariableUnitIterator
{
	private:
		VariableType currentVariable;
		ValueType currentLowerBound;
		Table currentTable;

		void next()
		{
			if (rel_algebra::isLeafNode(currentTable))
				return;

			while (true)
			{
				if (currentLowerBound > rel_algebra::getValue(currentTable))
				{
					Table elseTable = rel_algebra::getElseBranch(currentTable);
					if (rel_algebra::isLeafNode(elseTable))
					{
						if (rel_algebra::isEmpty(elseTable))
							currentTable = elseTable;
						break;
					}
					else if (rel_algebra::getVariable(elseTable) == currentVariable)
						currentTable = elseTable;
					else
						break;
				}

				Table thenTable = rel_algebra::getThenBranch(currentTable);
				if (rel_algebra::isEmpty(thenTable))
					currentLowerBound = rel_algebra::getValue(currentTable) + 1;
				else
					break;
			}
		}
	public:
		inline bool atEnd() { return rel_algebra::isEmpty(currentTable) || (currentLowerBound == numeric_limits<ValueType>::max()); }
		inline bool isUnbounded()
		{
			if (currentLowerBound == numeric_limits<ValueType>::min())
				return true;
			if (rel_algebra::isFull(currentTable))
				return true;
			if (rel_algebra::isEmpty(currentTable))
				return false;
			if (currentLowerBound > rel_algebra::getValue(currentTable))
				return !rel_algebra::isEmpty(rel_algebra::getElseBranch(currentTable));
			return false;
		}

		inline ValueType getCurrentValue() { assert(!atEnd()); return currentLowerBound; }
		inline VariableType getCurrentVariable() { return currentVariable; }
		inline Table getCurrentNode()
		{
			assert(!atEnd());
			if (rel_algebra::isLeafNode(currentTable))
				return currentTable;

			Table t = (currentLowerBound <= rel_algebra::getValue(currentTable)) ? rel_algebra::getThenBranch(currentTable) : rel_algebra::getElseBranch(currentTable);
			return t;
		}

		inline ValueType operator*() { return getCurrentValue(); }
		inline SingleVariableUnitIterator operator++() { assert(!atEnd()); currentLowerBound++; next(); return (*this); }
		inline SingleVariableUnitIterator operator++(int) { SingleVariableUnitIterator result = (*this); ++(*this); return result; }

		SingleVariableUnitIterator(Table t) :
			currentLowerBound(numeric_limits<ValueType>::min()),
			currentTable(t)
		{
			if (rel_algebra::isLeafNode(t))
				currentVariable = 0;
			else
			{
				currentVariable = rel_algebra::getVariable(t);
				next();
			}
		}
};

class UnitIterator
{
	private:
		// we guarantee that the "iterators" vector is non-empty if and only if we
		// are at the end of iteration
		vector<SingleVariableUnitIterator> iterators;
		VariableType maxVariable;

		// returns the index of the last iterator in the "iterators" vector whose
		// variable is less than or equal to the given variable.
		// returns -1 if the variables of all iterators in the "iterators" vector
		// are greater than the given variable
		int findVarPosition(VariableType variable);
		void init();
		void next();
	public:
		inline bool atEnd() { return iterators.size() == 0; }

		ValueType getCurrentValue(VariableType variable);
		bool isVariableUnbounded(VariableType variable);
		inline Table getCurrentNode() { assert(!atEnd()); return iterators[iterators.size() - 1].getCurrentNode(); }

		Table operator*();
		operator Table() { return *(*this); }
		inline UnitIterator operator++() { assert(!atEnd()); next(); return (*this); }
		inline UnitIterator operator++(int) { UnitIterator result = (*this); ++(*this); return result; }
		inline ValueType operator[](VariableType variable) { return getCurrentValue(variable); }

		UnitIterator(Table t, VariableType maxVar);
		UnitIterator(Table t) : UnitIterator(t, numeric_limits<VariableType>::max()) { }

};

typedef Table rel_algebra_node;
typedef GlobalStack<Table> global_table_stack;

extern unordered_map<compact_string, rel_algebra_node, compact_string_hash> database;

inline void set_database_atom(compact_string cs, rel_algebra_node value)
{
	database[cs] = value;
}

#endif // _RELALGEBRA_H_

