import numpy as np
import math

capacitate = { 0 : 'mare',  1 : 'medie', 2 : 'mica' }
marca = { 0 : 'ford', 1 : 'vw', 2 : 'dacia' }
caroserie = { 0 : 'mare', 1 : 'medie' }
cutie = { 0 : 'manuala', 1 : 'automata' }

attribs = {'capacitate' : capacitate, 
           'marca' : marca, 
           'caroserie' : caroserie,
           'cutie' : cutie }

classes = { -1 : 'da', 1 : 'nu' }

data = {'capacitate' : [2,2,1,0,0,0,1,2,2,0,2,1,1,0],
        'marca'      : [0,0,0,1,2,2,2,1,2,1,1,1,0,1],
        'caroserie'  : [0,0,0,0,1,1,1,0,1,1,1,0,1,0],
        'cutie'      : [1,0,1,1,1,0,0,1,1,1,0,0,1,0] }

nrRecords = 14
nrAttribs = 4

classVals = [1, 1, -1, -1, -1, 1, -1, 1, -1, -1, -1, -1, -1, 1]    

weights = np.ones(nrRecords) / nrRecords

# array of binary values:
# for each record i: 1 if attribValue of attribName is found in record i, 0 otherwise 
def attribMask(attribName, attribValue):
    mask = np.zeros(nrRecords)
    for i in range(nrRecords):
        if data[attribName][i] == attribValue:
            mask[i] = 1
    return mask

# array of binary values:
# if differentFrom == False    
# for each record with attribValue of attribName: 1 if classValue is found in record i, 0 otherwise 
# if differentFrom == True    
# for each record with value of attribName different from attribValue: 1 if classValue is found in record i, 0 otherwise 
def attribClassMask(attribName, attribValue, classValue, differentFrom = False):
    attrMask = attribMask(attribName, attribValue)
    mask = np.zeros(nrRecords)
    for i in range(nrRecords):
        attrCondition = (attrMask[i] == 1) if differentFrom == False else (attrMask[i] != 1)
        if attrCondition and classValue == classVals[i]:
            mask[i] = 1
    return mask

#weighted entropy of attribute attribName
def entropy(attribName):
    attr = list(attribs[attribName])
    cls = list(classes)
    attrEnt = 0
    minAttrVal = 0 
    minValEnt = 1.0
    for i in range(len(attr)):
        valWeigtedCount = sum(attribMask(attribName, attr[i]) * weights)
        valP =  valWeigtedCount / sum(weights)
        valEnt = 0
        for j in range(len(cls)):
            clsWeightedCount = sum(attribClassMask(attribName, attr[i], cls[j]) * weights)
            clsP = 0
            if clsWeightedCount != 0:
                clsP = clsWeightedCount / valWeigtedCount 
            if clsP != 0:
                valEnt -= clsP * math.log2(clsP)
        if minValEnt > valEnt:
            minValEnt = valEnt  
            minAttrVal = attr[i]                
        attrEnt += valP * valEnt    
    return attrEnt, minAttrVal    

def minEntropyAttrib():
    
    #attribute with min entropy and the value that generates min entropy   

    return minAttrib, minAttribVal        


def buildDS():
        
    #dsRoot - attribute of the dataset with min entropy
    #dsBranch - value of dsRoot which casues min entropy
    #dsLeftLeaf - value of most frequently-ocurring class when attribute dsRoot has value dsBranch
    #dsRightLeaf - value of most frequently-ocurring class when attribute dsRoot has value different from dsBranch 

    return dsRoot, dsBranch, dsLeftLeaf, dsRightLeaf 

# get class from decision stump for record with index recordIdx
def DSPrediction(dsRoot, dsBranch, dsLeft, dsRight, recordIdx):
    predictedClassVal = dsLeft if data[dsRoot][recordIdx] == dsBranch else dsRight
    return predictedClassVal

def DSError(dsRoot, dsBranch, dsLeft, dsRight):
    # number of incorrectly-predicted records / total number of records

def DSWeightedError(dsRoot, dsBranch, dsLeft, dsRight):
    # sum of weights of incorrectly-predicted records / sum of all weights

#adaboost
nrIter = 10

for i in range(nrIter):

    # classification error of current decision stump

    # value of alpha

    # update and normalize weights

	# generate new decision stump using new weights