outtotal<-(gamma*out$mu)#+(beta_star*sqrt(out$sig)) #refactored parameters in combination with softmax tau, where gamma = 1/tau and beta_star = beta/tau
#avoid borderline cases
#outtotal[outtotal<=0]<-0.0001
#outtotal[outtotal>100]<-100
outtotal<-matrix(outtotal,ncol=1,byrow=TRUE)
}else{
beta<-pars[1]
#calulate all the upper confidence bounds
outtotal<-out$mu+(beta*sqrt(out$sig))#refactored parameters in combination with softmax tau, where gamma = 1/tau and beta_star = beta/tau
#avoid borderline cases
#outtotal[outtotal<=0]<-0.0001
#outtotal[outtotal>99]<-99
outtotal<-matrix(outtotal,ncol=1,byrow=TRUE)
}
#return them
return(outtotal)
}
```
### some values to set.
``` r
# get sd of whole environemt for normalizing model input
set.seed(as.numeric(Sys.time()))
cntrl=list(
#get lambda
lambda=0.8,
#get beta
beta=0,# this scales risk attitude.
#get tau
tau=0.8,
mu0=100,#exploration bonus
var0=40,
#create a parameter vector
parVec=c(0.8,0.8,1,.0001),
#
ExploreBonus=0,
#kernel is RBF
#k<-rbf
#loop through trials
out=NULL,
AllChoices=NULL,
dummy=NULL,
overallCnt=1,
dat=expand.grid(x1=1:12,x2=1:12)
)
##
##
## Here i store the multiple Sims
ntrialss=100
list_Iter<-vector(mode="list",length=ntrialss)
```
# Make observations
Here i let the agent learn about the envirionment. There are three
phases of the simulation. Phase one is “childhood”. During childhood
Agents can explore only the lower middle quadrant. Here all decisions
have low risk and there are some low rewards possible. Phase two, that
occurs after some learning experience (400 samples), can be understood
as the onset of adolescence. Here the whole environment becomes
availible but the agent does not know about it so they have to learn.
Then after another 400 samples, the agent transitions into “adulthood”
where the same environment is still present but the learning expierience
now lead to greater exploitation of presumably advantengeous options.
#here i need a function that calls bayesianMeanTracker. n times and returns the values X for each n. Also, i need some kind of list, where i save the prior for each instance....
#
# browser()
utilityVec<-ucb(out,beta)
utilities<-utilityVec-max(utilityVec)
#softmaximization
p<-exp(utilities/tau)
#probabilities
p<-p/colSums(p)
#numerical overflow
p<-(pmax(p,0.00001))
p<-(pmin(p,0.99999))
#index is sampled proprotionally to softmaxed utitily vector
if(nround==1){# subset the probability vector so that it corresponds to the right tiles.
ind<-sample(sampleVec,1,prob=p[dat$x1>=lowestx&dat$x1<=highestx&dat$x2<7,])# sample from a childhood environemnt
#this monster just scales exploration boni
}else{
ind<-sample(1:144,1,prob=p)# sample from an adolescent environemnt
# print(ind)
}
X<-rbind(X,as.matrix(dat[ind,1:2]))
#bind y-observations
y<-rbind(y,as.matrix(rnorm(n=1,mean=EnvirionemntAdol[ind,]$Mean,sd=EnvirionemntAdol[ind,]$Variance)))# change this into a sample.
#if(y[overallCnt]<0){
# y[overallCnt]=-1*y[overallCnt]^2# make losses more severe.