%PDF-1.6
%
1 0 obj
<>
endobj
2 0 obj
<>stream
Daniel Rasmussen, Aaron Voelker, Chris Eliasmith
A neural model of hierarchical reinforcement learning
www.plosone.org
www.plosone.org
endstream
endobj
3 0 obj
<>/XObject<>>>/CropBox[0 0 612 792]/MediaBox[0 0 612 792]/Parent 10 0 R/Annots 11 0 R/Contents 12 0 R/TrimBox[0 0 612 792]>>
endobj
11 0 obj
[13 0 R 14 0 R 15 0 R 16 0 R 17 0 R 18 0 R 19 0 R 20 0 R 21 0 R 22 0 R]
endobj
13 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref003)>>
endobj
14 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref008)>>
endobj
15 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref009)>>
endobj
16 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref010)>>
endobj
17 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref012)>>
endobj
18 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref013)>>
endobj
19 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref014)>>
endobj
20 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref015)>>
endobj
21 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref016)>>
endobj
22 0 obj
<>/Border[0 0 0]/A 23 0 R>>
endobj
23 0 obj
<>
endobj
12 0 obj
[24 0 R 25 0 R 26 0 R 27 0 R 28 0 R 29 0 R 30 0 R 31 0 R 32 0 R 33 0 R]
endobj
24 0 obj
<>stream
q
0.83 0.64 0.02 0 k
424.3465 409.3228 m
429.1087 409.3228 l
h
f*
434.2677 409.3228 m
439.0299 409.3228 l
h
f*
496.8 292.3087 m
501.5622 292.3087 l
h
f*
279.6094 266.3433 m
289.1339 266.3433 l
h
f*
294.2929 266.3433 m
303.8173 266.3433 l
h
f*
203.4142 253.3039 m
212.9386 253.3039 l
h
f*
318.6142 240.3213 m
328.0819 240.3213 l
h
f*
332.5039 240.3213 m
341.9717 240.3213 l
h
f*
387.9496 188.3339 m
397.474 188.3339 l
h
f*
0 g
1 j
1 J
0 w
10 0 0 10 211.9748 707.4141 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(This)Tj
1.9899 0 Td
(cross-fertilization)Tj
7.2226 0 Td
(is)Tj
0.8504 0 Td
(realized)Tj
3.3222 0 Td
(even)Tj
2.0749 0 Td
(more)Tj
2.3414 0 Td
(explicitly)Tj
3.8268 0 Td
(in)Tj
1.0374 0 Td
(computational)Tj
6.0491 0 Td
(neural)Tj
2.7666 0 Td
(modelling)Tj
-32.6775 -1.3039 Td
(the)Tj
2.3924 0 Td
(practice)Tj
3.3788 0 Td
(of)Tj
1.0148 0 Td
(building)Tj
3.5773 0 Td
(detailed)Tj
3.3676 0 Td
(mechanistic)Tj
5.0116 0 Td
(models)Tj
3.1067 0 Td
(that)Tj
1.7802 0 Td
(recreate)Tj
3.3845 0 Td
(neural)Tj
2.7723 0 Td
(function.)Tj
3.8551 0 Td
(These)Tj
-33.6413 -1.2982 Td
(models)Tj
3.101 0 Td
(can)Tj
1.6271 0 Td
(be)Tj
1.1509 0 Td
(used)Tj
2.0636 0 Td
(to)Tj
1.0204 0 Td
(explain)Tj
3.1238 0 Td
(how)Tj
1.9389 0 Td
(the)Tj
1.4683 0 Td
(abstract)Tj
3.3505 0 Td
(computations)Tj
5.726 0 Td
(of)Tj
1.0148 0 Td
(reinforcement)Tj
5.9357 0 Td
(learning)Tj
3.5263 0 Td
(could)Tj
-35.0473 -1.2983 Td
(be)Tj
1.1509 0 Td
(carried)Tj
3.0386 0 Td
(out)Tj
1.5534 0 Td
(by)Tj
1.1792 0 Td
(real)Tj
1.6951 0 Td
(brains.)Tj
2.9367 0 Td
(One)Tj
1.9389 0 Td
(way)Tj
1.7915 0 Td
(to)Tj
1.0261 0 Td
(succinctly)Tj
4.1896 0 Td
(summarize)Tj
4.6601 0 Td
(the)Tj
1.4683 0 Td
(motivation)Tj
4.6205 0 Td
(for)Tj
1.3889 0 Td
(such)Tj
2.058 0 Td
(work)Tj
-34.6958 -1.2983 Td
(is)Tj
0.8448 0 Td
(as)Tj
1.0147 0 Td
(follows:)Tj
-1.8595 -1.9048 Td
(1.)Tj
1.2018 0 Td
(Brains)Tj
2.778 0 Td
(must)Tj
2.228 0 Td
(solve)Tj
2.211 0 Td
(reinforcement)Tj
5.9357 0 Td
(learning)Tj
3.5263 0 Td
(style)Tj
2.0012 0 Td
(problems)Tj
3.9742 0 Td
(somehow,)Tj
4.2746 0 Td
(as)Tj
1.0148 0 Td
(evidenced)Tj
4.2406 0 Td
(by)Tj
1.1792 0 Td
(their)Tj
-33.3636 -1.2983 Td
(impressive)Tj
4.4844 0 Td
(behavioural)Tj
4.9209 0 Td
(performance)Tj
-10.6071 -1.8992 Td
(2.)Tj
1.2018 0 Td
(There)Tj
2.5739 0 Td
(are)Tj
1.4513 0 Td
(algorithms)Tj
4.5184 0 Td
(in)Tj
1.0318 0 Td
(RL)Tj
1.3833 0 Td
(that)Tj
1.7745 0 Td
(provide)Tj
3.2881 0 Td
(powerful)Tj
3.7871 0 Td
(methods)Tj
3.685 0 Td
(for)Tj
1.389 0 Td
(solving)Tj
3.067 0 Td
(such)Tj
2.058 0 Td
(problems)Tj
-30.0074 -1.2982 Td
(computationally)Tj
-1.2018 -1.9049 Td
(3.)Tj
1.2018 0 Td
(If)Tj
0.8447 0 Td
(modellers)Tj
4.1499 0 Td
(can)Tj
1.6214 0 Td
(show)Tj
2.3018 0 Td
(how)Tj
1.9388 0 Td
(those)Tj
2.3358 0 Td
(methods)Tj
3.685 0 Td
(can)Tj
1.6271 0 Td
(be)Tj
1.1508 0 Td
(implemented)Tj
5.5332 0 Td
(in)Tj
1.0375 0 Td
(neural)Tj
2.7723 0 Td
(systems,)Tj
3.5262 0 Td
(we)Tj
1.3266 0 Td
(then)Tj
-33.8511 -1.2983 Td
(have)Tj
2.058 0 Td
(a)Tj
0.6519 0 Td
(hypothesis)Tj
4.4674 0 Td
(for)Tj
1.3833 0 Td
(how)Tj
1.9389 0 Td
(the)Tj
1.474 0 Td
(brain)Tj
2.3471 0 Td
(could)Tj
2.4491 0 Td
(achieve)Tj
3.1748 0 Td
(those)Tj
2.3357 0 Td
(same)Tj
2.262 0 Td
(solutions)Tj
-24.5478 -1.8992 Td
(Of)Tj
1.2585 0 Td
(course)Tj
2.8347 0 Td
(there)Tj
2.262 0 Td
(are)Tj
1.4513 0 Td
(challenges)Tj
4.3143 0 Td
(to)Tj
1.0262 0 Td
(this)Tj
1.6781 0 Td
(idealized)Tj
3.7473 0 Td
(approach.)Tj
4.1783 0 Td
(One)Tj
1.9389 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(most)Tj
2.2053 0 Td
(critical)Tj
2.9537 0 Td
(is)Tj
0.8447 0 Td
(hinted)Tj
-34.3726 -1.2982 Td
(at)Tj
0.9525 0 Td
(in)Tj
1.0317 0 Td
(point)Tj
2.3584 0 Td
(2,)Tj
0.9241 0 Td
(with)Tj
1.9956 0 Td
(the)Tj
1.4683 0 Td
(question)Tj
3.6623 0 Td
(being)Tj
2.4321 0 Td
(just)Tj
2.0523 0 Td
(how)Tj
1.9389 0 Td
(powerful)Tj
3.7871 0 Td
(are)Tj
1.4513 0 Td
(these)Tj
2.2507 0 Td
(algorithms?)Tj
5.2837 0 Td
(Reinforcement)Tj
-31.589 -1.304 Td
(learning)Tj
3.5205 0 Td
(has)Tj
1.5478 0 Td
(a)Tj
0.6519 0 Td
(30+)Tj
1.7518 0 Td
(year)Tj
1.9049 0 Td
(history)Tj
3.0047 0 Td
(in)Tj
1.0375 0 Td
(computer)Tj
4.1045 0 Td
(science;)Tj
3.3165 0 Td
(many)Tj
2.4775 0 Td
(different)Tj
3.6567 0 Td
(techniques)Tj
4.5297 0 Td
(have)Tj
2.0579 0 Td
(been)Tj
-33.5619 -1.2982 Td
(developed,)Tj
4.4957 0 Td
(all)Tj
1.1451 0 Td
(with)Tj
1.9956 0 Td
(their)Tj
2.109 0 Td
(own)Tj
1.9615 0 Td
(strengths)Tj
3.8722 0 Td
(and)Tj
1.7347 0 Td
(weaknesses.)Tj
4.9606 0 Td
(Thus)Tj
2.2451 0 Td
(it)Tj
0.788 0 Td
(is)Tj
0.8447 0 Td
(quite)Tj
2.245 0 Td
(important)Tj
4.2803 0 Td
(which)Tj
-32.6775 -1.2983 Td
(computational)Tj
6.049 0 Td
(account)Tj
3.3789 0 Td
(from)Tj
2.2053 0 Td
(point)Tj
2.3585 0 Td
(2)Tj
0.6973 0 Td
(a)Tj
0.6519 0 Td
(modeller)Tj
3.7871 0 Td
(chooses)Tj
3.3335 0 Td
(to)Tj
1.0205 0 Td
(implement)Tj
4.5864 0 Td
(in)Tj
1.0375 0 Td
(point)Tj
2.3584 0 Td
(3,)Tj
0.9184 0 Td
(as)Tj
1.0148 0 Td
(the)Tj
1.4684 0 Td
(result-)Tj
-34.8659 -1.2982 Td
(ing)Tj
1.5023 0 Td
(neural)Tj
2.7666 0 Td
(theory)Tj
2.8063 0 Td
(will)Tj
1.6611 0 Td
(have)Tj
2.0636 0 Td
(similar)Tj
2.982 0 Td
(strengths)Tj
3.8778 0 Td
(and)Tj
1.7291 0 Td
(weaknesses)Tj
4.7338 0 Td
(to)Tj
1.0262 0 Td
(the)Tj
1.474 0 Td
(computational)Tj
6.0434 0 Td
(theory.)Tj
-32.6662 -1.304 Td
(Unfortunately,)Tj
6.1284 0 Td
(much)Tj
2.5172 0 Td
(neural)Tj
2.7722 0 Td
(modelling)Tj
4.269 0 Td
(work)Tj
2.279 0 Td
(has)Tj
1.542 0 Td
(been)Tj
2.126 0 Td
(based)Tj
2.4718 0 Td
(on)Tj
1.2756 0 Td
(some)Tj
2.3301 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(earliest)Tj
3.0444 0 Td
(computa-)Tj
-33.2388 -1.2982 Td
(tional)Tj
2.5171 0 Td
(theories,)Tj
3.6283 0 Td
(and)Tj
1.7291 0 Td
(we)Tj
1.3266 0 Td
(therefore)Tj
3.8551 0 Td
(know)Tj
2.4548 0 Td
(that)Tj
1.7802 0 Td
(the)Tj
1.4683 0 Td
(proposed)Tj
3.9515 0 Td
(neural)Tj
2.7722 0 Td
(system)Tj
2.9367 0 Td
(will)Tj
1.6668 0 Td
(have)Tj
2.0579 0 Td
(the)Tj
1.4684 0 Td
(same)Tj
2.262 0 Td
(lim-)Tj
-35.875 -1.2983 Td
(itations)Tj
3.1974 0 Td
(as)Tj
1.0148 0 Td
(those)Tj
2.3414 0 Td
(theories.)Tj
3.6227 0 Td
(For)Tj
1.627 0 Td
(example,)Tj
3.7758 0 Td
(many)Tj
2.4774 0 Td
(models)Tj
3.1011 0 Td
(can)Tj
1.6271 0 Td
(only)Tj
1.9785 0 Td
(learn)Tj
2.2451 0 Td
(to)Tj
1.0204 0 Td
(maximize)Tj
4.1329 0 Td
(immediate)Tj
-32.1616 -1.3039 Td
(reward)Tj
3.033 0 Td
(\(known)Tj
3.3448 0 Td
(as)Tj
1.0148 0 Td
(associative)Tj
4.8416 0 Td
(RL\)they)Tj
4.7395 0 Td
(cannot)Tj
2.982 0 Td
(learn)Tj
2.2394 0 Td
(to)Tj
1.0261 0 Td
(execute)Tj
3.2088 0 Td
(a)Tj
0.652 0 Td
(series)Tj
2.4264 0 Td
(of)Tj
1.0148 0 Td
(unrewarded)Tj
-30.5232 -1.2983 Td
(actions)Tj
3.0557 0 Td
(in)Tj
1.0374 0 Td
(order)Tj
2.4208 0 Td
(to)Tj
1.0261 0 Td
(achieve)Tj
3.1748 0 Td
(a)Tj
0.652 0 Td
(larger)Tj
2.5342 0 Td
(reward)Tj
3.033 0 Td
(in)Tj
1.0318 0 Td
(the)Tj
1.4683 0 Td
(future)Tj
2.6589 0 Td
([)Tj
0.83 0.64 0.02 0 k
(3)Tj
0 g
()Tj
0.83 0.64 0.02 0 k
(8)Tj
0 g
(].)Tj
2.5965 0 Td
(This)Tj
1.9843 0 Td
(is)Tj
0.8504 0 Td
(not)Tj
1.5704 0 Td
(to)Tj
1.0261 0 Td
(say)Tj
1.4683 0 Td
(that)Tj
1.7802 0 Td
(there)Tj
2.262 0 Td
(are)Tj
-35.6312 -1.2982 Td
(not)Tj
1.5703 0 Td
(important)Tj
4.286 0 Td
(insights)Tj
3.3165 0 Td
(to)Tj
1.0205 0 Td
(be)Tj
1.1508 0 Td
(gained)Tj
2.8857 0 Td
(from)Tj
2.2053 0 Td
(such)Tj
2.0636 0 Td
(models,)Tj
3.3279 0 Td
(for)Tj
1.3889 0 Td
(example)Tj
3.549 0 Td
(with)Tj
1.9956 0 Td
(respect)Tj
3.0387 0 Td
(to)Tj
1.0261 0 Td
(the)Tj
1.4684 0 Td
(mecha-)Tj
-34.2933 -1.304 Td
(nisms)Tj
2.5738 0 Td
(of)Tj
1.0148 0 Td
(dopamine)Tj
4.2689 0 Td
(modulated)Tj
4.5298 0 Td
(plasticity.)Tj
4.0138 0 Td
(But)Tj
1.6271 0 Td
(from)Tj
2.2053 0 Td
(a)Tj
0.652 0 Td
(functional)Tj
4.3086 0 Td
(perspective,)Tj
4.9266 0 Td
(we)Tj
1.3266 0 Td
(know)Tj
2.4605 0 Td
(that)Tj
-33.9078 -1.2982 Td
(these)Tj
2.2506 0 Td
(mechanisms)Tj
5.2101 0 Td
(are)Tj
1.4456 0 Td
(not)Tj
1.5761 0 Td
(enough)Tj
3.2201 0 Td
(to)Tj
1.0262 0 Td
(support)Tj
3.3165 0 Td
(the)Tj
1.4683 0 Td
(temporally)Tj
4.5411 0 Td
(extended)Tj
3.8551 0 Td
(decision)Tj
3.5489 0 Td
(making)Tj
3.2485 0 Td
(behav-)Tj
-34.7071 -1.2983 Td
(iour)Tj
1.8935 0 Td
(observed)Tj
3.7927 0 Td
(in)Tj
1.0375 0 Td
(humans)Tj
3.4355 0 Td
(and)Tj
1.7348 0 Td
(other)Tj
2.3471 0 Td
(animals.)Tj
-13.0449 -1.3039 Td
(Another)Tj
3.5829 0 Td
(challenge)Tj
3.9572 0 Td
(arises)Tj
2.4377 0 Td
(when)Tj
2.4038 0 Td
(incorporating)Tj
5.7486 0 Td
(features)Tj
3.3449 0 Td
(of)Tj
1.0148 0 Td
(biological)Tj
4.0932 0 Td
(complexity)Tj
4.6431 0 Td
(into)Tj
1.8368 0 Td
(ideal-)Tj
-34.2592 -1.2983 Td
(ized)Tj
1.8481 0 Td
(computational)Tj
6.0491 0 Td
(algorithms.)Tj
4.7452 0 Td
(This)Tj
1.9842 0 Td
(is)Tj
0.8504 0 Td
(almost)Tj
2.8857 0 Td
(never)Tj
2.4434 0 Td
(a)Tj
0.652 0 Td
(straightforward)Tj
6.4289 0 Td
(translation)Tj
4.5297 0 Td
(process,)Tj
3.4129 0 Td
(and)Tj
-35.8296 -1.2982 Td
(can)Tj
1.627 0 Td
(require)Tj
3.1068 0 Td
(fundamental)Tj
5.3177 0 Td
(changes)Tj
3.4016 0 Td
(to)Tj
1.0261 0 Td
(the)Tj
1.474 0 Td
(underlying)Tj
4.5978 0 Td
(algorithm;)Tj
4.3823 0 Td
(that)Tj
1.7802 0 Td
(is)Tj
0.8447 0 Td
(why)Tj
1.8879 0 Td
(constructing)Tj
5.261 0 Td
(biolog-)Tj
-34.7071 -1.2983 Td
(ically)Tj
2.2903 0 Td
(detailed)Tj
3.3676 0 Td
(neural)Tj
2.7722 0 Td
(models)Tj
3.1068 0 Td
(is)Tj
0.8447 0 Td
(important,)Tj
4.507 0 Td
(if)Tj
0.7767 0 Td
(we)Tj
1.3266 0 Td
(want)Tj
2.1884 0 Td
(to)Tj
1.0204 0 Td
(understand)Tj
4.7906 0 Td
(the)Tj
1.4683 0 Td
(relationship)Tj
4.9889 0 Td
(between)Tj
-33.4485 -1.3039 Td
(idealized)Tj
3.7473 0 Td
(algorithms)Tj
4.5184 0 Td
(and)Tj
1.7291 0 Td
(the)Tj
1.4684 0 Td
(imperfect)Tj
4.0535 0 Td
(computational)Tj
6.0491 0 Td
(system)Tj
2.9367 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.474 0 Td
(brain)Tj
2.347 0 Td
([)Tj
0.83 0.64 0.02 0 k
(9)Tj
0 g
(].)Tj
1.5988 0 Td
(For)Tj
1.627 0 Td
(example,)Tj
-32.5641 -1.2983 Td
(many)Tj
2.4774 0 Td
(RL)Tj
1.3776 0 Td
(algorithms)Tj
4.5184 0 Td
(instantiated)Tj
4.9266 0 Td
(in)Tj
1.0375 0 Td
(neural)Tj
2.7666 0 Td
(models)Tj
3.1067 0 Td
(assume)Tj
3.1465 0 Td
(that)Tj
1.7801 0 Td
(space)Tj
2.3811 0 Td
(and)Tj
1.7291 0 Td
(time)Tj
2.0296 0 Td
(are)Tj
1.4457 0 Td
(divided)Tj
3.2144 0 Td
(into)Tj
-35.9373 -1.2982 Td
(discrete)Tj
3.3165 0 Td
(steps)Tj
2.1769 0 Td
(\(e.g.,)Tj
2.126 0 Td
([)Tj
0.83 0.64 0.02 0 k
(10)Tj
0 g
()Tj
0.83 0.64 0.02 0 k
(12)Tj
0 g
(]\),)Tj
3.8891 0 Td
(that)Tj
1.7745 0 Td
(computations)Tj
5.7316 0 Td
(are)Tj
1.4457 0 Td
(completely)Tj
4.5807 0 Td
(accurate)Tj
3.5547 0 Td
(and)Tj
1.7348 0 Td
(noiseless)Tj
3.719 0 Td
(\(e.g.,)Tj
-34.0495 -1.304 Td
([)Tj
0.83 0.64 0.02 0 k
(13)Tj
0 g
(]\),)Tj
2.415 0 Td
(or)Tj
1.0999 0 Td
(that)Tj
1.7745 0 Td
(functions)Tj
3.9911 0 Td
(have)Tj
2.0636 0 Td
(perfect)Tj
2.965 0 Td
(access)Tj
2.6419 0 Td
(to)Tj
1.0261 0 Td
(information)Tj
5.0797 0 Td
(from)Tj
2.2053 0 Td
(other)Tj
2.3471 0 Td
(areas)Tj
2.245 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4684 0 Td
(model)Tj
2.7439 0 Td
(or)Tj
-35.0813 -1.2982 Td
(previous)Tj
3.6396 0 Td
(points)Tj
2.7099 0 Td
(in)Tj
1.0318 0 Td
(time)Tj
2.0183 0 Td
(\(e.g.,)Tj
2.1203 0 Td
([)Tj
0.83 0.64 0.02 0 k
(14)Tj
0 g
(,)Tj
0.83 0.64 0.02 0 k
1.7291 0 Td
(15)Tj
0 g
(]\).)Tj
2.0636 0 Td
(When)Tj
2.6816 0 Td
(we)Tj
1.3209 0 Td
(increase)Tj
3.4639 0 Td
(the)Tj
1.4627 0 Td
(biological)Tj
4.0818 0 Td
(realism)Tj
3.1351 0 Td
(of)Tj
1.0035 0 Td
(these)Tj
2.245 0 Td
(models)Tj
-34.7071 -1.2983 Td
(it)Tj
0.7824 0 Td
(is)Tj
0.8446 0 Td
(often)Tj
2.2847 0 Td
(necessary)Tj
4.0195 0 Td
(to)Tj
1.0262 0 Td
(modify)Tj
3.084 0 Td
(their)Tj
2.1033 0 Td
(implementation,)Tj
6.8655 0 Td
(such)Tj
2.0579 0 Td
(as)Tj
1.0148 0 Td
(increasing)Tj
4.3257 0 Td
(the)Tj
1.4683 0 Td
(number)Tj
3.4186 0 Td
(of)Tj
1.0091 0 Td
(neurons)Tj
-34.3046 -1.3039 Td
(to)Tj
1.0262 0 Td
(counteract)Tj
4.4729 0 Td
(the)Tj
1.4684 0 Td
(less)Tj
1.6157 0 Td
(precise)Tj
3.0047 0 Td
(output)Tj
2.897 0 Td
(of)Tj
1.0148 0 Td
(each)Tj
2.0296 0 Td
(neuron.)Tj
3.3732 0 Td
(However,)Tj
4.0762 0 Td
(this)Tj
1.6724 0 Td
(can)Tj
1.6271 0 Td
(lead)Tj
1.8482 0 Td
(to)Tj
1.0261 0 Td
(an)Tj
1.2019 0 Td
(implausibly/)Tj
-32.3544 -1.2983 Td
(impractically)Tj
5.4197 0 Td
(large)Tj
2.16 0 Td
(number)Tj
3.4186 0 Td
(of)Tj
1.0148 0 Td
(neurons)Tj
3.5093 0 Td
(\(e.g.,)Tj
2.1259 0 Td
(a)Tj
0.652 0 Td
(significant)Tj
4.388 0 Td
(portion,)Tj
3.4639 0 Td
(or)Tj
1.0998 0 Td
(more,)Tj
2.5626 0 Td
(of)Tj
1.0147 0 Td
(all)Tj
1.1509 0 Td
(neurons)Tj
3.5036 0 Td
(that)Tj
-35.4838 -1.2982 Td
(exist)Tj
2.0352 0 Td
(in)Tj
1.0318 0 Td
(the)Tj
1.474 0 Td
(brain)Tj
2.3471 0 Td
(or)Tj
1.0998 0 Td
(some)Tj
2.3301 0 Td
(modelled)Tj
3.9401 0 Td
(subregion)Tj
4.1953 0 Td
([)Tj
0.83 0.64 0.02 0 k
(16)Tj
0 g
(]\).)Tj
2.4207 0 Td
(Thus)Tj
2.2451 0 Td
(we)Tj
1.3266 0 Td
(may)Tj
1.9332 0 Td
(need)Tj
2.143 0 Td
(to)Tj
1.0204 0 Td
(adapt)Tj
2.4321 0 Td
(the)Tj
1.474 0 Td
(algorithm)Tj
-33.4485 -1.2983 Td
(itself,)Tj
2.3187 0 Td
(for)Tj
1.3833 0 Td
(example)Tj
3.5376 0 Td
(to)Tj
1.0204 0 Td
(reduce)Tj
2.9084 0 Td
(the)Tj
1.4626 0 Td
(required)Tj
3.634 0 Td
(precision.)Tj
4.1216 0 Td
(Or)Tj
1.3323 0 Td
(in)Tj
1.0318 0 Td
(other)Tj
2.3357 0 Td
(cases)Tj
2.2167 0 Td
(we)Tj
1.3209 0 Td
(may)Tj
1.9219 0 Td
(need)Tj
2.1373 0 Td
(to)Tj
1.0148 0 Td
(introduce)Tj
-33.698 -1.3039 Td
(entirely)Tj
3.2541 0 Td
(new)Tj
1.8765 0 Td
(components)Tj
5.1704 0 Td
(to)Tj
1.0261 0 Td
(the)Tj
1.4683 0 Td
(algorithm,)Tj
4.3824 0 Td
(such)Tj
2.0636 0 Td
(as)Tj
1.0148 0 Td
(a)Tj
0.652 0 Td
(memory)Tj
3.6169 0 Td
(system)Tj
2.9367 0 Td
(to)Tj
1.0261 0 Td
(preserve)Tj
3.566 0 Td
(information)Tj
-32.0539 -1.2983 Td
(from)Tj
2.2053 0 Td
(previous)Tj
3.651 0 Td
(points)Tj
2.7156 0 Td
(in)Tj
1.0374 0 Td
(time.)Tj
2.2564 0 Td
(It)Tj
0.856 0 Td
(is)Tj
0.8504 0 Td
(not)Tj
1.5704 0 Td
(a)Tj
0.652 0 Td
(guarantee)Tj
4.1499 0 Td
(that)Tj
1.7801 0 Td
(this)Tj
1.6725 0 Td
(process)Tj
3.1861 0 Td
(will)Tj
1.6611 0 Td
(result)Tj
2.4491 0 Td
(in)Tj
1.0318 0 Td
(success;)Tj
3.3278 0 Td
(some)Tj
-35.0529 -1.2982 Td
(algorithms)Tj
4.5183 0 Td
(have)Tj
2.058 0 Td
(crucial)Tj
2.914 0 Td
(assumptions)Tj
5.2214 0 Td
(that)Tj
1.7801 0 Td
(are)Tj
1.4457 0 Td
(simply)Tj
2.8856 0 Td
(not)Tj
1.5761 0 Td
(possible)Tj
3.4072 0 Td
(to)Tj
1.0261 0 Td
(translate)Tj
3.634 0 Td
(into)Tj
1.8425 0 Td
(a)Tj
0.6577 0 Td
(biologically)Tj
-32.9667 -1.304 Td
(detailed)Tj
3.3675 0 Td
(implementation.)Tj
6.8654 0 Td
(This)Tj
1.9843 0 Td
(means)Tj
2.8063 0 Td
(that)Tj
1.7801 0 Td
(if)Tj
0.7767 0 Td
(we)Tj
1.3266 0 Td
(want)Tj
2.1827 0 Td
(to)Tj
1.0261 0 Td
(know)Tj
2.4548 0 Td
(whether)Tj
3.4752 0 Td
(an)Tj
1.2076 0 Td
(abstract)Tj
3.3449 0 Td
(RL)Tj
1.3776 0 Td
(algo-)Tj
-33.9758 -1.2982 Td
(rithm)Tj
2.5057 0 Td
(is)Tj
0.8448 0 Td
(a)Tj
0.6519 0 Td
(plausible)Tj
3.7531 0 Td
(hypothesis)Tj
4.4617 0 Td
(for)Tj
1.389 0 Td
(RL)Tj
1.3776 0 Td
(processing)Tj
4.4674 0 Td
(in)Tj
1.0374 0 Td
(the)Tj
1.4684 0 Td
(brain,)Tj
2.5738 0 Td
(demonstrating)Tj
6.0888 0 Td
(a)Tj
0.652 0 Td
(successful)Tj
4.1669 0 Td
(bio-)Tj
-35.4385 -1.2983 Td
(logical)Tj
2.8062 0 Td
(neural)Tj
2.7723 0 Td
(implementation)Tj
6.6387 0 Td
(of)Tj
1.0148 0 Td
(that)Tj
1.7801 0 Td
(algorithm)Tj
4.1556 0 Td
(is)Tj
0.8504 0 Td
(an)Tj
1.2019 0 Td
(important)Tj
4.2802 0 Td
(step.)Tj
ET
Q
q
1 j
1 J
0 w
576 737.1 m
36 737.1 l
36 737.6 l
576 737.6 l
f*
36 741.2598 107.1496 23.6976 re
W* n
q
107.0929 0 0 23.6409 36 741.3165 cm
q
/I0 Do
Q
Q
Q
q
0 0 612 792 re
W* n
1 j
1 J
0 w
7.9999 0 0 7.9999 498.1606 745.7952 cm
BT
/F0 1 Tf
1 TL
-0.005 Tc
0 0 Td
(A)Tj
0.8787 0 Td
(neural)Tj
2.941 0 Td
(model)Tj
2.8984 0 Td
(of)Tj
1.0418 0 Td
(HRL)Tj
ET
Q
q
1 j
1 J
0 w
36 48.0002 m
576 48.0002 l
576 47.5002 l
36 47.5002 l
f*
0.83 0.64 0.02 0 k
81.5811 34.9228 m
237.8835 34.9228 l
h
f*
0 g
7.9999 0 0 7.9999 36 36 cm
BT
/F0 1 Tf
1 TL
-0.005 Tc
0 0 Td
(PLOS)Tj
2.8559 0 Td
(ONE)Tj
2.3669 0 Td
(|)Tj
0.83 0.64 0.02 0 k
0.4748 0 Td
(https://doi.or)Tj
5.4142 0 Td
(g/10.137)Tj
3.8198 0 Td
(1/journal.po)Tj
5.1236 0 Td
(ne.01802)Tj
4.089 0 Td
(34)Tj
0 g
2.0906 0 Td
(July)Tj
1.9701 0 Td
(6,)Tj
1.0417 0 Td
(2017)Tj
35.894 0 Td
(2)Tj
0.7654 0 Td
(/)Tj
0.496 0 Td
(39)Tj
/F8 1 Tf
-0.025 Tc
-66.402 84.0904 Td
(had)Tj
1.5874 0 Td
(no)Tj
1.1693 0 Td
(role)Tj
1.6157 0 Td
(in)Tj
0.8859 0 Td
(study)Tj
2.2606 0 Td
(design,)Tj
2.9055 0 Td
(data)Tj
1.793 0 Td
(collecti)Tj
2.5653 0 Td
(on)Tj
1.1693 0 Td
(and)Tj
-15.952 -1.3748 Td
(analysis,)Tj
3.4086 0 Td
(decision)Tj
3.2953 0 Td
(to)Tj
0.9497 0 Td
(publish,)Tj
3.1393 0 Td
(or)Tj
1.0064 0 Td
(prepara)Tj
2.8205 0 Td
(tion)Tj
1.6157 0 Td
(of)Tj
0.9496 0 Td
(the)Tj
-17.1851 -1.3749 Td
(manuscript.)Tj
4.6417 0 Td
(DR,)Tj
1.6158 0 Td
(CE)Tj
1.2189 0 Td
(are)Tj
1.3677 0 Td
(employees)Tj
4.2237 0 Td
(of)Tj
0.9496 0 Td
(Applied)Tj
2.9693 0 Td
(Brain)Tj
-16.9867 -1.3748 Td
(Research)Tj
3.4724 0 Td
(,)Tj
0.4465 0 Td
(Inc.)Tj
1.5945 0 Td
(The)Tj
1.5874 0 Td
(funder)Tj
2.6291 0 Td
(provided)Tj
3.4513 0 Td
(support)Tj
3.1039 0 Td
(in)Tj
0.8859 0 Td
(the)Tj
-17.171 -1.3748 Td
(form)Tj
2.0126 0 Td
(of)Tj
0.9425 0 Td
(salaries)Tj
3.026 0 Td
(for)Tj
1.2543 0 Td
(DR,)Tj
1.6158 0 Td
(but)Tj
1.4244 0 Td
(did)Tj
1.3607 0 Td
(not)Tj
1.4244 0 Td
(have)Tj
1.9559 0 Td
(any)Tj
-15.0166 -1.3748 Td
(additional)Tj
3.7913 0 Td
(role)Tj
1.6158 0 Td
(in)Tj
0.8858 0 Td
(the)Tj
1.3677 0 Td
(study)Tj
2.2678 0 Td
(design,)Tj
2.8984 0 Td
(data)Tj
1.793 0 Td
(collection)Tj
-14.6198 -1.3748 Td
(and)Tj
1.5874 0 Td
(analysis,)Tj
3.4087 0 Td
(decision)Tj
3.2953 0 Td
(to)Tj
0.9496 0 Td
(publish,)Tj
3.1464 0 Td
(or)Tj
0.9993 0 Td
(preparation)Tj
4.4362 0 Td
(of)Tj
-17.8229 -1.3748 Td
(the)Tj
1.3677 0 Td
(manuscript.)Tj
4.6417 0 Td
(The)Tj
1.5875 0 Td
(specific)Tj
3.0189 0 Td
(roles)Tj
2.0409 0 Td
(of)Tj
0.9497 0 Td
(these)Tj
2.211 0 Td
(authors)Tj
-15.8174 -1.3748 Td
(are)Tj
1.3677 0 Td
(articulated)Tj
4.0606 0 Td
(in)Tj
0.8859 0 Td
(the)Tj
1.3677 0 Td
(`author)Tj
2.8205 0 Td
(contributi)Tj
3.515 0 Td
(ons')Tj
1.7858 0 Td
(section.)Tj
/F7 1 Tf
-15.8032 -2.126 Td
(Competing)Tj
4.3795 0 Td
(interests)Tj
3.3307 0 Td
(:)Tj
/F8 1 Tf
0.4819 0 Td
(DR,)Tj
1.6158 0 Td
(CE)Tj
1.2189 0 Td
(are)Tj
1.3677 0 Td
(employees/boa)Tj
5.6269 0 Td
(rd)Tj
-18.0214 -1.3749 Td
(members/)Tj
3.8055 0 Td
(shareholder)Tj
4.3866 0 Td
(s)Tj
0.6449 0 Td
(of)Tj
0.9496 0 Td
(Applied)Tj
2.9694 0 Td
(Brain)Tj
2.1401 0 Td
(Research,)Tj
-14.8961 -1.3748 Td
(Inc.)Tj
1.5945 0 Td
(Applied)Tj
2.9764 0 Td
(Brain)Tj
2.1401 0 Td
(Researc)Tj
2.9977 0 Td
(h,)Tj
0.9213 0 Td
(Inc)Tj
1.3677 0 Td
(has)Tj
1.5378 0 Td
(a)Tj
0.6378 0 Td
(patent)Tj
-14.1733 -1.3748 Td
(pending)Tj
3.2031 0 Td
(on)Tj
1.1622 0 Td
(some)Tj
2.2961 0 Td
(of)Tj
0.9425 0 Td
(the)Tj
1.3749 0 Td
(material)Tj
3.189 0 Td
(described)Tj
3.8197 0 Td
(in)Tj
0.8929 0 Td
(this)Tj
-16.8804 -1.3748 Td
(paper)Tj
2.3173 0 Td
(\(Methods)Tj
4.1599 0 Td
(and)Tj
1.5874 0 Td
(systems)Tj
3.3378 0 Td
(for)Tj
1.2543 0 Td
(performing)Tj
-12.6567 -1.3748 Td
(reinforcem)Tj
4.0252 0 Td
(ent)Tj
1.3748 0 Td
(learning)Tj
3.1748 0 Td
(in)Tj
0.8859 0 Td
(hierarchica)Tj
4.0535 0 Td
(l)Tj
0.4181 0 Td
(and)Tj
-13.9323 -1.3748 Td
(temporally)Tj
4.1315 0 Td
(extended)Tj
3.5858 0 Td
(environmen)Tj
4.4079 0 Td
(ts)Tj
1.2615 0 Td
(US)Tj
1.3323 0 Td
(14/)Tj
-14.719 -1.3748 Td
(836,084\).)Tj
3.8197 0 Td
(This)Tj
1.7787 0 Td
(does)Tj
2.0055 0 Td
(not)Tj
1.4245 0 Td
(alter)Tj
1.8212 0 Td
(our)Tj
1.4741 0 Td
(adherenc)Tj
3.4087 0 Td
(e)Tj
0.6448 0 Td
(to)Tj
-16.3772 -1.3748 Td
(PLOS)Tj
2.3315 0 Td
(ONE)Tj
1.8708 0 Td
(policies)Tj
3.0119 0 Td
(on)Tj
1.1622 0 Td
(sharing)Tj
2.9835 0 Td
(data)Tj
1.7929 0 Td
(and)Tj
ET
endstream
endobj
25 0 obj
<>stream
endstream
endobj
26 0 obj
<>stream
endstream
endobj
27 0 obj
<>stream
endstream
endobj
28 0 obj
<>stream
endstream
endobj
29 0 obj
<>stream
endstream
endobj
30 0 obj
<>stream
endstream
endobj
31 0 obj
<>stream
endstream
endobj
32 0 obj
<>stream
endstream
endobj
33 0 obj
<>stream
BT
14.7402 59.9674 Td
(materials.)Tj
ET
Q
endstream
endobj
34 0 obj
<>/XObject<>>>/CropBox[0 0 612 792]/MediaBox[0 0 612 792]/Parent 10 0 R/Annots 42 0 R/Contents 43 0 R/TrimBox[0 0 612 792]>>
endobj
42 0 obj
[44 0 R 45 0 R 46 0 R 47 0 R]
endobj
44 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref017)>>
endobj
45 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref018)>>
endobj
46 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref019)>>
endobj
47 0 obj
<>/Border[0 0 0]/A 48 0 R>>
endobj
48 0 obj
<>
endobj
43 0 obj
[49 0 R 50 0 R 51 0 R 52 0 R 53 0 R 54 0 R 55 0 R]
endobj
49 0 obj
<>stream
q
0.83 0.64 0.02 0 k
450.2551 667.3323 m
459.7795 667.3323 l
h
f*
243.2126 511.3134 m
252.737 511.3134 l
h
f*
410.6268 276.0945 m
420.1512 276.0945 l
h
f*
0 g
1 j
1 J
0 w
10 0 0 10 211.9748 707.4141 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(A)Tj
0.907 0 Td
(further)Tj
3.0274 0 Td
(difficulty)Tj
3.8098 0 Td
(for)Tj
1.3833 0 Td
(many)Tj
2.4774 0 Td
(models)Tj
3.1068 0 Td
(is)Tj
0.8447 0 Td
(scaling)Tj
2.9707 0 Td
(up)Tj
1.2642 0 Td
(to)Tj
1.0262 0 Td
(complex)Tj
3.6226 0 Td
(problem)Tj
3.6113 0 Td
(spaces.)Tj
2.965 0 Td
(While)Tj
2.6476 0 Td
(even)Tj
-34.8602 -1.3039 Td
(the)Tj
1.4683 0 Td
(simplest)Tj
3.5149 0 Td
(models)Tj
3.1068 0 Td
(may)Tj
1.9275 0 Td
(be)Tj
1.1509 0 Td
(guaranteed)Tj
4.6714 0 Td
(to)Tj
1.0262 0 Td
(find)Tj
1.8481 0 Td
(the)Tj
1.4741 0 Td
(correct)Tj
3.0387 0 Td
(solution)Tj
3.4809 0 Td
(in)Tj
1.0375 0 Td
(some)Tj
2.33 0 Td
(domain,)Tj
3.549 0 Td
(as)Tj
1.0148 0 Td
(the)Tj
-34.6391 -1.2982 Td
(problem)Tj
3.6113 0 Td
(space)Tj
2.381 0 Td
(becomes)Tj
3.6851 0 Td
(more)Tj
2.3414 0 Td
(complex)Tj
3.6169 0 Td
(it)Tj
0.7881 0 Td
(can)Tj
1.627 0 Td
(take)Tj
1.8709 0 Td
(impractical)Tj
4.7225 0 Td
(amounts)Tj
3.7134 0 Td
(of)Tj
1.0148 0 Td
(time)Tj
2.0295 0 Td
(or)Tj
1.0999 0 Td
(resources)Tj
3.9911 0 Td
(to)Tj
-36.4929 -1.2983 Td
(find)Tj
1.8481 0 Td
(that)Tj
1.7802 0 Td
(solution.)Tj
3.7077 0 Td
(Hierarchical)Tj
5.176 0 Td
(reinforcement)Tj
5.9357 0 Td
(learning)Tj
3.5263 0 Td
(\(HRL;)Tj
2.7099 0 Td
([)Tj
0.83 0.64 0.02 0 k
(17)Tj
0 g
(]\))Tj
2.194 0 Td
(is)Tj
0.8447 0 Td
(a)Tj
0.6576 0 Td
(computational)Tj
-28.3802 -1.2983 Td
(approach)Tj
3.9514 0 Td
(aimed)Tj
2.6929 0 Td
(at)Tj
0.9524 0 Td
(addressing)Tj
4.5071 0 Td
(this)Tj
1.6724 0 Td
(difficulty.)Tj
4.0309 0 Td
(The)Tj
1.7858 0 Td
(basic)Tj
2.211 0 Td
(idea)Tj
1.8708 0 Td
(behind)Tj
3.0218 0 Td
(HRL)Tj
2.1486 0 Td
(is)Tj
0.8504 0 Td
(to)Tj
1.0261 0 Td
(decompose)Tj
4.7282 0 Td
(the)Tj
-35.4498 -1.3039 Td
(overall)Tj
2.9026 0 Td
(RL)Tj
1.3776 0 Td
(task)Tj
1.8085 0 Td
(into)Tj
1.8425 0 Td
(subtasks,)Tj
3.7928 0 Td
(whose)Tj
2.7212 0 Td
(solutions)Tj
3.8438 0 Td
(can)Tj
1.627 0 Td
(be)Tj
1.1509 0 Td
(learned)Tj
3.1975 0 Td
(more)Tj
2.3414 0 Td
(tractably.)Tj
3.9117 0 Td
(Those)Tj
2.6476 0 Td
(subtask)Tj
-33.1651 -1.2982 Td
(solutions)Tj
3.8437 0 Td
(represent)Tj
3.9571 0 Td
(abstract)Tj
3.3449 0 Td
(actions,)Tj
3.2882 0 Td
(such)Tj
2.0579 0 Td
(that)Tj
1.7801 0 Td
(if)Tj
0.7767 0 Td
(the)Tj
1.4684 0 Td
(agent)Tj
2.3867 0 Td
(executes)Tj
3.566 0 Td
(that)Tj
1.7801 0 Td
(action)Tj
2.6986 0 Td
(it)Tj
0.788 0 Td
(will)Tj
1.6611 0 Td
(carry)Tj
2.2734 0 Td
(out)Tj
-35.6709 -1.2983 Td
(the)Tj
1.4683 0 Td
(subtask.)Tj
3.4299 0 Td
(The)Tj
1.7858 0 Td
(agent)Tj
2.3867 0 Td
(then)Tj
2.0183 0 Td
(needs)Tj
2.5058 0 Td
(to)Tj
1.0261 0 Td
(learn)Tj
2.2394 0 Td
(how)Tj
1.9389 0 Td
(to)Tj
1.0261 0 Td
(select)Tj
2.3981 0 Td
(between)Tj
3.5263 0 Td
(different)Tj
3.6567 0 Td
(abstract)Tj
3.3505 0 Td
(and)Tj
1.7291 0 Td
(primi-)Tj
-34.486 -1.3039 Td
(tive)Tj
1.661 0 Td
(actions)Tj
3.0614 0 Td
(in)Tj
1.0375 0 Td
(order)Tj
2.4208 0 Td
(to)Tj
1.0204 0 Td
(complete)Tj
3.8835 0 Td
(the)Tj
1.4683 0 Td
(overall)Tj
2.9027 0 Td
(task.)Tj
2.0352 0 Td
(This)Tj
1.9843 0 Td
(decomposition)Tj
6.1965 0 Td
(has)Tj
1.542 0 Td
(a)Tj
0.652 0 Td
(number)Tj
3.4185 0 Td
(of)Tj
1.0148 0 Td
(benefits)Tj
-34.2989 -1.2983 Td
(\(discussed)Tj
4.3369 0 Td
(in)Tj
1.0375 0 Td
(more)Tj
2.3414 0 Td
(detail)Tj
2.4151 0 Td
(in)Tj
1.0375 0 Td
(Section)Tj
3.1634 0 Td
(2.2\),)Tj
1.9616 0 Td
(allowing)Tj
3.617 0 Td
(reinforcement)Tj
5.9357 0 Td
(learning)Tj
3.5263 0 Td
(to)Tj
1.0261 0 Td
(scale)Tj
2.1033 0 Td
(to)Tj
1.0261 0 Td
(more)Tj
-33.5279 -1.2983 Td
(complex)Tj
3.6226 0 Td
(problems.)Tj
4.1952 0 Td
(Thus)Tj
2.2507 0 Td
(HRL)Tj
2.143 0 Td
(is)Tj
0.8504 0 Td
(an)Tj
1.2019 0 Td
(intriguing)Tj
4.2463 0 Td
(candidate)Tj
4.1045 0 Td
(as)Tj
1.0148 0 Td
(an)Tj
1.2019 0 Td
(account)Tj
3.3845 0 Td
(for)Tj
1.3833 0 Td
(reinforcement)Tj
-29.5991 -1.3039 Td
(learning)Tj
3.5205 0 Td
(processes)Tj
3.9742 0 Td
(in)Tj
1.0318 0 Td
(the)Tj
1.4683 0 Td
(brain,)Tj
2.5739 0 Td
(as)Tj
1.0148 0 Td
(it)Tj
0.788 0 Td
(would)Tj
2.7099 0 Td
(scale)Tj
2.109 0 Td
(better)Tj
2.5455 0 Td
(to)Tj
1.0204 0 Td
(the)Tj
1.474 0 Td
(complex)Tj
3.6227 0 Td
(problems)Tj
3.9685 0 Td
(faced)Tj
2.3187 0 Td
(in)Tj
1.0375 0 Td
(the)Tj
-35.1777 -1.2982 Td
(real)Tj
1.695 0 Td
(world.)Tj
2.778 0 Td
(However,)Tj
4.0818 0 Td
(in)Tj
1.0318 0 Td
(order)Tj
2.4208 0 Td
(to)Tj
1.0262 0 Td
(pursue)Tj
2.948 0 Td
(that)Tj
1.7801 0 Td
(hypothesis)Tj
4.4674 0 Td
(we)Tj
1.3266 0 Td
(need)Tj
2.143 0 Td
(to)Tj
1.0261 0 Td
(address)Tj
3.2258 0 Td
(the)Tj
1.4684 0 Td
(above)Tj
2.5454 0 Td
(issue:)Tj
-33.9644 -1.2983 Td
(can)Tj
1.627 0 Td
(this)Tj
1.6724 0 Td
(theory)Tj
2.8007 0 Td
(be)Tj
1.1508 0 Td
(adapted)Tj
3.3846 0 Td
(so)Tj
1.0885 0 Td
(as)Tj
1.0148 0 Td
(to)Tj
1.0261 0 Td
(be)Tj
1.1452 0 Td
(implemented)Tj
5.5388 0 Td
(in)Tj
1.0375 0 Td
(a)Tj
0.652 0 Td
(biologically)Tj
4.7905 0 Td
(plausible)Tj
3.7474 0 Td
(neural)Tj
2.7722 0 Td
(model?)Tj
-33.4485 -1.3039 Td
(This)Tj
1.9842 0 Td
(has)Tj
1.542 0 Td
(not)Tj
1.5761 0 Td
(been)Tj
2.1203 0 Td
(demonstrated)Tj
5.7599 0 Td
(by)Tj
1.1849 0 Td
(any)Tj
1.6554 0 Td
(previous)Tj
3.651 0 Td
(model)Tj
2.7439 0 Td
(\(although)Tj
4.0933 0 Td
(there)Tj
2.2677 0 Td
(have)Tj
2.0579 0 Td
(been)Tj
2.1203 0 Td
(promising)Tj
-32.7569 -1.2983 Td
(first)Tj
1.8028 0 Td
(steps)Tj
2.177 0 Td
([)Tj
0.83 0.64 0.02 0 k
(18)Tj
0 g
(]\),)Tj
2.4207 0 Td
(so)Tj
1.0829 0 Td
(it)Tj
0.7823 0 Td
(is)Tj
0.8504 0 Td
(as)Tj
1.0091 0 Td
(yet)Tj
1.4003 0 Td
(unclear)Tj
3.1918 0 Td
(whether)Tj
3.4753 0 Td
(HRL)Tj
2.143 0 Td
(might)Tj
2.5965 0 Td
(be)Tj
1.1508 0 Td
(a)Tj
0.652 0 Td
(plausible)Tj
3.7474 0 Td
(account)Tj
3.3845 0 Td
(of)Tj
1.0092 0 Td
(hierarchical)Tj
-32.876 -1.2982 Td
(learning)Tj
3.5205 0 Td
(in)Tj
1.0375 0 Td
(the)Tj
1.4684 0 Td
(brain.)Tj
-4.8302 -1.2983 Td
(In)Tj
1.1111 0 Td
(this)Tj
1.6725 0 Td
(work)Tj
2.279 0 Td
(we)Tj
1.3266 0 Td
(construct)Tj
3.9798 0 Td
(such)Tj
2.0636 0 Td
(a)Tj
0.652 0 Td
(model,)Tj
2.9707 0 Td
(which)Tj
2.6475 0 Td
(we)Tj
1.3266 0 Td
(call)Tj
1.5704 0 Td
(the)Tj
1.474 0 Td
(Neural)Tj
2.9594 0 Td
(HRL)Tj
2.143 0 Td
(\(NHRL\))Tj
3.5659 0 Td
(model.)Tj
-32.9383 -1.3039 Td
(This)Tj
1.9842 0 Td
(is)Tj
0.8504 0 Td
(the)Tj
1.4683 0 Td
(first)Tj
1.8085 0 Td
(neural)Tj
2.7666 0 Td
(model)Tj
2.7439 0 Td
(to)Tj
1.0261 0 Td
(implement)Tj
4.5865 0 Td
(the)Tj
1.4683 0 Td
(computational)Tj
6.0491 0 Td
(processes)Tj
3.9685 0 Td
(of)Tj
1.0148 0 Td
(HRL.)Tj
2.3754 0 Td
(It)Tj
0.8561 0 Td
(can)Tj
1.627 0 Td
(operate)Tj
-34.5937 -1.2983 Td
(in)Tj
1.0318 0 Td
(environments)Tj
5.7883 0 Td
(that)Tj
1.7744 0 Td
(are)Tj
1.44 0 Td
(continuous)Tj
4.7338 0 Td
(in)Tj
1.0319 0 Td
(both)Tj
2.0579 0 Td
(time)Tj
2.0239 0 Td
(and)Tj
1.7235 0 Td
(space,)Tj
2.6022 0 Td
(and)Tj
1.7291 0 Td
(that)Tj
1.7688 0 Td
(involve)Tj
3.1181 0 Td
(lengthy,)Tj
3.4072 0 Td
(variable,)Tj
-34.2309 -1.2983 Td
(and)Tj
1.7177 0 Td
(unknown)Tj
4.0762 0 Td
(time)Tj
2.0183 0 Td
(delays.)Tj
2.8856 0 Td
(In)Tj
1.0999 0 Td
(addition,)Tj
3.8154 0 Td
(it)Tj
0.7767 0 Td
(can)Tj
1.6157 0 Td
(operate)Tj
3.1861 0 Td
(within)Tj
2.8063 0 Td
(the)Tj
1.4627 0 Td
(constraints)Tj
4.6261 0 Td
(of)Tj
1.0091 0 Td
(a)Tj
0.6463 0 Td
(realistic)Tj
3.2995 0 Td
(neural)Tj
-35.0416 -1.3039 Td
(environment,)Tj
5.6579 0 Td
(such)Tj
2.0579 0 Td
(as)Tj
1.0148 0 Td
(local)Tj
2.0749 0 Td
(information)Tj
5.0797 0 Td
(transfer,)Tj
3.5376 0 Td
(heterogeneous)Tj
6.0321 0 Td
(components,)Tj
5.3971 0 Td
(and)Tj
1.7291 0 Td
(imprecise)Tj
-32.5811 -1.2982 Td
(computations.)Tj
5.9527 0 Td
(We)Tj
1.61 0 Td
(begin)Tj
2.4321 0 Td
(by)Tj
1.1792 0 Td
(discussing)Tj
4.3314 0 Td
(the)Tj
1.4683 0 Td
(underlying)Tj
4.6034 0 Td
(theories)Tj
3.3959 0 Td
(of)Tj
1.0148 0 Td
(HRL)Tj
2.1487 0 Td
(and)Tj
1.7291 0 Td
(neural)Tj
2.7722 0 Td
(modelling)Tj
-32.6378 -1.2983 Td
(in)Tj
1.0318 0 Td
(more)Tj
2.3413 0 Td
(detail,)Tj
2.6476 0 Td
(as)Tj
1.0148 0 Td
(well)Tj
1.8198 0 Td
(as)Tj
1.0148 0 Td
(briefly)Tj
2.7779 0 Td
(reviewing)Tj
4.1216 0 Td
(previous)Tj
3.651 0 Td
(modelling)Tj
4.2689 0 Td
(work)Tj
2.2791 0 Td
(in)Tj
1.0375 0 Td
(this)Tj
1.6724 0 Td
(area.)Tj
2.109 0 Td
(We)Tj
1.61 0 Td
(then)Tj
2.0183 0 Td
(pres-)Tj
-35.4158 -1.3039 Td
(ent)Tj
1.4853 0 Td
(the)Tj
1.4683 0 Td
(NHRL)Tj
2.88 0 Td
(model,)Tj
2.965 0 Td
(followed)Tj
3.6454 0 Td
(by)Tj
1.1792 0 Td
(results)Tj
2.8062 0 Td
(on)Tj
1.2756 0 Td
(several)Tj
2.931 0 Td
(different)Tj
3.651 0 Td
(tasks.)Tj
2.3925 0 Td
(We)Tj
1.6044 0 Td
(conclude)Tj
3.8494 0 Td
(with)Tj
1.9899 0 Td
(a)Tj
0.652 0 Td
(discus-)Tj
-34.7752 -1.2983 Td
(sion)Tj
1.9048 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(open)Tj
2.2167 0 Td
(questions)Tj
4.0195 0 Td
(highlighted)Tj
4.7622 0 Td
(by)Tj
1.1849 0 Td
(this)Tj
1.6724 0 Td
(work,)Tj
2.5058 0 Td
(as)Tj
1.0148 0 Td
(well)Tj
1.8198 0 Td
(as)Tj
1.0148 0 Td
(some)Tj
2.3301 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(predictions)Tj
4.7339 0 Td
(arising)Tj
-34.1459 -1.2983 Td
(from)Tj
2.2053 0 Td
(the)Tj
1.4683 0 Td
(model.)Tj
ET
Q
q
1 j
1 J
0 w
11.9999 0 0 11.9999 200.0125 338.1732 cm
BT
/F2 1 Tf
1 TL
-0.0033 Tc
0 0 Td
(2)Tj
0.7653 0 Td
(Background)Tj
/F0 1 Tf
-0.7653 -1.5023 Td
(2.1)Tj
1.5921 0 Td
(Reinforcement)Tj
6.652 0 Td
(learning)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 200.0125 303.137 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(The)Tj
1.7801 0 Td
(basic)Tj
2.211 0 Td
(problem)Tj
3.617 0 Td
(to)Tj
1.0205 0 Td
(be)Tj
1.1508 0 Td
(solved)Tj
2.7439 0 Td
(by)Tj
1.1792 0 Td
(reinforcement)Tj
5.9358 0 Td
(learning)Tj
3.5206 0 Td
(is)Tj
0.8503 0 Td
(this:)Tj
1.8992 0 Td
(given)Tj
2.3811 0 Td
(the)Tj
1.4684 0 Td
(current)Tj
3.1861 0 Td
(state)Tj
2.0353 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
-35.9941 -1.2982 Td
(world,)Tj
2.7779 0 Td
(what)Tj
2.1656 0 Td
(is)Tj
0.8504 0 Td
(the)Tj
1.4684 0 Td
(best)Tj
1.8085 0 Td
(action)Tj
2.6985 0 Td
(to)Tj
1.0262 0 Td
(take?)Tj
2.245 0 Td
(Most)Tj
2.279 0 Td
(commonly,)Tj
4.7679 0 Td
(the)Tj
1.474 0 Td
(world)Tj
3.3392 0 Td
(is)Tj
0.8447 0 Td
(described)Tj
4.0535 0 Td
(formally)Tj
3.5886 0 Td
(in)Tj
-35.3874 -1.2983 Td
(the)Tj
1.4683 0 Td
(language)Tj
3.7644 0 Td
(of)Tj
1.0148 0 Td
(Markov)Tj
3.3788 0 Td
(Decision)Tj
3.7531 0 Td
(Processes)Tj
4.0082 0 Td
(\(MDPs;)Tj
3.3335 0 Td
([)Tj
0.83 0.64 0.02 0 k
(19)Tj
0 g
(]\),)Tj
2.4208 0 Td
(where)Tj
2.6475 0 Td
(the)Tj
1.4683 0 Td
(task)Tj
1.8085 0 Td
(has)Tj
1.5477 0 Td
(some)Tj
2.3301 0 Td
(state)Tj
2.0353 0 Td
(space)Tj
/F10 1 Tf
-34.9793 -1.2982 Td
[()]TJ
/F5 1 Tf
(,)Tj
0.9014 0 Td
(available)Tj
3.6623 0 Td
(actions)Tj
/F10 1 Tf
3.0614 0 Td
[()]TJ
/F5 1 Tf
(,)Tj
1.0998 0 Td
(transition)Tj
4.1216 0 Td
(function)Tj
/F10 1 Tf
3.6283 0 Td
[()]TJ
/F5 1 Tf
(\()Tj
/F10 1 Tf
[()]TJ
/F5 1 Tf
(,)Tj
/F10 1 Tf
1.6611 0 Td
[()]TJ
/F5 1 Tf
(,)Tj
/F10 1 Tf
0.9298 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
6.9998 0 0 6.9998 394.0157 267.8173 cm
BT
/F11 1 Tf
1 TL
0 0 Td
(0)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 396.3968 264.1889 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(\))Tj
0.5612 0 Td
(\(which)Tj
2.9877 0 Td
(describes)Tj
3.8834 0 Td
(how)Tj
1.9389 0 Td
(the)Tj
1.474 0 Td
(agent)Tj
2.3868 0 Td
(will)Tj
1.6668 0 Td
(move)Tj
-34.5371 -1.3039 Td
(through)Tj
3.4413 0 Td
(the)Tj
1.474 0 Td
(state)Tj
2.0353 0 Td
(space)Tj
2.3754 0 Td
(given)Tj
2.3811 0 Td
(a)Tj
0.6519 0 Td
(current)Tj
3.1861 0 Td
(state)Tj
/F10 1 Tf
2.0353 0 Td
[()]TJ
/F5 1 Tf
0.5499 0 Td
(and)Tj
1.7291 0 Td
(selected)Tj
3.3505 0 Td
(action)Tj
/F10 1 Tf
2.6985 0 Td
[()]TJ
/F5 1 Tf
(\),)Tj
1.27 0 Td
(and)Tj
1.7291 0 Td
(reward)Tj
3.033 0 Td
(function)Tj
/F10 1 Tf
3.6284 0 Td
[()]TJ
/F5 1 Tf
(\()Tj
/F10 1 Tf
[()]TJ
/F5 1 Tf
(,)Tj
/F10 1 Tf
-35.5689 -1.2982 Td
[()]TJ
/F5 1 Tf
(\))Tj
1.0432 0 Td
(\(which)Tj
2.9934 0 Td
(describes)Tj
3.8834 0 Td
(the)Tj
1.4683 0 Td
(feedback)Tj
3.7474 0 Td
(the)Tj
1.4684 0 Td
(agent)Tj
2.3867 0 Td
(will)Tj
1.6668 0 Td
(receive)Tj
3.0046 0 Td
(after)Tj
2.0352 0 Td
(selecting)Tj
3.6794 0 Td
(action)Tj
/F10 1 Tf
2.6985 0 Td
[()]TJ
/F5 1 Tf
0.703 0 Td
(in)Tj
1.0375 0 Td
(state)Tj
/F10 1 Tf
2.0353 0 Td
[()]TJ
/F5 1 Tf
(\).)Tj
1.1168 0 Td
(In)Tj
1.1055 0 Td
(this)Tj
-36.0734 -1.2983 Td
(framework,)Tj
4.8416 0 Td
(the)Tj
1.4683 0 Td
(best)Tj
2.1997 0 Td
(action)Tj
3.0897 0 Td
(is)Tj
0.8504 0 Td
(the)Tj
1.4684 0 Td
(one)Tj
1.7007 0 Td
(that)Tj
1.7802 0 Td
(maximizes)Tj
4.4899 0 Td
(the)Tj
1.4684 0 Td
(expected)Tj
3.719 0 Td
(long)Tj
1.9899 0 Td
(term)Tj
2.1317 0 Td
(reward)Tj
3.033 0 Td
(received)Tj
-34.2309 -1.3039 Td
(by)Tj
1.1793 0 Td
(the)Tj
1.4683 0 Td
(agent.)Tj
-1.4513 -1.2983 Td
(The)Tj
1.7858 0 Td
(value)Tj
2.3074 0 Td
(of)Tj
1.0148 0 Td
(taking)Tj
2.7269 0 Td
(action)Tj
/F10 1 Tf
2.6985 0 Td
[()]TJ
/F5 1 Tf
0.7087 0 Td
(in)Tj
1.0318 0 Td
(state)Tj
/F10 1 Tf
2.0353 0 Td
[()]TJ
/F5 1 Tf
0.5499 0 Td
(is)Tj
0.8504 0 Td
(defined)Tj
3.2257 0 Td
(as)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(total)Tj
2.0069 0 Td
(reward)Tj
3.0274 0 Td
(received)Tj
3.5376 0 Td
(after)Tj
2.0353 0 Td
(selecting)Tj
/F10 1 Tf
3.6737 0 Td
[()]TJ
/F5 1 Tf
-36.8955 -1.2982 Td
(and)Tj
1.7235 0 Td
(then)Tj
2.0126 0 Td
(continuing)Tj
4.6204 0 Td
(on)Tj
1.2699 0 Td
(into)Tj
1.8369 0 Td
(the)Tj
1.4627 0 Td
(future.)Tj
2.8799 0 Td
(This)Tj
1.9843 0 Td
(can)Tj
1.6214 0 Td
(be)Tj
1.1451 0 Td
(expressed)Tj
4.0875 0 Td
(recursively)Tj
4.5411 0 Td
(through)Tj
3.4412 0 Td
(the)Tj
1.4627 0 Td
(standard)Tj
-34.0892 -1.304 Td
(Bellman)Tj
3.515 0 Td
(equation)Tj
3.736 0 Td
(as)Tj
/F10 1 Tf
0 Tc
2.0693 -2.0466 Td
[()]TJ
/F11 1 Tf
()Tj
/F10 1 Tf
[()]TJ
/F12 1 Tf
(;)Tj
/F10 1 Tf
1.8709 0 Td
[()]TJ
/F11 1 Tf
()Tj
1.1451 0 Td
()Tj
/F10 1 Tf
1.0489 0 Td
[()]TJ
/F11 1 Tf
()Tj
/F10 1 Tf
[()]TJ
/F12 1 Tf
(;)Tj
/F10 1 Tf
1.7518 0 Td
[()]TJ
/F11 1 Tf
()Tj
1.0941 0 Td
()Tj
/F13 1 Tf
0.9978 0 Td
(g)Tj
/F14 1 Tf
0.4365 0.9468 Td
(X)Tj
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 381.8834 142.0724 cm
BT
/F10 1 Tf
1 TL
0 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
5 0 0 5 383.9244 143.7732 cm
BT
/F11 1 Tf
1 TL
0 0 Td
(0)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 391.011 152.674 cm
BT
/F10 1 Tf
1 TL
0 0 Td
[()]TJ
/F11 1 Tf
0.5725 0 Td
()Tj
/F10 1 Tf
[()]TJ
/F12 1 Tf
(;)Tj
/F10 1 Tf
1.1566 0 Td
[()]TJ
/F12 1 Tf
(;)Tj
/F10 1 Tf
0.9297 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 420.9448 156.8125 cm
BT
/F11 1 Tf
1 TL
0 0 Td
(0)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 423.0425 152.674 cm
BT
/F11 1 Tf
1 TL
0 0 Td
()Tj
/F10 1 Tf
[()]TJ
/F11 1 Tf
()Tj
/F10 1 Tf
[()]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 441.1842 156.8125 cm
BT
/F11 1 Tf
1 TL
0 0 Td
(0)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 443.3385 152.674 cm
BT
/F12 1 Tf
1 TL
0 0 Td
(;)Tj
/F13 1 Tf
0.4365 0 Td
(p)Tj
/F11 1 Tf
()Tj
/F10 1 Tf
[()]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 460.4031 156.8125 cm
BT
/F11 1 Tf
1 TL
0 0 Td
(0)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 462.5574 152.674 cm
BT
/F11 1 Tf
1 TL
0 0 Td
()Tj
10.0912 -0.3571 Td
()Tj
/F5 1 Tf
(1)Tj
/F11 1 Tf
()Tj
/F5 1 Tf
-0.004 Tc
-36.3455 -2.7099 Td
(where)Tj
/F10 1 Tf
2.6476 0 Td
[()]TJ
/F5 1 Tf
(\()Tj
/F10 1 Tf
[()]TJ
/F5 1 Tf
(\))Tj
1.7574 0 Td
(is)Tj
0.8504 0 Td
(the)Tj
1.4684 0 Td
(agent's)Tj
2.965 0 Td
(policy,)Tj
2.863 0 Td
(indicating)Tj
4.2689 0 Td
(the)Tj
1.4683 0 Td
(action)Tj
2.6986 0 Td
(it)Tj
0.7824 0 Td
(will)Tj
1.6667 0 Td
(select)Tj
2.3925 0 Td
(in)Tj
1.0373 0 Td
(the)Tj
1.4684 0 Td
(given)Tj
2.3811 0 Td
(state.)Tj
2.262 0 Td
(The)Tj
1.7801 0 Td
(first)Tj
-34.7581 -1.2983 Td
(term)Tj
2.1317 0 Td
(corresponds)Tj
5.1363 0 Td
(to)Tj
1.0261 0 Td
(the)Tj
1.4741 0 Td
(immediate)Tj
4.4957 0 Td
(reward)Tj
3.033 0 Td
(received)Tj
3.5376 0 Td
(for)Tj
1.3833 0 Td
(picking)Tj
3.2088 0 Td
(action)Tj
/F10 1 Tf
2.6985 0 Td
[()]TJ
/F5 1 Tf
(,)Tj
0.9298 0 Td
(and)Tj
1.7291 0 Td
(the)Tj
1.4683 0 Td
(second)Tj
3.0104 0 Td
(term)Tj
-35.2627 -1.3039 Td
(corresponds)Tj
5.1364 0 Td
(to)Tj
1.0261 0 Td
(the)Tj
1.4683 0 Td
(expected)Tj
3.7247 0 Td
(future)Tj
2.6589 0 Td
(reward)Tj
3.0274 0 Td
(\(the)Tj
/F10 1 Tf
1.8142 0 Td
[()]TJ
/F5 1 Tf
0.9241 0 Td
(value)Tj
2.3073 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4684 0 Td
(policy's)Tj
3.2087 0 Td
(action)Tj
2.6985 0 Td
(in)Tj
1.0375 0 Td
(the)Tj
1.4684 0 Td
(next)Tj
1.9558 0 Td
(state,)Tj
-34.9395 -1.2983 Td
(scaled)Tj
2.6362 0 Td
(by)Tj
1.1792 0 Td
(the)Tj
1.4684 0 Td
(probability)Tj
4.5977 0 Td
(of)Tj
1.0148 0 Td
(reaching)Tj
3.6794 0 Td
(that)Tj
1.7801 0 Td
(state\).)Tj
/F15 1 Tf
2.6022 0 Td
(\r)Tj
/F5 1 Tf
0.6747 0 Td
(is)Tj
0.8447 0 Td
(a)Tj
0.6576 0 Td
(discounting)Tj
4.9606 0 Td
(factor,)Tj
2.7665 0 Td
(which)Tj
2.6532 0 Td
(is)Tj
0.8447 0 Td
(necessary)Tj
4.0195 0 Td
(to)Tj
ET
Q
q
1 j
1 J
0 w
576 737.1 m
36 737.1 l
36 737.6 l
576 737.6 l
f*
36 741.2598 107.1496 23.6976 re
W* n
q
107.0929 0 0 23.6409 36 741.3165 cm
q
/I0 Do
Q
Q
Q
q
0 0 612 792 re
W* n
1 j
1 J
0 w
7.9999 0 0 7.9999 498.1606 745.7952 cm
BT
/F0 1 Tf
1 TL
-0.005 Tc
0 0 Td
(A)Tj
0.8787 0 Td
(neural)Tj
2.941 0 Td
(model)Tj
2.8984 0 Td
(of)Tj
1.0418 0 Td
(HRL)Tj
ET
Q
q
1 j
1 J
0 w
36 48.0002 m
576 48.0002 l
576 47.5002 l
36 47.5002 l
f*
0.83 0.64 0.02 0 k
81.5811 34.9228 m
237.8835 34.9228 l
h
f*
0 g
7.9999 0 0 7.9999 36 36 cm
BT
/F0 1 Tf
1 TL
-0.005 Tc
0 0 Td
(PLOS)Tj
2.8559 0 Td
(ONE)Tj
2.3669 0 Td
(|)Tj
0.83 0.64 0.02 0 k
0.4748 0 Td
(https://doi.or)Tj
5.4142 0 Td
(g/10.137)Tj
3.8198 0 Td
(1/journal.po)Tj
5.1236 0 Td
(ne.01802)Tj
4.089 0 Td
(34)Tj
0 g
2.0906 0 Td
(July)Tj
1.9701 0 Td
(6,)Tj
1.0417 0 Td
(2017)Tj
35.894 0 Td
(3)Tj
0.7654 0 Td
(/)Tj
ET
endstream
endobj
50 0 obj
<>stream
endstream
endobj
51 0 obj
<>stream
endstream
endobj
52 0 obj
<>stream
endstream
endobj
53 0 obj
<>stream
endstream
endobj
54 0 obj
<>stream
endstream
endobj
55 0 obj
<>stream
BT
66.402 0 Td
(39)Tj
ET
Q
endstream
endobj
56 0 obj
<>/XObject<>>>/CropBox[0 0 612 792]/MediaBox[0 0 612 792]/Parent 10 0 R/Annots 58 0 R/Contents 59 0 R/TrimBox[0 0 612 792]>>
endobj
58 0 obj
[60 0 R 61 0 R 62 0 R 63 0 R 64 0 R 65 0 R 66 0 R 67 0 R 68 0 R]
endobj
60 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref001)>>
endobj
61 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref020)>>
endobj
62 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref022)>>
endobj
63 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref017)>>
endobj
64 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref023)>>
endobj
65 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref021)>>
endobj
66 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref021)>>
endobj
67 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref024)>>
endobj
68 0 obj
<>/Border[0 0 0]/A 69 0 R>>
endobj
69 0 obj
<>
endobj
59 0 obj
[70 0 R 71 0 R 72 0 R 73 0 R 74 0 R 75 0 R 76 0 R 77 0 R 78 0 R 79 0 R]
endobj
70 0 obj
<>stream
q
0.83 0.64 0.02 0 k
312.7181 654.3496 m
317.4803 654.3496 l
h
f*
285.9024 445.9465 m
295.4268 445.9465 l
h
f*
300.5858 445.9465 m
310.1102 445.9465 l
h
f*
335.622 445.9465 m
345.1465 445.9465 l
h
f*
349.5685 445.9465 m
359.0929 445.9465 l
h
f*
448.1575 419.9244 m
457.6819 419.9244 l
h
f*
302.1165 393.9024 m
311.5843 393.9024 l
h
f*
286.6961 263.9055 m
296.2205 263.9055 l
h
f*
0 g
1 j
1 J
0 w
10 0 0 10 200.0125 707.4141 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(prevent)Tj
3.2541 0 Td
(the)Tj
1.474 0 Td
(expected)Tj
3.719 0 Td
(values)Tj
2.6703 0 Td
(from)Tj
2.2053 0 Td
(going)Tj
2.4718 0 Td
(to)Tj
1.0261 0 Td
(infinity)Tj
3.1578 0 Td
(\(since)Tj
2.5852 0 Td
(the)Tj
1.4683 0 Td
(agent)Tj
2.3925 0 Td
(will)Tj
1.661 0 Td
(be)Tj
1.1509 0 Td
(continuously)Tj
5.4368 0 Td
(accu-)Tj
-34.6731 -1.3039 Td
(mulating)Tj
3.8267 0 Td
(more)Tj
2.3414 0 Td
(reward\).)Tj
-4.9719 -1.2982 Td
(Temporal)Tj
4.1499 0 Td
(difference)Tj
4.2065 0 Td
(\(TD\))Tj
2.2507 0 Td
(learning)Tj
3.5206 0 Td
(is)Tj
0.8504 0 Td
(a)Tj
0.652 0 Td
(method)Tj
3.3279 0 Td
(for)Tj
1.3833 0 Td
(learning)Tj
3.5262 0 Td
(those)Tj
/F10 1 Tf
2.3358 0 Td
[()]TJ
/F5 1 Tf
0.9241 0 Td
(values)Tj
2.6702 0 Td
(in)Tj
1.0318 0 Td
(an)Tj
1.2019 0 Td
(environ-)Tj
-33.2275 -1.2983 Td
(ment)Tj
2.3073 0 Td
(where)Tj
2.6532 0 Td
(the)Tj
1.4684 0 Td
(transition)Tj
4.1215 0 Td
(and)Tj
1.7292 0 Td
(reward)Tj
3.033 0 Td
(functions)Tj
3.9912 0 Td
(are)Tj
1.4456 0 Td
(unknown,)Tj
4.3087 0 Td
(and)Tj
1.7291 0 Td
(can)Tj
1.6271 0 Td
(only)Tj
1.9785 0 Td
(be)Tj
1.1452 0 Td
(sampled)Tj
3.549 0 Td
(by)Tj
-35.087 -1.2983 Td
(exploring)Tj
4.0308 0 Td
(the)Tj
1.4683 0 Td
(environment)Tj
5.4312 0 Td
([)Tj
0.83 0.64 0.02 0 k
(1)Tj
0 g
(].)Tj
1.5987 0 Td
(It)Tj
0.8617 0 Td
(accomplishes)Tj
5.5332 0 Td
(this)Tj
1.6725 0 Td
(by)Tj
1.1792 0 Td
(taking)Tj
2.7325 0 Td
(advantage)Tj
4.2406 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4684 0 Td
(fact)Tj
1.6611 0 Td
(that)Tj
1.7801 0 Td
(a)Tj
/F10 1 Tf
0.652 0 Td
[()]TJ
/F5 1 Tf
-35.3251 -1.3039 Td
(value)Tj
2.3073 0 Td
(is)Tj
0.8448 0 Td
(essentially)Tj
4.2859 0 Td
(a)Tj
0.652 0 Td
(prediction,)Tj
4.5977 0 Td
(which)Tj
2.6476 0 Td
(can)Tj
1.6271 0 Td
(be)Tj
1.1508 0 Td
(compared)Tj
4.2463 0 Td
(against)Tj
3.0274 0 Td
(observed)Tj
3.7927 0 Td
(data.)Tj
2.143 0 Td
(Specifically,)Tj
4.9096 0 Td
(the)Tj
/F10 1 Tf
-36.2322 -1.2982 Td
[()]TJ
/F5 1 Tf
0.924 0 Td
(values)Tj
2.6646 0 Td
(are)Tj
1.4513 0 Td
(updated)Tj
3.4753 0 Td
(according)Tj
4.1896 0 Td
(to)Tj
/F16 1 Tf
0 Tc
-2.007 -1.8482 Td
(D)Tj
/F10 1 Tf
[()]TJ
/F11 1 Tf
()Tj
/F10 1 Tf
[()]TJ
/F12 1 Tf
(;)Tj
/F10 1 Tf
2.5342 0 Td
[()]TJ
/F11 1 Tf
()Tj
1.1509 0 Td
()Tj
/F13 1 Tf
1.0488 0 Td
(a)Tj
/F11 1 Tf
()Tj
/F10 1 Tf
[( )]TJ
/F11 1 Tf
1.3889 0 Td
()Tj
/F13 1 Tf
0.9978 0 Td
(g)Tj
/F10 1 Tf
[()]TJ
/F11 1 Tf
()Tj
/F10 1 Tf
[()]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 396.8503 614.5511 cm
BT
/F11 1 Tf
1 TL
0 0 Td
(0)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 398.948 610.9228 cm
BT
/F12 1 Tf
1 TL
0 0 Td
(;)Tj
/F10 1 Tf
0.4365 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 408.2456 614.5511 cm
BT
/F11 1 Tf
1 TL
0 0 Td
(0)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 410.3433 610.9228 cm
BT
/F11 1 Tf
1 TL
0 0 Td
()Tj
0.6066 0 Td
( )Tj
/F10 1 Tf
0.9921 0 Td
[()]TJ
/F11 1 Tf
()Tj
/F10 1 Tf
[()]TJ
/F12 1 Tf
(;)Tj
/F10 1 Tf
1.8652 0 Td
[()]TJ
/F11 1 Tf
()Tj
11.8487 0 Td
()Tj
/F5 1 Tf
(2)Tj
/F11 1 Tf
()Tj
/F5 1 Tf
-0.004 Tc
-36.3455 -2.1543 Td
(where)Tj
/F15 1 Tf
2.6476 0 Td
()Tj
/F5 1 Tf
0.771 0 Td
(is)Tj
0.8447 0 Td
(a)Tj
0.6576 0 Td
(learning)Tj
3.5206 0 Td
(rate)Tj
1.7518 0 Td
(parameter.)Tj
4.5354 0 Td
(The)Tj
1.7859 0 Td
(value)Tj
2.3017 0 Td
(within)Tj
2.8175 0 Td
(the)Tj
1.4683 0 Td
(brackets)Tj
3.532 0 Td
(is)Tj
0.8504 0 Td
(referred)Tj
3.4185 0 Td
(to)Tj
1.0262 0 Td
(as)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(tempo-)Tj
-34.4123 -1.2982 Td
(ral)Tj
1.2699 0 Td
(difference/prediction)Tj
8.6853 0 Td
(error.)Tj
2.4888 0 Td
(Note)Tj
2.1884 0 Td
(that)Tj
1.7744 0 Td
(here)Tj
1.9673 0 Td
(the)Tj
1.4683 0 Td
(functions)Tj
/F10 1 Tf
3.9911 0 Td
[()]TJ
/F5 1 Tf
(\()Tj
/F10 1 Tf
[()]TJ
/F5 1 Tf
(,)Tj
/F10 1 Tf
1.7064 0 Td
[()]TJ
/F5 1 Tf
(\),)Tj
/F10 1 Tf
1.2699 0 Td
[()]TJ
/F5 1 Tf
(\()Tj
/F10 1 Tf
[()]TJ
/F5 1 Tf
(,)Tj
/F10 1 Tf
1.6668 0 Td
[()]TJ
/F5 1 Tf
(,)Tj
/F10 1 Tf
0.9297 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 497.3669 580.0251 cm
BT
/F11 1 Tf
1 TL
0 0 Td
(0)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 499.5212 576.3968 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(\),)Tj
0.7823 0 Td
(and)Tj
/F10 1 Tf
1.7291 0 Td
[()]TJ
/F5 1 Tf
(\()Tj
/F10 1 Tf
[()]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 536.6551 580.0251 cm
BT
/F11 1 Tf
1 TL
0 0 Td
(0)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 538.7527 576.3968 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(\))Tj
0.5612 0 Td
(have)Tj
-34.435 -1.2982 Td
(been)Tj
2.1203 0 Td
(replaced)Tj
3.5887 0 Td
(by)Tj
1.1792 0 Td
(the)Tj
1.474 0 Td
(samples)Tj
/F10 1 Tf
3.3789 0 Td
[( )]TJ
/F5 1 Tf
(,)Tj
/F10 1 Tf
0.805 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 328.8188 567.0425 cm
BT
/F11 1 Tf
1 TL
0 0 Td
(0)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 330.9165 563.4141 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(,)Tj
0.4478 0 Td
(and)Tj
/F10 1 Tf
1.7291 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 357.5622 567.0425 cm
BT
/F11 1 Tf
1 TL
0 0 Td
(0)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 359.6598 563.4141 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(,)Tj
0.4478 0 Td
(respectively.)Tj
5.108 0 Td
(Those)Tj
2.6476 0 Td
(samples)Tj
3.3845 0 Td
(allow)Tj
2.3357 0 Td
(us)Tj
1.1112 0 Td
(to)Tj
1.0262 0 Td
(approximate)Tj
-32.0256 -1.3039 Td
(the)Tj
1.4684 0 Td
(value)Tj
2.3074 0 Td
(of)Tj
1.0147 0 Td
(action)Tj
/F10 1 Tf
2.6986 0 Td
[()]TJ
/F5 1 Tf
(,)Tj
0.9298 0 Td
(which)Tj
2.6532 0 Td
(we)Tj
1.3266 0 Td
(compare)Tj
3.7133 0 Td
(to)Tj
1.0261 0 Td
(the)Tj
1.4683 0 Td
(predicted)Tj
4.0025 0 Td
(value)Tj
/F10 1 Tf
2.3074 0 Td
[()]TJ
/F5 1 Tf
(\()Tj
/F10 1 Tf
[()]TJ
/F5 1 Tf
(,)Tj
/F10 1 Tf
1.8198 0 Td
[()]TJ
/F5 1 Tf
(\))Tj
1.0432 0 Td
(in)Tj
1.0375 0 Td
(order)Tj
2.4207 0 Td
(to)Tj
1.0262 0 Td
(compute)Tj
3.736 0 Td
(the)Tj
-35.9997 -1.2982 Td
(update)Tj
2.948 0 Td
(to)Tj
1.0262 0 Td
(the)Tj
1.4683 0 Td
(prediction.)Tj
4.5978 0 Td
(The)Tj
1.7801 0 Td
(agent)Tj
2.3925 0 Td
(can)Tj
1.627 0 Td
(then)Tj
2.0125 0 Td
(determine)Tj
4.3257 0 Td
(a)Tj
0.6519 0 Td
(policy)Tj
2.6362 0 Td
(based)Tj
2.4775 0 Td
(on)Tj
1.2756 0 Td
(those)Tj
/F10 1 Tf
2.3357 0 Td
[()]TJ
/F5 1 Tf
0.9241 0 Td
(values,)Tj
2.8913 0 Td
(usu-)Tj
-35.3704 -1.2983 Td
(ally)Tj
1.5988 0 Td
(by)Tj
1.1848 0 Td
(selecting)Tj
3.6737 0 Td
(the)Tj
1.4683 0 Td
(highest)Tj
3.0955 0 Td
(valued)Tj
2.8289 0 Td
(action)Tj
2.6985 0 Td
(in)Tj
1.0375 0 Td
(each)Tj
2.0296 0 Td
(state)Tj
2.0352 0 Td
(\(with)Tj
2.3357 0 Td
(occasional)Tj
4.371 0 Td
(random)Tj
3.4299 0 Td
(exploration\).)Tj
ET
Q
q
1 j
1 J
0 w
11.9999 0 0 11.9999 200.0125 489.9968 cm
BT
/F0 1 Tf
1 TL
-0.0033 Tc
0 0 Td
(2.2)Tj
1.5921 0 Td
(Hierarchical)Tj
5.4284 0 Td
(reinforcement)Tj
6.2646 0 Td
(learning)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 200.0125 472.9889 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(As)Tj
1.2642 0 Td
(mentioned,)Tj
4.8075 0 Td
(HRL)Tj
2.1487 0 Td
(attempts)Tj
3.668 0 Td
(to)Tj
1.0261 0 Td
(improve)Tj
3.583 0 Td
(the)Tj
1.4683 0 Td
(practical)Tj
3.634 0 Td
(applicability)Tj
5.1023 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4684 0 Td
(basic)Tj
2.211 0 Td
(RL)Tj
1.3833 0 Td
(theory)Tj
2.8006 0 Td
(out-)Tj
-35.5802 -1.2982 Td
(lined)Tj
2.2336 0 Td
(above,)Tj
2.7723 0 Td
(through)Tj
3.4412 0 Td
(the)Tj
1.4684 0 Td
(addition)Tj
3.6 0 Td
(of)Tj
1.0148 0 Td
(hierarchical)Tj
4.9379 0 Td
(processing.)Tj
4.6884 0 Td
(There)Tj
2.5796 0 Td
(are)Tj
1.4456 0 Td
(several)Tj
2.9367 0 Td
(different)Tj
-31.1185 -1.2983 Td
(approaches)Tj
4.7338 0 Td
(to)Tj
1.0261 0 Td
(HRL)Tj
2.1487 0 Td
(\([)Tj
0.83 0.64 0.02 0 k
(20)Tj
0 g
()Tj
0.83 0.64 0.02 0 k
(22)Tj
0 g
(];)Tj
3.8834 0 Td
(see)Tj
1.4287 0 Td
([)Tj
0.83 0.64 0.02 0 k
(17)Tj
0 g
(,)Tj
0.83 0.64 0.02 0 k
1.7347 0 Td
(23)Tj
0 g
(])Tj
1.5137 0 Td
(for)Tj
1.3833 0 Td
(a)Tj
0.6577 0 Td
(review\).)Tj
3.4072 0 Td
(In)Tj
1.1112 0 Td
(this)Tj
1.6724 0 Td
(section)Tj
3.0501 0 Td
(we)Tj
1.3266 0 Td
(try)Tj
1.3436 0 Td
(to)Tj
1.0261 0 Td
(describe)Tj
3.5263 0 Td
(HRL)Tj
-34.9736 -1.3039 Td
(as)Tj
1.0147 0 Td
(generally)Tj
3.8325 0 Td
(as)Tj
1.0148 0 Td
(possible,)Tj
3.634 0 Td
(without)Tj
3.3335 0 Td
(reference)Tj
3.9118 0 Td
(to)Tj
1.0261 0 Td
(the)Tj
1.474 0 Td
(detailed)Tj
3.3675 0 Td
(distinctions)Tj
4.8926 0 Td
(of)Tj
1.0148 0 Td
(these)Tj
2.2564 0 Td
(approaches.)Tj
-30.7727 -1.2983 Td
(However,)Tj
4.0761 0 Td
(we)Tj
1.3267 0 Td
(draw)Tj
2.2393 0 Td
(most)Tj
2.2053 0 Td
(heavily)Tj
3.0274 0 Td
(on)Tj
1.2756 0 Td
(the)Tj
1.4683 0 Td
(options)Tj
3.2259 0 Td
(framework)Tj
4.6147 0 Td
(of)Tj
1.0148 0 Td
([)Tj
0.83 0.64 0.02 0 k
(21)Tj
0 g
(].)Tj
-23.2779 -1.2982 Td
(The)Tj
1.7858 0 Td
(central)Tj
2.965 0 Td
(idea)Tj
1.8709 0 Td
(of)Tj
1.0148 0 Td
(hierarchical)Tj
4.9379 0 Td
(reinforcement)Tj
5.9357 0 Td
(learning)Tj
3.5206 0 Td
(\(HRL\))Tj
2.8289 0 Td
(is)Tj
0.8504 0 Td
(the)Tj
1.4684 0 Td
(notion)Tj
2.897 0 Td
(of)Tj
1.0148 0 Td
(an)Tj
1.2018 0 Td
(abstract)Tj
-33.4882 -1.3039 Td
(action)Tj
2.6985 0 Td
(\(e.g.,)Tj
2.126 0 Td
(options)Tj
4.0082 0 Td
(in)Tj
1.0374 0 Td
([)Tj
0.83 0.64 0.02 0 k
(21)Tj
0 g
(]\).)Tj
2.4151 0 Td
(Abstract)Tj
3.6 0 Td
(actions)Tj
3.0557 0 Td
(work)Tj
2.2791 0 Td
(like)Tj
1.6554 0 Td
(shortcuts,)Tj
4.1215 0 Td
(encapsulating)Tj
5.7147 0 Td
(whole)Tj
-32.7116 -1.2983 Td
(sequences)Tj
4.2179 0 Td
(of)Tj
1.0148 0 Td
(decisions)Tj
3.9061 0 Td
(\(the)Tj
1.8085 0 Td
(basic)Tj
2.211 0 Td
(actions)Tj
3.0614 0 Td
(that)Tj
1.7801 0 Td
(actually)Tj
3.2825 0 Td
(carry)Tj
2.2734 0 Td
(out)Tj
1.5534 0 Td
(the)Tj
1.474 0 Td
(abstract)Tj
3.3449 0 Td
(action\))Tj
3.0387 0 Td
(in)Tj
1.0374 0 Td
(a)Tj
0.652 0 Td
(single)Tj
-34.6561 -1.2983 Td
(choice.)Tj
3.016 0 Td
(This)Tj
1.9899 0 Td
(framework)Tj
4.6148 0 Td
(is)Tj
0.8447 0 Td
(hierarchical)Tj
4.9379 0 Td
(because)Tj
3.3222 0 Td
(abstract)Tj
3.3449 0 Td
(actions)Tj
3.0614 0 Td
(can)Tj
1.627 0 Td
(themselves)Tj
4.5581 0 Td
(be)Tj
1.1509 0 Td
(components)Tj
-32.4678 -1.3039 Td
(in)Tj
1.0318 0 Td
(other)Tj
2.3527 0 Td
(abstract)Tj
3.3449 0 Td
(actions.)Tj
3.2825 0 Td
(For)Tj
1.627 0 Td
(example,)Tj
3.7758 0 Td
(imagine)Tj
3.4469 0 Td
(a)Tj
0.6519 0 Td
(robotic)Tj
3.1011 0 Td
(agent)Tj
2.3924 0 Td
(navigating)Tj
4.3994 0 Td
(around)Tj
3.1351 0 Td
(a)Tj
0.6576 0 Td
(house.)Tj
-33.1991 -1.2983 Td
(Basic)Tj
2.2847 0 Td
(actions)Tj
3.0614 0 Td
(might)Tj
2.5965 0 Td
(include)Tj
3.1804 0 Td
(turn)Tj
2.3584 0 Td
(left,)Tj
2.0977 0 Td
(turn)Tj
2.3584 0 Td
(right,)Tj
2.7666 0 Td
(and)Tj
1.7291 0 Td
(move)Tj
2.8176 0 Td
(forward.)Tj
4.0195 0 Td
(An)Tj
1.4513 0 Td
(abstract)Tj
3.3506 0 Td
(action)Tj
-34.0722 -1.2982 Td
(might)Tj
2.5965 0 Td
(be)Tj
1.1508 0 Td
(go)Tj
1.5818 0 Td
(to)Tj
1.0261 0 Td
(the)Tj
1.4683 0 Td
(kitchen.)Tj
3.8268 0 Td
(Selecting)Tj
3.7927 0 Td
(that)Tj
1.7745 0 Td
(action)Tj
2.6985 0 Td
(will)Tj
1.6668 0 Td
(activate)Tj
3.2542 0 Td
(a)Tj
0.6519 0 Td
(subpolicy)Tj
4.0309 0 Td
(designed)Tj
3.7643 0 Td
(to)Tj
1.0262 0 Td
(take)Tj
1.8708 0 Td
(the)Tj
-36.1811 -1.2983 Td
(agent)Tj
2.3867 0 Td
(from)Tj
2.2053 0 Td
(wherever)Tj
3.9005 0 Td
(it)Tj
0.788 0 Td
(currently)Tj
3.8835 0 Td
(is)Tj
0.8503 0 Td
(to)Tj
1.0262 0 Td
(the)Tj
1.4683 0 Td
(kitchen)Tj
3.2032 0 Td
(via)Tj
1.3776 0 Td
(a)Tj
0.6519 0 Td
(sequence)Tj
3.8608 0 Td
(of)Tj
1.0148 0 Td
(basic)Tj
2.211 0 Td
(actions.)Tj
3.2825 0 Td
(And,)Tj
2.2054 0 Td
(hierar-)Tj
-34.316 -1.3039 Td
(chically,)Tj
3.4695 0 Td
(go)Tj
1.5818 0 Td
(to)Tj
1.0261 0 Td
(the)Tj
1.4683 0 Td
(kitchen)Tj
3.6 0 Td
(could)Tj
2.4491 0 Td
(itself)Tj
2.1033 0 Td
(be)Tj
1.1509 0 Td
(one)Tj
1.7008 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(actions)Tj
3.0614 0 Td
(in)Tj
1.0318 0 Td
(a)Tj
0.6576 0 Td
(more)Tj
2.3414 0 Td
(abstract)Tj
3.3449 0 Td
(policy)Tj
2.6362 0 Td
(for)Tj
-34.1062 -1.2983 Td
(make)Tj
2.7836 0 Td
(dinner.)Tj
-1.5874 -1.2982 Td
(The)Tj
1.7858 0 Td
(incorporation)Tj
5.7883 0 Td
(of)Tj
1.0148 0 Td
(abstract)Tj
3.3448 0 Td
(actions)Tj
3.0614 0 Td
(helps)Tj
2.2961 0 Td
(to)Tj
1.0261 0 Td
(address)Tj
3.2258 0 Td
(the)Tj
1.4684 0 Td
(challenges)Tj
4.3199 0 Td
(faced)Tj
2.3131 0 Td
(by)Tj
1.1849 0 Td
(RL)Tj
1.3776 0 Td
(in)Tj
1.0375 0 Td
(a)Tj
0.6519 0 Td
(num-)Tj
-35.0926 -1.3039 Td
(ber)Tj
1.5136 0 Td
(of)Tj
1.0148 0 Td
(different)Tj
3.651 0 Td
(ways)Tj
2.1487 0 Td
([)Tj
0.83 0.64 0.02 0 k
(24)Tj
0 g
(].)Tj
2.0693 0 Td
(Perhaps)Tj
3.4129 0 Td
(the)Tj
1.4626 0 Td
(most)Tj
2.1997 0 Td
(basic)Tj
2.211 0 Td
(is)Tj
0.8447 0 Td
(that)Tj
1.7688 0 Td
(it)Tj
0.7824 0 Td
(speeds)Tj
2.8289 0 Td
(reward)Tj
3.0274 0 Td
(propagation)Tj
5.0854 0 Td
(through-)Tj
-34.0212 -1.2983 Td
(out)Tj
1.5533 0 Td
(the)Tj
1.4684 0 Td
(task.)Tj
2.0352 0 Td
(Returning)Tj
4.2917 0 Td
(to)Tj
1.0261 0 Td
(our)Tj
1.6271 0 Td
(example,)Tj
3.77 0 Td
(imagine)Tj
3.4469 0 Td
(an)Tj
1.2019 0 Td
(agent)Tj
2.3924 0 Td
(starting)Tj
3.2599 0 Td
(in)Tj
1.0374 0 Td
(the)Tj
1.4684 0 Td
(bedroom)Tj
3.8834 0 Td
(and)Tj
1.7291 0 Td
(trying)Tj
-34.1912 -1.2983 Td
(to)Tj
1.0261 0 Td
(learn)Tj
2.2393 0 Td
(how)Tj
1.9389 0 Td
(to)Tj
1.0262 0 Td
(navigate)Tj
3.5489 0 Td
(to)Tj
1.0205 0 Td
(the)Tj
1.474 0 Td
(refridgerator.)Tj
5.5615 0 Td
(A)Tj
0.9071 0 Td
(long)Tj
1.9842 0 Td
(sequence)Tj
3.8608 0 Td
(of)Tj
1.0148 0 Td
(basic)Tj
2.211 0 Td
(actions)Tj
3.0614 0 Td
(will)Tj
1.6611 0 Td
(be)Tj
1.1509 0 Td
(required)Tj
-33.6867 -1.3039 Td
(in)Tj
1.0318 0 Td
(order)Tj
2.4207 0 Td
(to)Tj
1.0262 0 Td
(complete)Tj
3.8834 0 Td
(the)Tj
1.4683 0 Td
(task,)Tj
2.0353 0 Td
(thus)Tj
1.9332 0 Td
(the)Tj
1.4684 0 Td
(agent)Tj
2.3924 0 Td
(is)Tj
0.8447 0 Td
(faced)Tj
2.3187 0 Td
(with)Tj
1.9956 0 Td
(a)Tj
0.6576 0 Td
(challenging)Tj
4.8132 0 Td
(credit)Tj
2.5342 0 Td
(assignment)Tj
-30.8237 -1.2983 Td
(problem)Tj
3.6113 0 Td
(when)Tj
2.4037 0 Td
(trying)Tj
2.6249 0 Td
(to)Tj
1.0261 0 Td
(decide)Tj
2.812 0 Td
(what)Tj
2.1656 0 Td
(the)Tj
1.4684 0 Td
(best)Tj
1.8085 0 Td
(action)Tj
2.6985 0 Td
(is)Tj
0.8504 0 Td
(in)Tj
1.0318 0 Td
(the)Tj
1.474 0 Td
(bedroom.)Tj
4.1046 0 Td
(But)Tj
1.627 0 Td
(suppose)Tj
3.4356 0 Td
(the)Tj
1.4684 0 Td
(agent)Tj
-34.6108 -1.2982 Td
(selects)Tj
2.7552 0 Td
(the)Tj
1.4683 0 Td
(go)Tj
1.5874 0 Td
(to)Tj
1.0262 0 Td
(the)Tj
1.4683 0 Td
(kitchen)Tj
3.5943 0 Td
(action,)Tj
2.9254 0 Td
(and)Tj
1.7291 0 Td
(then)Tj
2.0182 0 Td
(a)Tj
0.6577 0 Td
(few)Tj
1.6157 0 Td
(basic)Tj
2.211 0 Td
(actions)Tj
3.0557 0 Td
(to)Tj
1.0262 0 Td
(take)Tj
1.8765 0 Td
(it)Tj
0.7824 0 Td
(from)Tj
2.2053 0 Td
(the)Tj
1.4683 0 Td
(centre)Tj
2.7099 0 Td
(of)Tj
-36.1811 -1.3039 Td
(the)Tj
1.4683 0 Td
(kitchen)Tj
3.2031 0 Td
(to)Tj
1.0262 0 Td
(the)Tj
1.4683 0 Td
(refridgerator.)Tj
5.5672 0 Td
(Reward)Tj
3.2825 0 Td
(information)Tj
5.074 0 Td
(can)Tj
1.6271 0 Td
(then)Tj
2.0182 0 Td
(propagate)Tj
4.1896 0 Td
(directly)Tj
3.2315 0 Td
(from)Tj
2.2053 0 Td
(the)Tj
-34.3613 -1.2983 Td
(kitchen)Tj
3.2031 0 Td
(to)Tj
1.0261 0 Td
(wherever)Tj
3.9005 0 Td
(the)Tj
1.4683 0 Td
(agent)Tj
2.3924 0 Td
(selected)Tj
3.3449 0 Td
(the)Tj
1.4683 0 Td
(go)Tj
1.5874 0 Td
(to)Tj
1.0262 0 Td
(the)Tj
1.4683 0 Td
(kitchen)Tj
3.5943 0 Td
(action.)Tj
2.9253 0 Td
(The)Tj
1.7802 0 Td
(agent)Tj
2.3924 0 Td
(can)Tj
1.6271 0 Td
(quickly)Tj
-33.2048 -1.2983 Td
(learn)Tj
2.245 0 Td
(whether)Tj
3.4752 0 Td
(selecting)Tj
3.6737 0 Td
(go)Tj
1.5874 0 Td
(to)Tj
1.0261 0 Td
(the)Tj
1.4684 0 Td
(kitchen)Tj
3.5943 0 Td
(was)Tj
1.7008 0 Td
(a)Tj
0.6519 0 Td
(good)Tj
2.228 0 Td
(choice)Tj
2.795 0 Td
(in)Tj
1.0318 0 Td
(the)Tj
1.474 0 Td
(bedroom,)Tj
4.1045 0 Td
(even)Tj
2.0693 0 Td
(though)Tj
-33.1254 -1.2982 Td
(there)Tj
2.262 0 Td
(were)Tj
2.126 0 Td
(many)Tj
2.4774 0 Td
(basic)Tj
2.211 0 Td
(actions)Tj
3.0558 0 Td
(separating)Tj
4.3426 0 Td
(the)Tj
1.4683 0 Td
(decision)Tj
3.549 0 Td
(from)Tj
2.2053 0 Td
(the)Tj
1.4684 0 Td
(eventual)Tj
3.5773 0 Td
(outcome.)Tj
3.9514 0 Td
(In)Tj
1.1056 0 Td
(other)Tj
-33.8001 -1.304 Td
(words,)Tj
2.8969 0 Td
(the)Tj
1.4684 0 Td
(complexity)Tj
4.6431 0 Td
(of)Tj
1.0148 0 Td
(learning)Tj
3.5206 0 Td
(the)Tj
1.474 0 Td
(value)Tj
2.3074 0 Td
(of)Tj
1.0148 0 Td
(an)Tj
1.2019 0 Td
(abstract)Tj
3.3448 0 Td
(action)Tj
2.6986 0 Td
(is)Tj
0.8504 0 Td
(relatively)Tj
3.8381 0 Td
(independent)Tj
5.2724 0 Td
(of)Tj
-35.5462 -1.2982 Td
(the)Tj
1.4683 0 Td
(length)Tj
2.7269 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.474 0 Td
(actual)Tj
2.5795 0 Td
(decision)Tj
3.549 0 Td
(path)Tj
1.9955 0 Td
(that)Tj
1.7802 0 Td
(action)Tj
2.6986 0 Td
(will)Tj
1.661 0 Td
(invoke.)Tj
-19.7516 -1.2983 Td
(Another)Tj
3.5829 0 Td
(important)Tj
4.2803 0 Td
(advantage)Tj
4.2406 0 Td
(of)Tj
1.0148 0 Td
(HRL)Tj
2.1487 0 Td
(is)Tj
0.8447 0 Td
(that)Tj
1.7801 0 Td
(it)Tj
0.7881 0 Td
(promotes)Tj
4.0251 0 Td
(better)Tj
2.5399 0 Td
(exploration.)Tj
5.0286 0 Td
(One)Tj
1.9389 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
-34.4237 -1.3039 Td
(weaknesses)Tj
4.7338 0 Td
(of)Tj
1.0148 0 Td
(RL)Tj
1.3833 0 Td
(is)Tj
0.8447 0 Td
(that)Tj
1.7801 0 Td
(learning)Tj
3.5263 0 Td
(tends)Tj
2.3754 0 Td
(to)Tj
1.0262 0 Td
(begin)Tj
2.4321 0 Td
(with)Tj
1.9956 0 Td
(a)Tj
0.6576 0 Td
(long)Tj
1.9842 0 Td
(period)Tj
2.8347 0 Td
(of)Tj
1.0148 0 Td
(random)Tj
3.4299 0 Td
(action)Tj
2.6985 0 Td
(selection,)Tj
-33.732 -1.2983 Td
(or)Tj
1.0941 0 Td
(flailing.)Tj
3.9968 0 Td
(This)Tj
1.9843 0 Td
(results)Tj
2.8119 0 Td
(in)Tj
1.0318 0 Td
(a)Tj
0.6577 0 Td
(kind)Tj
2.0579 0 Td
(of)Tj
1.0148 0 Td
(Brownian)Tj
4.1612 0 Td
(motion,)Tj
3.3903 0 Td
(where)Tj
2.6532 0 Td
(the)Tj
1.4683 0 Td
(agent)Tj
2.3924 0 Td
(moves)Tj
2.7836 0 Td
(around)Tj
3.1351 0 Td
(in)Tj
1.0375 0 Td
(a)Tj
ET
Q
q
1 j
1 J
0 w
576 737.1 m
36 737.1 l
36 737.6 l
576 737.6 l
f*
36 741.2598 107.1496 23.6976 re
W* n
q
107.0929 0 0 23.6409 36 741.3165 cm
q
/I0 Do
Q
Q
Q
q
0 0 612 792 re
W* n
1 j
1 J
0 w
7.9999 0 0 7.9999 498.1606 745.7952 cm
BT
/F0 1 Tf
1 TL
-0.005 Tc
0 0 Td
(A)Tj
0.8787 0 Td
(neural)Tj
2.941 0 Td
(model)Tj
2.8984 0 Td
(of)Tj
1.0418 0 Td
(HRL)Tj
ET
Q
q
1 j
1 J
0 w
36 48.0002 m
576 48.0002 l
576 47.5002 l
36 47.5002 l
f*
0.83 0.64 0.02 0 k
81.5811 34.9228 m
237.8835 34.9228 l
h
f*
0 g
7.9999 0 0 7.9999 36 36 cm
BT
/F0 1 Tf
1 TL
-0.005 Tc
0 0 Td
(PLOS)Tj
2.8559 0 Td
(ONE)Tj
2.3669 0 Td
(|)Tj
0.83 0.64 0.02 0 k
0.4748 0 Td
(https://doi.or)Tj
5.4142 0 Td
(g/10.137)Tj
3.8198 0 Td
(1/journal.po)Tj
5.1236 0 Td
(ne.01802)Tj
4.089 0 Td
(34)Tj
0 g
2.0906 0 Td
(July)Tj
1.9701 0 Td
(6,)Tj
1.0417 0 Td
(2017)Tj
35.894 0 Td
(4)Tj
0.7654 0 Td
(/)Tj
ET
endstream
endobj
71 0 obj
<>stream
endstream
endobj
72 0 obj
<>stream
endstream
endobj
73 0 obj
<>stream
endstream
endobj
74 0 obj
<>stream
endstream
endobj
75 0 obj
<>stream
endstream
endobj
76 0 obj
<>stream
endstream
endobj
77 0 obj
<>stream
endstream
endobj
78 0 obj
<>stream
endstream
endobj
79 0 obj
<>stream
BT
66.402 0 Td
(39)Tj
ET
Q
endstream
endobj
80 0 obj
<>/XObject<>>>/CropBox[0 0 612 792]/MediaBox[0 0 612 792]/Parent 10 0 R/Annots 82 0 R/Contents 83 0 R/TrimBox[0 0 612 792]>>
endobj
82 0 obj
[84 0 R 85 0 R 86 0 R 87 0 R 88 0 R 89 0 R]
endobj
84 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref025)>>
endobj
85 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref026)>>
endobj
86 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref027)>>
endobj
87 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref028)>>
endobj
88 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.e002)>>
endobj
89 0 obj
<>/Border[0 0 0]/A 90 0 R>>
endobj
90 0 obj
<>
endobj
83 0 obj
[91 0 R 92 0 R 93 0 R 94 0 R 95 0 R 96 0 R]
endobj
91 0 obj
<>stream
q
0.83 0.64 0.02 0 k
278.589 589.3228 m
288.0567 589.3228 l
h
f*
252.5102 563.3008 m
262.0346 563.3008 l
h
f*
505.0205 329.3291 m
514.5449 329.3291 l
h
f*
203.4142 121.3228 m
212.9386 121.3228 l
h
f*
513.3543 95.3008 m
537.9024 95.3008 l
h
f*
0 g
1 j
1 J
0 w
10 0 0 10 200.0125 707.4141 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(limited)Tj
3.067 0 Td
(area)Tj
1.8822 0 Td
(rather)Tj
2.6476 0 Td
(than)Tj
2.0295 0 Td
(exploring)Tj
4.0309 0 Td
(throughout)Tj
4.7792 0 Td
(the)Tj
1.4683 0 Td
(state)Tj
2.0353 0 Td
(space.)Tj
2.6022 0 Td
(One)Tj
1.9388 0 Td
(can)Tj
1.6271 0 Td
(imagine)Tj
3.4469 0 Td
(that)Tj
1.7802 0 Td
(if)Tj
0.771 0 Td
(our)Tj
-34.1062 -1.3039 Td
(refrigerator-seeking)Tj
8.1467 0 Td
(agent)Tj
2.3867 0 Td
(begins)Tj
2.795 0 Td
(selecting)Tj
3.6793 0 Td
(random)Tj
3.4242 0 Td
(basic)Tj
2.2111 0 Td
(actions)Tj
3.0614 0 Td
(in)Tj
1.0374 0 Td
(the)Tj
1.4684 0 Td
(bedroom,)Tj
4.1102 0 Td
(it)Tj
0.7823 0 Td
(will)Tj
1.6668 0 Td
(spend)Tj
-34.7695 -1.2982 Td
(a)Tj
0.6519 0 Td
(long)Tj
1.9899 0 Td
(time)Tj
2.0239 0 Td
(wandering)Tj
4.4901 0 Td
(around)Tj
3.1408 0 Td
(the)Tj
1.4683 0 Td
(bedroom)Tj
3.8834 0 Td
(before)Tj
2.7383 0 Td
(it)Tj
0.788 0 Td
(gets)Tj
1.7688 0 Td
(anywhere)Tj
4.0876 0 Td
(close)Tj
2.177 0 Td
(to)Tj
1.0261 0 Td
(the)Tj
1.4683 0 Td
(kitchen.)Tj
3.4299 0 Td
(But)Tj
1.6271 0 Td
(if)Tj
-36.7594 -1.2983 Td
(the)Tj
1.4683 0 Td
(agent)Tj
2.3924 0 Td
(randomly)Tj
4.1272 0 Td
(selects)Tj
2.7553 0 Td
(the)Tj
1.4683 0 Td
(go)Tj
1.5874 0 Td
(to)Tj
1.0262 0 Td
(the)Tj
1.4683 0 Td
(dining)Tj
2.846 0 Td
(room)Tj
2.8176 0 Td
(action,)Tj
2.9196 0 Td
(that)Tj
1.7802 0 Td
(will)Tj
1.6611 0 Td
(take)Tj
1.8765 0 Td
(it)Tj
0.7824 0 Td
(to)Tj
1.0261 0 Td
(a)Tj
0.652 0 Td
(significantly)Tj
-32.6549 -1.2983 Td
(different)Tj
3.6566 0 Td
(area)Tj
1.8822 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(state)Tj
2.0353 0 Td
(space.)Tj
2.6022 0 Td
(Thus)Tj
2.2507 0 Td
(the)Tj
1.4683 0 Td
(agent's)Tj
2.9651 0 Td
(random)Tj
3.4298 0 Td
(exploration)Tj
4.8019 0 Td
(is)Tj
0.8504 0 Td
(going)Tj
2.4718 0 Td
(to)Tj
1.0261 0 Td
(result)Tj
2.4491 0 Td
(in)Tj
1.0375 0 Td
(a)Tj
-35.4101 -1.3039 Td
(much)Tj
2.5171 0 Td
(broader)Tj
3.3619 0 Td
(coverage)Tj
3.7247 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(search)Tj
2.7609 0 Td
(space,)Tj
2.6022 0 Td
(and)Tj
1.7291 0 Td
(therefore)Tj
3.8551 0 Td
(is)Tj
0.8504 0 Td
(more)Tj
2.3414 0 Td
(likely)Tj
2.3528 0 Td
(to)Tj
1.0261 0 Td
(bring)Tj
2.3811 0 Td
(it)Tj
0.7823 0 Td
(within)Tj
-32.7682 -1.2982 Td
(proximity)Tj
4.1839 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.474 0 Td
(goal.)Tj
-5.4765 -1.2983 Td
(Note)Tj
2.1883 0 Td
(that)Tj
1.7801 0 Td
(both)Tj
2.058 0 Td
(the)Tj
1.4683 0 Td
(above)Tj
2.5455 0 Td
(advantages)Tj
4.6035 0 Td
(are)Tj
1.4456 0 Td
(dependent)Tj
4.4617 0 Td
(on)Tj
1.2756 0 Td
(the)Tj
1.4683 0 Td
(quality)Tj
2.9537 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4684 0 Td
(abstract)Tj
3.3505 0 Td
(actions;)Tj
-33.2785 -1.3039 Td
(including)Tj
4.0421 0 Td
(unhelpful)Tj
4.0706 0 Td
(actions,)Tj
3.2881 0 Td
(such)Tj
2.058 0 Td
(as)Tj
1.0148 0 Td
(go)Tj
1.5817 0 Td
(to)Tj
1.0261 0 Td
(the)Tj
1.4684 0 Td
(roof,)Tj
2.5114 0 Td
(can)Tj
1.6271 0 Td
(actually)Tj
3.2825 0 Td
(make)Tj
2.3981 0 Td
(the)Tj
1.4683 0 Td
(problem)Tj
3.6114 0 Td
(more)Tj
2.3414 0 Td
(dif-)Tj
-35.79 -1.2983 Td
(ficult)Tj
2.2677 0 Td
(for)Tj
1.3889 0 Td
(the)Tj
1.4684 0 Td
(agent)Tj
2.3924 0 Td
([)Tj
0.83 0.64 0.02 0 k
(25)Tj
0 g
(].)Tj
2.0749 0 Td
(Even)Tj
2.2167 0 Td
(if)Tj
0.7767 0 Td
(the)Tj
1.4683 0 Td
(abstract)Tj
3.3449 0 Td
(actions)Tj
3.0614 0 Td
(are)Tj
1.4457 0 Td
(useful,)Tj
2.8232 0 Td
(they)Tj
1.9219 0 Td
(increase)Tj
3.4753 0 Td
(the)Tj
1.4683 0 Td
(complexity)Tj
4.6431 0 Td
(of)Tj
-36.2378 -1.2983 Td
(the)Tj
1.4683 0 Td
(problem)Tj
3.6113 0 Td
(by)Tj
1.1849 0 Td
(expanding)Tj
4.4163 0 Td
(the)Tj
1.4684 0 Td
(action)Tj
2.6985 0 Td
(space,)Tj
2.6022 0 Td
(so)Tj
1.0885 0 Td
(they)Tj
1.9276 0 Td
(must)Tj
2.2223 0 Td
(provide)Tj
3.2882 0 Td
(benefits)Tj
3.3392 0 Td
(that)Tj
1.7801 0 Td
(outweigh)Tj
3.9175 0 Td
(those)Tj
-35.0133 -1.3039 Td
(innate)Tj
2.7382 0 Td
(costs)Tj
2.1713 0 Td
([)Tj
0.83 0.64 0.02 0 k
(26)Tj
0 g
(].)Tj
2.075 0 Td
(The)Tj
1.7858 0 Td
(question)Tj
3.6567 0 Td
(of)Tj
1.0148 0 Td
(how)Tj
1.9388 0 Td
(to)Tj
1.0262 0 Td
(discover)Tj
3.5546 0 Td
(useful)Tj
2.5965 0 Td
(abstract)Tj
3.3449 0 Td
(actions)Tj
3.0614 0 Td
(is)Tj
0.8504 0 Td
(an)Tj
1.2018 0 Td
(important)Tj
4.2803 0 Td
(and)Tj
-35.2967 -1.2982 Td
(open)Tj
2.2166 0 Td
(problem)Tj
3.6113 0 Td
(in)Tj
1.0318 0 Td
(the)Tj
1.4741 0 Td
(computational)Tj
6.0491 0 Td
(study)Tj
2.3867 0 Td
(of)Tj
1.0148 0 Td
(HRL,)Tj
2.3754 0 Td
(but)Tj
1.5534 0 Td
(beyond)Tj
3.1861 0 Td
(the)Tj
1.4684 0 Td
(scope)Tj
2.4547 0 Td
(of)Tj
1.0148 0 Td
(this)Tj
1.6725 0 Td
(paper)Tj
2.4831 0 Td
(\(we)Tj
1.6668 0 Td
(will)Tj
-35.6596 -1.2983 Td
(return)Tj
2.7609 0 Td
(to)Tj
1.0261 0 Td
(this)Tj
1.6724 0 Td
(in)Tj
1.0375 0 Td
(Section)Tj
3.1635 0 Td
(6\).)Tj
-8.4642 -1.3039 Td
(A)Tj
0.907 0 Td
(third)Tj
2.2111 0 Td
(advantage)Tj
4.2406 0 Td
(of)Tj
1.0148 0 Td
(HRL)Tj
2.1486 0 Td
(is)Tj
0.8447 0 Td
(that)Tj
1.7802 0 Td
(it)Tj
0.788 0 Td
(lends)Tj
2.3244 0 Td
(itself)Tj
2.1033 0 Td
(to)Tj
1.0261 0 Td
(state)Tj
2.0353 0 Td
(abstraction.)Tj
4.8982 0 Td
(State)Tj
2.1487 0 Td
(abstraction)Tj
4.6658 0 Td
(is)Tj
0.8504 0 Td
(the)Tj
-35.1834 -1.2983 Td
(process)Tj
3.1804 0 Td
(of)Tj
1.0148 0 Td
(ignoring)Tj
3.6623 0 Td
(parts)Tj
2.1997 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.474 0 Td
(state)Tj
2.0353 0 Td
(that)Tj
1.7745 0 Td
(are)Tj
1.4513 0 Td
(irrelevant)Tj
4.0592 0 Td
(to)Tj
1.0261 0 Td
(the)Tj
1.4683 0 Td
(current)Tj
3.1862 0 Td
(task,)Tj
2.0352 0 Td
(thus)Tj
1.9332 0 Td
(reducing)Tj
3.7758 0 Td
(the)Tj
-35.2911 -1.2982 Td
(size)Tj
1.6837 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4684 0 Td
(state)Tj
2.0352 0 Td
(space.)Tj
2.6079 0 Td
(In)Tj
1.1055 0 Td
(HRL)Tj
2.1486 0 Td
(it)Tj
0.7824 0 Td
(is)Tj
0.8504 0 Td
(possible)Tj
3.4072 0 Td
(to)Tj
1.0261 0 Td
(associate)Tj
3.7304 0 Td
(different)Tj
3.6567 0 Td
(state)Tj
2.0352 0 Td
(abstractions)Tj
5.0287 0 Td
(with)Tj
2.0012 0 Td
(the)Tj
1.4683 0 Td
(dif-)Tj
-36.0507 -1.2983 Td
(ferent)Tj
2.5738 0 Td
(abstract)Tj
3.3449 0 Td
(actions.)Tj
3.2825 0 Td
(For)Tj
1.627 0 Td
(example,)Tj
3.7758 0 Td
(suppose)Tj
3.4355 0 Td
(the)Tj
1.4684 0 Td
(agent)Tj
2.3924 0 Td
(is)Tj
0.8447 0 Td
(trying)Tj
2.6249 0 Td
(to)Tj
1.0261 0 Td
(learn)Tj
2.245 0 Td
(a)Tj
0.652 0 Td
(subpolicy)Tj
4.0308 0 Td
(to)Tj
1.0262 0 Td
(get)Tj
1.4059 0 Td
(to)Tj
-35.7559 -1.3039 Td
(the)Tj
1.4683 0 Td
(doorway)Tj
3.7077 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(bedroom.)Tj
4.1102 0 Td
(In)Tj
1.1055 0 Td
(that)Tj
1.7802 0 Td
(case)Tj
1.8595 0 Td
(it)Tj
0.788 0 Td
(does)Tj
2.0353 0 Td
(not)Tj
1.576 0 Td
(really)Tj
2.3981 0 Td
(matter)Tj
2.863 0 Td
(what)Tj
2.1657 0 Td
(is)Tj
0.8447 0 Td
(going)Tj
2.4774 0 Td
(on)Tj
1.2756 0 Td
(anywhere)Tj
-32.9383 -1.2983 Td
(else)Tj
1.6724 0 Td
(in)Tj
1.0375 0 Td
(the)Tj
1.4683 0 Td
(house,)Tj
2.7949 0 Td
(so)Tj
1.0829 0 Td
(that)Tj
1.7801 0 Td
(subpolicy)Tj
4.0308 0 Td
(can)Tj
1.6271 0 Td
(be)Tj
1.1509 0 Td
(learned)Tj
3.1918 0 Td
(based)Tj
2.4774 0 Td
(only)Tj
1.9729 0 Td
(on)Tj
1.2756 0 Td
(the)Tj
1.474 0 Td
(parts)Tj
2.1997 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(state)Tj
2.0353 0 Td
(pertain-)Tj
-33.7547 -1.2983 Td
(ing)Tj
1.5023 0 Td
(to)Tj
1.0205 0 Td
(the)Tj
1.474 0 Td
(bedroom.)Tj
4.1045 0 Td
(This)Tj
1.9899 0 Td
(will)Tj
1.6611 0 Td
(make)Tj
2.3924 0 Td
(it)Tj
0.7881 0 Td
(much)Tj
2.5171 0 Td
(easier)Tj
2.5002 0 Td
(to)Tj
1.0261 0 Td
(learn)Tj
2.245 0 Td
(that)Tj
1.7802 0 Td
(subpolicy.)Tj
4.2519 0 Td
(Again,)Tj
2.846 0 Td
(the)Tj
1.4683 0 Td
(question)Tj
-33.5676 -1.3039 Td
(of)Tj
1.0147 0 Td
(how)Tj
1.9389 0 Td
(to)Tj
1.0262 0 Td
(come)Tj
2.3924 0 Td
(up)Tj
1.2642 0 Td
(with)Tj
1.9956 0 Td
(useful)Tj
2.5965 0 Td
(state)Tj
2.0353 0 Td
(abstractions)Tj
5.0286 0 Td
(is)Tj
0.8504 0 Td
(nontrivial)Tj
4.1612 0 Td
(\(e.g.,)Tj
2.126 0 Td
(how)Tj
1.9389 0 Td
(does)Tj
2.0409 0 Td
(the)Tj
1.4684 0 Td
(agent)Tj
2.3867 0 Td
(know)Tj
-34.2649 -1.2982 Td
(which)Tj
2.6418 0 Td
(aspects)Tj
3.0387 0 Td
(of)Tj
1.0092 0 Td
(the)Tj
1.4626 0 Td
(state)Tj
2.0353 0 Td
(are)Tj
1.44 0 Td
(associated)Tj
4.2519 0 Td
(with)Tj
1.9956 0 Td
(the)Tj
1.4627 0 Td
(bedroom,)Tj
4.1045 0 Td
(or)Tj
1.0942 0 Td
(which)Tj
2.6475 0 Td
(it)Tj
0.7824 0 Td
(is)Tj
0.839 0 Td
(safe)Tj
1.7235 0 Td
(to)Tj
1.0205 0 Td
(ignore?\).)Tj
3.736 0 Td
(How-)Tj
-35.2854 -1.2983 Td
(ever,)Tj
2.1146 0 Td
(this)Tj
1.6781 0 Td
(question)Tj
3.6567 0 Td
(is)Tj
0.8503 0 Td
(more)Tj
2.3414 0 Td
(easily)Tj
2.4038 0 Td
(addressed)Tj
4.1782 0 Td
(in)Tj
1.0375 0 Td
(the)Tj
1.4684 0 Td
(hierarchical)Tj
4.9379 0 Td
(case,)Tj
2.0863 0 Td
(as)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(abstract)Tj
3.3505 0 Td
(actions)Tj
3.0558 0 Td
(are)Tj
-35.6426 -1.3039 Td
(restricted)Tj
3.9854 0 Td
(to)Tj
1.0262 0 Td
(limited)Tj
3.067 0 Td
(parts)Tj
2.2054 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(task)Tj
1.8085 0 Td
(by)Tj
1.1792 0 Td
(design.)Tj
3.0387 0 Td
(Without)Tj
3.6113 0 Td
(HRL)Tj
2.1487 0 Td
(the)Tj
1.4683 0 Td
(agent)Tj
2.3925 0 Td
(must)Tj
2.228 0 Td
(try)Tj
1.3436 0 Td
(to)Tj
1.0261 0 Td
(find)Tj
1.8482 0 Td
(a)Tj
0.6576 0 Td
(state)Tj
-35.5178 -1.2983 Td
(abstraction)Tj
4.6657 0 Td
(that)Tj
1.7802 0 Td
(works)Tj
2.6419 0 Td
(for)Tj
1.3833 0 Td
(the)Tj
1.474 0 Td
(whole)Tj
2.6078 0 Td
(task,)Tj
2.0353 0 Td
(which)Tj
2.6475 0 Td
(is)Tj
0.8504 0 Td
(likely)Tj
2.3528 0 Td
(to)Tj
1.0261 0 Td
(be)Tj
1.1509 0 Td
(more)Tj
2.3414 0 Td
(difficult)Tj
3.3505 0 Td
(to)Tj
1.0261 0 Td
(find)Tj
1.8539 0 Td
(and)Tj
1.7291 0 Td
(also)Tj
-34.9169 -1.2983 Td
(likely)Tj
2.3527 0 Td
(to)Tj
1.0261 0 Td
(eliminate)Tj
3.9458 0 Td
(a)Tj
0.6577 0 Td
(smaller)Tj
3.1181 0 Td
(portion)Tj
3.2371 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(state)Tj
2.0353 0 Td
(space.)Tj
-17.6597 -1.2982 Td
(The)Tj
1.7858 0 Td
(use)Tj
1.5307 0 Td
(of)Tj
1.0148 0 Td
(transfer)Tj
3.3165 0 Td
(learning)Tj
3.5263 0 Td
(in)Tj
1.0318 0 Td
(HRL)Tj
2.1486 0 Td
(is)Tj
0.8504 0 Td
(a)Tj
0.652 0 Td
(similar)Tj
2.9877 0 Td
(case,)Tj
2.0863 0 Td
(in)Tj
1.0374 0 Td
(that)Tj
1.7745 0 Td
(it)Tj
0.788 0 Td
(is)Tj
0.8504 0 Td
(not)Tj
1.5704 0 Td
(an)Tj
1.2019 0 Td
(intrinsic)Tj
3.5716 0 Td
(benefit)Tj
2.9764 0 Td
(of)Tj
-35.8977 -1.3039 Td
(HRL)Tj
2.1486 0 Td
(but)Tj
1.5477 0 Td
(is)Tj
0.8504 0 Td
(made)Tj
2.4264 0 Td
(easier)Tj
2.5002 0 Td
(by)Tj
1.1792 0 Td
(the)Tj
1.4683 0 Td
(hierarchical)Tj
4.9379 0 Td
(framework.)Tj
4.8416 0 Td
(Transfer)Tj
3.6283 0 Td
(learning)Tj
3.5263 0 Td
(is)Tj
0.8447 0 Td
(the)Tj
1.474 0 Td
(process)Tj
3.1805 0 Td
(of)Tj
-34.5541 -1.2983 Td
(using)Tj
2.3867 0 Td
(knowledge)Tj
4.5411 0 Td
(gained)Tj
2.8913 0 Td
(in)Tj
1.0318 0 Td
(a)Tj
0.6576 0 Td
(previous)Tj
3.6454 0 Td
(task)Tj
1.8141 0 Td
(to)Tj
1.0205 0 Td
(aid)Tj
1.4513 0 Td
(performance)Tj
5.3461 0 Td
(in)Tj
1.0375 0 Td
(a)Tj
0.652 0 Td
(new)Tj
1.8765 0 Td
(task)Tj
1.8085 0 Td
([)Tj
0.83 0.64 0.02 0 k
(27)Tj
0 g
(].)Tj
2.0806 0 Td
(While)Tj
2.6475 0 Td
(this)Tj
1.6725 0 Td
(is)Tj
-36.561 -1.2983 Td
(possible)Tj
3.4072 0 Td
(in)Tj
1.0374 0 Td
(other)Tj
2.3471 0 Td
(RL)Tj
1.3776 0 Td
(frameworks,)Tj
5.2044 0 Td
(it)Tj
0.7824 0 Td
(is)Tj
0.8504 0 Td
(made)Tj
2.4264 0 Td
(much)Tj
2.5172 0 Td
(easier)Tj
2.5001 0 Td
(by)Tj
1.1849 0 Td
(the)Tj
1.4683 0 Td
(use)Tj
1.5307 0 Td
(of)Tj
1.0148 0 Td
(HRL.)Tj
2.3754 0 Td
(One)Tj
1.9389 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(main)Tj
-34.4463 -1.3039 Td
(challenges)Tj
4.3199 0 Td
(of)Tj
1.0148 0 Td
(transfer)Tj
3.3165 0 Td
(learning)Tj
3.5206 0 Td
(is)Tj
0.8504 0 Td
(trying)Tj
2.6249 0 Td
(to)Tj
1.0261 0 Td
(separate)Tj
3.481 0 Td
(the)Tj
1.4683 0 Td
(knowledge)Tj
4.5467 0 Td
(that)Tj
1.7745 0 Td
(can)Tj
1.6271 0 Td
(be)Tj
1.1508 0 Td
(reused)Tj
2.8574 0 Td
(from)Tj
2.2053 0 Td
(the)Tj
-35.7843 -1.2982 Td
(knowledge)Tj
4.541 0 Td
(specific)Tj
3.1918 0 Td
(to)Tj
1.0261 0 Td
(the)Tj
1.4684 0 Td
(previous)Tj
3.651 0 Td
(task.)Tj
2.0353 0 Td
(In)Tj
1.1055 0 Td
(HRL,)Tj
2.3754 0 Td
(knowledge)Tj
4.541 0 Td
(is)Tj
0.8448 0 Td
(already)Tj
3.1124 0 Td
(divided)Tj
3.2201 0 Td
(into)Tj
1.8369 0 Td
(natural)Tj
-32.9497 -1.2983 Td
(modular)Tj
3.651 0 Td
(chunksthe)Tj
5.278 0 Td
(abstract)Tj
3.3506 0 Td
(actions.)Tj
3.2825 0 Td
(The)Tj
1.7858 0 Td
(abstract)Tj
3.3448 0 Td
(actions)Tj
3.0614 0 Td
(tend)Tj
2.0183 0 Td
(to)Tj
1.0205 0 Td
(be)Tj
1.1508 0 Td
(self-contained,)Tj
6.0945 0 Td
(general,)Tj
-34.0382 -1.3039 Td
(and)Tj
1.7291 0 Td
(well-defined,)Tj
5.4028 0 Td
(making)Tj
3.2485 0 Td
(them)Tj
2.2903 0 Td
(perfect)Tj
2.9651 0 Td
(components)Tj
5.176 0 Td
(for)Tj
1.3833 0 Td
(transfer)Tj
3.3165 0 Td
(learning.)Tj
3.753 0 Td
(For)Tj
1.6271 0 Td
(example,)Tj
3.7701 0 Td
(it)Tj
0.788 0 Td
(is)Tj
-35.4498 -1.2983 Td
(easy)Tj
1.8878 0 Td
(to)Tj
1.0205 0 Td
(see)Tj
1.423 0 Td
(how)Tj
1.9332 0 Td
(the)Tj
1.4626 0 Td
(go)Tj
1.5761 0 Td
(to)Tj
1.0205 0 Td
(the)Tj
1.4683 0 Td
(kitchen)Tj
3.5886 0 Td
(action)Tj
2.6929 0 Td
(could)Tj
2.4492 0 Td
(be)Tj
1.1451 0 Td
(reused)Tj
2.846 0 Td
(for)Tj
1.3833 0 Td
(navigating)Tj
4.3937 0 Td
(to)Tj
1.0204 0 Td
(the)Tj
1.4684 0 Td
(refrigerator,)Tj
-32.7796 -1.2983 Td
(the)Tj
1.4683 0 Td
(oven,)Tj
2.3811 0 Td
(the)Tj
1.4683 0 Td
(sink,)Tj
2.1203 0 Td
(and)Tj
1.7291 0 Td
(so)Tj
1.0885 0 Td
(on.)Tj
1.4967 0 Td
(Once)Tj
2.3641 0 Td
(that)Tj
1.7801 0 Td
(subpolicy)Tj
4.0309 0 Td
(has)Tj
1.542 0 Td
(been)Tj
2.1203 0 Td
(learned)Tj
3.1975 0 Td
(once,)Tj
2.3471 0 Td
(it)Tj
0.788 0 Td
(can)Tj
1.6214 0 Td
(be)Tj
1.1508 0 Td
(used)Tj
2.0637 0 Td
(as)Tj
1.0148 0 Td
(an)Tj
-35.773 -1.3039 Td
(abstract)Tj
3.3448 0 Td
(action)Tj
2.6986 0 Td
(in)Tj
1.0375 0 Td
(these)Tj
2.2506 0 Td
(new)Tj
1.8766 0 Td
(tasks,)Tj
2.3981 0 Td
(thereby)Tj
3.2258 0 Td
(conferring)Tj
4.4333 0 Td
(all)Tj
1.1452 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.474 0 Td
(benefits)Tj
3.3335 0 Td
(described)Tj
4.0536 0 Td
(above.)Tj
/F17 1 Tf
-31.0902 -1.2982 Td
(2.2.1)Tj
2.2053 0 Td
(Adding)Tj
3.3562 0 Td
(temporal)Tj
4.0422 0 Td
(delays)Tj
2.7949 0 Td
(to)Tj
1.0772 0 Td
(Markov)Tj
3.4582 0 Td
(Decision)Tj
3.8665 0 Td
(Processes.)Tj
/F5 1 Tf
5.0059 0 Td
(When)Tj
2.6872 0 Td
(an)Tj
1.2019 0 Td
(agent)Tj
2.3868 0 Td
(selects)Tj
2.7609 0 Td
(a)Tj
-36.0394 -1.2983 Td
(basic)Tj
2.211 0 Td
(action,)Tj
2.9253 0 Td
(the)Tj
1.4683 0 Td
(result)Tj
2.4492 0 Td
(of)Tj
1.0147 0 Td
(that)Tj
1.7745 0 Td
(action)Tj
2.6986 0 Td
(can)Tj
1.6271 0 Td
(be)Tj
1.1508 0 Td
(observed)Tj
3.7984 0 Td
(in)Tj
1.0318 0 Td
(the)Tj
1.474 0 Td
(next)Tj
1.9559 0 Td
(timestep)Tj
3.6284 0 Td
(\(by)Tj
1.5193 0 Td
(the)Tj
1.4684 0 Td
(definition)Tj
4.1669 0 Td
(of)Tj
-36.3626 -1.2983 Td
(MDPs\).)Tj
3.3335 0 Td
(But)Tj
1.627 0 Td
(abstract)Tj
3.3449 0 Td
(actions)Tj
3.0614 0 Td
(are)Tj
1.4457 0 Td
(not)Tj
1.576 0 Td
(completed)Tj
4.405 0 Td
(in)Tj
1.0375 0 Td
(a)Tj
0.652 0 Td
(single)Tj
2.5341 0 Td
(timestepthere)Tj
6.599 0 Td
(is)Tj
0.8448 0 Td
(some)Tj
2.33 0 Td
(time)Tj
2.0296 0 Td
(inter-)Tj
-34.8205 -1.3039 Td
(val)Tj
1.3549 0 Td
(that)Tj
1.7745 0 Td
(elapses)Tj
2.9877 0 Td
(while)Tj
2.3697 0 Td
(the)Tj
1.4684 0 Td
(subpolicy)Tj
4.0308 0 Td
(is)Tj
0.8504 0 Td
(executing)Tj
4.0648 0 Td
(the)Tj
1.4684 0 Td
(underlying)Tj
4.5978 0 Td
(basic)Tj
2.2166 0 Td
(actions,)Tj
3.2825 0 Td
(and)Tj
1.7292 0 Td
(only)Tj
1.9785 0 Td
(at)Tj
0.9525 0 Td
(the)Tj
-35.1267 -1.2983 Td
(end)Tj
1.7177 0 Td
(of)Tj
1.0148 0 Td
(that)Tj
1.7802 0 Td
(delay)Tj
2.3074 0 Td
(period)Tj
2.8289 0 Td
(can)Tj
1.6271 0 Td
(the)Tj
1.474 0 Td
(results)Tj
2.8063 0 Td
(of)Tj
1.0148 0 Td
(that)Tj
1.7801 0 Td
(abstract)Tj
3.3449 0 Td
(action)Tj
2.6986 0 Td
(be)Tj
1.1508 0 Td
(observed.)Tj
4.0195 0 Td
(Thus)Tj
2.2507 0 Td
(we)Tj
1.3266 0 Td
(need)Tj
2.143 0 Td
(to)Tj
-35.2854 -1.2982 Td
(add)Tj
1.7064 0 Td
(the)Tj
1.474 0 Td
(notion)Tj
2.8913 0 Td
(of)Tj
1.0148 0 Td
(temporal)Tj
3.8381 0 Td
(delays)Tj
2.6702 0 Td
(into)Tj
1.8369 0 Td
(the)Tj
1.474 0 Td
(MDP-based)Tj
5.0116 0 Td
(reinforcement)Tj
5.93 0 Td
(learning)Tj
3.5263 0 Td
(framework.)Tj
-30.1774 -1.3039 Td
(This)Tj
1.9899 0 Td
(can)Tj
1.627 0 Td
(be)Tj
1.1509 0 Td
(achieved)Tj
3.702 0 Td
(using)Tj
2.3868 0 Td
(the)Tj
1.4683 0 Td
(language)Tj
3.7644 0 Td
(of)Tj
1.0148 0 Td
(Semi-Markov)Tj
5.7146 0 Td
(Decision)Tj
3.7531 0 Td
(Processes)Tj
4.0081 0 Td
(\(SMDPs;)Tj
-31.7761 -1.2983 Td
([)Tj
0.83 0.64 0.02 0 k
(28)Tj
0 g
(]\).)Tj
2.4094 0 Td
(In)Tj
1.0998 0 Td
(an)Tj
1.1962 0 Td
(SMDP)Tj
2.88 0 Td
(environment)Tj
5.4255 0 Td
(the)Tj
1.4627 0 Td
(value)Tj
2.296 0 Td
(of)Tj
1.0091 0 Td
(selecting)Tj
3.668 0 Td
(action)Tj
/F10 1 Tf
2.6929 0 Td
[()]TJ
/F5 1 Tf
0.703 0 Td
(in)Tj
1.0262 0 Td
(state)Tj
/F10 1 Tf
2.0296 0 Td
[()]TJ
/F5 1 Tf
0.5442 0 Td
(is)Tj
0.8391 0 Td
(equal)Tj
2.3527 0 Td
(to)Tj
1.0205 0 Td
(the)Tj
1.4626 0 Td
(summed)Tj
-34.1175 -1.2983 Td
(reward)Tj
3.033 0 Td
(received)Tj
3.5319 0 Td
(across)Tj
2.6759 0 Td
(the)Tj
1.474 0 Td
(delay)Tj
2.3018 0 Td
(period,)Tj
3.0614 0 Td
(plus)Tj
1.8708 0 Td
(the)Tj
1.474 0 Td
(action)Tj
2.6929 0 Td
(value)Tj
2.3074 0 Td
(in)Tj
1.0375 0 Td
(the)Tj
1.4683 0 Td
(resulting)Tj
3.7304 0 Td
(state,)Tj
2.262 0 Td
(all)Tj
1.1452 0 Td
(dis-)Tj
-34.0665 -1.3039 Td
(counted)Tj
3.4752 0 Td
(across)Tj
2.6759 0 Td
(the)Tj
1.4683 0 Td
(length)Tj
2.7326 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(delay)Tj
2.3074 0 Td
(period)Tj
/F10 1 Tf
2.8347 0 Td
[(\n)]TJ
/F5 1 Tf
(.)Tj
0.8674 0 Td
(The)Tj
1.7858 0 Td
(prediction)Tj
4.371 0 Td
(error)Tj
2.262 0 Td
(equation)Tj
3.7304 0 Td
(\()Tj
0.83 0.64 0.02 0 k
(Eq)Tj
1.6384 0 Td
(\(2\))Tj
0 g
(\))Tj
1.7178 0 Td
(can)Tj
1.627 0 Td
(be)Tj
ET
Q
q
1 j
1 J
0 w
576 737.1 m
36 737.1 l
36 737.6 l
576 737.6 l
f*
36 741.2598 107.1496 23.6976 re
W* n
q
107.0929 0 0 23.6409 36 741.3165 cm
q
/I0 Do
Q
Q
Q
q
0 0 612 792 re
W* n
1 j
1 J
0 w
7.9999 0 0 7.9999 498.1606 745.7952 cm
BT
/F0 1 Tf
1 TL
-0.005 Tc
0 0 Td
(A)Tj
0.8787 0 Td
(neural)Tj
2.941 0 Td
(model)Tj
2.8984 0 Td
(of)Tj
1.0418 0 Td
(HRL)Tj
ET
Q
q
1 j
1 J
0 w
36 48.0002 m
576 48.0002 l
576 47.5002 l
36 47.5002 l
f*
0.83 0.64 0.02 0 k
81.5811 34.9228 m
237.8835 34.9228 l
h
f*
0 g
7.9999 0 0 7.9999 36 36 cm
BT
/F0 1 Tf
1 TL
-0.005 Tc
0 0 Td
(PLOS)Tj
2.8559 0 Td
(ONE)Tj
2.3669 0 Td
(|)Tj
0.83 0.64 0.02 0 k
0.4748 0 Td
(https://doi.or)Tj
5.4142 0 Td
(g/10.137)Tj
3.8198 0 Td
(1/journal.po)Tj
5.1236 0 Td
(ne.01802)Tj
4.089 0 Td
(34)Tj
0 g
2.0906 0 Td
(July)Tj
1.9701 0 Td
(6,)Tj
1.0417 0 Td
(2017)Tj
35.894 0 Td
(5)Tj
0.7654 0 Td
(/)Tj
ET
endstream
endobj
92 0 obj
<>stream
endstream
endobj
93 0 obj
<>stream
endstream
endobj
94 0 obj
<>stream
endstream
endobj
95 0 obj
<>stream
endstream
endobj
96 0 obj
<>stream
BT
66.402 0 Td
(39)Tj
ET
Q
endstream
endobj
97 0 obj
<>/XObject<>>>/CropBox[0 0 612 792]/MediaBox[0 0 612 792]/Parent 10 0 R/Annots 100 0 R/Contents 101 0 R/TrimBox[0 0 612 792]>>
endobj
100 0 obj
[102 0 R 103 0 R 104 0 R 105 0 R 106 0 R 107 0 R 108 0 R]
endobj
102 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.e003)>>
endobj
103 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.e003)>>
endobj
104 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.s001)>>
endobj
105 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref029)>>
endobj
106 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref029)>>
endobj
107 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref030)>>
endobj
108 0 obj
<>/Border[0 0 0]/A 109 0 R>>
endobj
109 0 obj
<>
endobj
101 0 obj
[110 0 R 111 0 R 112 0 R 113 0 R 114 0 R 115 0 R 116 0 R 117 0 R]
endobj
110 0 obj
<>stream
q
0.83 0.64 0.02 0 k
288 555.4772 m
312.4913 555.4772 l
h
f*
265.3795 503.4898 m
289.8709 503.4898 l
h
f*
539.6031 386.4756 m
565.6252 386.4756 l
h
f*
310.8472 317.537 m
320.315 317.537 l
h
f*
283.011 278.5323 m
292.5354 278.5323 l
h
f*
348.6047 133.9087 m
358.0724 133.9087 l
h
f*
0 g
1 j
1 J
0 w
10 0 0 10 200.0125 707.4141 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(re-expressed)Tj
5.2383 0 Td
(as)Tj
/F16 1 Tf
0 Tc
3.8268 -2.8459 Td
(D)Tj
/F10 1 Tf
[()]TJ
/F11 1 Tf
()Tj
/F10 1 Tf
[()]TJ
/F12 1 Tf
(;)Tj
/F10 1 Tf
2.5341 0 Td
[()]TJ
/F11 1 Tf
()Tj
1.1509 0 Td
()Tj
/F13 1 Tf
1.0488 0 Td
(a)Tj
/F14 1 Tf
1.0715 0.9467 Td
(X)Tj
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 350.759 690.4629 cm
BT
/F13 1 Tf
1 TL
0 0 Td
(t)Tj
/F11 1 Tf
( )Tj
/F18 1 Tf
1.2094 0 Td
(1)Tj
/F10 1 Tf
-1.1622 -3.6568 Td
[()]TJ
/F11 1 Tf
0.3496 0 Td
()Tj
/F18 1 Tf
(0)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 363.0614 678.9543 cm
BT
/F13 1 Tf
1 TL
0 0 Td
(g)Tj
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 367.4834 683.0929 cm
BT
/F10 1 Tf
1 TL
0 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 370.0346 678.9543 cm
BT
/F10 1 Tf
1 TL
0 0 Td
[( )]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 373.6629 676.4598 cm
BT
/F10 1 Tf
1 TL
0 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 378.4251 678.9543 cm
BT
/F11 1 Tf
1 TL
0 0 Td
()Tj
/F13 1 Tf
0.9977 0 Td
(g)Tj
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 392.7685 683.0929 cm
BT
/F13 1 Tf
1 TL
0 0 Td
(t)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 395.8866 678.9543 cm
BT
/F10 1 Tf
1 TL
0 0 Td
[()]TJ
/F11 1 Tf
()Tj
/F10 1 Tf
[()]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 410.1732 683.0929 cm
BT
/F11 1 Tf
1 TL
0 0 Td
(0)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 412.2708 678.9543 cm
BT
/F12 1 Tf
1 TL
0 0 Td
(;)Tj
/F10 1 Tf
0.4422 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 421.5685 683.0929 cm
BT
/F11 1 Tf
1 TL
0 0 Td
(0)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 423.7228 678.9543 cm
BT
/F11 1 Tf
1 TL
0 0 Td
()Tj
0.6009 0 Td
( )Tj
/F10 1 Tf
0.9921 0 Td
[()]TJ
/F11 1 Tf
()Tj
/F10 1 Tf
[()]TJ
/F12 1 Tf
(;)Tj
/F10 1 Tf
1.8709 0 Td
[()]TJ
/F11 1 Tf
()Tj
/F14 1 Tf
-11.5369 1.7121 Td
(")Tj
12.4099 0 Td
(#)Tj
/F11 1 Tf
9.6378 -1.7121 Td
()Tj
/F5 1 Tf
(3)Tj
/F11 1 Tf
()Tj
/F5 1 Tf
-0.004 Tc
-36.3455 -3.1407 Td
(This)Tj
1.9842 0 Td
(allows)Tj
2.6986 0 Td
(the)Tj
1.4683 0 Td
(agent)Tj
2.3924 0 Td
(to)Tj
1.0262 0 Td
(learn)Tj
2.245 0 Td
(the)Tj
1.4683 0 Td
(value)Tj
2.3074 0 Td
(of)Tj
1.0148 0 Td
(both)Tj
2.058 0 Td
(primitive)Tj
3.906 0 Td
(and)Tj
1.7291 0 Td
(abstract)Tj
3.3505 0 Td
(actions)Tj
3.0558 0 Td
(\(we)Tj
1.6667 0 Td
(can)Tj
1.6271 0 Td
(think)Tj
2.3584 0 Td
(of)Tj
-36.3568 -1.2983 Td
(primitive)Tj
3.9061 0 Td
(actions)Tj
3.0557 0 Td
(as)Tj
1.0148 0 Td
(a)Tj
0.6576 0 Td
(special)Tj
2.8914 0 Td
(kind)Tj
2.0579 0 Td
(of)Tj
1.0148 0 Td
(abstract)Tj
3.3505 0 Td
(action)Tj
2.6986 0 Td
(that)Tj
1.7744 0 Td
(always)Tj
2.8346 0 Td
(terminate)Tj
4.1045 0 Td
(after)Tj
2.0353 0 Td
(one)Tj
1.7008 0 Td
(step\).)Tj
-31.9008 -1.3039 Td
(Note)Tj
2.1883 0 Td
(that)Tj
1.7802 0 Td
(this)Tj
1.6724 0 Td
(is)Tj
0.8447 0 Td
(from)Tj
2.211 0 Td
(the)Tj
1.4684 0 Td
(perspective)Tj
4.6998 0 Td
(of)Tj
1.0148 0 Td
(a)Tj
0.6519 0 Td
(single)Tj
2.5342 0 Td
(layer)Tj
2.1485 0 Td
(in)Tj
1.0375 0 Td
(the)Tj
1.4683 0 Td
(hierarchy.)Tj
4.2463 0 Td
(The)Tj
1.7802 0 Td
(hierarchical)Tj
4.9379 0 Td
(sys-)Tj
-35.8806 -1.2983 Td
(tem)Tj
1.7574 0 Td
(must)Tj
2.2281 0 Td
(also)Tj
1.7688 0 Td
(learn)Tj
2.245 0 Td
(the)Tj
1.4683 0 Td
(subpolicies)Tj
4.6261 0 Td
(associated)Tj
4.2576 0 Td
(with)Tj
2.0013 0 Td
(each)Tj
2.0238 0 Td
(abstract)Tj
3.3505 0 Td
(action,)Tj
2.9254 0 Td
(if)Tj
0.771 0 Td
(they)Tj
1.9276 0 Td
(are)Tj
1.4456 0 Td
(not)Tj
1.5761 0 Td
(given.)Tj
-34.3726 -1.2982 Td
(For)Tj
1.627 0 Td
(example,)Tj
3.7701 0 Td
(in)Tj
1.0375 0 Td
(addition)Tj
3.5999 0 Td
(to)Tj
1.0205 0 Td
(learning)Tj
3.5263 0 Td
(how)Tj
1.9389 0 Td
(to)Tj
1.0261 0 Td
(sequence)Tj
3.8608 0 Td
(the)Tj
1.4682 0 Td
(go)Tj
1.5817 0 Td
(to)Tj
1.0262 0 Td
(doorway)Tj
4.0988 0 Td
(and)Tj
1.7292 0 Td
(go)Tj
1.5874 0 Td
(to)Tj
1.0204 0 Td
(kitchen)Tj
-33.919 -1.304 Td
(actions)Tj
3.0557 0 Td
(to)Tj
1.0261 0 Td
(complete)Tj
3.8778 0 Td
(the)Tj
1.474 0 Td
(overall)Tj
2.9027 0 Td
(task,)Tj
2.0352 0 Td
(the)Tj
1.4684 0 Td
(agent)Tj
2.3867 0 Td
(must)Tj
2.228 0 Td
(also)Tj
1.7688 0 Td
(learn)Tj
2.245 0 Td
(the)Tj
1.4683 0 Td
(sub-policy)Tj
4.3823 0 Td
(that)Tj
1.7802 0 Td
(will)Tj
1.6611 0 Td
(carry)Tj
2.2734 0 Td
(out)Tj
-36.0337 -1.2982 Td
(the)Tj
1.4683 0 Td
(go)Tj
1.5817 0 Td
(to)Tj
1.0262 0 Td
(the)Tj
1.474 0 Td
(doorway)Tj
4.0932 0 Td
(action.)Tj
2.9253 0 Td
(However,)Tj
4.0762 0 Td
(because)Tj
3.3222 0 Td
(everything)Tj
4.4559 0 Td
(has)Tj
1.5421 0 Td
(been)Tj
2.1203 0 Td
(reframed)Tj
3.8777 0 Td
(in)Tj
1.0375 0 Td
(terms)Tj
2.4945 0 Td
(of)Tj
-35.4951 -1.2983 Td
(abstract)Tj
3.3448 0 Td
(actions)Tj
3.0614 0 Td
(\(as)Tj
1.355 0 Td
(in)Tj
0.83 0.64 0.02 0 k
1.0375 0 Td
(Eq)Tj
1.2925 0 Td
(\(3\))Tj
0 g
(\),)Tj
1.9446 0 Td
(there)Tj
2.262 0 Td
(is)Tj
0.8504 0 Td
(no)Tj
1.2756 0 Td
(qualitative)Tj
4.3767 0 Td
(distinction)Tj
4.5353 0 Td
(between)Tj
3.5262 0 Td
(the)Tj
1.4684 0 Td
(policy)Tj
2.6362 0 Td
(that)Tj
1.7801 0 Td
(carries)Tj
-34.7467 -1.2982 Td
(out)Tj
1.5533 0 Td
(the)Tj
1.4684 0 Td
(overall)Tj
2.9026 0 Td
(task)Tj
1.8085 0 Td
(and)Tj
1.7292 0 Td
(the)Tj
1.474 0 Td
(sub-policy)Tj
4.3823 0 Td
(that)Tj
1.7745 0 Td
(carries)Tj
2.8743 0 Td
(out)Tj
1.5534 0 Td
(an)Tj
1.2017 0 Td
(abstract)Tj
3.3449 0 Td
(action.)Tj
2.9253 0 Td
(We)Tj
1.6101 0 Td
(can)Tj
1.6271 0 Td
(think)Tj
2.3584 0 Td
(of)Tj
-34.588 -1.304 Td
(them)Tj
2.2903 0 Td
(both)Tj
2.058 0 Td
(as)Tj
1.0148 0 Td
(a)Tj
0.6519 0 Td
(task)Tj
1.8142 0 Td
(with)Tj
1.9956 0 Td
(some)Tj
2.3301 0 Td
(goal,)Tj
2.0976 0 Td
(in)Tj
1.0374 0 Td
(which)Tj
2.6476 0 Td
(the)Tj
1.4683 0 Td
(agent)Tj
2.3925 0 Td
(must)Tj
2.2279 0 Td
(learn)Tj
2.2393 0 Td
(to)Tj
1.0262 0 Td
(select)Tj
2.3981 0 Td
(between)Tj
3.5262 0 Td
(the)Tj
1.4684 0 Td
(avail-)Tj
-34.6844 -1.2982 Td
(able)Tj
1.8311 0 Td
(actions)Tj
3.0558 0 Td
(in)Tj
1.0374 0 Td
(order)Tj
2.4208 0 Td
(to)Tj
1.0261 0 Td
(maximize)Tj
4.1273 0 Td
(reward.)Tj
3.2598 0 Td
(Thus)Tj
2.245 0 Td
(learning)Tj
3.5262 0 Td
(the)Tj
1.4683 0 Td
(subpolicy)Tj
4.0309 0 Td
(can)Tj
1.627 0 Td
(be)Tj
1.1509 0 Td
(accomplished)Tj
5.6976 0 Td
(in)Tj
-36.5042 -1.2983 Td
(the)Tj
1.4683 0 Td
(same)Tj
2.2564 0 Td
(way)Tj
1.7971 0 Td
(as)Tj
0.83 0.64 0.02 0 k
1.0148 0 Td
(Eq)Tj
1.2926 0 Td
(\(3\))Tj
0 g
(,)Tj
1.6044 0 Td
(but)Tj
1.5534 0 Td
(in)Tj
1.0318 0 Td
(the)Tj
1.4683 0 Td
(state)Tj
2.0353 0 Td
(and)Tj
1.7348 0 Td
(action)Tj
2.6985 0 Td
(space)Tj
2.3755 0 Td
(of)Tj
1.0147 0 Td
(the)Tj
1.474 0 Td
(subpolicy.)Tj
4.2519 0 Td
(Note)Tj
2.1883 0 Td
(that)Tj
1.7745 0 Td
(this)Tj
1.6781 0 Td
(also)Tj
-34.7127 -1.3039 Td
(requires)Tj
3.4696 0 Td
(rewards)Tj
3.3958 0 Td
(for)Tj
1.3833 0 Td
(the)Tj
1.4684 0 Td
(subpolicy,)Tj
4.2576 0 Td
(which)Tj
2.6532 0 Td
(may)Tj
1.9275 0 Td
(be)Tj
1.1509 0 Td
(distinct)Tj
3.2087 0 Td
(from)Tj
2.2053 0 Td
(the)Tj
1.474 0 Td
(task)Tj
1.8085 0 Td
(rewards)Tj
3.3903 0 Td
(\(referred)Tj
3.7643 0 Td
(to)Tj
1.0262 0 Td
(as)Tj
-36.5836 -1.2983 Td
(pseudoreward\).)Tj
7.2453 0 Td
(This)Tj
1.9842 0 Td
(will)Tj
1.6668 0 Td
(be)Tj
1.1508 0 Td
(discussed)Tj
3.9969 0 Td
(in)Tj
1.0374 0 Td
(more)Tj
2.3414 0 Td
(detail)Tj
2.4151 0 Td
(when)Tj
2.4037 0 Td
(we)Tj
1.3323 0 Td
(look)Tj
1.9729 0 Td
(at)Tj
0.9524 0 Td
(specific)Tj
3.1918 0 Td
(implementa-)Tj
-31.691 -1.2982 Td
(tions)Tj
2.1996 0 Td
(of)Tj
1.0148 0 Td
(HRL)Tj
2.1487 0 Td
(in)Tj
1.0375 0 Td
(this)Tj
1.6724 0 Td
(model.)Tj
2.9707 0 Td
(It)Tj
0.856 0 Td
(is)Tj
0.8504 0 Td
(also)Tj
1.7688 0 Td
(useful)Tj
2.5909 0 Td
(to)Tj
1.0261 0 Td
(point)Tj
2.3584 0 Td
(out)Tj
1.5534 0 Td
(that)Tj
1.78 0 Td
(these)Tj
2.2507 0 Td
(learning)Tj
3.5263 0 Td
(processes)Tj
3.9685 0 Td
(can)Tj
1.6271 0 Td
(be)Tj
-35.2003 -1.304 Td
(occurring)Tj
4.1215 0 Td
(simultaneously)Tj
6.2475 0 Td
(as)Tj
1.0148 0 Td
(the)Tj
1.4684 0 Td
(agent)Tj
2.3867 0 Td
(moves)Tj
2.7893 0 Td
(through)Tj
3.4412 0 Td
(the)Tj
1.4739 0 Td
(environment,)Tj
5.6523 0 Td
(allowing)Tj
3.6226 0 Td
(the)Tj
1.4684 0 Td
(system)Tj
2.9367 0 Td
(to)Tj
-36.6233 -1.2982 Td
(learn)Tj
2.245 0 Td
(across)Tj
2.6759 0 Td
(the)Tj
1.4683 0 Td
(different)Tj
3.6567 0 Td
(levels)Tj
2.3754 0 Td
(of)Tj
1.0148 0 Td
(abstraction.)Tj
-0.01 Tc
-12.2399 -1.2983 Td
(With)Tj
2.2563 0 Td
(that)Tj
1.7575 0 Td
(basic)Tj
2.1827 0 Td
(framework)Tj
4.5581 0 Td
(in)Tj
1.0261 0 Td
(place)Tj
2.2337 0 Td
(there)Tj
2.2337 0 Td
(are)Tj
1.4286 0 Td
(still)Tj
1.6101 0 Td
(many)Tj
2.4547 0 Td
(different)Tj
3.6 0 Td
(ways)Tj
2.1316 0 Td
(to)Tj
1.0148 0 Td
(implement)Tj
4.5297 0 Td
(HRL,)Tj
-34.2138 -1.3039 Td
(based)Tj
2.4434 0 Td
(on)Tj
1.2643 0 Td
(issues)Tj
2.4831 0 Td
(such)Tj
2.0353 0 Td
(as)Tj
1.0034 0 Td
(how)Tj
1.9219 0 Td
(the)Tj
1.4513 0 Td
(hierarchy)Tj
3.9685 0 Td
(of)Tj
1.0035 0 Td
(actions)Tj
3.016 0 Td
(is)Tj
0.8334 0 Td
(structured)Tj
4.2915 0 Td
(and)Tj
1.7121 0 Td
(how)Tj
1.9219 0 Td
(the)Tj
1.4513 0 Td
(prediction)Tj
4.3143 0 Td
(error)Tj
-35.1152 -1.2983 Td
(is)Tj
0.8333 0 Td
(calculated)Tj
4.1443 0 Td
(and)Tj
1.7121 0 Td
(applied.)Tj
3.3335 0 Td
(Different)Tj
3.8098 0 Td
(HRL)Tj
2.1316 0 Td
(theories)Tj
3.3505 0 Td
(are)Tj
1.4287 0 Td
(defined)Tj
3.186 0 Td
(by)Tj
1.1679 0 Td
(their)Tj
2.0806 0 Td
(choices)Tj
3.1124 0 Td
(on)Tj
1.2586 0 Td
(these)Tj
2.228 0 Td
(issues.)Tj
-33.7773 -1.2982 Td
(We)Tj
1.5987 0 Td
(draw)Tj
2.211 0 Td
(a)Tj
0.6463 0 Td
(more)Tj
2.3187 0 Td
(detailed)Tj
3.3222 0 Td
(comparison)Tj
4.9153 0 Td
(to)Tj
1.0148 0 Td
(these)Tj
2.2223 0 Td
(theories)Tj
3.3505 0 Td
(in)Tj
1.0261 0 Td
(the)Tj
1.4513 0 Td
(supplementary)Tj
6.0831 0 Td
(material)Tj
3.4696 0 Td
(\()Tj
0.83 0.64 0.02 0 k
(S1)Tj
1.491 0 Td
(File)Tj
0 g
(\).)Tj
ET
Q
q
1 j
1 J
0 w
11.9999 0 0 11.9999 200.0125 348.6047 cm
BT
/F0 1 Tf
1 TL
-0.0033 Tc
0 0 Td
(2.3)Tj
1.5921 0 Td
(Neural)Tj
3.1276 0 Td
(engineering)Tj
5.3622 0 Td
(framework)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 200.0125 331.5968 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(The)Tj
1.7801 0 Td
(previous)Tj
3.651 0 Td
(sections)Tj
3.4072 0 Td
(have)Tj
2.0636 0 Td
(outlined)Tj
3.566 0 Td
(the)Tj
1.4683 0 Td
(computations)Tj
5.7316 0 Td
(involved)Tj
3.6454 0 Td
(in)Tj
1.0375 0 Td
(RL/HRL.)Tj
3.8607 0 Td
(The)Tj
1.7858 0 Td
(Neural)Tj
2.9594 0 Td
(Engi-)Tj
-34.9566 -1.2982 Td
(neering)Tj
3.2654 0 Td
(Framework)Tj
4.8586 0 Td
(\(NEF;)Tj
2.6192 0 Td
([)Tj
0.83 0.64 0.02 0 k
(29)Tj
0 g
(]\))Tj
2.1883 0 Td
(is)Tj
0.8504 0 Td
(the)Tj
1.4683 0 Td
(tool)Tj
1.7802 0 Td
(we)Tj
1.3266 0 Td
(use)Tj
1.5307 0 Td
(to)Tj
1.0261 0 Td
(bridge)Tj
2.7836 0 Td
(the)Tj
1.4684 0 Td
(gap)Tj
1.6327 0 Td
(between)Tj
3.532 0 Td
(those)Tj
2.3357 0 Td
(computa-)Tj
-32.6662 -1.3039 Td
(tional)Tj
2.5171 0 Td
(descriptions)Tj
5.0967 0 Td
(and)Tj
1.7291 0 Td
(a)Tj
0.6519 0 Td
(neural)Tj
2.7723 0 Td
(implementation.)Tj
6.8655 0 Td
(A)Tj
0.9071 0 Td
(full)Tj
1.525 0 Td
(overview)Tj
3.8041 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.474 0 Td
(NEF)Tj
2.0522 0 Td
(is)Tj
0.8448 0 Td
(beyond)Tj
3.1917 0 Td
(the)Tj
-34.4463 -1.2983 Td
(scope)Tj
2.4491 0 Td
(of)Tj
1.0148 0 Td
(this)Tj
1.6781 0 Td
(article;)Tj
2.9083 0 Td
(here)Tj
1.9672 0 Td
(we)Tj
1.3266 0 Td
(will)Tj
1.6611 0 Td
(focus)Tj
2.3301 0 Td
(on)Tj
1.2756 0 Td
(the)Tj
1.4683 0 Td
(aspects)Tj
3.0387 0 Td
(most)Tj
2.2054 0 Td
(relevant)Tj
3.4185 0 Td
(to)Tj
1.0262 0 Td
(this)Tj
1.6724 0 Td
(work,)Tj
2.5058 0 Td
(and)Tj
1.7291 0 Td
(refer)Tj
2.0977 0 Td
(the)Tj
-35.773 -1.2983 Td
(interested)Tj
4.1612 0 Td
(reader)Tj
2.7722 0 Td
(to)Tj
1.0262 0 Td
([)Tj
0.83 0.64 0.02 0 k
(29)Tj
0 g
(])Tj
1.8482 0 Td
(for)Tj
1.3889 0 Td
(more)Tj
2.3414 0 Td
(detail.)Tj
-12.3419 -1.3039 Td
(A)Tj
0.907 0 Td
(central)Tj
2.9594 0 Td
(feature)Tj
2.9877 0 Td
(of)Tj
1.0091 0 Td
(the)Tj
1.4684 0 Td
(NEF)Tj
2.0522 0 Td
(is)Tj
0.8447 0 Td
(the)Tj
1.4684 0 Td
(ability)Tj
2.6929 0 Td
(to)Tj
1.0261 0 Td
(translate)Tj
3.634 0 Td
(computational)Tj
6.0434 0 Td
(variables)Tj
3.7191 0 Td
(\(such)Tj
2.3981 0 Td
(as)Tj
1.0091 0 Td
(states)Tj
-35.4158 -1.2982 Td
(or)Tj
1.0941 0 Td
(action)Tj
2.6986 0 Td
(values\))Tj
3.0104 0 Td
(into)Tj
1.8425 0 Td
(neural)Tj
2.7666 0 Td
(activity,)Tj
3.3448 0 Td
(and)Tj
1.7291 0 Td
(decode)Tj
3.0558 0 Td
(neural)Tj
2.7722 0 Td
(activity)Tj
3.1181 0 Td
(back)Tj
2.0806 0 Td
(into)Tj
1.8369 0 Td
(computational)Tj
6.0491 0 Td
(vari-)Tj
-35.3988 -1.2983 Td
(ables)Tj
2.1883 0 Td
(the)Tj
1.474 0 Td
(modeller)Tj
3.7814 0 Td
(can)Tj
1.6271 0 Td
(analyze.)Tj
3.4015 0 Td
(This)Tj
1.9843 0 Td
(is)Tj
0.8503 0 Td
(accomplished)Tj
5.6976 0 Td
(via)Tj
1.372 0 Td
(a)Tj
0.6576 0 Td
(distributed,)Tj
4.8189 0 Td
(population-based)Tj
7.1943 0 Td
(repre-)Tj
-35.0473 -1.3039 Td
(sentation)Tj
3.9061 0 Td
(scheme.)Tj
3.4242 0 Td
(Specifically,)Tj
4.9096 0 Td
(encoding)Tj
3.9344 0 Td
(a)Tj
0.652 0 Td
(vector)Tj
/F10 1 Tf
2.6986 0 Td
[()]TJ
/F5 1 Tf
0.6859 0 Td
(into)Tj
1.8426 0 Td
(the)Tj
1.4683 0 Td
(activity)Tj
3.1181 0 Td
(of)Tj
1.0148 0 Td
(neuron)Tj
/F10 1 Tf
3.1464 0 Td
[(\r)]TJ
/F5 1 Tf
0.4876 0 Td
(is)Tj
-31.2886 -1.2983 Td
(accomplished)Tj
5.6976 0 Td
(via)Tj
/F10 1 Tf
0 Tc
7.8802 -1.8765 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 340.7244 193.3228 cm
BT
/F10 1 Tf
1 TL
0 0 Td
[(\r)]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 342.822 195.8173 cm
BT
/F11 1 Tf
1 TL
0 0 Td
()Tj
/F10 1 Tf
[()]TJ
/F11 1 Tf
0.8787 0 Td
()Tj
0.6576 0 Td
()Tj
/F10 1 Tf
1.0488 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 375.4204 193.3228 cm
BT
/F10 1 Tf
1 TL
0 0 Td
[(\r)]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 377.5748 195.8173 cm
BT
/F11 1 Tf
1 TL
0 0 Td
()Tj
/F13 1 Tf
(a)Tj
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 385.285 193.3228 cm
BT
/F10 1 Tf
1 TL
0 0 Td
[(\r)]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 387.4393 195.8173 cm
BT
/F10 1 Tf
1 TL
0 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 391.3511 193.3228 cm
BT
/F10 1 Tf
1 TL
0 0 Td
[(\r)]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 393.5055 195.8173 cm
BT
/F10 1 Tf
1 TL
0 0 Td
[()]TJ
/F11 1 Tf
0.7086 0 Td
()Tj
/F10 1 Tf
0.9978 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 414.4251 199.4456 cm
BT
/F19 1 Tf
1 TL
0 0 Td
(bia)Tj
1.2472 0 Td
(s)Tj
/F10 1 Tf
-1.37 -1.0677 Td
[(\r)]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 424.97 195.8173 cm
BT
/F11 1 Tf
1 TL
0 0 Td
()Tj
13.8499 0 Td
()Tj
/F5 1 Tf
(4)Tj
/F11 1 Tf
()Tj
/F5 1 Tf
-0.004 Tc
-36.3455 -2.1826 Td
(which)Tj
2.6476 0 Td
(describes)Tj
3.8891 0 Td
(the)Tj
1.4683 0 Td
(activity)Tj
/F10 1 Tf
3.1181 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
7.4999 0 0 6.6659 316.1196 172.3464 cm
BT
/F10 1 Tf
1 TL
-0.0053 Tc
0 0 Td
[(\r)]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 320.2582 173.9905 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(of)Tj
1.0147 0 Td
(neuron)Tj
/F10 1 Tf
3.1522 0 Td
[(\r)]TJ
/F5 1 Tf
0.4818 0 Td
(as)Tj
1.0148 0 Td
(a)Tj
0.6577 0 Td
(function)Tj
3.6283 0 Td
(of)Tj
1.0148 0 Td
(its)Tj
1.1452 0 Td
(input)Tj
2.3811 0 Td
(current.)Tj
/F10 1 Tf
3.4072 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
7.4999 0 0 6.6659 505.9842 172.3464 cm
BT
/F10 1 Tf
1 TL
-0.0053 Tc
0 0 Td
[(\r)]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 510.1795 173.9905 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(is)Tj
0.8503 0 Td
(a)Tj
0.652 0 Td
(function)Tj
-32.5187 -1.2982 Td
(representing)Tj
5.2383 0 Td
(the)Tj
1.4684 0 Td
(nonlinear)Tj
4.1159 0 Td
(neuron)Tj
3.1464 0 Td
(characteristics.)Tj
6.1285 0 Td
(It)Tj
0.8617 0 Td
(takes)Tj
2.2337 0 Td
(a)Tj
0.6519 0 Td
(current)Tj
3.1862 0 Td
(as)Tj
1.0148 0 Td
(input)Tj
2.3754 0 Td
(\(the)Tj
1.814 0 Td
(value)Tj
2.3018 0 Td
(within)Tj
-34.537 -1.2983 Td
(the)Tj
1.4683 0 Td
(brackets\),)Tj
4.0989 0 Td
(and)Tj
1.7291 0 Td
(uses)Tj
1.8935 0 Td
(a)Tj
0.6576 0 Td
(model)Tj
2.744 0 Td
(of)Tj
1.0148 0 Td
(neuron)Tj
3.1464 0 Td
(behaviour)Tj
4.2406 0 Td
(to)Tj
1.0261 0 Td
(output)Tj
2.8914 0 Td
(firing)Tj
2.4321 0 Td
(activity.)Tj
3.3392 0 Td
(In)Tj
1.111 0 Td
(this)Tj
1.6725 0 Td
(work)Tj
2.279 0 Td
(we)Tj
-35.7445 -1.3039 Td
(use)Tj
1.5307 0 Td
(the)Tj
1.4683 0 Td
(leaky)Tj
2.279 0 Td
(integrate)Tj
3.7474 0 Td
(and)Tj
1.7291 0 Td
(fire)Tj
1.5704 0 Td
(\(LIF;)Tj
2.194 0 Td
([)Tj
0.83 0.64 0.02 0 k
(30)Tj
0 g
(]\))Tj
2.1883 0 Td
(neuron)Tj
3.1465 0 Td
(model,)Tj
2.9707 0 Td
(which)Tj
2.6475 0 Td
(strikes)Tj
2.8006 0 Td
(a)Tj
0.652 0 Td
(balance)Tj
3.237 0 Td
(between)Tj
3.532 0 Td
(bio-)Tj
-35.6935 -1.2983 Td
(logical)Tj
2.8062 0 Td
(detail)Tj
2.4208 0 Td
(and)Tj
1.7291 0 Td
(computational)Tj
6.0491 0 Td
(simplicity.)Tj
4.3654 0 Td
(One)Tj
1.9388 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4684 0 Td
(nice)Tj
1.8822 0 Td
(features)Tj
3.3505 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(LIF)Tj
1.627 0 Td
(model)Tj
2.7439 0 Td
(is)Tj
0.8447 0 Td
(that)Tj
1.7802 0 Td
(it)Tj
-36.5042 -1.2982 Td
(can)Tj
1.627 0 Td
(output)Tj
2.8913 0 Td
(either)Tj
2.5342 0 Td
(an)Tj
1.2019 0 Td
(overall)Tj
2.9026 0 Td
(firing)Tj
2.4265 0 Td
(rate)Tj
1.7518 0 Td
(or)Tj
1.0942 0 Td
(individual)Tj
4.2916 0 Td
(spikes;)Tj
2.8686 0 Td
(we)Tj
1.3266 0 Td
(will)Tj
1.6668 0 Td
(show)Tj
2.296 0 Td
(that)Tj
1.7802 0 Td
(either)Tj
2.534 0 Td
(can)Tj
1.6215 0 Td
(be)Tj
-34.8148 -1.3039 Td
(used)Tj
2.0579 0 Td
(in)Tj
1.0375 0 Td
(this)Tj
1.6724 0 Td
(model)Tj
2.7439 0 Td
(in)Tj
1.0375 0 Td
(the)Tj
1.4683 0 Td
(results)Tj
2.812 0 Td
(section)Tj
3.0444 0 Td
(\(see)Tj
1.7688 0 Td
(Section)Tj
3.1634 0 Td
(5.2)Tj
1.4003 0 Td
(for)Tj
1.3833 0 Td
(a)Tj
0.652 0 Td
(rate-based)Tj
4.354 0 Td
(implementation)Tj
6.6386 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
-36.2491 -1.2983 Td
(model)Tj
2.7439 0 Td
(and)Tj
1.7291 0 Td
(Section)Tj
3.1634 0 Td
(5.3)Tj
1.3947 0 Td
(for)Tj
1.3889 0 Td
(a)Tj
0.652 0 Td
(spiking)Tj
3.1464 0 Td
(implementation\).)Tj
ET
Q
q
1 j
1 J
0 w
576 737.1 m
36 737.1 l
36 737.6 l
576 737.6 l
f*
36 741.2598 107.1496 23.6976 re
W* n
q
107.0929 0 0 23.6409 36 741.3165 cm
q
/I0 Do
Q
Q
Q
q
0 0 612 792 re
W* n
1 j
1 J
0 w
7.9999 0 0 7.9999 498.1606 745.7952 cm
BT
/F0 1 Tf
1 TL
-0.005 Tc
0 0 Td
(A)Tj
0.8787 0 Td
(neural)Tj
2.941 0 Td
(model)Tj
2.8984 0 Td
(of)Tj
1.0418 0 Td
(HRL)Tj
ET
Q
q
1 j
1 J
0 w
36 48.0002 m
576 48.0002 l
576 47.5002 l
36 47.5002 l
f*
0.83 0.64 0.02 0 k
81.5811 34.9228 m
237.8835 34.9228 l
h
f*
0 g
7.9999 0 0 7.9999 36 36 cm
BT
/F0 1 Tf
1 TL
-0.005 Tc
0 0 Td
(PLOS)Tj
2.8559 0 Td
(ONE)Tj
2.3669 0 Td
(|)Tj
0.83 0.64 0.02 0 k
0.4748 0 Td
(https://doi.or)Tj
5.4142 0 Td
(g/10.137)Tj
3.8198 0 Td
(1/journal.po)Tj
5.1236 0 Td
(ne.01802)Tj
4.089 0 Td
(34)Tj
0 g
2.0906 0 Td
(July)Tj
1.9701 0 Td
(6,)Tj
1.0417 0 Td
(2017)Tj
35.894 0 Td
(6)Tj
0.7654 0 Td
(/)Tj
ET
endstream
endobj
111 0 obj
<>stream
endstream
endobj
112 0 obj
<>stream
endstream
endobj
113 0 obj
<>stream
endstream
endobj
114 0 obj
<>stream
endstream
endobj
115 0 obj
<>stream
endstream
endobj
116 0 obj
<>stream
endstream
endobj
117 0 obj
<>stream
BT
66.402 0 Td
(39)Tj
ET
Q
endstream
endobj
118 0 obj
<>/XObject<>>>/CropBox[0 0 612 792]/MediaBox[0 0 612 792]/Parent 10 0 R/Annots 119 0 R/Contents 120 0 R/TrimBox[0 0 612 792]>>
endobj
119 0 obj
[121 0 R 122 0 R 123 0 R 124 0 R 125 0 R 126 0 R 127 0 R 128 0 R]
endobj
121 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref031)>>
endobj
122 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref032)>>
endobj
123 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.e002)>>
endobj
124 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref033)>>
endobj
125 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref034)>>
endobj
126 0 obj
<>/Border[0 0 0]/A 129 0 R>>
endobj
129 0 obj
<>
endobj
127 0 obj
<>/Border[0 0 0]/A 130 0 R>>
endobj
130 0 obj
<>
endobj
128 0 obj
<>/Border[0 0 0]/A 131 0 R>>
endobj
131 0 obj
<>
endobj
120 0 obj
[132 0 R 133 0 R 134 0 R 135 0 R 136 0 R 137 0 R 138 0 R 139 0 R 140 0 R]
endobj
132 0 obj
<>stream
q
0.83 0.64 0.02 0 k
462.4441 601.7386 m
471.9685 601.7386 l
h
f*
400.4787 510.7465 m
410.0031 510.7465 l
h
f*
441.1843 391.2378 m
465.7323 391.2378 l
h
f*
257.6126 365.2157 m
267.137 365.2157 l
h
f*
271.5591 365.2157 m
281.0835 365.2157 l
h
f*
203.4142 339.2504 m
287.2063 339.2504 l
h
f*
227.8488 326.211 m
388.8 326.211 l
h
f*
0 g
1 j
1 J
0 w
10 0 0 10 211.9748 707.1307 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(The)Tj
1.7858 0 Td
(variables)Tj
/F15 1 Tf
3.719 0 Td
()Tj
ET
Q
q
1 j
1 J
0 w
7.4999 0 0 6.6659 272.5228 705.4866 cm
BT
/F10 1 Tf
1 TL
-0.0053 Tc
0 0 Td
[(\r)]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 274.507 707.1307 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(,)Tj
/F10 1 Tf
0 Tc
0.4422 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 282.7842 710.759 cm
BT
/F19 1 Tf
1 TL
0 0 Td
(bias)Tj
/F10 1 Tf
-0.1228 -1.0677 Td
[(\r)]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 293.3858 707.1307 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(,)Tj
0.4422 0 Td
(and)Tj
/F10 1 Tf
1.7291 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
7.4999 0 0 6.6659 319.0677 705.4866 cm
BT
/F10 1 Tf
1 TL
-0.0053 Tc
0 0 Td
[(\r)]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 323.2062 707.1307 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(are)Tj
1.4513 0 Td
(the)Tj
1.4683 0 Td
(parameters)Tj
4.6772 0 Td
(of)Tj
1.0147 0 Td
(neuron)Tj
/F10 1 Tf
3.1465 0 Td
[(\r)]TJ
/F5 1 Tf
(.)Tj
0.7086 0 Td
(The)Tj
1.7859 0 Td
(parameters)Tj
/F15 1 Tf
4.6714 0 Td
()Tj
ET
Q
q
1 j
1 J
0 w
7.4999 0 0 6.6659 517.9464 705.4866 cm
BT
/F10 1 Tf
1 TL
-0.0053 Tc
0 0 Td
[(\r)]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 522.1417 707.1307 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(and)Tj
/F10 1 Tf
0 Tc
1.7291 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 543.2881 710.759 cm
BT
/F19 1 Tf
1 TL
0 0 Td
(bias)Tj
/F10 1 Tf
-0.1228 -1.0677 Td
[(\r)]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 556.044 707.1307 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(do)Tj
-35.6029 -1.3322 Td
(not)Tj
1.5704 0 Td
(directly)Tj
3.2372 0 Td
(play)Tj
1.8708 0 Td
(a)Tj
0.652 0 Td
(role)Tj
1.7688 0 Td
(in)Tj
1.0375 0 Td
(the)Tj
1.4683 0 Td
(encoding)Tj
3.9288 0 Td
(of)Tj
1.0148 0 Td
(information,)Tj
5.3064 0 Td
(but)Tj
1.5477 0 Td
(rather)Tj
2.6476 0 Td
(are)Tj
1.4513 0 Td
(used)Tj
2.0579 0 Td
(to)Tj
1.0262 0 Td
(provide)Tj
3.2881 0 Td
(variabil-)Tj
-33.8738 -1.2983 Td
(ity)Tj
1.2416 0 Td
(in)Tj
1.0318 0 Td
(the)Tj
1.4683 0 Td
(firing)Tj
2.4321 0 Td
(characteristics)Tj
5.9017 0 Td
(of)Tj
1.0148 0 Td
(neurons.)Tj
3.7361 0 Td
(They)Tj
2.2337 0 Td
(are)Tj
1.4513 0 Td
(chosen)Tj
3.0104 0 Td
(randomly)Tj
4.1272 0 Td
(from)Tj
2.2053 0 Td
(ranges)Tj
2.8233 0 Td
(that)Tj
1.7802 0 Td
(give)Tj
-34.4578 -1.3039 Td
(biologically)Tj
4.7905 0 Td
(plausible)Tj
3.7474 0 Td
(response)Tj
3.7361 0 Td
(curves;)Tj
3.0047 0 Td
(this)Tj
1.6724 0 Td
(allows)Tj
2.6986 0 Td
(the)Tj
1.474 0 Td
(modeller)Tj
3.7814 0 Td
(to)Tj
1.0261 0 Td
(capture)Tj
3.2145 0 Td
(the)Tj
1.474 0 Td
(heterogeneity)Tj
-30.6197 -1.2983 Td
(observed)Tj
3.7928 0 Td
(in)Tj
1.0374 0 Td
(biological)Tj
4.0876 0 Td
(neurons.)Tj
3.736 0 Td
(The)Tj
1.7801 0 Td
(parameter)Tj
/F10 1 Tf
4.3144 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
7.4999 0 0 6.6659 391.4078 653.159 cm
BT
/F10 1 Tf
1 TL
-0.0053 Tc
0 0 Td
[(\r)]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 395.6031 654.8031 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(is)Tj
0.8503 0 Td
(a)Tj
0.652 0 Td
(vector)Tj
2.6986 0 Td
(representing)Tj
5.2384 0 Td
(the)Tj
1.4683 0 Td
(neuron's)Tj
3.7247 0 Td
(pre-)Tj
-34.1912 -1.2982 Td
(ferred)Tj
2.6249 0 Td
(stimulus.)Tj
3.8551 0 Td
(Specifically,)Tj
4.9096 0 Td
(the)Tj
1.4683 0 Td
(dot)Tj
1.5534 0 Td
(product)Tj
3.3959 0 Td
(between)Tj
/F10 1 Tf
3.5261 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
7.4999 0 0 6.6659 417.2598 640.1196 cm
BT
/F10 1 Tf
1 TL
-0.0053 Tc
0 0 Td
[(\r)]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 421.4551 641.8204 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(and)Tj
1.7347 0 Td
(the)Tj
1.4684 0 Td
(input)Tj
/F10 1 Tf
2.3754 0 Td
[()]TJ
/F5 1 Tf
0.686 0 Td
(\(i.e.,)Tj
1.9275 0 Td
(their)Tj
2.109 0 Td
(similarity\))Tj
-32.4451 -1.3039 Td
(drives)Tj
2.6249 0 Td
(a)Tj
0.652 0 Td
(particular)Tj
4.1102 0 Td
(cell,)Tj
1.7858 0 Td
(so)Tj
/F10 1 Tf
1.0885 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
7.4999 0 0 6.6659 306.5385 627.137 cm
BT
/F10 1 Tf
1 TL
-0.0053 Tc
0 0 Td
[(\r)]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 310.7338 628.7811 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(defines)Tj
3.0613 0 Td
(which)Tj
2.6476 0 Td
(types)Tj
2.279 0 Td
(of)Tj
1.0148 0 Td
(inputs)Tj
2.7383 0 Td
(will)Tj
1.6611 0 Td
(cause)Tj
2.3924 0 Td
(the)Tj
1.4683 0 Td
(neuron)Tj
3.1465 0 Td
(to)Tj
1.0261 0 Td
(respond)Tj
-32.5074 -1.2982 Td
(most)Tj
2.2053 0 Td
(strongly.)Tj
-1.0091 -1.2983 Td
(Decoding)Tj
4.1216 0 Td
(is)Tj
0.8503 0 Td
(accomplished)Tj
5.6975 0 Td
(via)Tj
1.372 0 Td
(a)Tj
0.6576 0 Td
(linear)Tj
2.5059 0 Td
(least)Tj
1.9842 0 Td
(squares)Tj
3.2088 0 Td
(procedure)Tj
4.3086 0 Td
([)Tj
0.83 0.64 0.02 0 k
(31)Tj
0 g
(],)Tj
2.0806 0 Td
(which)Tj
2.6476 0 Td
(is)Tj
0.8504 0 Td
(used)Tj
2.0579 0 Td
(to)Tj
1.0261 0 Td
(find)Tj
1.8539 0 Td
(a)Tj
-36.4192 -1.2982 Td
(linear)Tj
2.5115 0 Td
(weighting)Tj
4.1669 0 Td
(over)Tj
1.9729 0 Td
(the)Tj
1.474 0 Td
(neural)Tj
2.7665 0 Td
(response)Tj
3.736 0 Td
(functions)Tj
3.9912 0 Td
(\()Tj
/F10 1 Tf
[()]TJ
ET
Q
q
1 j
1 J
0 w
7.4999 0 0 6.6659 414.4251 588.1322 cm
BT
/F10 1 Tf
1 TL
-0.0053 Tc
0 0 Td
[(\r)]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 416.4094 589.833 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(\))Tj
0.5612 0 Td
(that)Tj
1.7802 0 Td
(best)Tj
1.8084 0 Td
(approximates)Tj
5.6353 0 Td
(a)Tj
0.6576 0 Td
(target)Tj
2.5058 0 Td
(func-)Tj
-34.588 -1.3039 Td
(tion.)Tj
2.0636 0 Td
(This)Tj
1.9899 0 Td
(allows)Tj
2.6986 0 Td
(for)Tj
1.3833 0 Td
(very)Tj
1.9275 0 Td
(efficient,)Tj
3.6737 0 Td
(analytic)Tj
3.3222 0 Td
(computation)Tj
5.3687 0 Td
(of)Tj
1.0148 0 Td
(connection)Tj
4.7168 0 Td
(weights)Tj
3.2485 0 Td
(for)Tj
1.3833 0 Td
(fixed)Tj
2.194 0 Td
(trans-)Tj
-34.9849 -1.2982 Td
(formations,)Tj
4.8472 0 Td
(such)Tj
2.058 0 Td
(as)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(steps)Tj
2.1827 0 Td
(involved)Tj
3.651 0 Td
(in)Tj
1.0318 0 Td
(computing)Tj
4.5977 0 Td
(a)Tj
0.652 0 Td
(temporal)Tj
3.838 0 Td
(difference)Tj
4.2066 0 Td
(error.)Tj
-28.3519 -1.2983 Td
(However,)Tj
4.0819 0 Td
(in)Tj
1.0318 0 Td
(some)Tj
2.33 0 Td
(cases)Tj
2.2281 0 Td
(the)Tj
1.4683 0 Td
(required)Tj
3.6397 0 Td
(transformation)Tj
6.2701 0 Td
(is)Tj
0.8503 0 Td
(not)Tj
1.5761 0 Td
(known)Tj
3.0047 0 Td
(ahead)Tj
2.5625 0 Td
(of)Tj
1.0148 0 Td
(time;)Tj
2.2564 0 Td
(for)Tj
1.3833 0 Td
(exam-)Tj
-34.8942 -1.3039 Td
(ple,)Tj
1.6327 0 Td
(in)Tj
1.0319 0 Td
(reinforcement)Tj
5.9357 0 Td
(learning)Tj
3.5262 0 Td
(the)Tj
/F10 1 Tf
1.4684 0 Td
[()]TJ
/F5 1 Tf
0.9241 0 Td
(function)Tj
3.6283 0 Td
(is)Tj
0.8504 0 Td
(not)Tj
1.5704 0 Td
(known)Tj
3.0046 0 Td
(until)Tj
2.1089 0 Td
(the)Tj
1.474 0 Td
(agent)Tj
2.3868 0 Td
(actually)Tj
3.2825 0 Td
(starts)Tj
-32.8249 -1.2983 Td
(exploring)Tj
4.0308 0 Td
(its)Tj
1.1452 0 Td
(environment.)Tj
5.6579 0 Td
(In)Tj
1.1056 0 Td
(those)Tj
2.3414 0 Td
(cases)Tj
2.2223 0 Td
(the)Tj
1.4683 0 Td
(weights)Tj
3.2485 0 Td
(need)Tj
2.1429 0 Td
(to)Tj
1.0261 0 Td
(be)Tj
1.1509 0 Td
(learned)Tj
3.1918 0 Td
(online.)Tj
2.9877 0 Td
(In)Tj
1.1055 0 Td
(our)Tj
1.6271 0 Td
(work)Tj
-34.452 -1.2982 Td
(we)Tj
1.3266 0 Td
(use)Tj
1.5307 0 Td
(the)Tj
1.4683 0 Td
(Prescribed)Tj
4.4561 0 Td
(Error)Tj
2.4094 0 Td
(Sensitivity)Tj
4.337 0 Td
(rule)Tj
1.7915 0 Td
(\(PES;)Tj
2.3867 0 Td
([)Tj
0.83 0.64 0.02 0 k
(32)Tj
0 g
(]\).)Tj
2.4207 0 Td
(This)Tj
1.9842 0 Td
(is)Tj
0.8504 0 Td
(described)Tj
4.0479 0 Td
(by)Tj
1.1849 0 Td
(the)Tj
1.4683 0 Td
(formula:)Tj
/F16 1 Tf
0 Tc
-17.2174 -1.8539 Td
(D)Tj
/F13 1 Tf
(o)Tj
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 358.3559 490.7905 cm
BT
/F10 1 Tf
1 TL
0 0 Td
[(\r)12()]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 364.7622 493.285 cm
BT
/F11 1 Tf
1 TL
0 0 Td
()Tj
/F13 1 Tf
1.0488 0 Td
(ka)Tj
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 385.7952 490.7905 cm
BT
/F10 1 Tf
1 TL
0 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 387.8362 493.285 cm
BT
/F10 1 Tf
1 TL
0 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 391.8047 490.7905 cm
BT
/F10 1 Tf
1 TL
0 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 393.9023 493.285 cm
BT
/F10 1 Tf
1 TL
0 0 Td
[()-1()]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 404.2771 490.7905 cm
BT
/F10 1 Tf
1 TL
0 0 Td
[(\r)]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 406.3748 493.285 cm
BT
/F11 1 Tf
1 TL
0 0 Td
()Tj
/F10 1 Tf
[()]TJ
/F11 1 Tf
0.8787 0 Td
()Tj
14.8308 -0.0737 Td
()Tj
/F5 1 Tf
(5)Tj
/F11 1 Tf
()Tj
/F5 1 Tf
-0.004 Tc
-36.3455 -2.2223 Td
(where)Tj
/F15 1 Tf
2.6475 0 Td
()Tj
/F5 1 Tf
0.7597 0 Td
(is)Tj
0.8504 0 Td
(a)Tj
0.6519 0 Td
(learning)Tj
3.5263 0 Td
(rate,)Tj
/F15 1 Tf
1.9729 0 Td
()Tj
ET
Q
q
1 j
1 J
0 w
7.4999 0 0 6.6659 309.5433 468.6236 cm
BT
/F10 1 Tf
1 TL
-0.0053 Tc
0 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 313.6818 470.3244 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(and)Tj
/F10 1 Tf
1.7291 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
7.4999 0 0 6.6659 334.885 468.6236 cm
BT
/F10 1 Tf
1 TL
-0.0053 Tc
0 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 339.0236 470.3244 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(are)Tj
1.4456 0 Td
(parameters)Tj
4.6771 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4684 0 Td
(postsynaptic)Tj
5.21 0 Td
(neuron)Tj
3.1465 0 Td
(\(described)Tj
-30.8634 -1.3039 Td
(above\),)Tj
/F10 1 Tf
3.1125 0 Td
[()]TJ
/F5 1 Tf
0.7653 0 Td
(is)Tj
0.8504 0 Td
(an)Tj
1.2019 0 Td
(error)Tj
2.262 0 Td
(signal,)Tj
2.7666 0 Td
(and)Tj
/F10 1 Tf
1.7348 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
7.4999 0 0 6.6659 331.7669 455.6409 cm
BT
/F10 1 Tf
1 TL
-0.0053 Tc
0 0 Td
[(\r)]TJ
ET
Q
q
1 j
1 J
0 w
10 0 0 10 333.8078 457.285 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(\()Tj
/F10 1 Tf
[()]TJ
/F5 1 Tf
(\))Tj
1.3606 0 Td
(is)Tj
0.8504 0 Td
(the)Tj
1.4683 0 Td
(activity)Tj
3.1181 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(presynaptic)Tj
4.8359 0 Td
(neuron.)Tj
3.3732 0 Td
(This)Tj
1.9899 0 Td
(is)Tj
0.8447 0 Td
(an)Tj
1.2019 0 Td
(error)Tj
-34.9055 -1.2982 Td
(modulated)Tj
4.5297 0 Td
(local)Tj
2.075 0 Td
(learning)Tj
3.5263 0 Td
(rule,)Tj
2.0126 0 Td
(which)Tj
2.6531 0 Td
(we)Tj
1.3266 0 Td
(can)Tj
1.627 0 Td
(think)Tj
2.3585 0 Td
(of)Tj
1.0148 0 Td
(as)Tj
1.0147 0 Td
(performing)Tj
4.7962 0 Td
(gradient)Tj
3.5603 0 Td
(descent)Tj
3.2258 0 Td
(on)Tj
1.2756 0 Td
(the)Tj
-34.9962 -1.2983 Td
(output)Tj
2.897 0 Td
(weights)Tj
3.2428 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.474 0 Td
(presynaptic)Tj
4.8358 0 Td
(neuron)Tj
3.1464 0 Td
(based)Tj
2.4718 0 Td
(on)Tj
1.2756 0 Td
(the)Tj
1.4683 0 Td
(error)Tj
2.2677 0 Td
(signal.)Tj
2.7666 0 Td
(In)Tj
1.1055 0 Td
(other)Tj
2.3528 0 Td
(words,)Tj
2.8913 0 Td
(this)Tj
1.6781 0 Td
(learn-)Tj
-34.8885 -1.2982 Td
(ing)Tj
1.5024 0 Td
(rule)Tj
1.7858 0 Td
(will)Tj
1.6667 0 Td
(cause)Tj
2.3868 0 Td
(the)Tj
1.4683 0 Td
(transformation)Tj
6.2758 0 Td
(calculated)Tj
4.2066 0 Td
(by)Tj
1.1792 0 Td
(the)Tj
1.4683 0 Td
(connection)Tj
4.7168 0 Td
(weights)Tj
3.2485 0 Td
(to)Tj
1.0262 0 Td
(be)Tj
1.1452 0 Td
(adjusted)Tj
3.5773 0 Td
(in)Tj
-35.6539 -1.304 Td
(the)Tj
1.4683 0 Td
(direction)Tj
3.8551 0 Td
(of)Tj
/F10 1 Tf
1.0148 0 Td
[()]TJ
/F5 1 Tf
(.)Tj
0.9922 0 Td
(For)Tj
1.627 0 Td
(example,)Tj
3.7758 0 Td
(if)Tj
0.7765 0 Td
(the)Tj
1.4684 0 Td
(output)Tj
2.897 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
/F10 1 Tf
1.4683 0 Td
[()]TJ
/F5 1 Tf
0.703 0 Td
(population)Tj
4.5921 0 Td
(represents)Tj
/F10 1 Tf
4.32 0 Td
[()]TJ
/F5 1 Tf
0.924 0 Td
(values,)Tj
2.8914 0 Td
(and)Tj
/F10 1 Tf
1.7348 0 Td
[()]TJ
/F5 1 Tf
0.7653 0 Td
(is)Tj
-36.2888 -1.2982 Td
(the)Tj
1.4683 0 Td
(TD)Tj
1.5648 0 Td
(error,)Tj
2.4944 0 Td
(this)Tj
1.6725 0 Td
(will)Tj
1.6611 0 Td
(cause)Tj
2.3924 0 Td
(the)Tj
/F10 1 Tf
1.4683 0 Td
[()]TJ
/F5 1 Tf
0.924 0 Td
(values)Tj
2.6702 0 Td
(to)Tj
1.0262 0 Td
(be)Tj
1.1508 0 Td
(adjusted)Tj
3.5716 0 Td
(as)Tj
1.0148 0 Td
(in)Tj
0.83 0.64 0.02 0 k
1.0375 0 Td
(Eq)Tj
1.2983 0 Td
(\(2\))Tj
0 g
(.)Tj
-24.219 -1.2983 Td
(The)Tj
1.7858 0 Td
(actual)Tj
2.5852 0 Td
(construction)Tj
5.3008 0 Td
(and)Tj
1.7291 0 Td
(simulation)Tj
4.4956 0 Td
(of)Tj
1.0148 0 Td
(NEF)Tj
2.0523 0 Td
(models)Tj
3.1067 0 Td
(is)Tj
0.8504 0 Td
(carried)Tj
3.0387 0 Td
(out)Tj
1.5534 0 Td
(by)Tj
1.1792 0 Td
(a)Tj
0.652 0 Td
(software)Tj
3.5886 0 Td
(suite)Tj
-34.1288 -1.3039 Td
(called)Tj
2.5172 0 Td
(Nengo)Tj
2.9026 0 Td
([)Tj
0.83 0.64 0.02 0 k
(33)Tj
0 g
(,)Tj
0.83 0.64 0.02 0 k
1.7348 0 Td
(34)Tj
0 g
(].)Tj
1.7405 0 Td
(Nengo)Tj
2.897 0 Td
(provides)Tj
3.6509 0 Td
(a)Tj
0.6519 0 Td
(high-level)Tj
4.1556 0 Td
(functional)Tj
4.3086 0 Td
(perspective)Tj
4.6998 0 Td
(to)Tj
1.0262 0 Td
(the)Tj
1.4683 0 Td
(modeller,)Tj
4.0138 0 Td
(and)Tj
-35.7672 -1.2983 Td
(implements)Tj
4.9436 0 Td
(the)Tj
1.4683 0 Td
(NEF)Tj
2.0523 0 Td
(mathematics)Tj
5.329 0 Td
(behind)Tj
3.0217 0 Td
(the)Tj
1.4684 0 Td
(scenes.)Tj
2.9877 0 Td
(Nengo)Tj
2.9026 0 Td
(is)Tj
0.8447 0 Td
(an)Tj
1.2019 0 Td
(open-source)Tj
5.1817 0 Td
(project)Tj
-31.4019 -1.2982 Td
(\()Tj
0.83 0.64 0.02 0 k
(http://www.nengo.ca)Tj
0 g
(\),)Tj
9.5017 0 Td
(and)Tj
1.7348 0 Td
(all)Tj
1.1452 0 Td
(the)Tj
1.4682 0 Td
(code)Tj
2.1033 0 Td
(used)Tj
2.0579 0 Td
(to)Tj
1.0262 0 Td
(construct)Tj
3.9798 0 Td
(the)Tj
1.4683 0 Td
(model)Tj
2.7439 0 Td
(we)Tj
1.3266 0 Td
(present)Tj
3.1635 0 Td
(here)Tj
1.9672 0 Td
(is)Tj
0.8504 0 Td
(avail-)Tj
-34.537 -1.304 Td
(able)Tj
1.8312 0 Td
(at)Tj
0.83 0.64 0.02 0 k
0.9524 0 Td
(https://github.com/drasmuss/n)Tj
12.4269 0 Td
(hrlmodel)Tj
0 g
(.)Tj
ET
Q
q
1 j
1 J
0 w
11.9999 0 0 11.9999 200.0125 280.6299 cm
BT
/F2 1 Tf
1 TL
-0.0033 Tc
0 0 Td
(3)Tj
0.7653 0 Td
(Previous)Tj
4.4079 0 Td
(neural)Tj
3.1985 0 Td
(modelling)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 200.0125 263.6787 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(We)Tj
1.61 0 Td
(begin)Tj
2.4265 0 Td
(by)Tj
1.1848 0 Td
(discussing)Tj
4.3313 0 Td
(standard)Tj
3.7191 0 Td
(\(non-hierarchical\))Tj
7.5741 0 Td
(reinforcement)Tj
5.9357 0 Td
(learning)Tj
3.5206 0 Td
(models,)Tj
3.3335 0 Td
(as)Tj
1.0148 0 Td
(several)Tj
-34.6504 -1.3039 Td
(new)Tj
1.8765 0 Td
(developments)Tj
5.7713 0 Td
(incorporated)Tj
5.4141 0 Td
(in)Tj
1.0375 0 Td
(the)Tj
1.4683 0 Td
(NHRL)Tj
2.8857 0 Td
(model)Tj
2.7439 0 Td
(also)Tj
1.7688 0 Td
(address)Tj
3.2258 0 Td
(open)Tj
2.2167 0 Td
(issues)Tj
2.5171 0 Td
(there.)Tj
2.4888 0 Td
(We)Tj
1.6101 0 Td
(then)Tj
-35.0246 -1.2982 Td
(discuss)Tj
3.05 0 Td
(the)Tj
1.4683 0 Td
(much)Tj
2.5172 0 Td
(more)Tj
2.3414 0 Td
(sparsely)Tj
3.3902 0 Td
(populated)Tj
4.2179 0 Td
(domain)Tj
3.3222 0 Td
(of)Tj
1.0148 0 Td
(neural)Tj
2.7723 0 Td
(HRL)Tj
2.1486 0 Td
(models.)Tj
ET
Q
q
1 j
1 J
0 w
11.9999 0 0 11.9999 200.0125 190.9984 cm
BT
/F0 1 Tf
1 TL
-0.0033 Tc
0 0 Td
(3.1)Tj
1.5921 0 Td
(Neural)Tj
3.1276 0 Td
(models)Tj
3.4016 0 Td
(of)Tj
1.0488 0 Td
(reinforcement)Tj
6.2646 0 Td
(learning)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 200.0125 173.9905 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(As)Tj
1.2642 0 Td
(the)Tj
1.4683 0 Td
(move)Tj
2.4265 0 Td
(is)Tj
0.8447 0 Td
(made)Tj
2.4264 0 Td
(to)Tj
1.0262 0 Td
(more)Tj
2.3414 0 Td
(biologically)Tj
4.7905 0 Td
(plausible)Tj
3.753 0 Td
(models,)Tj
3.3279 0 Td
(often)Tj
2.2847 0 Td
(there)Tj
2.2677 0 Td
(is)Tj
0.8447 0 Td
(a)Tj
0.652 0 Td
(trade-off)Tj
3.7134 0 Td
(between)Tj
-33.4316 -1.2982 Td
(biological)Tj
4.0875 0 Td
(detail)Tj
2.4208 0 Td
(and)Tj
1.7291 0 Td
(functionality.)Tj
5.5502 0 Td
(Purely)Tj
2.8063 0 Td
(computational)Tj
6.0491 0 Td
(systems)Tj
3.2995 0 Td
(have)Tj
2.0579 0 Td
(the)Tj
1.4683 0 Td
(option)Tj
2.8687 0 Td
(to)Tj
1.0205 0 Td
(ignore)Tj
-33.3579 -1.2983 Td
(some)Tj
2.33 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(challenges)Tj
4.32 0 Td
(faced)Tj
2.3187 0 Td
(by)Tj
1.1793 0 Td
(real)Tj
1.6951 0 Td
(physical)Tj
3.4525 0 Td
(systems,)Tj
3.5263 0 Td
(such)Tj
2.058 0 Td
(as)Tj
1.0147 0 Td
(limited)Tj
3.0671 0 Td
(precision,)Tj
4.1329 0 Td
(capacity,)Tj
3.6964 0 Td
(and)Tj
-35.2741 -1.3039 Td
(local)Tj
2.0749 0 Td
(information)Tj
5.0797 0 Td
(transfer.)Tj
3.5376 0 Td
(Thus)Tj
2.2507 0 Td
(when)Tj
2.4037 0 Td
(biologically)Tj
4.7905 0 Td
(based)Tj
2.4718 0 Td
(models)Tj
3.1068 0 Td
(add)Tj
1.7064 0 Td
(these)Tj
2.2564 0 Td
(extra)Tj
2.211 0 Td
(constraints,)Tj
4.8642 0 Td
(it)Tj
-36.7537 -1.2983 Td
(is)Tj
0.8447 0 Td
(often)Tj
2.2847 0 Td
(necessary)Tj
4.0252 0 Td
(to)Tj
1.0261 0 Td
(simplify)Tj
3.4412 0 Td
(the)Tj
1.4684 0 Td
(computations)Tj
5.7259 0 Td
(they)Tj
1.9276 0 Td
(are)Tj
1.4456 0 Td
(performing.)Tj
-20.9932 -1.2982 Td
(One)Tj
1.9388 0 Td
(simplification)Tj
5.7317 0 Td
(common)Tj
3.8494 0 Td
(to)Tj
1.0261 0 Td
(these)Tj
2.2507 0 Td
(models)Tj
3.1068 0 Td
(is)Tj
0.8447 0 Td
(that)Tj
1.7801 0 Td
(they)Tj
1.9276 0 Td
(restrict)Tj
3.033 0 Td
(themselves)Tj
4.5581 0 Td
(to)Tj
1.0262 0 Td
(associative)Tj
-32.2694 -1.3039 Td
(reinforcement)Tj
5.93 0 Td
(learning.)Tj
4.1442 0 Td
(In)Tj
1.1112 0 Td
(associative)Tj
4.4504 0 Td
(RL)Tj
1.3776 0 Td
(the)Tj
1.4683 0 Td
(agent)Tj
2.3924 0 Td
(does)Tj
2.041 0 Td
(not)Tj
1.5704 0 Td
(consider)Tj
3.651 0 Td
(the)Tj
1.4683 0 Td
(future)Tj
2.6589 0 Td
(impact)Tj
2.9763 0 Td
(of)Tj
1.0148 0 Td
(its)Tj
-36.2548 -1.2983 Td
(actions)Tj
3.0557 0 Td
(\(i.e.,)Tj
1.9275 0 Td
(the)Tj
1.4684 0 Td
(value)Tj
2.3073 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4741 0 Td
(subsequent)Tj
4.6998 0 Td
(state\),)Tj
2.6022 0 Td
(it)Tj
0.788 0 Td
(just)Tj
1.6611 0 Td
(tries)Tj
1.9445 0 Td
(to)Tj
1.0205 0 Td
(pick)Tj
1.9275 0 Td
(whichever)Tj
4.32 0 Td
(action)Tj
2.6986 0 Td
(will)Tj
1.6667 0 Td
(result)Tj
ET
Q
q
1 j
1 J
0 w
576 737.1 m
36 737.1 l
36 737.6 l
576 737.6 l
f*
36 741.2598 107.1496 23.6976 re
W* n
q
107.0929 0 0 23.6409 36 741.3165 cm
q
/I0 Do
Q
Q
Q
q
0 0 612 792 re
W* n
1 j
1 J
0 w
7.9999 0 0 7.9999 498.1606 745.7952 cm
BT
/F0 1 Tf
1 TL
-0.005 Tc
0 0 Td
(A)Tj
0.8787 0 Td
(neural)Tj
2.941 0 Td
(model)Tj
2.8984 0 Td
(of)Tj
1.0418 0 Td
(HRL)Tj
ET
Q
q
1 j
1 J
0 w
36 48.0002 m
576 48.0002 l
576 47.5002 l
36 47.5002 l
f*
0.83 0.64 0.02 0 k
81.5811 34.9228 m
237.8835 34.9228 l
h
f*
0 g
7.9999 0 0 7.9999 36 36 cm
BT
/F0 1 Tf
1 TL
-0.005 Tc
0 0 Td
(PLOS)Tj
2.8559 0 Td
(ONE)Tj
2.3669 0 Td
(|)Tj
0.83 0.64 0.02 0 k
0.4748 0 Td
(https://doi.or)Tj
5.4142 0 Td
(g/10.137)Tj
3.8198 0 Td
(1/journal.po)Tj
5.1236 0 Td
(ne.01802)Tj
4.089 0 Td
(34)Tj
0 g
2.0906 0 Td
(July)Tj
1.9701 0 Td
(6,)Tj
1.0417 0 Td
(2017)Tj
35.894 0 Td
(7)Tj
0.7654 0 Td
(/)Tj
ET
endstream
endobj
133 0 obj
<>stream
endstream
endobj
134 0 obj
<>stream
endstream
endobj
135 0 obj
<>stream
endstream
endobj
136 0 obj
<>stream
endstream
endobj
137 0 obj
<>stream
endstream
endobj
138 0 obj
<>stream
endstream
endobj
139 0 obj
<>stream
endstream
endobj
140 0 obj
<>stream
BT
66.402 0 Td
(39)Tj
ET
Q
endstream
endobj
141 0 obj
<>/XObject<>>>/CropBox[0 0 612 792]/MediaBox[0 0 612 792]/Parent 10 0 R/Annots 144 0 R/Contents 145 0 R/TrimBox[0 0 612 792]>>
endobj
144 0 obj
[146 0 R 147 0 R 148 0 R 149 0 R 150 0 R 151 0 R 152 0 R 153 0 R 154 0 R]
endobj
146 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.e002)>>
endobj
147 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref003)>>
endobj
148 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref008)>>
endobj
149 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref015)>>
endobj
150 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref035)>>
endobj
151 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref037)>>
endobj
152 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref038)>>
endobj
153 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref039)>>
endobj
154 0 obj
<>/Border[0 0 0]/A 155 0 R>>
endobj
155 0 obj
<>
endobj
145 0 obj
[156 0 R 157 0 R 158 0 R 159 0 R 160 0 R 161 0 R 162 0 R 163 0 R 164 0 R 165 0 R 166 0 R]
endobj
156 0 obj
<>stream
q
0.83 0.64 0.02 0 k
239.8677 601.9087 m
264.3591 601.9087 l
h
f*
239.5276 588.926 m
244.2898 588.926 l
h
f*
249.4488 588.926 m
254.211 588.926 l
h
f*
370.5449 445.9465 m
380.0693 445.9465 l
h
f*
384.548 445.9465 m
394.0157 445.9465 l
h
f*
399.2315 445.9465 m
408.6992 445.9465 l
h
f*
234.3685 224.9008 m
243.8929 224.9008 l
h
f*
270.1984 224.9008 m
279.6661 224.9008 l
h
f*
0 g
1 j
1 J
0 w
10 0 0 10 200.0125 707.4141 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(in)Tj
1.0318 0 Td
(the)Tj
1.474 0 Td
(largest)Tj
2.8176 0 Td
(immediate)Tj
4.5014 0 Td
(reward.)Tj
3.2541 0 Td
(That)Tj
2.092 0 Td
(is,)Tj
1.0771 0 Td
(instead)Tj
3.0784 0 Td
(of)Tj
1.0148 0 Td
(representing)Tj
5.2384 0 Td
(the)Tj
1.4684 0 Td
(state-action)Tj
4.8642 0 Td
(value)Tj
2.3074 0 Td
(as)Tj
/F10 1 Tf
0 Tc
-24.2701 -2.0522 Td
[()]TJ
/F11 1 Tf
()Tj
/F10 1 Tf
[()]TJ
/F12 1 Tf
(;)Tj
/F10 1 Tf
1.8652 0 Td
[()]TJ
/F11 1 Tf
()Tj
1.1508 0 Td
()Tj
/F10 1 Tf
1.0488 0 Td
[()]TJ
/F11 1 Tf
()Tj
/F10 1 Tf
[()]TJ
/F12 1 Tf
(;)Tj
/F10 1 Tf
1.7518 0 Td
[()]TJ
/F11 1 Tf
()Tj
1.0942 0 Td
()Tj
/F13 1 Tf
0.9921 0 Td
(g)Tj
/F14 1 Tf
0.4366 0.9467 Td
(X)Tj
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 388.1763 676.233 cm
BT
/F10 1 Tf
1 TL
0 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
5 0 0 5 390.1606 677.9905 cm
BT
/F11 1 Tf
1 TL
0 0 Td
(0)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 397.2472 686.8913 cm
BT
/F10 1 Tf
1 TL
0 0 Td
[()]TJ
/F11 1 Tf
0.5725 0 Td
()Tj
/F10 1 Tf
[()]TJ
/F12 1 Tf
(;)Tj
/F10 1 Tf
1.1566 0 Td
[()]TJ
/F12 1 Tf
(;)Tj
/F10 1 Tf
0.9297 0 Td
[()]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 427.1811 690.9732 cm
BT
/F11 1 Tf
1 TL
0 0 Td
(0)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 429.2787 686.8913 cm
BT
/F11 1 Tf
1 TL
0 0 Td
()Tj
/F10 1 Tf
[()]TJ
/F11 1 Tf
()Tj
/F10 1 Tf
[()]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 447.4204 690.9732 cm
BT
/F11 1 Tf
1 TL
0 0 Td
(0)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 449.5748 686.8913 cm
BT
/F12 1 Tf
1 TL
0 0 Td
(;)Tj
/F13 1 Tf
0.4365 0 Td
(p)Tj
/F11 1 Tf
()Tj
/F10 1 Tf
[()]TJ
ET
Q
q
1 j
1 J
0 w
5.9998 0 0 5.9998 466.6393 690.9732 cm
BT
/F11 1 Tf
1 TL
0 0 Td
(0)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 468.7937 686.8913 cm
BT
/F11 1 Tf
1 TL
0 0 Td
()Tj
/F5 1 Tf
-0.004 Tc
-26.8779 -3.0784 Td
(it)Tj
0.7824 0 Td
(is)Tj
0.8504 0 Td
(simply)Tj
/F10 1 Tf
0 Tc
13.1923 -1.8538 Td
[()]TJ
/F11 1 Tf
()Tj
/F10 1 Tf
[()]TJ
/F12 1 Tf
(;)Tj
/F10 1 Tf
1.8652 0 Td
[()]TJ
/F11 1 Tf
()Tj
1.1509 0 Td
()Tj
/F10 1 Tf
1.0488 0 Td
[()]TJ
/F11 1 Tf
()Tj
/F10 1 Tf
[()]TJ
/F12 1 Tf
(;)Tj
/F10 1 Tf
1.7518 0 Td
[()]TJ
/F11 1 Tf
()Tj
15.7037 0 Td
()Tj
/F5 1 Tf
(6)Tj
/F11 1 Tf
()Tj
/F5 1 Tf
-0.004 Tc
-36.3455 -2.1543 Td
(This)Tj
1.9842 0 Td
(means)Tj
2.8063 0 Td
(that)Tj
1.7802 0 Td
(the)Tj
1.4683 0 Td
(associative)Tj
4.4504 0 Td
(RL)Tj
1.3833 0 Td
(update)Tj
2.948 0 Td
(is)Tj
/F20 1 Tf
0.8447 0 Td
( )Tj
/F10 1 Tf
[()]TJ
/F5 1 Tf
(\()Tj
/F10 1 Tf
[()]TJ
/F5 1 Tf
(,)Tj
/F10 1 Tf
2.4151 0 Td
[()]TJ
/F5 1 Tf
(\))Tj
1.0432 0 Td
(=)Tj
/F15 1 Tf
0.7993 0 Td
()Tj
/F5 1 Tf
([)Tj
/F10 1 Tf
[( )]TJ
/F21 1 Tf
1.4684 0 Td
[()]TJ
/F10 1 Tf
0.7937 0 Td
[()]TJ
/F5 1 Tf
(\()Tj
/F10 1 Tf
[()]TJ
/F5 1 Tf
(,)Tj
/F10 1 Tf
1.8198 0 Td
[()]TJ
/F5 1 Tf
(\)])Tj
1.3832 0 Td
(\(compare)Tj
4.0592 0 Td
(to)Tj
1.0261 0 Td
(the)Tj
1.4683 0 Td
(TD)Tj
-33.9417 -1.3039 Td
(update)Tj
2.948 0 Td
(in)Tj
0.83 0.64 0.02 0 k
1.0375 0 Td
(Eq)Tj
1.2926 0 Td
(\(2\))Tj
0 g
(\).)Tj
1.9445 0 Td
(The)Tj
1.7802 0 Td
(majority)Tj
3.6283 0 Td
(of)Tj
1.0148 0 Td
(work)Tj
2.279 0 Td
(in)Tj
1.0375 0 Td
(biological)Tj
4.0876 0 Td
(RL)Tj
1.3833 0 Td
(modelling)Tj
4.2689 0 Td
(has)Tj
1.5419 0 Td
(been)Tj
2.1203 0 Td
(on)Tj
1.2756 0 Td
(this)Tj
1.6781 0 Td
(type)Tj
1.9106 0 Td
(of)Tj
-35.2287 -1.2983 Td
(problem)Tj
3.6113 0 Td
([)Tj
0.83 0.64 0.02 0 k
(3)Tj
0 g
()Tj
0.83 0.64 0.02 0 k
(8)Tj
0 g
(].)Tj
2.5909 0 Td
(In)Tj
1.1111 0 Td
(cases)Tj
2.2224 0 Td
(where)Tj
2.6532 0 Td
(the)Tj
1.4683 0 Td
(agent's)Tj
2.9651 0 Td
(actions)Tj
3.0557 0 Td
(do)Tj
1.2586 0 Td
(not)Tj
1.5704 0 Td
(have)Tj
2.0636 0 Td
(any)Tj
1.6554 0 Td
(long)Tj
1.9898 0 Td
(term)Tj
2.1316 0 Td
(consequences)Tj
-30.3474 -1.2983 Td
(beyond)Tj
3.1805 0 Td
(the)Tj
1.4683 0 Td
(immediate)Tj
4.4957 0 Td
(reward)Tj
3.0274 0 Td
(\(such)Tj
2.3981 0 Td
(as)Tj
1.0091 0 Td
(a)Tj
0.6463 0 Td
(bandit)Tj
2.8006 0 Td
(task\),)Tj
2.3698 0 Td
(associative)Tj
4.4447 0 Td
(RL)Tj
1.3775 0 Td
(is)Tj
0.8447 0 Td
(all)Tj
1.1395 0 Td
(that)Tj
1.7745 0 Td
(is)Tj
0.8447 0 Td
(needed)Tj
3.0898 0 Td
(to)Tj
1.0205 0 Td
(per-)Tj
-35.9317 -1.3039 Td
(form)Tj
2.2053 0 Td
(well)Tj
1.8199 0 Td
(on)Tj
1.2756 0 Td
(the)Tj
1.4683 0 Td
(task.)Tj
2.0353 0 Td
(However,)Tj
4.0762 0 Td
(imagine)Tj
3.4469 0 Td
(a)Tj
0.6519 0 Td
(task)Tj
1.8085 0 Td
(where)Tj
2.6532 0 Td
(the)Tj
1.4684 0 Td
(agent)Tj
2.3924 0 Td
(can)Tj
1.627 0 Td
(choose)Tj
2.965 0 Td
(between)Tj
3.5319 0 Td
(two)Tj
-33.4258 -1.2983 Td
(actions.)Tj
3.2825 0 Td
(Action)Tj
/F10 1 Tf
2.948 0 Td
[()]TJ
/F5 1 Tf
0.8731 0 Td
(gives)Tj
2.194 0 Td
(a)Tj
0.652 0 Td
(small)Tj
2.33 0 Td
(negative)Tj
3.532 0 Td
(reward,)Tj
3.2598 0 Td
(but)Tj
1.5534 0 Td
(allows)Tj
2.6985 0 Td
(the)Tj
1.4684 0 Td
(agent)Tj
2.3866 0 Td
(to)Tj
1.0262 0 Td
(select)Tj
2.3981 0 Td
(an)Tj
1.2018 0 Td
(action)Tj
2.6986 0 Td
(on)Tj
1.2756 0 Td
(its)Tj
-35.7786 -1.2982 Td
(next)Tj
1.9559 0 Td
(step)Tj
1.8198 0 Td
(leading)Tj
3.1295 0 Td
(to)Tj
1.0261 0 Td
(a)Tj
0.652 0 Td
(large)Tj
2.1656 0 Td
(positive)Tj
3.3109 0 Td
(reward.)Tj
3.2598 0 Td
(Action)Tj
/F10 1 Tf
2.948 0 Td
[()]TJ
/F5 1 Tf
0.788 0 Td
(gives)Tj
2.194 0 Td
(a)Tj
0.652 0 Td
(small)Tj
2.3301 0 Td
(positive)Tj
3.3164 0 Td
(reward)Tj
3.0274 0 Td
(and)Tj
1.7347 0 Td
(nothing)Tj
-34.3102 -1.3039 Td
(else.)Tj
1.8992 0 Td
(An)Tj
1.4513 0 Td
(associative)Tj
4.4504 0 Td
(RL)Tj
1.3833 0 Td
(agent)Tj
2.3868 0 Td
(will)Tj
1.6667 0 Td
(always)Tj
2.829 0 Td
(learn)Tj
2.245 0 Td
(to)Tj
1.0261 0 Td
(choose)Tj
2.9707 0 Td
(action)Tj
/F10 1 Tf
2.6986 0 Td
[()]TJ
/F5 1 Tf
0.788 0 Td
(over)Tj
1.9785 0 Td
(action)Tj
/F10 1 Tf
2.6986 0 Td
[()]TJ
/F5 1 Tf
(,)Tj
1.0941 0 Td
(even)Tj
2.0693 0 Td
(though)Tj
3.0784 0 Td
(it)Tj
-36.714 -1.2983 Td
(could)Tj
2.4491 0 Td
(achieve)Tj
3.1748 0 Td
(more)Tj
2.3414 0 Td
(reward)Tj
3.0331 0 Td
(in)Tj
1.0374 0 Td
(the)Tj
1.4684 0 Td
(long)Tj
1.9842 0 Td
(run)Tj
1.6724 0 Td
(by)Tj
1.1793 0 Td
(selecting)Tj
/F10 1 Tf
3.6793 0 Td
[()]TJ
/F5 1 Tf
(.)Tj
1.0942 0 Td
(Sacrificing)Tj
4.4502 0 Td
(short)Tj
2.2847 0 Td
(term)Tj
2.1373 0 Td
(losses)Tj
2.4775 0 Td
(for)Tj
-34.4633 -1.2983 Td
(long)Tj
1.9842 0 Td
(term)Tj
2.1317 0 Td
(gains)Tj
2.296 0 Td
(is)Tj
0.8504 0 Td
(one)Tj
1.7008 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(fundamental)Tj
5.3178 0 Td
(challenges)Tj
4.32 0 Td
(of)Tj
1.0148 0 Td
(decision)Tj
3.5433 0 Td
(making)Tj
3.2483 0 Td
(\(and)Tj
2.075 0 Td
(motivations)Tj
4.9776 0 Td
(for)Tj
-35.943 -1.3039 Td
(advances)Tj
3.8268 0 Td
(such)Tj
2.0579 0 Td
(as)Tj
1.0148 0 Td
(TD)Tj
1.5704 0 Td
(learning\),)Tj
4.0875 0 Td
(and)Tj
1.7291 0 Td
(if)Tj
0.7767 0 Td
(we)Tj
1.3266 0 Td
(want)Tj
2.1884 0 Td
(to)Tj
1.0204 0 Td
(be)Tj
1.1509 0 Td
(able)Tj
1.8312 0 Td
(to)Tj
1.0261 0 Td
(model)Tj
2.7439 0 Td
(that)Tj
1.7801 0 Td
(behaviour)Tj
4.2406 0 Td
(we)Tj
1.3266 0 Td
(need)Tj
2.1429 0 Td
(to)Tj
-35.8409 -1.2983 Td
(go)Tj
1.1906 0 Td
(beyond)Tj
3.1861 0 Td
(associative)Tj
4.456 0 Td
(RL.)Tj
-7.6365 -1.2982 Td
(As)Tj
1.2699 0 Td
(a)Tj
0.652 0 Td
(sidenote,)Tj
3.8041 0 Td
(there)Tj
2.262 0 Td
(is)Tj
0.8504 0 Td
(another)Tj
3.3278 0 Td
(class)Tj
2.0466 0 Td
(of)Tj
1.0148 0 Td
(models)Tj
3.1068 0 Td
(that)Tj
1.7745 0 Td
(compute)Tj
3.736 0 Td
(the)Tj
1.474 0 Td
(TD)Tj
1.5646 0 Td
(error)Tj
2.2677 0 Td
(outside)Tj
3.1295 0 Td
(the)Tj
1.4683 0 Td
(model)Tj
-34.9452 -1.2983 Td
(and)Tj
1.7291 0 Td
(then)Tj
2.0183 0 Td
(feed)Tj
1.8822 0 Td
(it)Tj
0.788 0 Td
(in)Tj
1.0318 0 Td
(as)Tj
1.0148 0 Td
(an)Tj
1.2019 0 Td
(input)Tj
2.3811 0 Td
(signal)Tj
2.5398 0 Td
(\(e.g.,)Tj
2.126 0 Td
([)Tj
0.83 0.64 0.02 0 k
(15)Tj
0 g
(,)Tj
0.83 0.64 0.02 0 k
1.7404 0 Td
(35)Tj
0 g
()Tj
0.83 0.64 0.02 0 k
(37)Tj
0 g
(]\).)Tj
3.5433 0 Td
(From)Tj
2.4435 0 Td
(an)Tj
1.2019 0 Td
(implementation)Tj
6.6386 0 Td
(perspective,)Tj
-32.2807 -1.3039 Td
(learning)Tj
3.5206 0 Td
(can)Tj
1.6271 0 Td
(then)Tj
2.0182 0 Td
(be)Tj
1.1509 0 Td
(accomplished)Tj
5.6976 0 Td
(in)Tj
1.0375 0 Td
(the)Tj
1.4683 0 Td
(same)Tj
2.2564 0 Td
(way)Tj
1.7971 0 Td
(as)Tj
1.0148 0 Td
(associative)Tj
4.4504 0 Td
(RL,)Tj
1.6043 0 Td
(because)Tj
3.3222 0 Td
(the)Tj
1.4683 0 Td
(model)Tj
2.7439 0 Td
(only)Tj
-35.1776 -1.2983 Td
(needs)Tj
2.5058 0 Td
(to)Tj
1.0205 0 Td
(pick)Tj
1.9275 0 Td
(the)Tj
1.4684 0 Td
(action)Tj
2.6985 0 Td
(in)Tj
1.0375 0 Td
(each)Tj
2.0296 0 Td
(state)Tj
2.0296 0 Td
(that)Tj
1.7801 0 Td
(will)Tj
1.6668 0 Td
(result)Tj
2.4435 0 Td
(in)Tj
1.0374 0 Td
(the)Tj
1.4684 0 Td
(highest)Tj
3.0954 0 Td
(immediate)Tj
4.4956 0 Td
(rewardthe)Tj
-30.7046 -1.2982 Td
(difference)Tj
4.2009 0 Td
(is)Tj
0.8504 0 Td
(that)Tj
1.7802 0 Td
(the)Tj
1.4683 0 Td
(reward)Tj
3.8154 0 Td
(in)Tj
1.0375 0 Td
(this)Tj
1.6724 0 Td
(case)Tj
1.8652 0 Td
(is)Tj
0.8447 0 Td
(the)Tj
1.4684 0 Td
(externally)Tj
4.1328 0 Td
(computed)Tj
4.2632 0 Td
(signal,)Tj
2.7723 0 Td
(rather)Tj
2.6419 0 Td
(than)Tj
2.0296 0 Td
(the)Tj
-34.8432 -1.3039 Td
(environmental)Tj
6.1115 0 Td
(reward.)Tj
3.2541 0 Td
(From)Tj
2.4491 0 Td
(a)Tj
0.652 0 Td
(behavioural)Tj
4.9209 0 Td
(perspective)Tj
4.6998 0 Td
(this)Tj
1.6725 0 Td
(does)Tj
2.0409 0 Td
(capture)Tj
3.2144 0 Td
(the)Tj
1.474 0 Td
(behaviour)Tj
4.2349 0 Td
(of)Tj
-34.7241 -1.2983 Td
(interest,)Tj
3.4356 0 Td
(and)Tj
1.7291 0 Td
(these)Tj
2.2564 0 Td
(models)Tj
3.1067 0 Td
(can)Tj
1.6214 0 Td
(address)Tj
3.2258 0 Td
(many)Tj
2.4775 0 Td
(important)Tj
4.286 0 Td
(questions.)Tj
4.2462 0 Td
(However,)Tj
4.0761 0 Td
(if)Tj
0.771 0 Td
(we)Tj
1.3323 0 Td
(are)Tj
1.4457 0 Td
(inter-)Tj
-34.0098 -1.2983 Td
(ested)Tj
2.2507 0 Td
(specifically)Tj
4.5751 0 Td
(in)Tj
1.0375 0 Td
(neural)Tj
2.7666 0 Td
(mechanisms)Tj
5.21 0 Td
(for)Tj
1.3833 0 Td
(temporally)Tj
4.5411 0 Td
(extended)Tj
3.8551 0 Td
(learning,)Tj
3.7529 0 Td
(then)Tj
2.0183 0 Td
(such)Tj
2.0579 0 Td
(a)Tj
0.652 0 Td
(system)Tj
-34.1005 -1.3039 Td
(is)Tj
0.8447 0 Td
(solving)Tj
3.0728 0 Td
(an)Tj
1.2018 0 Td
(associative)Tj
4.4504 0 Td
(RL)Tj
1.3776 0 Td
(problem.)Tj
-9.7511 -1.2983 Td
(One)Tj
1.9389 0 Td
(approach)Tj
3.9571 0 Td
(to)Tj
1.0205 0 Td
(moving)Tj
3.2825 0 Td
(beyond)Tj
3.1918 0 Td
(associative)Tj
4.4504 0 Td
(RL)Tj
1.3776 0 Td
(is)Tj
0.8504 0 Td
(the)Tj
1.4683 0 Td
(use)Tj
1.5307 0 Td
(of)Tj
1.0148 0 Td
(eligibility)Tj
3.9287 0 Td
(traces.)Tj
2.7609 0 Td
(The)Tj
1.7802 0 Td
(basic)Tj
-33.749 -1.2982 Td
(idea)Tj
1.8709 0 Td
(of)Tj
1.0148 0 Td
(an)Tj
1.2018 0 Td
(eligibility)Tj
3.9288 0 Td
(trace)Tj
2.1714 0 Td
(is)Tj
0.8503 0 Td
(to)Tj
1.0262 0 Td
(add)Tj
1.7064 0 Td
(a)Tj
0.652 0 Td
(slowly)Tj
2.7212 0 Td
(decaying)Tj
3.7644 0 Td
(representation)Tj
6.015 0 Td
(of)Tj
1.0148 0 Td
(some)Tj
2.3301 0 Td
(signal)Tj
2.5398 0 Td
(of)Tj
1.0148 0 Td
(interest)Tj
-33.8227 -1.2983 Td
(\(such)Tj
2.3981 0 Td
(as)Tj
1.0148 0 Td
(recently)Tj
3.4129 0 Td
(visited)Tj
2.8176 0 Td
(states\).)Tj
2.9594 0 Td
(Then,)Tj
2.5568 0 Td
(rather)Tj
2.6475 0 Td
(than)Tj
2.0296 0 Td
(just)Tj
1.6611 0 Td
(updating)Tj
3.8098 0 Td
(the)Tj
1.4683 0 Td
(state)Tj
2.0352 0 Td
(immediately)Tj
5.1987 0 Td
(preced-)Tj
-34.0098 -1.3039 Td
(ing)Tj
1.5024 0 Td
(the)Tj
1.4683 0 Td
(prediction)Tj
4.371 0 Td
(error,)Tj
2.4888 0 Td
(we)Tj
1.3266 0 Td
(update)Tj
2.948 0 Td
(all)Tj
1.1509 0 Td
(the)Tj
1.4683 0 Td
(states)Tj
2.3924 0 Td
(leading)Tj
3.1351 0 Td
(up)Tj
1.2643 0 Td
(to)Tj
1.0261 0 Td
(that)Tj
1.7802 0 Td
(prediction)Tj
4.3709 0 Td
(error,)Tj
2.4888 0 Td
(weighted)Tj
-33.1821 -1.2983 Td
(by)Tj
1.1792 0 Td
(the)Tj
1.4684 0 Td
(decaying)Tj
3.7643 0 Td
(eligibility)Tj
3.9345 0 Td
(trace.)Tj
2.3924 0 Td
(Thus)Tj
2.2507 0 Td
(a)Tj
0.652 0 Td
(model)Tj
2.7439 0 Td
(can)Tj
1.6271 0 Td
(use)Tj
1.5307 0 Td
(essentially)Tj
4.2859 0 Td
(the)Tj
1.4683 0 Td
(same)Tj
2.2563 0 Td
(associative)Tj
4.4504 0 Td
(RL)Tj
-34.0041 -1.2982 Td
(framework,)Tj
4.8416 0 Td
(but)Tj
1.5533 0 Td
(with)Tj
1.9956 0 Td
(the)Tj
1.4683 0 Td
(benefit)Tj
2.9764 0 Td
(of)Tj
1.0148 0 Td
(eligibility)Tj
3.9288 0 Td
(traces)Tj
2.5342 0 Td
(the)Tj
1.4683 0 Td
(model)Tj
2.7439 0 Td
(can)Tj
1.6271 0 Td
(learn)Tj
2.2449 0 Td
(a)Tj
0.652 0 Td
(value)Tj
2.3074 0 Td
(for)Tj
1.3833 0 Td
(the)Tj
1.4683 0 Td
(states)Tj
-34.2082 -1.304 Td
(leading)Tj
3.1294 0 Td
(up)Tj
1.2643 0 Td
(to)Tj
1.0261 0 Td
(the)Tj
1.4684 0 Td
(reward,)Tj
3.2598 0 Td
(rather)Tj
2.6475 0 Td
(than)Tj
2.0296 0 Td
(just)Tj
1.6611 0 Td
(the)Tj
1.4683 0 Td
(state)Tj
2.0353 0 Td
(with)Tj
2.0013 0 Td
(immediate)Tj
4.5013 0 Td
(reward.)Tj
3.2541 0 Td
(Returning)Tj
4.2916 0 Td
(to)Tj
1.0262 0 Td
(the)Tj
-35.0643 -1.2982 Td
(above)Tj
2.5455 0 Td
(example,)Tj
3.7757 0 Td
(when)Tj
2.4038 0 Td
(the)Tj
1.4683 0 Td
(agent)Tj
2.3868 0 Td
(receives)Tj
3.3732 0 Td
(a)Tj
0.652 0 Td
(large)Tj
2.1599 0 Td
(positive)Tj
3.3166 0 Td
(reward)Tj
3.033 0 Td
(for)Tj
1.3833 0 Td
(the)Tj
1.4682 0 Td
(later)Tj
1.9956 0 Td
(action,)Tj
2.9254 0 Td
(the)Tj
1.4683 0 Td
(eligibil-)Tj
-34.3556 -1.2983 Td
(ity)Tj
1.2416 0 Td
(trace)Tj
2.1656 0 Td
(for)Tj
/F10 1 Tf
1.389 0 Td
[()]TJ
/F5 1 Tf
0.8731 0 Td
(will)Tj
1.6611 0 Td
(still)Tj
1.6384 0 Td
(be)Tj
1.1508 0 Td
(elevated,)Tj
3.6794 0 Td
(and)Tj
1.7291 0 Td
(some)Tj
2.3301 0 Td
(of)Tj
1.0148 0 Td
(that)Tj
1.7801 0 Td
(reward)Tj
3.0331 0 Td
(information)Tj
5.0738 0 Td
(will)Tj
1.6668 0 Td
(be)Tj
1.1509 0 Td
(associated)Tj
-31.5777 -1.3039 Td
(with)Tj
/F10 1 Tf
1.9956 0 Td
[()]TJ
/F5 1 Tf
(.)Tj
1.0998 0 Td
([)Tj
0.83 0.64 0.02 0 k
(38)Tj
0 g
(])Tj
1.8482 0 Td
(and)Tj
1.7348 0 Td
([)Tj
0.83 0.64 0.02 0 k
(39)Tj
0 g
(])Tj
1.8482 0 Td
(are)Tj
1.4513 0 Td
(examples)Tj
3.9061 0 Td
(of)Tj
1.0148 0 Td
(this)Tj
1.6781 0 Td
(approach,)Tj
4.1783 0 Td
(combining)Tj
4.5694 0 Td
(associative)Tj
4.4502 0 Td
(RL)Tj
1.3833 0 Td
(with)Tj
1.9956 0 Td
(eligibility)Tj
-33.1537 -1.2983 Td
(traces.)Tj
1.1962 -1.2982 Td
(However,)Tj
4.0819 0 Td
(there)Tj
2.262 0 Td
(are)Tj
1.4513 0 Td
(important)Tj
4.2803 0 Td
(limitations)Tj
4.5354 0 Td
(to)Tj
1.0205 0 Td
(such)Tj
2.0636 0 Td
(an)Tj
1.2019 0 Td
(approach.)Tj
4.1782 0 Td
(The)Tj
1.7801 0 Td
(main)Tj
2.2904 0 Td
(one)Tj
1.7007 0 Td
(is)Tj
0.8448 0 Td
(that)Tj
1.7801 0 Td
(at)Tj
-34.6674 -1.3039 Td
(some)Tj
2.3244 0 Td
(point)Tj
2.3471 0 Td
(the)Tj
1.4626 0 Td
(eligibility)Tj
3.9232 0 Td
(trace)Tj
2.16 0 Td
(will)Tj
1.6554 0 Td
(have)Tj
2.0522 0 Td
(decayed)Tj
3.4243 0 Td
(to)Tj
1.0204 0 Td
(a)Tj
0.6463 0 Td
(point)Tj
2.3471 0 Td
(where)Tj
2.6419 0 Td
(the)Tj
1.4626 0 Td
(model)Tj
2.7382 0 Td
(cannot)Tj
2.9707 0 Td
(distinguish)Tj
-33.1764 -1.2983 Td
(it)Tj
0.7824 0 Td
(from)Tj
2.2053 0 Td
(zero,)Tj
2.16 0 Td
(which)Tj
2.6532 0 Td
(will)Tj
1.6611 0 Td
(mark)Tj
2.3414 0 Td
(the)Tj
1.474 0 Td
(limit)Tj
2.1146 0 Td
(of)Tj
1.0148 0 Td
(how)Tj
1.9389 0 Td
(far)Tj
1.3153 0 Td
(away)Tj
2.228 0 Td
(from)Tj
2.2053 0 Td
(the)Tj
1.4684 0 Td
(goal)Tj
1.8707 0 Td
(the)Tj
1.4684 0 Td
(agent)Tj
2.3924 0 Td
(can)Tj
1.6271 0 Td
(make)Tj
2.3924 0 Td
(effec-)Tj
-35.3137 -1.2983 Td
(tive)Tj
1.6611 0 Td
(decisions.)Tj
4.1329 0 Td
(Note)Tj
2.1827 0 Td
(that)Tj
1.7801 0 Td
(in)Tj
1.0375 0 Td
(purely)Tj
2.7609 0 Td
(computational)Tj
6.0491 0 Td
(systems)Tj
3.2995 0 Td
(\(with)Tj
2.3357 0 Td
(perfect)Tj
2.965 0 Td
(precision)Tj
3.9061 0 Td
(in)Tj
1.0374 0 Td
(the)Tj
1.4684 0 Td
(repre-)Tj
-34.6164 -1.2982 Td
(sented)Tj
2.8006 0 Td
(values\))Tj
3.0104 0 Td
(there)Tj
2.262 0 Td
(is)Tj
0.8504 0 Td
(no)Tj
1.2756 0 Td
(such)Tj
2.0579 0 Td
(limit,)Tj
2.3414 0 Td
(since)Tj
2.2451 0 Td
(the)Tj
1.4683 0 Td
(eligibility)Tj
3.9288 0 Td
(trace)Tj
2.1713 0 Td
(can)Tj
1.6271 0 Td
(be)Tj
1.1508 0 Td
(tracked)Tj
3.1974 0 Td
(indefinitely.)Tj
-30.3871 -1.304 Td
(However,)Tj
4.0762 0 Td
(in)Tj
1.0375 0 Td
(a)Tj
0.6519 0 Td
(realistic,)Tj
3.5377 0 Td
(noisy,)Tj
2.5851 0 Td
(imprecise)Tj
4.0932 0 Td
(neural)Tj
2.7723 0 Td
(model)Tj
2.7439 0 Td
(there)Tj
2.2621 0 Td
(will)Tj
1.6667 0 Td
(be)Tj
1.1452 0 Td
(a)Tj
0.6575 0 Td
(fixed)Tj
2.1884 0 Td
(limit)Tj
2.1203 0 Td
(to)Tj
1.0261 0 Td
(the)Tj
1.4684 0 Td
(effective)Tj
-34.0325 -1.2982 Td
(range)Tj
2.4548 0 Td
(of)Tj
1.0148 0 Td
(any)Tj
1.6497 0 Td
(eligibility)Tj
3.9288 0 Td
(trace.)Tj
2.3925 0 Td
(Another)Tj
3.5773 0 Td
(limitation)Tj
4.1669 0 Td
(is)Tj
0.8447 0 Td
(that)Tj
1.7745 0 Td
(eligibility)Tj
3.9231 0 Td
(traces)Tj
2.5284 0 Td
(perform)Tj
3.5149 0 Td
(indiscriminate)Tj
-31.7704 -1.2983 Td
(credit)Tj
2.5285 0 Td
(assignment,)Tj
4.9719 0 Td
(meaning)Tj
3.7304 0 Td
(that)Tj
1.7745 0 Td
(they)Tj
1.9275 0 Td
(will)Tj
1.6611 0 Td
(associate)Tj
3.7304 0 Td
(a)Tj
0.652 0 Td
(reward)Tj
3.033 0 Td
(with)Tj
2.0013 0 Td
(any)Tj
1.6553 0 Td
(temporally)Tj
4.5354 0 Td
(preceding)Tj
-32.2013 -1.3039 Td
(actions.)Tj
3.2825 0 Td
(This)Tj
1.9899 0 Td
(is)Tj
0.8447 0 Td
(generally)Tj
3.8325 0 Td
(not)Tj
1.576 0 Td
(a)Tj
0.652 0 Td
(problem)Tj
3.6113 0 Td
(for)Tj
1.389 0 Td
(standard)Tj
3.7247 0 Td
(RL,)Tj
1.6044 0 Td
(but)Tj
1.5533 0 Td
(if)Tj
0.771 0 Td
(we)Tj
1.3323 0 Td
(introduce)Tj
4.1101 0 Td
(structure)Tj
3.8268 0 Td
(into)Tj
1.8368 0 Td
(the)Tj
-35.9373 -1.2983 Td
(action)Tj
2.6986 0 Td
(space)Tj
2.3754 0 Td
(\(e.g.,)Tj
2.126 0 Td
(through)Tj
3.4469 0 Td
(the)Tj
1.4683 0 Td
(addition)Tj
3.6 0 Td
(of)Tj
1.0148 0 Td
(hierarchical)Tj
4.9379 0 Td
(actions\),)Tj
3.6227 0 Td
(wherein)Tj
3.4638 0 Td
(temporally)Tj
4.541 0 Td
(adjacent)Tj
ET
Q
q
1 j
1 J
0 w
576 737.1 m
36 737.1 l
36 737.6 l
576 737.6 l
f*
36 741.2598 107.1496 23.6976 re
W* n
q
107.0929 0 0 23.6409 36 741.3165 cm
q
/I0 Do
Q
Q
Q
q
0 0 612 792 re
W* n
1 j
1 J
0 w
7.9999 0 0 7.9999 498.1606 745.7952 cm
BT
/F0 1 Tf
1 TL
-0.005 Tc
0 0 Td
(A)Tj
0.8787 0 Td
(neural)Tj
2.941 0 Td
(model)Tj
2.8984 0 Td
(of)Tj
1.0418 0 Td
(HRL)Tj
ET
Q
q
1 j
1 J
0 w
36 48.0002 m
576 48.0002 l
576 47.5002 l
36 47.5002 l
f*
0.83 0.64 0.02 0 k
81.5811 34.9228 m
237.8835 34.9228 l
h
f*
0 g
7.9999 0 0 7.9999 36 36 cm
BT
/F0 1 Tf
1 TL
-0.005 Tc
0 0 Td
(PLOS)Tj
2.8559 0 Td
(ONE)Tj
2.3669 0 Td
(|)Tj
0.83 0.64 0.02 0 k
0.4748 0 Td
(https://doi.or)Tj
5.4142 0 Td
(g/10.137)Tj
3.8198 0 Td
(1/journal.po)Tj
5.1236 0 Td
(ne.01802)Tj
4.089 0 Td
(34)Tj
0 g
2.0906 0 Td
(July)Tj
1.9701 0 Td
(6,)Tj
1.0417 0 Td
(2017)Tj
35.894 0 Td
(8)Tj
0.7654 0 Td
(/)Tj
ET
endstream
endobj
157 0 obj
<>stream
endstream
endobj
158 0 obj
<>stream
endstream
endobj
159 0 obj
<>stream
endstream
endobj
160 0 obj
<>stream
endstream
endobj
161 0 obj
<>stream
endstream
endobj
162 0 obj
<>stream
endstream
endobj
163 0 obj
<>stream
endstream
endobj
164 0 obj
<>stream
endstream
endobj
165 0 obj
<>stream
endstream
endobj
166 0 obj
<>stream
BT
66.402 0 Td
(39)Tj
ET
Q
endstream
endobj
167 0 obj
<>/XObject<>>>/CropBox[0 0 612 792]/MediaBox[0 0 612 792]/Parent 10 0 R/Annots 168 0 R/Contents 169 0 R/TrimBox[0 0 612 792]>>
endobj
168 0 obj
[170 0 R 171 0 R 172 0 R 173 0 R 174 0 R 175 0 R 176 0 R 177 0 R 178 0 R 179 0 R 180 0 R 181 0 R 182 0 R 183 0 R 184 0 R]
endobj
170 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref012)>>
endobj
171 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref012)>>
endobj
172 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref012)>>
endobj
173 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref025)>>
endobj
174 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref040)>>
endobj
175 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref044)>>
endobj
176 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref040)>>
endobj
177 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref040)>>
endobj
178 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref043)>>
endobj
179 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref018)>>
endobj
180 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref010)>>
endobj
181 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.e003)>>
endobj
182 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.ref018)>>
endobj
183 0 obj
<>/Border[0 0 0]/Dest(Rpone.0180234.g001)>>
endobj
184 0 obj
<>/Border[0 0 0]/A 185 0 R>>
endobj
185 0 obj
<>
endobj
169 0 obj
[186 0 R 187 0 R 188 0 R 189 0 R 190 0 R 191 0 R 192 0 R 193 0 R 194 0 R 195 0 R 196 0 R 197 0 R]
endobj
186 0 obj
<>stream
q
0.83 0.64 0.02 0 k
266.2866 667.3323 m
275.811 667.3323 l
h
f*
504.1701 472.3087 m
513.6945 472.3087 l
h
f*
502.0157 446.3433 m
511.5402 446.3433 l
h
f*
261.7512 374.6268 m
271.2756 374.6268 l
h
f*
275.6976 374.6268 m
285.222 374.6268 l
h
f*
290.3811 374.6268 m
299.9055 374.6268 l
h
f*
266.1732 335.622 m
275.6976 335.622 l
h
f*
396.1134 309.6 m
405.6378 309.6 l
h
f*
481.1528 296.6173 m
490.6772 296.6173 l
h
f*
226.4882 257.6126 m
236.0126 257.6126 l
h
f*
463.4646 257.6126 m
472.989 257.6126 l
h
f*
200.0126 192.6425 m
224.5039 192.6425 l
h
f*
299.1118 179.6031 m
308.6362 179.6031 l
h
f*
471.6283 94.9039 m
491.1874 94.9039 l
h
f*
0 g
1 j
1 J
0 w
10 0 0 10 200.0125 707.4141 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(actions)Tj
3.0557 0 Td
(may)Tj
1.9219 0 Td
(need)Tj
2.1373 0 Td
(to)Tj
1.0204 0 Td
(be)Tj
1.1452 0 Td
(treated)Tj
2.9934 0 Td
(qualitatively)Tj
5.074 0 Td
(differently,)Tj
4.5807 0 Td
(then)Tj
2.0126 0 Td
(eligibility)Tj
3.9231 0 Td
(traces)Tj
2.5285 0 Td
(do)Tj
1.2529 0 Td
(not)Tj
1.5647 0 Td
(lend)Tj
1.9616 0 Td
(them-)Tj
-35.172 -1.3039 Td
(selves)Tj
2.4888 0 Td
(to)Tj
1.0261 0 Td
(that)Tj
1.7801 0 Td
(type)Tj
1.9106 0 Td
(of)Tj
1.0148 0 Td
(processing.)Tj
-7.0242 -1.2982 Td
(One)Tj
1.9388 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4741 0 Td
(most)Tj
2.2053 0 Td
(advanced)Tj
3.9911 0 Td
(neural)Tj
2.7723 0 Td
(reinforcement)Tj
5.9357 0 Td
(learning)Tj
3.5206 0 Td
(models)Tj
3.1068 0 Td
(is)Tj
0.8504 0 Td
(the)Tj
1.4683 0 Td
(work)Tj
2.2791 0 Td
(described)Tj
4.0535 0 Td
(by)Tj
-35.807 -1.2983 Td
(Potjans)Tj
3.1804 0 Td
(et)Tj
0.9468 0 Td
(al.)Tj
1.1225 0 Td
(in)Tj
1.0375 0 Td
([)Tj
0.83 0.64 0.02 0 k
(12)Tj
0 g
(].)Tj
2.0749 0 Td
(Their)Tj
2.4208 0 Td
(model)Tj
2.7439 0 Td
(also)Tj
1.7688 0 Td
(makes)Tj
2.7553 0 Td
(use)Tj
1.5307 0 Td
(of)Tj
1.0148 0 Td
(eligibility)Tj
3.9287 0 Td
(traces,)Tj
2.761 0 Td
(but)Tj
1.5534 0 Td
(not)Tj
1.5703 0 Td
(in)Tj
1.0375 0 Td
(the)Tj
1.4683 0 Td
(same)Tj
2.2564 0 Td
(way)Tj
-35.172 -1.2983 Td
(as)Tj
1.0147 0 Td
(above.)Tj
2.7723 0 Td
(Rather)Tj
2.897 0 Td
(than)Tj
2.0239 0 Td
(using)Tj
2.3925 0 Td
(eligibility)Tj
3.9287 0 Td
(traces)Tj
2.5342 0 Td
(to)Tj
1.0205 0 Td
(replace)Tj
3.0614 0 Td
(the)Tj
1.4683 0 Td
(TD)Tj
1.5704 0 Td
(error)Tj
2.262 0 Td
(calculation,)Tj
4.8019 0 Td
(this)Tj
1.6724 0 Td
(model)Tj
-33.4202 -1.3039 Td
(uses)Tj
1.8935 0 Td
(eligibility)Tj
3.9288 0 Td
(traces)Tj
2.5285 0 Td
(to)Tj
1.0261 0 Td
(compute)Tj
3.736 0 Td
(the)Tj
1.474 0 Td
(TD)Tj
1.5648 0 Td
(error.)Tj
2.4888 0 Td
(Two)Tj
2.0239 0 Td
(eligibility)Tj
3.9288 0 Td
(traces)Tj
2.5341 0 Td
(with)Tj
1.9956 0 Td
(different)Tj
3.6567 0 Td
(time)Tj
2.0296 0 Td
(con-)Tj
-34.8092 -1.2982 Td
(stants)Tj
2.5171 0 Td
(are)Tj
1.4513 0 Td
(used)Tj
2.058 0 Td
(to)Tj
1.0261 0 Td
(compute)Tj
3.736 0 Td
(the)Tj
1.4684 0 Td
(change)Tj
3.0387 0 Td
(in)Tj
1.0375 0 Td
(state)Tj
2.0352 0 Td
(value,)Tj
2.5285 0 Td
(which)Tj
2.6476 0 Td
(when)Tj
2.4037 0 Td
(combined)Tj
4.2463 0 Td
(with)Tj
1.9956 0 Td
(reward)Tj
3.033 0 Td
(is)Tj
0.8448 0 Td
(suf-)Tj
-36.0678 -1.2983 Td
(ficient)Tj
2.7325 0 Td
(to)Tj
1.0262 0 Td
(compute)Tj
3.736 0 Td
(a)Tj
0.6576 0 Td
(prediction)Tj
4.371 0 Td
(error.)Tj
2.4888 0 Td
(However,)Tj
4.0762 0 Td
(this)Tj
1.6725 0 Td
(still)Tj
1.644 0 Td
(imposes)Tj
3.4753 0 Td
(a)Tj
0.652 0 Td
(fixed)Tj
2.194 0 Td
(time)Tj
2.0239 0 Td
(period)Tj
2.8346 0 Td
(during)Tj
-33.5846 -1.3039 Td
(which)Tj
2.6475 0 Td
(the)Tj
1.4627 0 Td
(TD)Tj
1.5647 0 Td
(error)Tj
2.2563 0 Td
(can)Tj
1.6214 0 Td
(be)Tj
1.1509 0 Td
(computed.)Tj
4.4844 0 Td
(If)Tj
0.8447 0 Td
(the)Tj
1.4627 0 Td
(TD)Tj
1.5647 0 Td
(update)Tj
2.9423 0 Td
(does)Tj
2.0353 0 Td
(not)Tj
1.5704 0 Td
(occur)Tj
2.4718 0 Td
(within)Tj
2.8063 0 Td
(the)Tj
1.4683 0 Td
(time)Tj
2.0239 0 Td
(window)Tj
-34.3783 -1.2983 Td
(dictated)Tj
3.4185 0 Td
(by)Tj
1.1849 0 Td
(the)Tj
1.4683 0 Td
(decay)Tj
2.4832 0 Td
(rate)Tj
1.7461 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(slow)Tj
2.0183 0 Td
(trace,)Tj
2.3981 0 Td
(then)Tj
2.0182 0 Td
(it)Tj
0.7824 0 Td
(will)Tj
1.6667 0 Td
(not)Tj
1.5704 0 Td
(be)Tj
1.1509 0 Td
(possible)Tj
3.4129 0 Td
(to)Tj
1.0261 0 Td
(compute)Tj
3.7361 0 Td
(a)Tj
0.6519 0 Td
(meaning-)Tj
-33.2161 -1.2983 Td
(ful)Tj
1.2812 0 Td
(TD)Tj
1.5647 0 Td
(error.)Tj
-1.6497 -1.3039 Td
(A)Tj
0.907 0 Td
(fixed)Tj
2.1884 0 Td
(window)Tj
3.4412 0 Td
(is)Tj
0.8504 0 Td
(feasible)Tj
3.1691 0 Td
(in)Tj
1.0318 0 Td
(an)Tj
1.2076 0 Td
(MDP)Tj
2.4037 0 Td
(framework)Tj
4.6148 0 Td
(because)Tj
3.3222 0 Td
(rewards)Tj
3.3902 0 Td
(and)Tj
1.7348 0 Td
(state)Tj
2.0353 0 Td
(transitions)Tj
4.4787 0 Td
(all)Tj
-35.9714 -1.2982 Td
(occur)Tj
2.4717 0 Td
(on)Tj
1.2756 0 Td
(a)Tj
0.652 0 Td
(fixed)Tj
2.194 0 Td
(schedule,)Tj
3.9061 0 Td
(which)Tj
2.6476 0 Td
(we)Tj
1.3322 0 Td
(can)Tj
1.6214 0 Td
(assume)Tj
3.1465 0 Td
(falls)Tj
1.7971 0 Td
(within)Tj
2.812 0 Td
(that)Tj
1.7801 0 Td
(window.)Tj
3.6624 0 Td
(But)Tj
1.627 0 Td
(in)Tj
1.0375 0 Td
(an)Tj
1.2019 0 Td
(SMDP)Tj
-33.1651 -1.2983 Td
(environment,)Tj
5.6579 0 Td
(i.e.,)Tj
1.5817 0 Td
(one)Tj
1.7008 0 Td
(where)Tj
2.6532 0 Td
(actions)Tj
3.0557 0 Td
(do)Tj
1.2529 0 Td
(not)Tj
1.5761 0 Td
(terminate)Tj
4.1045 0 Td
(on)Tj
1.2756 0 Td
(a)Tj
0.652 0 Td
(fixed)Tj
2.194 0 Td
(schedule,)Tj
3.9061 0 Td
(the)Tj
1.4683 0 Td
(learning)Tj
3.5263 0 Td
(update)Tj
-34.6051 -1.3039 Td
(may)Tj
1.9275 0 Td
(need)Tj
2.143 0 Td
(to)Tj
1.0261 0 Td
(be)Tj
1.1509 0 Td
(performed)Tj
4.4673 0 Td
(after)Tj
2.0353 0 Td
(100ms)Tj
2.829 0 Td
(or)Tj
1.0998 0 Td
(after)Tj
2.0296 0 Td
(10s;)Tj
1.7631 0 Td
(the)Tj
1.4684 0 Td
(system)Tj
2.9366 0 Td
(does)Tj
2.041 0 Td
(not)Tj
1.5704 0 Td
(know)Tj
2.4604 0 Td
(the)Tj
1.4684 0 Td
(delay)Tj
2.3073 0 Td
(ahead)Tj
-34.7241 -1.2983 Td
(of)Tj
1.0147 0 Td
(time,)Tj
2.2507 0 Td
(so)Tj
1.0885 0 Td
(it)Tj
0.7881 0 Td
(cannot)Tj
2.9763 0 Td
(be)Tj
1.1509 0 Td
(hard)Tj
2.0806 0 Td
(coded)Tj
2.6305 0 Td
(into)Tj
1.8425 0 Td
(the)Tj
1.4684 0 Td
(eligibility)Tj
3.9288 0 Td
(traces.)Tj
2.7609 0 Td
(This)Tj
1.9842 0 Td
(is)Tj
0.8504 0 Td
(an)Tj
1.2019 0 Td
(even)Tj
2.0693 0 Td
(greater)Tj
3.0047 0 Td
(problem)Tj
3.6113 0 Td
(in)Tj
-36.7027 -1.2982 Td
(the)Tj
1.4683 0 Td
(case)Tj
1.8595 0 Td
(of)Tj
1.0148 0 Td
(hierarchical)Tj
4.9379 0 Td
(RL,)Tj
1.6044 0 Td
(as)Tj
1.0148 0 Td
(the)Tj
1.474 0 Td
(state)Tj
2.0353 0 Td
(may)Tj
1.9275 0 Td
(be)Tj
1.1509 0 Td
(changing)Tj
3.9004 0 Td
(during)Tj
2.9254 0 Td
(the)Tj
1.4683 0 Td
(delay)Tj
2.3074 0 Td
(period;)Tj
3.0614 0 Td
(in)Tj
1.0318 0 Td
(that)Tj
1.7801 0 Td
(case)Tj
-34.9622 -1.2983 Td
(the)Tj
1.4683 0 Td
(value)Tj
2.3074 0 Td
(trace)Tj
2.1713 0 Td
(from)Tj
2.2053 0 Td
(the)Tj
1.4684 0 Td
(beginning)Tj
4.2633 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(delay)Tj
2.3074 0 Td
(period)Tj
2.8289 0 Td
(will)Tj
1.6668 0 Td
(have)Tj
2.0579 0 Td
(long)Tj
1.9899 0 Td
(since)Tj
2.2394 0 Td
(been)Tj
2.126 0 Td
(replaced)Tj
3.5886 0 Td
(by)Tj
-35.172 -1.3039 Td
(intermediate)Tj
5.3234 0 Td
(values)Tj
2.6645 0 Td
(by)Tj
1.1849 0 Td
(the)Tj
1.4683 0 Td
(end)Tj
1.7178 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4684 0 Td
(delay)Tj
2.3073 0 Td
(period.)Tj
3.0614 0 Td
(Thus)Tj
2.2451 0 Td
(while)Tj
2.3697 0 Td
(the)Tj
1.4684 0 Td
(model)Tj
2.7439 0 Td
(in)Tj
1.0375 0 Td
([)Tj
0.83 0.64 0.02 0 k
(12)Tj
0 g
(])Tj
1.8538 0 Td
(is)Tj
0.8447 0 Td
(a)Tj
0.652 0 Td
(solution)Tj
-33.4259 -1.2983 Td
(to)Tj
1.0261 0 Td
(the)Tj
1.4683 0 Td
(basic)Tj
2.211 0 Td
(TD)Tj
1.5704 0 Td
(RL)Tj
1.3776 0 Td
(problem,)Tj
3.8381 0 Td
(we)Tj
1.3266 0 Td
(will)Tj
1.6611 0 Td
(not)Tj
1.5761 0 Td
(be)Tj
1.1508 0 Td
(able)Tj
1.8312 0 Td
(to)Tj
1.0261 0 Td
(use)Tj
1.5307 0 Td
(this)Tj
1.6725 0 Td
(method)Tj
3.3278 0 Td
(to)Tj
1.0205 0 Td
(implement)Tj
4.5864 0 Td
(an)Tj
1.2019 0 Td
(SMDP)Tj
2.8857 0 Td
(RL)Tj
-36.2889 -1.2983 Td
(algorithm)Tj
4.1555 0 Td
(\(such)Tj
2.3981 0 Td
(as)Tj
1.0148 0 Td
(HRL\).)Tj
2.7156 0 Td
(We)Tj
1.61 0 Td
(compare)Tj
3.7134 0 Td
(the)Tj
1.4683 0 Td
(performance)Tj
5.3518 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4684 0 Td
(model)Tj
2.7439 0 Td
(from)Tj
2.2053 0 Td
([)Tj
0.83 0.64 0.02 0 k
(12)Tj
0 g
(])Tj
1.8539 0 Td
(to)Tj
1.0204 0 Td
(the)Tj
1.474 0 Td
(NHRL)Tj
-34.2082 -1.3039 Td
(model)Tj
2.7439 0 Td
(in)Tj
1.0318 0 Td
(Section)Tj
3.1691 0 Td
(5.1.)Tj
ET
Q
q
1 j
1 J
0 w
11.9999 0 0 11.9999 200.0125 405.6944 cm
BT
/F0 1 Tf
1 TL
-0.0033 Tc
0 0 Td
(3.2)Tj
1.5921 0 Td
(Hierarchical)Tj
5.4284 0 Td
(reinforcement)Tj
6.2646 0 Td
(learning)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 200.0125 388.6866 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(In)Tj
1.1055 0 Td
(recent)Tj
2.7099 0 Td
(years)Tj
2.2677 0 Td
(there)Tj
2.262 0 Td
(has)Tj
1.542 0 Td
(been)Tj
2.1203 0 Td
(significant)Tj
4.3937 0 Td
(interest)Tj
3.2088 0 Td
(in)Tj
1.0375 0 Td
(neural)Tj
2.7722 0 Td
(correlates)Tj
4.0762 0 Td
(of)Tj
1.0148 0 Td
(hierarchical)Tj
4.938 0 Td
(reinforce-)Tj
-33.4486 -1.2982 Td
(ment)Tj
2.3073 0 Td
(learning)Tj
3.5263 0 Td
([)Tj
0.83 0.64 0.02 0 k
(25)Tj
0 g
(,)Tj
0.83 0.64 0.02 0 k
1.7348 0 Td
(40)Tj
0 g
()Tj
0.83 0.64 0.02 0 k
(44)Tj
0 g
(].)Tj
3.2088 0 Td
(However,)Tj
4.0762 0 Td
(in)Tj
1.0375 0 Td
(contrast)Tj
3.4582 0 Td
(to)Tj
1.0262 0 Td
(standard)Tj
3.7247 0 Td
(RL,)Tj
1.6044 0 Td
(there)Tj
2.262 0 Td
(has)Tj
1.542 0 Td
(been)Tj
2.126 0 Td
(almost)Tj
2.8856 0 Td
(no)Tj
1.2756 0 Td
(pre-)Tj
-35.7956 -1.2983 Td
(vious)Tj
2.3357 0 Td
(work)Tj
2.279 0 Td
(on)Tj
1.2756 0 Td
(recreating)Tj
4.2463 0 Td
(the)Tj
1.4683 0 Td
(computational)Tj
6.0491 0 Td
(theory)Tj
2.8063 0 Td
(of)Tj
1.0148 0 Td
(hierarchical)Tj
4.9322 0 Td
(reinforcement)Tj
5.9358 0 Td
(learning)Tj
3.5262 0 Td
(in)Tj
1.0318 0 Td
(a)Tj
-36.9011 -1.3039 Td
(neural)Tj
2.7722 0 Td
(model.)Tj
-1.576 -1.2983 Td
(The)Tj
1.7858 0 Td
(work)Tj
2.279 0 Td
(of)Tj
1.0148 0 Td
([)Tj
0.83 0.64 0.02 0 k
(40)Tj
0 g
(])Tj
1.8482 0 Td
(develops)Tj
3.685 0 Td
(a)Tj
0.652 0 Td
(proposal)Tj
3.6793 0 Td
(as)Tj
1.0148 0 Td
(to)Tj
1.0262 0 Td
(how)Tj
1.9388 0 Td
(the)Tj
1.4684 0 Td
(actor)Tj
2.2563 0 Td
(critic)Tj
2.2677 0 Td
(architecture)Tj
5.0117 0 Td
(could)Tj
2.4548 0 Td
(be)Tj
1.1508 0 Td
(modi-)Tj
-34.7298 -1.2982 Td
(fied)Tj
1.7234 0 Td
(in)Tj
1.0375 0 Td
(order)Tj
2.4207 0 Td
(to)Tj
1.0262 0 Td
(implement)Tj
4.5807 0 Td
(the)Tj
1.474 0 Td
(options)Tj
3.2259 0 Td
(framework)Tj
4.6147 0 Td
(of)Tj
1.0148 0 Td
(HRL.)Tj
2.3698 0 Td
(The)Tj
1.7858 0 Td
(implementation)Tj
6.6387 0 Td
(itself)Tj
2.1033 0 Td
(is)Tj
0.8447 0 Td
(purely)Tj
-34.8602 -1.3039 Td
(computational,)Tj
6.2701 0 Td
(with)Tj
2.0013 0 Td
(no)Tj
1.2756 0 Td
(neural)Tj
2.7722 0 Td
(components,)Tj
5.3972 0 Td
(but)Tj
1.5533 0 Td
([)Tj
0.83 0.64 0.02 0 k
(40)Tj
0 g
(])Tj
1.8539 0 Td
(includes)Tj
3.5433 0 Td
(a)Tj
0.6519 0 Td
(detailed)Tj
3.3676 0 Td
(discussion)Tj
4.371 0 Td
(of)Tj
1.0148 0 Td
(how)Tj
1.9389 0 Td
(that)Tj
-36.0111 -1.2983 Td
(proposal)Tj
3.6793 0 Td
(could)Tj
2.4548 0 Td
(map)Tj
1.9899 0 Td
(onto)Tj
2.0806 0 Td
(neural)Tj
2.7723 0 Td
(components)Tj
5.1703 0 Td
(in)Tj
1.0375 0 Td
(theory.)Tj
3.0274 0 Td
(The)Tj
1.7858 0 Td
(model)Tj
2.7439 0 Td
(in)Tj
1.0318 0 Td
([)Tj
0.83 0.64 0.02 0 k
(43)Tj
0 g
(])Tj
1.8539 0 Td
(is)Tj
0.8447 0 Td
(similar,)Tj
3.2145 0 Td
(in)Tj
1.0374 0 Td
(that)Tj
-34.7241 -1.2983 Td
(the)Tj
1.4683 0 Td
(implementation)Tj
6.6387 0 Td
(itself)Tj
2.1033 0 Td
(is)Tj
0.8504 0 Td
(non-neural,)Tj
4.9492 0 Td
(but)Tj
1.5534 0 Td
(the)Tj
1.4683 0 Td
(model)Tj
2.744 0 Td
(is)Tj
0.8503 0 Td
(used)Tj
2.058 0 Td
(to)Tj
1.0261 0 Td
(gain)Tj
1.9332 0 Td
(interesting)Tj
4.4958 0 Td
(insights)Tj
3.3165 0 Td
(into)Tj
-35.4555 -1.3039 Td
(neural)Tj
2.7722 0 Td
(data)Tj
1.9106 0 Td
(on)Tj
1.2755 0 Td
(hierarchical)Tj
4.938 0 Td
(processing)Tj
4.4673 0 Td
(in)Tj
1.0318 0 Td
(the)Tj
1.474 0 Td
(brain.)Tj
-16.6732 -1.2983 Td
(In)Tj
1.1111 0 Td
([)Tj
0.83 0.64 0.02 0 k
(18)Tj
0 g
(])Tj
1.8539 0 Td
(the)Tj
1.4683 0 Td
(authors)Tj
3.2485 0 Td
(extend)Tj
2.9027 0 Td
(a)Tj
0.6576 0 Td
(previous)Tj
3.6453 0 Td
(working)Tj
3.5603 0 Td
(memory)Tj
3.617 0 Td
(model)Tj
2.7439 0 Td
([)Tj
0.83 0.64 0.02 0 k
(10)Tj
0 g
(])Tj
1.8539 0 Td
(to)Tj
1.0261 0 Td
(a)Tj
0.652 0 Td
(hierarchical)Tj
4.9379 0 Td
(archi-)Tj
-34.4747 -1.2982 Td
(tecture.)Tj
3.2087 0 Td
(However,)Tj
4.0762 0 Td
(this)Tj
1.6781 0 Td
(model)Tj
2.744 0 Td
(is)Tj
0.8447 0 Td
(not)Tj
1.576 0 Td
(intended)Tj
3.7871 0 Td
(to)Tj
1.0205 0 Td
(be)Tj
1.1508 0 Td
(an)Tj
1.2076 0 Td
(implementation)Tj
6.6387 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(computational)Tj
-30.4155 -1.2983 Td
(theory)Tj
2.8006 0 Td
(of)Tj
1.0148 0 Td
(HRL.)Tj
2.3754 0 Td
(It)Tj
0.856 0 Td
(is)Tj
0.8504 0 Td
(designed)Tj
3.7644 0 Td
(specifically)Tj
4.5694 0 Td
(for)Tj
1.389 0 Td
(tasks)Tj
2.1657 0 Td
(with)Tj
2.0012 0 Td
(hierarchical)Tj
4.9379 0 Td
(spatial)Tj
2.778 0 Td
(structure,)Tj
4.0478 0 Td
(not)Tj
1.5704 0 Td
(hier-)Tj
-35.121 -1.3039 Td
(archical)Tj
3.3448 0 Td
(temporal)Tj
3.8381 0 Td
(structure.)Tj
4.0478 0 Td
(Although)Tj
4.0025 0 Td
(this)Tj
1.6781 0 Td
(is)Tj
0.8448 0 Td
(a)Tj
0.6576 0 Td
(component)Tj
4.8132 0 Td
(of)Tj
1.0148 0 Td
(HRL,)Tj
2.3697 0 Td
(it)Tj
0.7881 0 Td
(does)Tj
2.0352 0 Td
(not)Tj
1.5761 0 Td
(address)Tj
3.2258 0 Td
(the)Tj
-34.2366 -1.2983 Td
(problem)Tj
3.6113 0 Td
(of)Tj
1.0148 0 Td
(temporally)Tj
4.5354 0 Td
(extended)Tj
3.8607 0 Td
(abstract)Tj
3.3449 0 Td
(actions)Tj
3.0614 0 Td
(that)Tj
1.7801 0 Td
(is)Tj
0.8448 0 Td
(at)Tj
0.9524 0 Td
(the)Tj
1.4683 0 Td
(core)Tj
1.9503 0 Td
(of)Tj
1.0147 0 Td
(HRL)Tj
2.143 0 Td
(processing)Tj
4.4674 0 Td
(\(e.g.,)Tj
0.83 0.64 0.02 0 k
-34.0495 -1.2982 Td
(Eq)Tj
1.2925 0 Td
(\(3\))Tj
0 g
(\);)Tj
1.9446 0 Td
(we)Tj
1.3266 0 Td
(can)Tj
1.6271 0 Td
(think)Tj
2.3584 0 Td
(of)Tj
1.0148 0 Td
(this)Tj
1.6724 0 Td
(model)Tj
2.7439 0 Td
(as)Tj
1.0148 0 Td
(performing)Tj
4.8019 0 Td
(associative)Tj
4.8415 0 Td
(HRL.)Tj
2.7666 0 Td
(This)Tj
1.9843 0 Td
(is)Tj
0.8504 0 Td
(not)Tj
1.5703 0 Td
(intended)Tj
3.7871 0 Td
(as)Tj
1.0148 0 Td
(a)Tj
-36.612 -1.3039 Td
(critique)Tj
3.3051 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4684 0 Td
(model)Tj
2.7439 0 Td
(in)Tj
1.0375 0 Td
([)Tj
0.83 0.64 0.02 0 k
(18)Tj
0 g
(],)Tj
2.0749 0 Td
(but)Tj
1.5534 0 Td
(rather)Tj
2.6475 0 Td
(to)Tj
1.0262 0 Td
(show)Tj
2.296 0 Td
(that)Tj
1.7801 0 Td
(the)Tj
1.4684 0 Td
(problem)Tj
3.6113 0 Td
(it)Tj
0.788 0 Td
(addresses)Tj
4.0082 0 Td
(is)Tj
0.8504 0 Td
(not)Tj
1.5704 0 Td
(the)Tj
1.474 0 Td
(same)Tj
-34.7185 -1.2983 Td
(as)Tj
1.0147 0 Td
(the)Tj
1.4684 0 Td
(one)Tj
1.7008 0 Td
(we)Tj
1.3266 0 Td
(focus)Tj
2.3244 0 Td
(on)Tj
1.2755 0 Td
(in)Tj
1.0375 0 Td
(the)Tj
1.4684 0 Td
(work)Tj
2.279 0 Td
(presented)Tj
4.1159 0 Td
(here.)Tj
2.194 0 Td
(However,)Tj
4.0762 0 Td
(for)Tj
1.3833 0 Td
(the)Tj
1.4683 0 Td
(sake)Tj
1.9389 0 Td
(of)Tj
1.0148 0 Td
(comparison)Tj
4.9776 0 Td
(we)Tj
-35.0643 -1.2983 Td
(contrast)Tj
3.4582 0 Td
(their)Tj
2.109 0 Td
(results)Tj
2.8119 0 Td
(with)Tj
1.9956 0 Td
(the)Tj
1.4683 0 Td
(NHRL)Tj
2.8857 0 Td
(model)Tj
2.7439 0 Td
(in)Tj
1.0375 0 Td
(Section)Tj
3.1634 0 Td
(5.3.)Tj
ET
Q
q
1 j
1 J
0 w
11.9999 0 0 11.9999 200.0125 125.9716 cm
BT
/F2 1 Tf
1 TL
-0.0033 Tc
0 0 Td
(4)Tj
0.7653 0 Td
(Model)Tj
3.0992 0 Td
(description)Tj
ET
Q
q
1 j
1 J
0 w
10 0 0 10 200.0125 109.0204 cm
BT
/F5 1 Tf
1 TL
-0.004 Tc
0 0 Td
(We)Tj
1.61 0 Td
(have)Tj
2.058 0 Td
(divided)Tj
3.2144 0 Td
(the)Tj
1.4684 0 Td
(structure)Tj
3.8267 0 Td
(of)Tj
1.0148 0 Td
(the)Tj
1.4683 0 Td
(NHRL)Tj
2.8857 0 Td
(model)Tj
2.7439 0 Td
(into)Tj
1.8369 0 Td
(three)Tj
2.2677 0 Td
(main)Tj
2.2903 0 Td
(components,)Tj
5.3972 0 Td
(which)Tj
2.6475 0 Td
(we)Tj
-34.7298 -1.3039 Td
(term)Tj
2.1316 0 Td
(action)Tj
2.6986 0 Td
(values,)Tj
2.8913 0 Td
(action)Tj
2.6985 0 Td
(selection,)Tj
3.9458 0 Td
(and)Tj
1.7292 0 Td
(error)Tj
2.262 0 Td
(calculation)Tj
4.5808 0 Td
(\(shown)Tj
3.1861 0 Td
(in)Tj
0.83 0.64 0.02 0 k
1.0375 0 Td
(Fig)Tj
1.4796 0 Td
(1)Tj
0 g
(\).)Tj
1.2586 0 Td
(We)Tj
1.6101 0 Td
(begin)Tj
2.4321 0 Td
(by)Tj
1.1792 0 Td
(dis-)Tj
-35.121 -1.2982 Td
(cussing)Tj
3.1747 0 Td
(each)Tj
2.024 0 Td
(of)Tj
1.0148 0 Td
(these)Tj
2.2563 0 Td
(components)Tj
5.176 0 Td
(in)Tj
1.0319 0 Td
(turn,)Tj
2.194 0 Td
(and)Tj
1.7291 0 Td
(show)Tj
2.3017 0 Td
(how)Tj
1.9389 0 Td
(they)Tj
1.9219 0 Td
(implement)Tj
4.5864 0 Td
(their)Tj
2.1089 0 Td
(respective)Tj
ET
Q
q
1 j
1 J
0 w
576 737.1 m
36 737.1 l
36 737.6 l
576 737.6 l
f*
36 741.2598 107.1496 23.6976 re
W* n
q
107.0929 0 0 23.6409 36 741.3165 cm
q
/I0 Do
Q
Q
Q
q
0 0 612 792 re
W* n
1 j
1 J
0 w
7.9999 0 0 7.9999 498.1606 745.7952 cm
BT
/F0 1 Tf
1 TL
-0.005 Tc
0 0 Td
(A)Tj
0.8787 0 Td
(neural)Tj
2.941 0 Td
(model)Tj
2.8984 0 Td
(of)Tj
1.0418 0 Td
(HRL)Tj
ET
Q
q
1 j
1 J
0 w
36 48.0002 m
576 48.0002 l
576 47.5002 l
36 47.5002 l
f*
0.83 0.64 0.02 0 k
81.5811 34.9228 m
237.8835 34.9228 l
h
f*
0 g
7.9999 0 0 7.9999 36 36 cm
BT
/F0 1 Tf
1 TL
-0.005 Tc
0 0 Td
(PLOS)Tj
2.8559 0 Td
(ONE)Tj
2.3669 0 Td
(|)Tj
0.83 0.64 0.02 0 k
0.4748 0 Td
(https://doi.or)Tj
5.4142 0 Td
(g/10.137)Tj
3.8198 0 Td
(1/journal.po)Tj
5.1236 0 Td
(ne.01802)Tj
4.089 0 Td
(34)Tj
0 g
2.0906 0 Td
(July)Tj
1.9701 0 Td
(6,)Tj
1.0417 0 Td
(2017)Tj
35.894 0 Td
(9)Tj
0.7654 0 Td
(/)Tj
ET
endstream
endobj
187 0 obj
<>stream
endstream
endobj
188 0 obj
<>stream
endstream
endobj
189 0 obj
<>stream
endstream
endobj
190 0 obj
<>stream
endstream
endobj
191 0 obj
<>stream
endstream
endobj
192 0 obj
<>stream
endstream
endobj
193 0 obj
<>stream
endstream
endobj
194 0 obj
<>stream
endstream
endobj
195 0 obj
<>stream
endstream
endobj
196 0 obj
<>stream
endstream
endobj
197 0 obj
<>stream
BT
66.402 0 Td
(39)Tj
ET
Q
endstream
endobj
198 0 obj
<>/XObject<>>>/CropBox[0 0 612 792]/MediaBox[0 0 612 792]/Parent 10 0 R/Annots 201 0 R/Contents 202 0 R/TrimBox[0 0 612 792]>>
endobj
200 0 obj
<>stream
x\Wk/w%df\C4C&g.&
c
d s-9Yjld[,
V>ttTIէNgjWW9۫MD@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D@D /9sڻwݻnݺaÆ5k֬Xbɒ%-joooKlV@kk:::{zz-[jժumڴi>}9O;w.ۍD@D@D@D@D@D@D Ο?رܹ-rŋ3/nnnnhh'yZZZ̕XuGqr566ssr+/Vgҥ-[uԥK"Q](_.\xꩧnܸq坝H&Lu2D04p{]olLAHpv$|?4r8{,)NBCOc}4s"7T8B*&Uğ Obivڅ! 9g):06L]P{㏓<PD@D@D@D@D@DKCU2~NA2?\[_3s
9ED@D@D@D@D@D@F QO]tEC OYZ ǤQ )J#\-n@ ׯG4ӔɴZ&@W7d%qSSbQ>հT79L :A+"AGJ3O@ jr5K"