object MonteCarlo
- Source
- MonteCarlo.scala
- Alphabetic
- By Inheritance
- MonteCarlo
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Type Members
- final case class ShouldUpdateState(get: Boolean) extends AnyVal with Product with Serializable
- type Tracker[Obs, A, R, T, M[_]] = MonoidAggregator[SARS[Obs, A, R, M], T, Trajectory[Obs, A, R, M]]
- type Trajectory[Obs, A, R, M[_]] = Iterator[(SARS[Obs, A, R, M], ShouldUpdateState)]
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
- def byPolicy[Obs, A, R, M[_]](basePolicy: Policy[Obs, A, R, Cat, M], targetPolicy: Policy[Obs, A, R, Cat, M]): (State[Obs, A, R, M], A, R) ⇒ Weight
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
- def constant[Obs, A, R, M[_]]: (SARS[Obs, A, R, M]) ⇒ Weight
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
def
processTrajectory[Obs, A, R, G, M[_]](trajectory: Trajectory[Obs, A, R, M], valueFn: ActionValueFn[Obs, A, G], agg: MonoidAggregator[SARS[Obs, A, R, M], G, Option[G]]): ActionValueFn[Obs, A, G]
So if you have G, your return...
So if you have G, your return... okay, this is a version that tracks the weights, but doesn't give you a nice way to push the weights back. What if we make the weight part of G? Try that in the next fn.
This is a full monte carlo trajectory tracker that's able to do off-policy control. The behavior policy does NOT change at all, but that's okay, I guess. We're going to have to solve that now. Presumably if you're updating a value function at any point you could get a new agent.
-
def
processTrajectorySimple[Obs, A, R, G, M[_]](trajectory: Trajectory[Obs, A, R, M], valueFn: ActionValueFn[Obs, A, G], agg: MonoidAggregator[R, G, G]): ActionValueFn[Obs, A, G]
This is a simpler version that doesn't do any weighting.
This is a simpler version that doesn't do any weighting. This should be equivalent to the more difficult one above, with a constant weight of 1 for everything.
- def sarsa[Obs, A, R, M[_], T](moment: Moment[Obs, A, R, M], tracker: Tracker[Obs, A, R, T, M])(implicit arg0: Monad[M]): M[(Moment[Obs, A, R, M], Trajectory[Obs, A, R, M])]
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
- def weighted[Obs, A, R, G, M[_]](agg: MonoidAggregator[R, G, G], fn: (SARS[Obs, A, R, M]) ⇒ Weight): MonoidAggregator[SARS[Obs, A, R, M], (G, Weight), Option[(G, Weight)]]
- object ShouldUpdateState extends Serializable
- object Tracker
edit this text on github
ScalaRL
This is the API documentation for the ScalaRL functional reinforcement learning library.
Further documentation for ScalaRL can be found at the documentation site.
Check out the ScalaRL package list for all the goods.