Commit 5ce3af69 authored by Hans-Peter Deifel's avatar Hans-Peter Deifel 🐢
Browse files

wta: Don't use floats for probability

This switches from floats to a sort of arbitrary precision base10 floating point
implementation where we can easily generate uniform random numbers.

The problem with floats is that they aren't distributed equally across the whole
range and thus a random number in [0, 1) is not uniform.
parent bb0d6c9b
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -289,6 +289,7 @@ executable random-wta
  other-modules:       Types
                     , Generator
                     , Output
                     , Probability
  default-language:    Haskell2010
  default-extensions:  OverloadedStrings
                     , LambdaCase
+3 −4
Original line number Diff line number Diff line
@@ -13,10 +13,11 @@ import Data.Maybe
import           Data.Foldable

import           Types hiding (spec)
import           Probability

data GeneratorConfig m = GeneratorConfig
   { spec :: WTASpec m
   , zeroFreq :: Double
   , zeroFreq :: Probability
   }

type Generator m = ReaderT (GeneratorConfig m) IO
@@ -42,9 +43,7 @@ aritySummand arity = do

decideZero :: Generator m Bool
decideZero = do
  freq <- asks zeroFreq
  randomValue :: Double <- liftIO (randomRIO (0.0, 1.0))
  return $ randomValue < freq
  asks zeroFreq >>= liftIO . decide

-- Generates Nothing, when it decides that a zero value would be in order
genTransition :: Int -> Int -> [State] -> Generator m (Maybe (Transition m))
+4 −3
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@ import System.IO
import           Types
import           Generator
import           Output
import           Probability

data SomeMonoid = forall m. SomeMonoid (MonoidType m)

@@ -29,7 +30,7 @@ data Opts = Opts
  { optMonoid :: SomeMonoid
  , optStates :: Int
  , optSymbols :: SymbolSpec
  , optZeroFrequency :: Double
  , optZeroFrequency :: Probability
  , optRandomState :: Maybe StdGen
  }

@@ -82,10 +83,10 @@ parseOpts =
               "Comma separated list of symbols per arity. E.g. 2,0,1 means two symbols with arity 0, non with arity 1 and one with arity two"
          )
    <*> Options.option
          Options.auto
          (Options.eitherReader readProbability)
          (  Options.long "zero-frequency"
          <> Options.showDefault
          <> Options.value 0.7
          <> Options.value (Probability 7 1)
          <> Options.metavar "FREQ"
          <> Options.help
               "Frequency of edges with zero weight as number between 0 and 1."
+31 −0
Original line number Diff line number Diff line
{-# LANGUAGE GeneralizedNewtypeDeriving #-}

module Probability (Probability(..), readProbability, decide) where

import           System.Random
import           Text.Printf

data Probability = Probability Integer Int

readProbability :: String -> Either String Probability
readProbability input = case input of
  "0" -> Right (Probability 0 0)
  ('0':'.':rest) -> case reads rest of
    [(digits, "")] -> Right (Probability digits (length rest))
    _ -> failure
  "1" -> Right (Probability 1 0)
  ('1':'.':rest)
    | all (=='0') rest -> Right (Probability 1 0)
    | otherwise -> failure
  _ -> failure

  where failure = Left "Could not parse probability"

instance Show Probability where
  show (Probability digits 0) = show digits
  show (Probability digits exp) = "0." ++ printf "%0*d" exp digits

decide :: Probability -> IO Bool
decide (Probability digits exp) = do
  randomNumber <- randomRIO (0, (10^exp)-1)
  return $ randomNumber < digits