python 3.x – Tensorflow model for predicting dice game decisions

For my first ML project, I modeled a dice game called Ten Thousand, or Farkle, depending on who you ask, as an extremely over-designed solution for a computer player. You can find the complete game (with a very good player complete with about 15 lines of logic) here.

As a brief explanation of the game, the 1 and the 5 are still valid, scoring the dice. Other numbers must be involved in 1) three or more of a type, 2) a following or 3) three pairs to score dice. I would like my model to predict which dice should be kept for a given pitch. Until now, it's great to understand that the 1 and 5 are guardians, but I can not improve that with so many antics until now.

I am looking for advice on how to improve the forecast to include dice scoring dice other than 1 and 5. I have tried to increase the proportion of these situations overall of learning, by increasing and decreasing the complexity of the model, both in terms of structure and with various methods of regularization, and even using convolutional layers.

Specifically, is the RMSProp optimizer and sigmoid-cross-entropy loss appropriate here?

Setting up things.

import tensorflow as a tf
import numpy as np
import pandas as pd

from import collections
from itertools, import combinations_with_replacement as combinations
itertools import permutations as perms

import matplotlib.pyplot as plt
from the import layers tensorflow.keras, Model
of tensorflow.data import Dataset


tf.enable_eager_execution ()
tfe = tf.contrib.eager

I get my data just by making them, making sure to do many examples of special logging situations.

def make_some_features (numbers, clip):
features = set ()
combinations = (combo combo combo (numbers, 6))
for i, comb enumerate (combinations):
if i% clip == 0: # Keep a reasonable size
for permanent (combs):
features.add (perm)
return features

# I have browsed these pages and we are expecting a proportion of similar or better examples.
features = make_some_features (list (range (1, 7)), 3)

# Make a few strokes of three pairs.
special_features = set ()
for _ in the range (1000):
half = [np.random.randint(1, 6) for _ in range(3)]
    half + = half
for permanent (half):
special_features.add (perm)

# We can not do as much as with straight lines.
for permanent in the permanent ([1, 2, 3, 4, 5, 6]):
special_features.add (perm)

all_features = [np.array(feature) for feature in special_features]
all_features + = [np.array(feature) for feature in features]
all_labels = [choose_dice(feature) for feature in special_features]
all_labels + = [choose_dice(feature) for feature in features]

I put all this in a database of pandas to facilitate the scrambling and partitioning of training, validation and testing.

def create_dataset (features, labels):
dice = pd.Series (characteristics)
labels = pd.Series (labels)
dataset = pd.DataFrame ({-dice: dice,
& # 39; labels & # 39 ;: labels})
return the dataset


all_dice = create_dataset (all_features, all_labels)
all_dice = all_dice.reindex (np.random.permutation (all_dice.index))

train_dice = all_dice.head (10000)
val_dice = train_dice.tail (5000)
test_dice = all_dice.tail (1936)

I one_hot encode the features and resize the tensor of the label.

def pre_process_features (dice: pd.DataFrame) -> list:
rollers = []
    to roll dice['dice']:
roll = np.array (roll)
roll - = 1
roll = tf.one_hot (roll, depth = 6, axis = -1)
rolls.append (roll)
return rollers


def pre_process_labels (dice: pd.DataFrame) -> list:
labels = [tf.reshape(tf.convert_to_tensor([label]), (6, 1)) for the diced label['labels']]return labels

Model, optimization, loss and gradient functions.

model = tf.keras.Sequential ([
    layers.Dense(6, activation=tf.nn.relu, input_shape=(6, 6),
                 kernel_regularizer=tf.keras.regularizers.l2(regularization_rate)),
    layers.Dense(64, activation=tf.nn.relu,
                 kernel_regularizer=tf.keras.regularizers.l2(regularization_rate)),
    layers.Dense(128, activation=tf.nn.relu,
                 kernel_regularizer=tf.keras.regularizers.l2(regularization_rate)),
    # layers.Dense(256, activation=tf.nn.relu,
    #              kernel_regularizer=tf.keras.regularizers.l2(regularization_rate)),
    layers.Dense(32, activation=tf.nn.relu,
                 kernel_regularizer=tf.keras.regularizers.l2(regularization_rate)),
    layers.Dense(1)])

optimizer = tf.train.RMSPropOptimizer (learning_rate = learning_rate)
global_step = tf.train.get_or_create_global_step ()

def loss (model, features, labels):
logits = model (features)
if logits.shape == (1, 6, 1):
logits = tf.squeeze (logits, [0])
standard_loss = tf.losses.sigmoid_cross_entropy (logits = logits, multi_class_labels = labels)
returns standard_loss


def grad (model, features, labels):
with tf.GradientTape () as a tape:
loss_value = loss (template, features, labels)
return loss_value, tape.gradient (loss_value, model.trainable_variables)

Training loop.

train_loss = []
train_accuracy = []
val_loss = []
val_accuracy = []

val_features, val_labels = iter (val_features), iter (val_labels)
val_feature, val_label = next (val_features), next (val_labels)
for the time in the beach (num_epochs):
epoch_loss_ave = tfe.metrics.Mean ("loss")
epoch_val_loss_average = tfe.metrics.Mean ("loss")
epoch_accuracy = tfe.metrics.Accuracy (& # 39; acc)
epoch_val_accuracy = tfe.metrics.Accuracy (& # 39; acc)

for the entity, label in zip (train_features, train_labels):
feature = tf.convert_to_tensor (feature.numpy (). reshape (1, 6, 6))

loss_value, grads = grad (model, feature, label)
optimizer.apply_gradients (zip (grads, model.variables), global_step)
epoch_loss_ave (loss_value)

guessed_label = decode_label (model (feature))
epoch_accuracy (guessed_label, decode_label (label))

val_loss_value = loss (model, val_feature, val_label)
epoch_val_loss_average (val_loss_value)

val_guess_label = decode_label (model (val_feature))
epoch_val_accuracy (val_guess_label, decode_label (val_label))

train_loss.append (epoch_loss_ave.result ())
train_accuracy.append (epoch_accuracy.result ())

val_loss.append (epoch_val_loss_average.result ())
val_accuracy.append ((epoch_val_accuracy.result ()))

if epoch% 20 == 0:
print (f Epoch {epoch} Loss: {epoch_loss_ave.result ()} Accuracy: {epoch_accuracy.result ()} & # 39;)
print (Validation loss: {epoch_val_loss_average.result ()} Accuracy: {epoch_val_accuracy.result ()} & # 39;)

Tests and few predictions for random game entries.

test_results = []
test_accuracy = tfe.metrics.Accuracy (& # 39; acc)

for the feature, label in zip (test_features, test_labels):

guessed_label = decode_label (model (feature))
test_accuracy (guessed_label, decode_label (label))
print (test precision: {test_accuracy.result ()} & # 39;)

for _ in the range (25):
roll = np.array ([np.random.randint(0, 5) for _ in range(6)])
turn = tf.one_hot (roll, depth = 6, dtype = np.int32)
roll + = 1
answer = Choose_dice (roll)
print (Roll: {roll} & # 39;)
print (Dice should be kept: {answer} & # 39;)
turn = tf.convert_to_tensor (turn.numpy (). reshape ((1, 6, 6)), dtype = tf.float32)
predictions = model.predict (round)
tf.nn.softmax (predictions)
predicted label = []
    for prediction in predictions[0]:
if prediction[0] > 0 .:
predicted label.append (1.)
other:
predicted label.append (0.)
print (Dice should be kept: {preded_label} & # 39;)

The complete code can be found here.