Updated generated code.

3 years ago · 93b14bba40
parent 8e132e035a
commit 93b14bba40
2 changed files with 42 additions and 3 deletions
--- a/commenting_code_model/encode_model.py
+++ b/commenting_code_model/encode_model.py
@ -36,9 +36,29 @@ def encode_tree(tree):

 def make_classifier (tree):
  lines = [
-    "char predict(float *r) {",
-    *encode_tree(tree),
-    "}"
+    "#pragma once",
+    "#include <cstdarg>",
+    "namespace PublishingHouse",
+    "{",
+    *map(indent_line, [
+      "namespace RandomForest",
+      "{",
+      *map(indent_line, [
+        "class DecisionTree",
+        "{",
+        "public:",
+        *map(indent_line, [
+          "char* predict(float *r)",
+          "{",
+          *encode_tree(tree),
+          "}",
+        ]),
+        "private:",
+        "};"
+      ]),
+      "}"
+    ]),
+    "}",
  ]

  return('\n'.join(lines))
--- a/commenting_code_model/random_forest_model_altered.py
+++ b/commenting_code_model/random_forest_model_altered.py
@ -9,11 +9,14 @@ from math import sqrt
 import json
 import os.path

+#from birdseye import eye
+
 # Get the directory of the current script to use in importing data
 # and exporting the model.
 basepath = os.path.dirname(os.path.realpath(__file__))

 # Load a CSV file. Definition of the function to read the csv and create dataset here
+#@eye
 def load_csv(filename):
 	dataset = list()
 	with open(filename, 'r') as file:
@ -25,11 +28,13 @@ def load_csv(filename):
 	return dataset
 
 # Convert string column to float - original dataset is in string format
+#@eye
 def str_column_to_float(dataset, column):
 	for row in dataset:
 		row[column] = float(row[column].strip())
 
 # Convert string column to integer / transforms classes 'mine' and 'rock' into 1 and 2
+#@eye
 def str_column_to_int(dataset, column):
 	# extracts values of the classes of the dataset: array of all the mine, rock
 	class_values = [row[column] for row in dataset]
@ -49,6 +54,7 @@ def str_column_to_int(dataset, column):
 	return lookup
 
 # Split a dataset into k folds
+#@eye
 def cross_validation_split(dataset, n_folds):
 	# creates list
 	dataset_split = list()
@ -70,6 +76,7 @@ def cross_validation_split(dataset, n_folds):
 	return dataset_split #return the dataset_split, a list of folds
 
 # Calculate accuracy percentage
+#@eye
 def accuracy_metric(actual, predicted):
 	correct = 0
 	# loops through index list which has length of actual classes
@ -82,6 +89,7 @@ def accuracy_metric(actual, predicted):
 	return correct / float(len(actual)) * 100.0
 
 # Evaluate an algorithm using a cross validation split
+#@eye
 def evaluate_algorithm(dataset, algorithm, n_folds, *args):
 	# split dataset in n folds
 	folds = cross_validation_split(dataset, n_folds)
@ -120,6 +128,7 @@ def evaluate_algorithm(dataset, algorithm, n_folds, *args):

 # Split a dataset based on a feature and a feature value defined in build tree
 # just trying many times, benefitting from speed of computer
+#@eye
 def test_split(index, value, dataset):
 	left, right = list(), list()
 	for row in dataset:
@ -134,6 +143,7 @@ def test_split(index, value, dataset):
 
 # Calculate the Gini index for a split dataset, using left/right og test split as groups
 # cfr calculating wealth distribution: https://en.wikipedia.org/wiki/Gini_coefficient
+#@eye
 def gini_index(groups, classes):
 	# count all samples at split point (the dataset), converts it in a float in order to do divisions
 	n_instances = float(sum([len(group) for group in groups]))
@ -158,6 +168,7 @@ def gini_index(groups, classes):
 	return gini
 
 # Select the best split point for a dataset
+#@eye
 def get_split(dataset, n_features):
 	# takes last element of each row (class) and returns it as a row, as it is a set, it has only 2 values
 	class_values = list(set(row[-1] for row in dataset))
@ -187,6 +198,7 @@ def get_split(dataset, n_features):
 	return {'index':b_index, 'value':b_value, 'groups':b_groups, 'gini':b_score}
 
 # Create a terminal node value = node at end of the tree = end leaf
+#@eye
 def to_terminal(group):
 	# returns list of classes of group
 	outcomes = [row[-1] for row in group]
@ -195,6 +207,7 @@ def to_terminal(group):
 	return max(set(outcomes), key=outcomes.count)

 # Counts the amount of unique values in a 'group' (rows in dataset)
+#@eye
 def count_unique_values (group):
  # Pick classes in the dataset, transform to a set
  # count amount of values
@ -203,6 +216,7 @@ def count_unique_values (group):
 # Create child splits for a node or make terminals/end leafs
 # recursive function, it calls itself
 # node is dictionary returned by get_split (b_index, b_value, b_groups)
+#@eye
 def split(node, max_depth, min_size, n_features, depth):
 	left, right = node['groups']
 	del(node['groups'])
@ -244,6 +258,7 @@ def split(node, max_depth, min_size, n_features, depth):
 	# return no value because functions are working on the same dictionaries
 
 # Build a decision tree
+#@eye
 def build_tree(train, max_depth, min_size, n_features):
 	# root of decision tree is defined by dictionary of index, value, 2 groups (left/right of the split)
 	root = get_split(train, n_features)
@ -255,6 +270,7 @@ def build_tree(train, max_depth, min_size, n_features):
 
 # Make a prediction with a decision tree
 # recursive function as well
+#@eye
 def predict(node, row):
 	# node index = column feature, it looks up value for this feature for this row in dataset
 	# compare feature value of row you're checking with feature value of node
@ -278,6 +294,7 @@ def predict(node, row):
 # Create a random subsample from the dataset with replacement, ratio is called sample_size further on
 # This is called BOOTSTRAPPING: build new datasets from the original data, with the same number of rows
 # with replacement: after selecting the row we put it back into the data, so it can be selected twice or more
+#@eye
 def subsample(dataset, ratio):
 	sample = list()
 	# if it is smaller than 1, not all dataset is taken as sample - he uses the full dataset
@ -288,6 +305,7 @@ def subsample(dataset, ratio):
 	return sample
 
 # Make a prediction with a list of bagged trees
+#@eye
 def bagging_predict(trees, row):
 	# asks the forest to predict class for every row in the test data, this gives list of votes
 	predictions = [predict(tree, row) for tree in trees]
@ -295,6 +313,7 @@ def bagging_predict(trees, row):
 	return max(set(predictions), key=predictions.count)
 
 # Random Forest Algorithm
+#@eye
 def random_forest(train, test, max_depth, min_size, sample_size, n_trees, n_features, model_path=None):
 	trees = list()
 	for _ in range(n_trees):