Our Services

Get 15% Discount on your First Order

DTLearner import pandas as pd import numpy as np class…

January 31, 2024

DTLearner

import pandas as pd

import numpy as np

class DTLearner(object):

def __init__(self, leaf_size = 1, verbose = False):

self.leaf_size = leaf_size

self.verbose = verbose

self.dataframe = None

self.tree = None

def get_dataframe(self):

return(self.dataframe)

def get_tree(self):

return(self.tree)

def addEvidence(self, Xtrain, Ytrain):

“””Accepts inputs (Xtrain) and outputs (Ytrain) and calls the build_tree function on the data, updates the tree attribute”””

dataframe = pd.DataFrame(Xtrain)

dataframe[‘Y’] = Ytrain

self.data = dataframe

self.tree = self.build_tree(dataframe)

self.query_tree = self.tree.copy()

def highest_correlation(self, df):

“””Returns the highest correlated feature by its index value”””

correlations = np.tril(np.array(df.corr()), k=-1) # takes the lower half of the correlation table without the diagonal.

return(abs(np.nan_to_num(correlations[-1])).argmax())

def split_val(self, df):

“””Acceptes a df and returns (best_feature, value to split on)”””

best_feature = self.highest_correlation(df)

column = df.iloc[:, best_feature]

return best_feature, column.median()

def build_tree(self, data):

“””Recursively build’s a tree by returning arrays in the form [feature, split value, less than index, greater than index]

leaf values are denoted as feature == -1″””

if data.shape[0] <= self.leaf_size or len(pd.unique(data.iloc[:,-1])) == 1:

return(np.array([-1, data.iloc[np.random.choice(range(data.shape[0])), -1], np.nan, np.nan]).reshape(1,4))

else:

best_feature, split_val = self.split_val(data)

# when split_val does not separate a feature, it will iterate forever, bandaid fix with min

if data[best_feature].shape[0] == data[data[best_feature] <= split_val].shape[0]:

split_val = data[best_feature].min()

left_tree = self.build_tree(data[data.iloc[:, best_feature] <= split_val])

right_tree = self.build_tree(data[data.iloc[:, best_feature] > split_val])

root = [best_feature, split_val, 1, left_tree.shape[0] + 1]

temp_tree = np.vstack([root, left_tree, right_tree])

return(temp_tree)

def query_value(self, values):

“””Queries a single list of values, returns the output of the tree”””

current_pos = 0

while True:

tree_pos = self.tree[current_pos]

if current_pos > self.tree.shape[0]:

return(‘Error querying value’)

elif int(tree_pos[0]) == -1:

return(tree_pos[1])

elif values[int(tree_pos[0])] <= tree_pos[1]:

current_pos += 1

else:

current_pos += int(tree_pos[3])

def query(self,Xtest):

“””Given an input (Xtest), returns the associated query output(s), can accept arrays”””

try: # assumes multiple test values

return([self.query_value(i) for i in Xtest])

except:

return([self.query_value(Xtest)])

RTLearner

import pandas as pd

import numpy as np

class RTLearner(object):

def __init__(self, leaf_size = 1, verbose = False):

self.leaf_size = leaf_size

self.verbose = verbose

self.dataframe = None

self.tree = None

def get_dataframe(self):

return(self.dataframe)

def get_tree(self):

return(self.tree)

def addEvidence(self, Xtrain, Ytrain):

“””Accepts inputs (Xtrain) and outputs (Ytrain) and calls the build_tree function on the data, updates the tree attribute”””

dataframe = pd.DataFrame(Xtrain)

dataframe[‘Y’] = Ytrain

self.data = dataframe

self.tree = self.build_tree(dataframe)

self.query_tree = self.tree.copy()

def build_tree(self, data):

“””Recursively build’s a tree by returning arrays in the form [feature, split value, less than index, greater than index]

leaf values are denoted as feature == -1″””

if data.shape[0] <= self.leaf_size or len(pd.unique(data.iloc[:,-1])) == 1:

# randomly selects a leaf value if it is larger than 1

return(np.array([-1, data.iloc[np.random.choice(range(data.shape[0])), -1], np.nan, np.nan]).reshape(1,4))

else:

feature = np.random.choice(data.columns[:-1])

split1, split2 = np.random.choice(data.iloc[:,feature], size=2)

split_val = (split1 + split2)/2.0

# checks if the split_val will only generate a left tree, rerandomizes split_val to allow two tree’s

while data[data.iloc[:, feature] <= split_val].shape[0] == data.shape[0]:

feature = np.random.choice(data.columns[:-1])

split1, split2 = np.random.choice(data.iloc[:,feature], size=2)

split_val = (split1 + split2)/2.0

left_tree = self.build_tree(data[data.iloc[:, feature] <= split_val])

right_tree = self.build_tree(data[data.iloc[:, feature] > split_val])

root = [feature, split_val, 1, left_tree.shape[0] + 1]

temp_tree = np.vstack([root, left_tree, right_tree])

return(temp_tree)

def query_value(self, values):

“””Queries a single list of values, returns the output of the tree”””

current_pos = 0

while True:

tree_pos = self.tree[current_pos]

if current_pos > self.tree.shape[0]:

return(‘Error querying value’)

elif int(tree_pos[0]) == -1:

return(tree_pos[1])

elif values[int(tree_pos[0])] <= tree_pos[1]:

current_pos += 1

else:

current_pos += int(tree_pos[3])

def query(self,Xtest):

“””Given an input (Xtest), returns the associated query output(s), can accept arrays”””

try: # assumes multiple test values

return([self.query_value(i) for i in Xtest])

except:

return([self.query_value(Xtest)])

Insane Learner

import BagLearner as bl

import LinRegLearner as lrl

class InsaneLearner(object):

def __init__(self, verbose=False):

self.learners = [bl.BagLearner(learner=lrl.LinRegLearner, bags=20) for _ in range(20)]

self.verbose = verbose

def add_evidence(self, Xtrain, Ytrain):

for learner in self.learners:

learner.add_evidence(Xtrain, Ytrain)

def query(self, Xtest):

predictions = [learner.query(Xtest) for learner in self.learners]

return sum(predictions) / len(predictions)

BagLearner

import numpy as np

import pandas as pd

import DTLearner as dt

import RTLearner as rt

class BagLearner(object):

def __init__(self, learner, kwargs = {}, bags=20, boost=False, verbose=False):

learners = []

for i in range(bags):

learners.append(learner(**kwargs))

self.learners = learners

self.kwargs = kwargs

self.bags = bags

#self.boost = boost

self.verbose = verbose

self.trees = []

def addEvidence(self, Xtrain, Ytrain):

df = pd.DataFrame(Xtrain)

df[‘output’] = Ytrain

for method in self.learners:

learning_df = pd.DataFrame([df.sample().values[0] for i in range(df.shape[0])])

X = learning_df.iloc[:, :-1]

Y = learning_df.iloc[:,-1]

method.addEvidence(X,Y)

self.trees.append(method.tree)

def query_value(self, values, tree):

“””Queries a single list of values for a given tree, returns the output of the tree”””

current_pos = 0

while True:

tree_pos = tree[current_pos]

if current_pos > tree.shape[0]:

return(‘Error querying value’)

elif int(tree_pos[0]) == -1:

return(tree_pos[1])

elif values[int(tree_pos[0])] <= tree_pos[1]:

current_pos += 1

else:

current_pos += int(tree_pos[3])

def query_trees(self,Xtest, tree):

“””Given an input (Xtest), returns the associated query output(s), can accept arrays”””

try: # assumes multiple test values

return([self.query_value(i, tree) for i in Xtest])

except:

return([self.query_value(Xtest, tree)])

def query(self,Xtest):

queries = [self.query_trees(Xtest, i) for i in self.trees]

return(np.average(queries,axis=0))

TestLearner

import math

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

from DTLearner import DTLearner

from RTLearner import RTLearner

from BagLearner import BagLearner

# Specify the file path to your CSV

file_path = “C:/Users/Anthony/OneDrive/Desktop/CS-7646 Machine Learning for Trading/Project 3-Assess Learners/assess_learners/Data/Istanbul.csv”

# Load the dataset without headers and skip the first column

data = pd.read_csv(file_path, header=None, skiprows=1).iloc[:, 1:].reset_index(drop=True)

# Compute how much of the data is training and testing

train_rows = int(0.6 * data.shape[0])

test_rows = data.shape[0] – train_rows

# Separate out training and testing data

train_x = data.iloc[:train_rows, :-1].values

train_y = data.iloc[:train_rows, -1].values

test_x = data.iloc[train_rows:, :-1].values

test_y = data.iloc[train_rows:, -1].values

# Experiment 1: Overfitting with DTLearner

leaf_sizes = [1, 5, 10, 20, 50]

rmse_values = []

for leaf_size in leaf_sizes:

learner = DTLearner(leaf_size=leaf_size)

learner.addEvidence(train_x, train_y)

pred_y = learner.query(test_x)

rmse = math.sqrt(((test_y – pred_y) ** 2).sum() / test_y.shape[0])

rmse_values.append(rmse)

# Plot RMSE vs. Leaf Size

plt.figure(figsize=(8, 6))

plt.plot(leaf_sizes, rmse_values, marker=’o’)

plt.title(“RMSE vs. Leaf Size for DTLearner”)

plt.xlabel(“Leaf Size”)

plt.ylabel(“RMSE”)

plt.grid()

plt.show()

# Experiment 2: Bagging to Reduce Overfitting with DTLearner

num_bags = 20

leaf_size = 10

learner = BagLearner(learner=DTLearner, bags=num_bags)

learner.addEvidence(train_x, train_y)

# Query each bag and average the predictions

pred_y = np.zeros(test_y.shape)

for i in range(num_bags):

pred_y += learner.query(test_x)

pred_y /= num_bags

rmse = math.sqrt(((test_y – pred_y) ** 2).sum() / test_y.shape[0])

print(f”RMSE with BagLearner: {rmse}”)

# Experiment 3: Comparison between DTLearner and RTLearner

dt_learner = DTLearner()

rt_learner = RTLearner()

dt_learner.addEvidence(train_x, train_y)

rt_learner.addEvidence(train_x, train_y)

dt_pred_y = dt_learner.query(test_x)

rt_pred_y = rt_learner.query(test_x)

# Calculate the Mean Absolute Error (MAE) for each learner

dt_mae = np.mean(np.abs(test_y – dt_pred_y))

rt_mae = np.mean(np.abs(test_y – rt_pred_y))

# Print the results

print(f”DTLearner MAE: {dt_mae}”)

print(f”RTLearner MAE: {rt_mae}”)

I am getting a graph that looks like this…

But I need this to print graphs that look like the ones below. Please help fix this code to do so and help with the correct answers to the experiment questions. Thank you in advance.

class, DTLearner, import, numpy, pandas, Python Programming

Share This Post

Order a Similar Paper and get 15% Discount on your First Order

Related Questions

Python –Gaussian Naive Bayes classifier How can I resolve this…

Python –Gaussian Naive Bayes classifier How can I resolve this Error “TypeError: np.matrix is not supported. Please convert to a numpy array with np.asarray. For more information “ CODE: from sklearn.datasets import load_svmlight_filefrom sklearn.model_selection import train_test_splitfrom sklearn.pipeline import make_pipelinefrom sklearn.preprocessing import StandardScalerfrom sklearn.naive_bayes import GaussianNB # Load the data

Program should run in python 3 API :…

Program should run in python 3 API : https://api.chucknorris.io/ Outline : Welcome: Print a Welcome message for the user. : Make a GET request from the library to API: Chuck Norris Jokes. Choose science for as the category Only generate jokes of this categor parse the JSON

ogram should run in python 3 API :…

ogram should run in python 3 API : https://api.chucknorris.io/ Outline : Welcome: Print a Welcome message for the user. : Make a GET request from the library to API: Chuck Norris Jokes. Choose science for as the category Only generate jokes of this categor parse the JSON

DTLearner import pandas as pd import numpy as np class…

DTLearner import pandas as pd import numpy as np class DTLearner(object): def __init__(self, leaf_size = 1, verbose = False): self.leaf_size = leaf_size self.verbose = verbose self.dataframe = None self.tree = None

Two sum in Python: Based on array of different integers (ints) and…

Two sum in Python: Based on array of different integers (ints) and a single integer (target), write function to print count/amount of combinations (non-repeating) of pairs of integers in (ints) such that the two integers sum to (target). Starting with: def twosum (ints, target): I’m not sure how to make

import pandas as pd import numpy as np from sklearn.model_selection…

import pandas as pdimport numpy as npfrom sklearn.model_selection import train_test_splitfrom sklearn import linear_modelfrom sklearn.metrics import r2_score import seaborn as snsimport matplotlib.pylab as plt%matplotlib inline reg = linear_model.LinearRegression()X = iris[[‘petal_length’]]y = iris[‘petal_width’]reg.fit(X, y)print(“y = x *”, reg.coef_, “+”, reg.intercept_) predicted = reg.predict(X)mse = ((np.array(y)-predicted)**2).sum()/len(y)r2 = r2_score(y, predicted)print(“MSE:”, mse)print(“R Squared:”,

how to calculate average maths score for student of each year…

how to calculate average maths score for student of each year level(9,10,11,12) at each school, using pandas. how to make a pandas series for each year, and group each series by school, and then combine the series in a dataframe the results like below Year 9 Year 10 Year

def add(num1, num2): return num1 + num2 def sub(num1, num2):…

def add(num1, num2): return num1 + num2 def sub(num1, num2): return num1 – num2 def mult(num1, num2): return num1 * num2 def div(num1, num2): if num2 == 0: raise ZeroDivisionError(“Cannot divide by zero”) else: return num1 / num2 def isInRange(lr, hr, n): if n >= lr

The Beauty of Data Visualization Here is an easy and short…

The Beauty of Data Visualization Here is an easy and short discussion post: watch this Ted Talk on data visualization. https://www.youtube.com/watch?v=5Zg-C8AAIGg Watch the video, then answer the following questions: Summarize the video in a few sentences (2-4) Why is it important to visualize data in specific ways? Give

PLEASEEE CREATE ILLUSTRATION OF EXACTLY HOW THE HIERARCHY CHART…

PLEASEEE CREATE ILLUSTRATION OF EXACTLY HOW THE HIERARCHY CHART WILL LOOK USING THE CODE BELOW. THE CODE MUST BE HORIZONTAL. USE THIS PHOTO AS AN EXAMPLE OF WHAT IT IS SUPPOSED TO LOOK LIKE !!! class VendingMachine: def __init__(self): self.products = {

Since I used isdigit() method to verify the input, any floating…

Since I used isdigit() method to verify the input, any floating number input won’t be processed because of the “.” in the number. I suppose to use type conversion with “try, except” method instead. # calculate pay(hours worked and hourly rate ) by declaring functiondef CalPay(hrs, rate): if hrs <=

How to remove stopwords from. CSV to text classification or…

How to remove stopwords from. CSV to text classification or sentiment. my project is cyberbully detection using machine learning, and at the pre-processing stage, I need to remove some of the stopwords this error keeps showing up, and I can’t solve it ” TypeError: list indices must be integers or

I am creating a text based adventure game. I have written the code…

I am creating a text based adventure game. I have written the code below which does allow me to move from room to room but for some reason I can’t get it to show the inventory after I have collected an item from a room…please help. The project is being

A Python function definition is initiated by using this keyword in…

A Python function definition is initiated by using this keyword in the header: function def import It is necessary to include this punctuation at the end of a function header statement: ; semi-colon : colon {opening curly brace All statements within the

Write code in Python using Python compiler online GDB. The program…

Write code in Python using Python compiler online GDB. The program accepts a test score from keyboard, then determine the letterGrade, such as A, B, C … based on the score entered. Your IF statement should start from lowest score 60, and then follow with higher score.

While using this python program on cengage mindtap for programming…

While using this python program on cengage mindtap for programming exercise 4.12 it is not taking the hours worked and multiplying it by the Total Pay of the person that is listed in a txt file. Enter the file name: test.txt Name Hours Total Pay Lennon 12 3.33 McCartney 57

how to correct this code? correct the output into A1Z26 cipher:…

how to correct this code? correct the output into A1Z26 cipher: VIVAN LOS PATOS DE LA PISCINA.

1 in contrast to a terminal-based program, a GUI-based program…

1 in contrast to a terminal-based program, a GUI-based program completely controls the order in which the user enters inputs can allow the user to enter inputs in any order 2 The attribute used to attach an event-handling method to a button is named pressevent onclick command

Our Services

DTLearner import pandas as pd import numpy as np class…

Share This Post

Related Questions

Python –Gaussian Naive Bayes classifier How can I resolve this…

Program should run in python 3 API :…

ogram should run in python 3 API :…

DTLearner import pandas as pd import numpy as np class…

Two sum in Python: Based on array of different integers (ints) and…

import pandas as pd import numpy as np from sklearn.model_selection…

how to calculate average maths score for student of each year…

def add(num1, num2): return num1 + num2 def sub(num1, num2):…

The Beauty of Data Visualization Here is an easy and short…

PLEASEEE CREATE ILLUSTRATION OF EXACTLY HOW THE HIERARCHY CHART…

Since I used isdigit() method to verify the input, any floating…

How to remove stopwords from. CSV to text classification or…

I am creating a text based adventure game. I have written the code…

A Python function definition is initiated by using this keyword in…

Write code in Python using Python compiler online GDB. The program…

While using this python program on cengage mindtap for programming…

how to correct this code? correct the output into A1Z26 cipher:…

1 in contrast to a terminal-based program, a GUI-based program…

Terms & Conditions

Privacy Policy

Use Our 6 Free Tools

Follow us:

We accept:

© 2023. Homework Study Help. All Rights Reserved.