ISYE 6740 Predictive Modeling

.

School

Georgia Institute Of Technology *

*We aren’t endorsed by this school

Course

6740

Subject

Economics

Date

Apr 28, 2024

Type

Pages

2

Uploaded by CommodoreThunderAnteater4 on coursehero.com

# Predictive Modeling Script import pandas as pd from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score from sklearn.metrics import classification_report, accuracy_score # Function to load data def load_data(filepath): df = pd.read_csv(filepath) return df # Function to split the dataset into features and target variable def split_data(df, target_col): X = df.drop(target_col, axis=1) y = df[target_col] return X, y # Function to divide data into training and testing sets def train_test_split_data(X, y, test_size=0.2, random_state=42): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state) return X_train, X_test, y_train, y_test # Function to build and train the RandomForest model def train_model(X_train, y_train): model = RandomForestClassifier(n_estimators=100, random_state=42) model.fit(X_train, y_train) return model # Function to perform hyperparameter tuning def hyperparameter_tuning(X_train, y_train): param_grid = { 'n_estimators': [100, 200, 300], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth': [None, 10, 20, 30, 40], 'min_samples_split': [2, 5, 10], 'min_samples_leaf': [1, 2, 4] } grid_search = GridSearchCV(estimator=RandomForestClassifier(random_state=42), param_grid=param_grid, cv=3, n_jobs=-1, verbose=2) grid_search.fit(X_train, y_train) return grid_search.best_estimator_ # Function to evaluate the model def evaluate_model(model, X_test, y_test): y_pred = model.predict(X_test) print("Classification Report:") print(classification_report(y_test, y_pred)) print("Accuracy Score:") print(accuracy_score(y_test, y_pred)) # Combining all model functionalities def perform_modeling(filepath, target_col): df = load_data(filepath) X, y = split_data(df, target_col) X_train, X_test, y_train, y_test = train_test_split_data(X, y) model = train_model(X_train, y_train) model = hyperparameter_tuning(X_train, y_train)
evaluate_model(model, X_test, y_test) return model
Your preview ends here
Eager to read complete document? Join bartleby learn and gain access to the full version
  • Access to all documents
  • Unlimited textbook solutions
  • 24/7 expert homework help