Usage Guide
Provide examples and guides on how to use the core functionalities of pysmatch.
Basic Example
import warnings
warnings.filterwarnings('ignore')
from pysmatch.Matcher import Matcher
import pandas as pd
import numpy as np
path = "misc/loan.csv"
data = pd.read_csv(path)
test = data[data.loan_status == "Default"]
control = data[data.loan_status == "Fully Paid"]
test['loan_status'] = 1
control['loan_status'] = 0
m = Matcher(test, control, yvar="loan_status", exclude=[])
np.random.seed(20240919)
# ============ (1) Noraml train (Without optuna) =============
# m.fit_scores(balance=True, nmodels=10, n_jobs=3, model_type='knn')
# m.fit_scores(balance=True, nmodels=10, n_jobs=3, model_type='tree', max_iter=100)
m.fit_scores(balance=True, nmodels=10, n_jobs=3, model_type='linear', max_iter=200)
# ============ (2) Utilize optuna (Only train one best model) =============
# m.fit_scores(
# balance=True,
# model_type='tree',
# max_iter=200,
# use_optuna=True,
# n_trials=15
# )
m.predict_scores()
m.plot_scores()
m.tune_threshold(method='random')
m.match(method="min", nmatches=1, threshold=1, replacement=False)
m.plot_matched_scores()
freq_df = m.record_frequency()
m.assign_weight_vector()
print("top 6 matched data")
print(m.matched_data.sort_values("match_id").head(6))
categorical_results = m.compare_categorical(return_table=True, plot_result=True)
print(categorical_results)
cc = m.compare_continuous(return_table=True, plot_result=True)
print(cc)