函数封装的问题

来源：6-7 scikit-learn中的随机梯度下降法

weixin_慕村8280326

2021-09-09

def fit_SGD(self, X_train, y_train, n_iters=50, t0=5, t1=50):
    assert X_train.shape[0] == y_train.shape[0], \
        "the size of X_train must be equal to y_train"

    def dJ_SGD(thea, X_b_i, y_i):
        return 2. * X_b_i.T.dot(X_b_i.dot(thea) - y_i)

    def SGD(initial_thea, X_b, y, n_iters, t0, t1):
        thea = initial_thea

        def learning_rate(t):
            return t0 / (t + t1)

        for i_iters in range(n_iters):
            rand_i = np.random.randint(0, len(X_b))
            gradient = dJ_SGD(thea, X_b[rand_i], y[rand_i])
            thea = thea - learning_rate(i_iters) * gradient
        return thea

    X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
    initial_thea = np.zeros(X_b.shape[1])

    self._thea = SGD(initial_thea, X_b, y_train, n_iters, t0, t1)
    self.coef_ = self._thea[1:]
    self.interception_ = self._thea[0]
    return self
  
老师上面一部分是我封装在pyCharm没有改进之前的随机梯度下降算法，，下面一部分是我在jupyter notebook中直接定义函数调用的代码，对比我没有进行封装直接在jupyter notebook 定义函数调用，为什么封装的结果会好一些，是因为什么啊？(代码以及图片比较多，非常非常感谢老师耐心观看！)
def dJ_SGD(thea,X_b_i,y_i):
return 2.*X_b_i.T.dot(X_b_i.dot(thea)-y_i)

def SGD(initial_thea,X_b,y,n_iters):
thea = initial_thea
t0 = 5
t1 = 50
def descent_η(t):
eta = t0/(t+t1)
return eta
for i_iters in range(n_iters):
rand_i = np.random.randint(0,len(X_b))
gradient = dJ_SGD(thea,X_b[rand_i],y[rand_i])
thea = thea-descent_η(i_iters)*gradient