烟叶分级--基于光谱的烟叶分级方法（线性回归）

加载烟叶光谱数据

import mxnet as mx
from mxnet import autograd, nd
from IPython.display import set_matplotlib_formats
from matplotlib import pyplot as plt
import numpy as np
import random
features_s=np.loadtxt('/home/wt/Documents/ML/Data/XF.txt',dtype=np.float32)
labels_s=np.loadtxt('/home/wt/Documents/ML/Data/XFlabel.csv',dtype=np.float32)

#CPU 版本
#features=nd.array(features_s)
#labels=nd.array(labels_s)

# GPU版本
features=nd.array(features_s,ctx=mx.gpu())
labels=nd.array(labels_s,ctx=mx.gpu())

测试显卡的GPU是否启用 1050Ti

!nvidia-smi

Tue Jul 17 09:25:00 2018       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 384.130                Driver Version: 384.130                   |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|===============================+======================+======================|
|   0  GeForce GTX 105...  Off  | 00000000:01:00.0  On |                  N/A |
| 31%   35C    P0   ERR! /  75W |    480MiB /  4037MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+

+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|=============================================================================|
|    0      1225      G   /usr/lib/xorg/Xorg                           135MiB |
|    0      1981      G   cinnamon                                      45MiB |
|    0      2955      G   ...-token=297718BD516CE4C3FD446B6738037890    62MiB |
|    0      3312      C   /usr/bin/python                              225MiB |
+-----------------------------------------------------------------------------+

查看样本和标签的Shape

print(features.shape)
print(labels.shape)

(68L, 1024L)
(68L,)

分离训练样本和测试样本，默认情况下训练样本和测试样本的 features labels和原始数据存储的位置一样

features_train=nd.concat(features[0:21,:],features[25:45,:],features[49:64,:],dim=0)
features_test=nd.concat(features[21:25,:],features[45:49,:],features[64:68,:],dim=0)

labels_train=nd.concat(labels[0:21],labels[25:45],labels[49:64],dim=0)
labels_test=nd.concat(labels[21:25],labels[45:49],labels[64:68],dim=0)

features_test,labels_train

(
 [[28.959 28.879 29.046 ... 22.99  24.579 26.117]
  [41.629 39.224 38.589 ... 57.235 57.239 57.045]
  [39.367 35.776 36.1   ... 57.235 56.902 56.701]
  ...
  [23.303 22.198 22.614 ... 35.048 35.017 34.192]
  [22.851 21.983 21.162 ... 37.138 37.542 37.285]
  [23.529 22.989 22.96  ... 37.942 38.047 37.285]]
 <NDArray 12x1024 @gpu(0)>,
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 2. 2. 2.
  2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 3. 3. 3. 3. 3. 3. 3.
  3. 3. 3. 3. 3. 3. 3. 3.]
 <NDArray 56 @gpu(0)>)

训练样本 examples 样本的特征

num_inputs = 1024
num_examples = 49

读取数据

batch_size = 10
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    # 样本的读取顺序是随机的。
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        j = nd.array(indices[i: min(i + batch_size, num_examples)],ctx=mx.gpu())
        # take 函数根据索引返回对应元素。
        yield features.take(j), labels.take(j)

for X, y in data_iter(batch_size, features, labels):
    print(X, y)
    break

(
[[39.367 35.776 36.1   ... 57.235 56.902 56.701]
 [27.451 26.724 25.726 ... 30.386 29.798 29.21 ]
 [30.017 29.703 28.482 ... 39.291 40.614 42.032]
 ...
 [26.244 24.138 23.79  ... 37.621 36.869 35.911]
 [25.339 24.138 23.237 ... 29.743 30.303 29.553]
 [27.451 27.586 26.556 ... 26.367 26.431 26.632]]
<NDArray 10x1024 @cpu(0)>,
[1. 1. 2. 2. 1. 2. 3. 3. 3. 1.]
<NDArray 10 @cpu(0)>)

#CPU存储
#w = nd.random.normal(scale=0.01, shape=(num_inputs, 1))
#b = nd.zeros(shape=(1,))
#params = [w, b]

#GPU存储
w=nd.array(w,ctx=mx.gpu())
b=nd.array(b,ctx=mx.gpu())
params=[w,b]
#params = params.as_in_context(mx.gpu())

params

[
 [[ 0.03057447]
  [ 0.01027301]
  [ 0.0069176 ]
  ...
  [-0.00026318]
  [ 0.02125593]
  [-0.00302696]]
 <NDArray 1024x1 @gpu(0)>,
 [0.0109025]
 <NDArray 1 @gpu(0)>]

求导

for param in params:
    param.attach_grad()

模型损失函数随机梯度下降

def linreg(X, w, b):
    return nd.dot(X, w) + b
def squared_loss(y_hat, y):
    return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2
def sgd(params, lr, batch_size):
    for param in params:
        param[:] = param - lr * param.grad / batch_size

训练模型，输出损失函数

lr = 0.00000003
num_epochs = 1000000
net = linreg
loss = squared_loss

# 训练模型一共需要 num_epochs 个迭代周期。
for epoch in range(1, num_epochs + 1):
    # 在一个迭代周期中，使用训练数据集中所有样本一次（假设样本数能够被批量大小整除）。
    # X 和 y 分别是小批量样本的特征和标签。
    for X, y in data_iter(batch_size, features_train, labels_train):
        with autograd.record():
            # l 是有关小批量 X 和 y 的损失。
            l = loss(net(X, w, b), y)
        # 小批量的损失对模型参数求导。
        l.backward()
        # 使用小批量随机梯度下降迭代模型参数。
        sgd([w, b], lr, batch_size)
    print('epoch %d, loss %f'
          % (epoch, loss(net(features_train, w, b), labels_train).mean().asnumpy()))

epoch 1, loss 0.085310
epoch 2, loss 0.085306
epoch 3, loss 0.085320
epoch 4, loss 0.085339
epoch 5, loss 0.085321    ...
epoch 998218, loss 0.056923
epoch 998219, loss 0.056943
epoch 998220, loss 0.056927
epoch 998221, loss 0.056943
...
epoch 999885, loss 0.056893
epoch 999886, loss 0.056939
epoch 999887, loss 0.056962
epoch 999888, loss 0.056938
epoch 999889, loss 0.056914
epoch 999890, loss 0.056926
epoch 999891, loss 0.056899
epoch 999892, loss 0.056894
epoch 999893, loss 0.056893
...
epoch 999995, loss 0.056904
epoch 999996, loss 0.056917
epoch 999997, loss 0.056938
epoch 999998, loss 0.056891
epoch 999999, loss 0.056903
epoch 1000000, loss 0.056891

lr

3e-07

预测函数，判断结果

a=nd.dot(features_test,w)+b
a

[[-0.9413086]
 [ 1.4630066]
 [ 1.4438759]
 [ 1.3059595]
 [ 2.7054663]
 [ 2.5185423]
 [ 2.207837 ]
 [ 2.5860143]
 [ 3.2223454]
 [ 3.063551 ]
 [ 3.2315679]
 [ 3.8970466]]
<NDArray 12x1 @gpu(0)>

labels_test

[1. 1. 1. 1. 2. 2. 2. 2. 3. 3. 3. 3.]
<NDArray 12 @cpu(0)>

保存模型参数加载模型参数

#nd.save('LinWB20180716', [w, b])
w, b = nd.load('LinWB20180716')
params = [w, b]

y2==b

[1.]
<NDArray 1 @cpu(0)>

nd.save('LinWB20180717', [w, b])

CATALOG

FEATURED TAGS

FRIENDS