sklearn pandas
- 系统环境
- 1.安装pandas
- 测试
- 出现问题
- 2. 安装sklearn
- 3. 配置离线fashion_mnist数据集
- 能连上google的时候是这样
- 不能科学上网的,我们就下载数据集
- 4. 安装matplotlib
- 5. 测试
系统环境
1 2 | CentOS Linux release 7.7.1908 (Core) Linux version 3.10.0-1062.1.1.el7.x86_64 ([email protected]) (gcc version 4.8.5 20150623 (Red Hat 4.8.5-39) (GCC) ) #1 SMP Fri Sep 13 22:55:44 UTC 2019 |
1.安装pandas
在已经安装的环境
1 2 3 4 | # 激活该环境 conda activate tensorflow2_env # 安装pandas |
测试
1 2 3 | >>> import pandas >>> pandas.__version__ '1.0.3' |
出现问题
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | # 1 ImportError: libSM.so.6: cannot open shared object file: No such file or directory yum whatprovides libSM.so.6 yum install libSM-1.2.2-2.el7.x86_64 --setopt=protected_multilib=false # 2 ImportError: libXrender.so.1: cannot open shared object file: No such file or directory yum whatprovides libXrender.so.1 yum isntall libXrender # 3 ImportError: libXext.so.6: cannot open shared object file: No such file or directory >>> exit() yum install libXext |
2. 安装sklearn
1 2 3 | conda install sklearn # conda没有 pip install sklearn |
3. 配置离线fashion_mnist数据集
能连上google的时候是这样
1 2 3 | from tensorflow import keras fashion_mnist = keras.datasets.fashion_mnist (x_train_all, y_train_all), (x_test, y_test) = fashion_mnist.load_data() |
不能科学上网的,我们就下载数据集
1 2 3 4 | http://www.obs.sixmillions.cn/fashion_mnist/t10k-images-idx3-ubyte.gz http://www.obs.sixmillions.cn/fashion_mnist/t10k-labels-idx1-ubyte.gz http://www.obs.sixmillions.cn/fashion_mnist/train-images-idx3-ubyte.gz http://www.obs.sixmillions.cn/fashion_mnist/train-labels-idx1-ubyte.gz |
我下载到了
编辑当前python路径里面的
因为我是用anaconda安装的环境,所以我去anaconda环境中找这个文件
编辑这个文件
本来是这样的
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | from __future__ import print_function import gzip import os import numpy as np from tensorflow.python.keras.utils.data_utils import get_file from tensorflow.python.util.tf_export import keras_export @keras_export('keras.datasets.fashion_mnist.load_data') def load_data(): """Loads the Fashion-MNIST dataset. Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. License: The copyright for Fashion-MNIST is held by Zalando SE. Fashion-MNIST is licensed under the [MIT license]( https://github.com/zalandoresearch/fashion-mnist/blob/master/LICENSE). """ dirname = os.path.join('datasets', 'fashion-mnist') base = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' files = [ 'train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz' ] paths = [] for fname in files: paths.append(get_file(fname, origin=base + fname, cache_subdir=dirname)) with gzip.open(paths[0], 'rb') as lbpath: y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8) with gzip.open(paths[1], 'rb') as imgpath: x_train = np.frombuffer( imgpath.read(), np.uint8, offset=16).reshape(len(y_train), 28, 28) with gzip.open(paths[2], 'rb') as lbpath: y_test = np.frombuffer(lbpath.read(), np.uint8, offset=8) with gzip.open(paths[3], 'rb') as imgpath: x_test = np.frombuffer( imgpath.read(), np.uint8, offset=16).reshape(len(y_test), 28, 28) return (x_train, y_train), (x_test, y_test) |
将数据源那个地方换成我们的
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | from __future__ import print_function import gzip import os import numpy as np from tensorflow.python.keras.utils.data_utils import get_file from tensorflow.python.util.tf_export import keras_export @keras_export('keras.datasets.fashion_mnist.load_data') def load_data(): """Loads the Fashion-MNIST dataset. Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. License: The copyright for Fashion-MNIST is held by Zalando SE. Fashion-MNIST is licensed under the [MIT license]( https://github.com/zalandoresearch/fashion-mnist/blob/master/LICENSE). """ dirname = os.path.join('datasets', 'fashion-mnist') base = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' files = [ 'train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz' ] paths = [] for fname in files: paths.append(get_file(fname, origin=base + fname, cache_subdir=dirname)) with gzip.open(paths[0], 'rb') as lbpath: y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8) with gzip.open(paths[1], 'rb') as imgpath: x_train = np.frombuffer( imgpath.read(), np.uint8, offset=16).reshape(len(y_train), 28, 28) with gzip.open(paths[2], 'rb') as lbpath: y_test = np.frombuffer(lbpath.read(), np.uint8, offset=8) with gzip.open(paths[3], 'rb') as imgpath: x_test = np.frombuffer( imgpath.read(), np.uint8, offset=16).reshape(len(y_test), 28, 28) return (x_train, y_train), (x_test, y_test) |
编辑后
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | import gzip import os import numpy as np from tensorflow.python.keras.utils.data_utils import get_file from tensorflow.python.util.tf_export import keras_export @keras_export('keras.datasets.fashion_mnist.load_data') def load_data(): """Loads the Fashion-MNIST dataset. Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. License: The copyright for Fashion-MNIST is held by Zalando SE. Fashion-MNIST is licensed under the [MIT license]( https://github.com/zalandoresearch/fashion-mnist/blob/master/LICENSE). """ dirname = os.path.join('datasets', 'fashion-mnist') #base = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' base = '/opt/packages/keras_datasets/fashion_mnist/' files = [ 'train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz' ] paths = [] for fname in files: #paths.append(get_file(fname, origin=base + fname, cache_subdir=dirname)) paths.append(base + fname) with gzip.open(paths[0], 'rb') as lbpath: y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8) with gzip.open(paths[1], 'rb') as imgpath: x_train = np.frombuffer( imgpath.read(), np.uint8, offset=16).reshape(len(y_train), 28, 28) with gzip.open(paths[2], 'rb') as lbpath: y_test = np.frombuffer(lbpath.read(), np.uint8, offset=8) with gzip.open(paths[3], 'rb') as imgpath: x_test = np.frombuffer( imgpath.read(), np.uint8, offset=16).reshape(len(y_test), 28, 28) return (x_train, y_train), (x_test, y_test) |
注意路径后面有个斜杠
4. 安装matplotlib
1 | conda install matplotlib |
5. 测试
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | import matplotlib as mpl import matplotlib.pyplot as plt %matplotlib inline import numpy as np import sklearn import pandas as pd import os import sys import time import tensorflow as tf #import cv2 from tensorflow import keras print(tf.__version__) #print(cv2.__version__) print(sys.version_info) for moudule in mpl, np, pd, sklearn, tf, keras: print(moudule.__name__, moudule.__version__) |
1 2 3 4 5 6 7 8 9 10 | fashion_mnist = keras.datasets.fashion_mnist # 训练集测试集分开 (x_train_all, y_train_all), (x_test, y_test) = fashion_mnist.load_data() # 训练集拆分为验证集(前5000)和训练(后) x_valid, x_train = x_train_all[:5000], x_train_all[5000:] y_valid, y_train = y_train_all[:5000], y_train_all[5000:] print(x_valid.shape, y_valid.shape) print(x_train.shape, y_train.shape) print(x_test.shape, y_test.shape) |
1 2 3 4 5 | def show_single_image(img_arr): plt.imshow(img_arr, cmap='binary') plt.show() show_single_image(x_train[0]) |