数据科学与python语言实验——NumPy数值计算基础

NumPy数值计算基础

实验数据：
链接：https://pan.baidu.com/s/1-E2ShVTdI0X5lwDtMLFFsQ
提取码：0929

代码实现：

之前不会的地方：

1．读取文件
使用numpy内置的loadtxt()函数以及这个函数的参数frame：要读取的文件，dtype读取后的数据类型，delimiter：读取文件中的数据分隔符

 #参数列表：fname要读取的文件，dtype读取后的数据类型，delimiter读取文件中数据的分隔符self.data=np.loadtxt(fname=path,dtype=str,delimiter=',')

2．数据的切分索引

self.data=self.data[1:,:]#去掉第一行（标签行）
self.colindex=self.data[0,:]#得到标签行

3．将数据中的年份季度带小数的浮点数转化为不带浮点数的整数使用了numpy中的np.char.replace()函数

 #需要将数据中的年份和季度中的小数部分去掉
self.data[:,:2]=np.char.replace(self.data[:,:2],'.0','')
#data[:,:2]是数据中的前两列

4.查找满足条件的行索引

 index = np.where((self.data[:, 0] == year) & (self.data[:, 1] == quarter))  # 使用where方法返回符合给定年份和季度的行索引

5.实现变量的展平

import numpy as npfp='./macrodata.csv'#要读取的文件
op='./test.csv'#输出文件class processdata:colmap = {'year': 0, 'quarter': 1, 'gdp': 2, 'realcons': 3, 'realinv': 4, 'realgovt': 5, 'realdpi': 6, 'cpi': 7, \'m1': 8, 'tbilrate': 9, 'unemp': 10, 'pop': 11, 'infl': 12, 'realint': 13}  # 定义一个字典，用来转换字符串列名与列索引def __init__(self,path):#定义构造函数#参数列表：fname要读取的文件，dtype读取后的数据类型，delimiter读取文件中数据的分隔符self.data=np.loadtxt(fname=path,dtype=str,delimiter=',')self.data=self.data[1:,:]#去掉第一行（标签行）self.colindex=self.data[0,:]#得到标签行#需要将数据中的年份和季度中的小数部分去掉self.data[:,:2]=np.char.replace(self.data[:,:2],'.0','')#data[:,:2]是数据中的前两列def lookupdata(self,year,quarter,col):#查看任意时间点的GDP或者人口信息 col传入gdp或popyear = str(int(year))quarter = str(int(quarter))index = np.where((self.data[:, 0] == year) & (self.data[:, 1] == quarter))  # 使用where方法返回符合给定年份和季度的行索引index = np.array(index)result = 'The ' + col + ' in quarter ' + str(quarter) + ', year ' + str(year) + ' is 'if index.size == 0:  # 若没有查找到符合条件的行tmp = np.where(self.data[:, 0] == year)tmp = np.array(tmp)if tmp.size == 0:  # 若没有找到该年份的行error = 'The given year ' + str(year) + ' is out of range...'else:  # 找到年份但没找到对应的季度error = 'The given quarter ' + str(quarter) + ' is not found for the given year ' + str(year) + '...'print(error)return errorelse:  # 找到了符合条件的结果print(result + str(self.data[index[0][0]][processdata.colmap[col]]) + '...')return self.data[index[0][0]][processdata.colmap[col]]def calPerCapitaGDP(self, year, quarter):  # 计算人均GDPyear = str(int(year))quarter = str(int(quarter))index = np.where((self.data[:, 0] == year) & (self.data[:, 1] == quarter))  # 使用where方法返回符合给定年份和季度的行索引index = np.array(index)result = 'The Per Capita GDP in quarter ' + str(quarter) + ', year ' + str(year) + ' is 'if index.size == 0:  # 若没有查找到符合条件的行tmp = np.where(self.data[:, 0] == year)tmp = np.array(tmp)if tmp.size == 0:  # 若没有找到该年份的行error = 'The given year ' + str(year) + ' is out of range...'else:  # 找到年份但没找到对应的季度error = 'The given quarter ' + str(quarter) + ' is not found for the given year ' + str(year) + '...'print(error)return errorprint(result + str(float(self.data[index[0][0]][2]) / float(self.data[index[0][0]][11])) + '...')  # 第二列数据/第11列数据return float(self.data[index[0][0]][2]) / float(self.data[index[0][0]][11])def flattendata(self, col=['gdp', 'pop']):  # 展平方法collist = [processdata.colmap[i] for i in col]  # 将要展平的列的列名转换成对应的数值索引data = self.data[:, collist]  # 提取这些列的数据colarr = np.zeros((self.data.shape[0], 1), dtype=np.float)indexdata = self.data[:, [0, 1]]  # 提取年份和季度信息indexdata = indexdata.repeat(len(col), axis=0)  # 对年份和季度两列进行纵向重复，重复的次数为要展平的列数，repeat是对所有行整个进行重复newdata = data.flatten().reshape(-1, 1)  # 使用flatten方法将要展平的列展平，并转为列向量newcol = collist * self.data.shape[0]  # 将要展平的列的列号整个进行横向重复（与repeat重复方式对应），重复次数为原始数据的行数newcol = np.array(newcol).reshape(-1, 1)tmp = np.hstack((indexdata, newdata))  # 将重复后的日期季度列与展平后的列向量进行横向合并tmp = np.hstack((tmp, newcol))  # 再与其原数据所在列号生成的列进行横向合并newcolindex = np.array(['year', 'quarter', 'values', 'columns number'])tmp = np.vstack((newcolindex, tmp))  # 添加新的列名return tmpdef printdata(self):data1 = np.vstack((self.data[:, 2].reshape(-1, 1), self.data[:, 11].reshape(-1, 1)))print(data1.shape)prdata=processdata(fp)#定义了一个类对象来实现这些方法
print('(a):')
prdata.lookupdata(2000,1,'gdp')
print('(b):')
prdata.lookupdata(2000,1,'pop')
print('(c):')
prdata.lookupdata(2020,1,'gdp')
print('(d):')
prdata.lookupdata(2000,6,'pop')
print('(e):')
prdata.calPerCapitaGDP(2000,1)
print('(f):')
flatdata=prdata.flattendata()np.savetxt(op,flatdata,delimiter=',',fmt='%s')#保存展平后的结果print('End!')
input('按回车键结束')