Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

""" 

Numpy utilities. 

 

Import this as ``np`` instead of importing the ``numpy`` module. It exports the same symbols, with 

the addition of strongly-typed phantom classes for tracking the exact dimensions and type of each 

variable using ``mypy``. It also provides some additional utilities (I/O). 

""" 

 

from numpy import * # pylint: disable=redefined-builtin,wildcard-import,unused-wildcard-import 

from typing import Any 

from typing import Collection 

from typing import Optional 

from typing import Tuple 

from typing import Type 

from typing import TypeVar 

from typing import Union 

 

import ctypes 

import multiprocessing 

import os 

 

# pylint: disable=redefined-outer-name 

 

 

#: Type variable for arrays. 

A = TypeVar('A', bound='BaseArray') # pylint: disable=invalid-name 

 

_C_TYPES = dict(float32=ctypes.c_float, int32=ctypes.c_int32, float64=ctypes.c_double) 

 

 

class BaseArray(ndarray): 

""" 

Base class for all Numpy array and matrix phantom types. 

""" 

 

#: The expected dimensions of an array of the (derived) class. 

dimensions: int 

 

#: The expected data type of an array of the (derived) class. 

dtype: str 

 

@staticmethod 

def exists(path: str) -> bool: 

""" 

Whether there exists a disk file with the specified path to load an array from. 

 

This checks for either a ``.txt`` or a ``.npy`` suffix to allow for loading either 

an array of strings or an array or matrix of numeric values. 

""" 

assert not path.endswith('.npy') 

assert not path.endswith('.txt') 

 

return os.path.exists(path + '.npy') or os.path.exists(path + '.txt') 

 

@classmethod 

def read(cls: Type[A], path: str, mmap_mode: Optional[str] = None) -> A: 

""" 

Read a Numpy array of the concrete type from the disk. 

 

If a disk file with a ``.txt`` suffix exists, this will read an array of strings. Otherwise, 

a file with a ``.npy`` suffix must exist, and this will memory map the array or matrix of 

values contained in it. 

""" 

return cls.am(BaseArray._read(path, mmap_mode)) 

 

@staticmethod 

def _read(path: str, mmap_mode: Optional[str] = None) -> ndarray: 

assert not path.endswith('.npy') 

assert not path.endswith('.txt') 

 

text_path = path + '.txt' 

if os.path.exists(text_path): 

with open(text_path, 'r') as file: 

strings = file.read().split('\n')[:-1] 

values = array(strings, dtype='O') 

else: 

values = load(path + '.npy', mmap_mode) 

 

return values 

 

@staticmethod 

def read_array(path: str, mmap_mode: Optional[str] = None) -> ndarray: 

""" 

Read a 1D array of any type from the disk. 

""" 

array = BaseArray._read(path, mmap_mode) 

BaseArray._am_shape(array, 1) 

return array 

 

@staticmethod 

def read_matrix(path: str, mmap_mode: Optional[str] = None) -> ndarray: 

""" 

Read a 2D array of any type from the disk. 

""" 

array = BaseArray._read(path, mmap_mode) 

BaseArray._am_shape(array, 2) 

return array 

 

@classmethod 

def write(cls, data: ndarray, path: str) -> None: 

""" 

Write a Numpy array of the concrete type to the disk. 

 

If writing an array of strings, this will create a file with a ``.txt`` suffix containing 

one string value per line. Otherwise, the data may be an array or a matrix of numeric 

values, which will be written to a file with a ``.npy`` format allowing for memory mapped 

access. 

""" 

cls.am(data) 

BaseArray._write(data, path) 

 

@staticmethod 

def _write(data: ndarray, path: str) -> None: 

assert not path.endswith('.npy') 

assert not path.endswith('.txt') 

 

if data.dtype == 'O': 

BaseArray._am_shape(data, 1) 

with open(path + '.txt', 'w') as file: 

file.write('\n'.join(data)) 

file.write('\n') 

else: 

save(path + '.npy', data) 

 

@classmethod 

def am(cls: Type[A], data: ndarray) -> A: # pylint: disable=invalid-name 

""" 

Declare an array as being of this type. 

""" 

BaseArray._am_shape(data, cls.dimensions) 

if cls.dtype not in [data.dtype.name, data.dtype.kind]: 

raise ValueError('unexpected data type: %s instead of: %s' 

% (data.dtype, cls.dtype)) 

return data # type: ignore 

 

@classmethod 

def be(cls: Type[A], data: Collection) -> A: # pylint: disable=invalid-name 

""" 

Convert an array to this type. 

""" 

if not isinstance(data, ndarray): 

data = array(data, dtype=cls.dtype) 

 

BaseArray._am_shape(data, cls.dimensions) 

if cls.dtype not in [data.dtype.name, data.dtype.kind]: 

data = data.astype(cls.dtype) 

 

return data # type: ignore 

 

@staticmethod 

def _am_shape(data: ndarray, expected_dimensions: int) -> None: 

if not isinstance(data, ndarray): 

raise ValueError('unexpected type: %s.%s instead of: %s.%s' 

% (data.__class__.__module__, data.__class__.__qualname__, 

ndarray.__module__, ndarray.__qualname__)) 

 

if len(data.shape) != expected_dimensions: 

raise ValueError('unexpected dimensions: %s instead of: %s' 

% (len(data.shape), expected_dimensions)) 

 

@classmethod 

def zeros(cls: Type[A], shape: Union[int, Tuple[int, ...]]) -> A: 

""" 

Return an array full of zeros. 

""" 

if isinstance(shape, tuple): 

assert len(shape) == 2 

return cls.am(zeros(shape, dtype=cls.dtype)) 

 

@classmethod 

def empty(cls: Type[A], shape: Union[int, Tuple[int, ...]]) -> A: 

""" 

Return an uninitialized array. 

""" 

if isinstance(shape, tuple): 

assert len(shape) == 2 

return cls.am(empty(shape, dtype=cls.dtype)) 

 

@classmethod 

def filled(cls: Type[A], value: Any, shape: Union[int, Tuple[int, ...]]) -> A: 

""" 

Return an array full of some value. 

""" 

array = cls.empty(shape) 

array.fill(value) 

return array 

 

@classmethod 

def shared_memory_zeros(cls: Type[A], shape: Union[int, Tuple[int, ...]]) -> A: 

""" 

Create a shared memory array, initialized to zeros. 

""" 

if isinstance(shape, int): 

size = shape 

shape = (size,) 

else: 

assert len(shape) == 2 

size = int(shape[0] * shape[1]) 

 

c_type = _C_TYPES[cls.dtype] 

 

shared_buffer = multiprocessing.Array(c_type, size) # type: ignore 

shared_array = frombuffer(shared_buffer.get_obj(), dtype=cls.dtype) 

return cls.am(reshape(shared_array, shape, order='F')) 

 

 

class ArrayStr(BaseArray): 

""" 

An array of Unicode strings. 

""" 

dimensions = 1 

dtype = 'O' 

 

 

class ArrayBool(BaseArray): 

""" 

An array of booleans. 

""" 

dimensions = 1 

dtype = 'bool' 

 

 

class MatrixBool(BaseArray): 

""" 

A matrix of booleans. 

""" 

dimensions = 2 

dtype = 'bool' 

 

 

class ArrayInt8(BaseArray): 

""" 

An array of 8-bit integers. 

""" 

dimensions = 1 

dtype = 'int8' 

 

 

class MatrixInt8(BaseArray): 

""" 

A matrix of 8-bit integers. 

""" 

dimensions = 2 

dtype = 'int8' 

 

 

class ArrayInt16(BaseArray): 

""" 

An array of 16-bit integers. 

""" 

dimensions = 1 

dtype = 'int16' 

 

 

class MatrixInt16(BaseArray): 

""" 

A matrix of 16-bit integers. 

""" 

dimensions = 2 

dtype = 'int16' 

 

 

class ArrayInt32(BaseArray): 

""" 

An array of 32-bit integers. 

""" 

dimensions = 1 

dtype = 'int32' 

 

 

class MatrixInt32(BaseArray): 

""" 

A matrix of 32-bit integers. 

""" 

dimensions = 2 

dtype = 'int32' 

 

 

class ArrayInt64(BaseArray): 

""" 

An array of 64-bit integers. 

""" 

dimensions = 1 

dtype = 'int64' 

 

 

class MatrixInt64(BaseArray): 

""" 

A matrix of 64-bit integers. 

""" 

dimensions = 2 

dtype = 'int64' 

 

 

class ArrayFloat32(BaseArray): 

""" 

An array of 32-bit floating point numbers. 

""" 

dimensions = 1 

dtype = 'float32' 

 

 

class MatrixFloat32(BaseArray): 

""" 

A matrix of 32-bit floating point numbers. 

""" 

dimensions = 2 

dtype = 'float32' 

 

 

class ArrayFloat64(BaseArray): 

""" 

An array of 64-bit floating point numbers. 

""" 

dimensions = 1 

dtype = 'float64' 

 

 

class MatrixFloat64(BaseArray): 

""" 

A matrix of 64-bit floating point numbers. 

""" 

dimensions = 2 

dtype = 'float64' 

 

 

#: The phantom type for an array by its data type name. 

ARRAY_OF_DTYPE = dict( # 

str=ArrayStr, 

bool=ArrayBool, 

int8=ArrayInt8, 

int16=ArrayInt16, 

int32=ArrayInt32, 

int64=ArrayInt64, 

float32=ArrayFloat32, 

float64=ArrayFloat64, 

) 

 

#: The phantom type for a matrix by its data type name. 

MATRIX_OF_DTYPE = dict( # 

bool=MatrixBool, 

int8=MatrixInt8, 

int16=MatrixInt16, 

int32=MatrixInt32, 

int64=MatrixInt64, 

float32=MatrixFloat32, 

float64=MatrixFloat64, 

)