Знакомство с Python

библиотеки

In [1]:
import time
import math
from itertools import combinations

Стандартные типы

In [2]:
!touch somefile
In [3]:
a = 1
b = 1.1
c = 'abc'
d = True
e = True and False
f = open('somefile')
g = c+c
h = c*3
a+=1

a,b,c,d,e,f,g,h
Out[3]:
(2,
 1.1,
 'abc',
 True,
 False,
 <_io.TextIOWrapper name='somefile' mode='r' encoding='UTF-8'>,
 'abcabc',
 'abcabcabc')

Ветвления

In [4]:
for i in range(10):
    print(i)
    
if True:
    print('this code runs')
else:
    print('This not')
0
1
2
3
4
5
6
7
8
9
this code runs

функции

In [5]:
def power_3(x):
    return x**3
print(power_3(10))
1000

Коллекции

In [6]:
somelist = [5,5,9,2,3,5]
someotherlist = ['d','g',5,True,False, [5,3]]
In [7]:
sometuple = (5,6,7)
someothertuple = ('a','b',6)
In [8]:
somedict = {'a':5,
            'b':9,
            9:'g'
           }
somedict
Out[8]:
{'a': 5, 'b': 9, 9: 'g'}
In [9]:
print(id(somelist))
somelist.append(5)
print(id(somelist))
140382861682952
140382861682952
In [10]:
[1,2,3] + [4,5,6]
Out[10]:
[1, 2, 3, 4, 5, 6]
In [11]:
[x**2 for x in [7,6,5,3]]
Out[11]:
[49, 36, 25, 9]
In [12]:
a = [4,5,6]
a.append(7)
a
Out[12]:
[4, 5, 6, 7]
In [13]:
a = [x**2 for x in range(0,21,2)]
a
Out[13]:
[0, 4, 16, 36, 64, 100, 144, 196, 256, 324, 400]
In [14]:
a[2]
Out[14]:
16
In [15]:
a[3:6]
Out[15]:
[36, 64, 100]
In [16]:
a[::2]
Out[16]:
[0, 16, 64, 144, 256, 400]
In [17]:
a,b,c = sometuple
print(sometuple)
print(a)
print(b)
print(c)
(5, 6, 7)
5
6
7

Numpy

In [18]:
import numpy
numpy.__version__
Out[18]:
'1.15.0'
In [19]:
import numpy as np
In [20]:
np.array([1, 4, 2, 5, 3])
Out[20]:
array([1, 4, 2, 5, 3])
In [21]:
np.array([3.14, 4, 2, 3])
Out[21]:
array([3.14, 4.  , 2.  , 3.  ])
In [22]:
np.zeros(10, dtype=int)
Out[22]:
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
In [23]:
np.ones((3, 5), dtype=float)
Out[23]:
array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])
In [24]:
np.linspace(0, 10, 101)
Out[24]:
array([ 0. ,  0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ,
        1.1,  1.2,  1.3,  1.4,  1.5,  1.6,  1.7,  1.8,  1.9,  2. ,  2.1,
        2.2,  2.3,  2.4,  2.5,  2.6,  2.7,  2.8,  2.9,  3. ,  3.1,  3.2,
        3.3,  3.4,  3.5,  3.6,  3.7,  3.8,  3.9,  4. ,  4.1,  4.2,  4.3,
        4.4,  4.5,  4.6,  4.7,  4.8,  4.9,  5. ,  5.1,  5.2,  5.3,  5.4,
        5.5,  5.6,  5.7,  5.8,  5.9,  6. ,  6.1,  6.2,  6.3,  6.4,  6.5,
        6.6,  6.7,  6.8,  6.9,  7. ,  7.1,  7.2,  7.3,  7.4,  7.5,  7.6,
        7.7,  7.8,  7.9,  8. ,  8.1,  8.2,  8.3,  8.4,  8.5,  8.6,  8.7,
        8.8,  8.9,  9. ,  9.1,  9.2,  9.3,  9.4,  9.5,  9.6,  9.7,  9.8,
        9.9, 10. ])
In [25]:
np.random.random((5, 5))
Out[25]:
array([[0.09225413, 0.81072873, 0.526364  , 0.95475325, 0.57343003],
       [0.54820397, 0.85478708, 0.42181113, 0.23355806, 0.76542544],
       [0.56519126, 0.00880718, 0.59837914, 0.13979167, 0.07426447],
       [0.71652527, 0.54426333, 0.29552233, 0.61280946, 0.24455905],
       [0.36310691, 0.22880362, 0.48744992, 0.56788046, 0.89221588]])
In [26]:
np.arange(10)
Out[26]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [27]:
a = np.linspace(0, 10, 101)
In [28]:
a.shape
Out[28]:
(101,)
In [29]:
a[5]
Out[29]:
0.5
In [30]:
a[3:10]
Out[30]:
array([0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
In [31]:
a[-10:-1]
Out[31]:
array([9.1, 9.2, 9.3, 9.4, 9.5, 9.6, 9.7, 9.8, 9.9])
In [32]:
arr = np.arange(16)
arr
Out[32]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
In [33]:
grid = arr.reshape((4, 4))
grid
Out[33]:
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])
In [34]:
upper, lower = np.vsplit(grid, [2])
print(upper)
print(lower)
[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]
In [35]:
left, right = np.hsplit(grid, [2])
print(left)
print(right)
[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]
In [36]:
np.min(grid)
Out[36]:
0
In [37]:
np.max(grid)
Out[37]:
15
In [38]:
np.sqrt(grid)
Out[38]:
array([[0.        , 1.        , 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974, 2.64575131],
       [2.82842712, 3.        , 3.16227766, 3.31662479],
       [3.46410162, 3.60555128, 3.74165739, 3.87298335]])
In [39]:
np.power(grid, 2)
Out[39]:
array([[  0,   1,   4,   9],
       [ 16,  25,  36,  49],
       [ 64,  81, 100, 121],
       [144, 169, 196, 225]])

Pandas

In [40]:
import pandas as pd
/usr/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
  return f(*args, **kwds)
In [41]:
s = pd.Series([1,3,5,np.nan,6,8])
s
Out[41]:
0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64
In [42]:
dates = pd.date_range('20130101',periods=6)
dates
Out[42]:
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')
In [43]:
df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=list('ABCD'))
df
Out[43]:
A B C D
2013-01-01 -0.126064 -1.793415 2.060169 -0.313953
2013-01-02 -0.743610 -0.088651 -0.231429 -0.950453
2013-01-03 -2.857036 1.233371 -0.824847 2.099980
2013-01-04 1.667349 -0.595047 -0.991380 -1.391953
2013-01-05 -0.446143 -0.965614 -0.304377 -1.621390
2013-01-06 0.270045 -0.187666 -0.501277 -0.822913
In [44]:
df.A
Out[44]:
2013-01-01   -0.126064
2013-01-02   -0.743610
2013-01-03   -2.857036
2013-01-04    1.667349
2013-01-05   -0.446143
2013-01-06    0.270045
Freq: D, Name: A, dtype: float64
In [45]:
df['A']
Out[45]:
2013-01-01   -0.126064
2013-01-02   -0.743610
2013-01-03   -2.857036
2013-01-04    1.667349
2013-01-05   -0.446143
2013-01-06    0.270045
Freq: D, Name: A, dtype: float64
In [46]:
df.loc['2013-01-01']
Out[46]:
A   -0.126064
B   -1.793415
C    2.060169
D   -0.313953
Name: 2013-01-01 00:00:00, dtype: float64
In [47]:
df.applymap(lambda x: x+1)
Out[47]:
A B C D
2013-01-01 0.873936 -0.793415 3.060169 0.686047
2013-01-02 0.256390 0.911349 0.768571 0.049547
2013-01-03 -1.857036 2.233371 0.175153 3.099980
2013-01-04 2.667349 0.404953 0.008620 -0.391953
2013-01-05 0.553857 0.034386 0.695623 -0.621390
2013-01-06 1.270045 0.812334 0.498723 0.177087
In [48]:
df.applymap(lambda x: x>0)
Out[48]:
A B C D
2013-01-01 False False True False
2013-01-02 False False False False
2013-01-03 False True False True
2013-01-04 True False False False
2013-01-05 False False False False
2013-01-06 True False False False
In [49]:
df>0
Out[49]:
A B C D
2013-01-01 False False True False
2013-01-02 False False False False
2013-01-03 False True False True
2013-01-04 True False False False
2013-01-05 False False False False
2013-01-06 True False False False
In [50]:
%matplotlib inline
df.plot(figsize=(15,15))
Out[50]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fad3e8f73c8>
In [51]:
df.groupby(pd.Grouper(freq='2d')).mean()
Out[51]:
A B C D
2013-01-01 -0.434837 -0.941033 0.914370 -0.632203
2013-01-03 -0.594844 0.319162 -0.908113 0.354013
2013-01-05 -0.088049 -0.576640 -0.402827 -1.222151
In [52]:
df.groupby(pd.Grouper(freq='3d')).mean()
Out[52]:
A B C D
2013-01-01 -1.242237 -0.216232 0.334631 0.278525
2013-01-04 0.497084 -0.582776 -0.599011 -1.278752
In [53]:
df_1 = pd.concat([df,df], axis=1)
df_1
Out[53]:
A B C D A B C D
2013-01-01 -0.126064 -1.793415 2.060169 -0.313953 -0.126064 -1.793415 2.060169 -0.313953
2013-01-02 -0.743610 -0.088651 -0.231429 -0.950453 -0.743610 -0.088651 -0.231429 -0.950453
2013-01-03 -2.857036 1.233371 -0.824847 2.099980 -2.857036 1.233371 -0.824847 2.099980
2013-01-04 1.667349 -0.595047 -0.991380 -1.391953 1.667349 -0.595047 -0.991380 -1.391953
2013-01-05 -0.446143 -0.965614 -0.304377 -1.621390 -0.446143 -0.965614 -0.304377 -1.621390
2013-01-06 0.270045 -0.187666 -0.501277 -0.822913 0.270045 -0.187666 -0.501277 -0.822913
In [54]:
df_1.columns = ['A','B','C','D','E','F','G','H']
In [55]:
df_1.A+df_1.B
Out[55]:
2013-01-01   -1.919479
2013-01-02   -0.832261
2013-01-03   -1.623665
2013-01-04    1.072301
2013-01-05   -1.411757
2013-01-06    0.082379
Freq: D, dtype: float64
In [56]:
df_1.A + df_1.B - df_1.C - df_1.D
Out[56]:
2013-01-01   -3.665695
2013-01-02    0.349620
2013-01-03   -2.898799
2013-01-04    3.455635
2013-01-05    0.514010
2013-01-06    1.406568
Freq: D, dtype: float64
In [57]:
df_1.loc['2013-01-04', 'D']
Out[57]:
-1.3919529271434394
In [58]:
df_1.loc['2013-01-04', 'D'] = 100
In [59]:
df_1
Out[59]:
A B C D E F G H
2013-01-01 -0.126064 -1.793415 2.060169 -0.313953 -0.126064 -1.793415 2.060169 -0.313953
2013-01-02 -0.743610 -0.088651 -0.231429 -0.950453 -0.743610 -0.088651 -0.231429 -0.950453
2013-01-03 -2.857036 1.233371 -0.824847 2.099980 -2.857036 1.233371 -0.824847 2.099980
2013-01-04 1.667349 -0.595047 -0.991380 100.000000 1.667349 -0.595047 -0.991380 -1.391953
2013-01-05 -0.446143 -0.965614 -0.304377 -1.621390 -0.446143 -0.965614 -0.304377 -1.621390
2013-01-06 0.270045 -0.187666 -0.501277 -0.822913 0.270045 -0.187666 -0.501277 -0.822913
In [60]:
df_1.describe()
Out[60]:
A B C D E F G H
count 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000
mean -0.372577 -0.399504 -0.132190 16.398545 -0.372577 -0.399504 -0.132190 -0.500114
std 1.479908 1.010890 1.113566 40.976198 1.479908 1.010890 1.113566 1.353129
min -2.857036 -1.793415 -0.991380 -1.621390 -2.857036 -1.793415 -0.991380 -1.621390
25% -0.669243 -0.872972 -0.743954 -0.918568 -0.669243 -0.872972 -0.743954 -1.281578
50% -0.286103 -0.391356 -0.402827 -0.568433 -0.286103 -0.391356 -0.402827 -0.886683
75% 0.171018 -0.113405 -0.249666 1.496497 0.171018 -0.113405 -0.249666 -0.441193
max 1.667349 1.233371 2.060169 100.000000 1.667349 1.233371 2.060169 2.099980
In [61]:
df_1.shape
Out[61]:
(6, 8)
In [62]:
df_1.info(memory_usage='deep')
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 6 entries, 2013-01-01 to 2013-01-06
Freq: D
Data columns (total 8 columns):
A    6 non-null float64
B    6 non-null float64
C    6 non-null float64
D    6 non-null float64
E    6 non-null float64
F    6 non-null float64
G    6 non-null float64
H    6 non-null float64
dtypes: float64(8)
memory usage: 592.0 bytes