In [1]:
import pandas as pd
import numpy as np

## Duplicated indexes

In [2]:
df = pd.concat([pd.DataFrame(np.random.randint(1, 20, size=(100,7)))]*2).iloc[50:]

In [4]:
df.index.is_unique

False

In [5]:
df.index.is_monotonic

False

In [6]:
df.tail()

Unnamed: 0,0,1,2,3,4,5,6
95,12,11,16,13,16,2,7
96,1,3,10,10,7,15,7
97,11,10,17,1,12,12,19
98,9,3,10,13,13,11,14
99,12,7,13,1,13,19,11


In [7]:
df.loc[0]

0    16
1     3
2     8
3    15
4    12
5     4
6    13
Name: 0, dtype: int64

In [8]:
df.loc[0] = range(7)

In [11]:
df.loc[0]

0    0
1    1
2    2
3    3
4    4
5    5
6    6
Name: 0, dtype: int64

In [12]:
df.loc[99] = range(7)

ValueError: cannot set using a list-like indexer with a different length than the value

In [13]:
df.loc[99]

Unnamed: 0,0,1,2,3,4,5,6
99,12,7,13,1,13,19,11
99,12,7,13,1,13,19,11


In [14]:
ddf = df.reset_index()

In [15]:
ddf.tail()

Unnamed: 0,index,0,1,2,3,4,5,6
145,95,12,11,16,13,16,2,7
146,96,1,3,10,10,7,15,7
147,97,11,10,17,1,12,12,19
148,98,9,3,10,13,13,11,14
149,99,12,7,13,1,13,19,11


In [16]:
ddf.index.is_unique

True

In [17]:
ddf.index.is_monotonic

True

In [18]:
ddf.loc[0]

index    50
0         7
1         4
2         9
3         8
4         8
5        14
6        14
Name: 0, dtype: int64

In [19]:
%timeit -n 1 -r 1 [ddf.loc[idx] for idx in range(50)]

1 loop, best of 1: 5.41 ms per loop


In [20]:
%timeit -n 1 -r 1 [df.loc[idx] for idx in range(50)]

1 loop, best of 1: 6.33 ms per loop


In [21]:
%timeit -n 1 -r 1 [ddf.loc[idx] for idx in range(50, 100)]

1 loop, best of 1: 5.27 ms per loop


In [22]:
%timeit -n 1 -r 1 [df.loc[idx] for idx in range(50, 100)]

1 loop, best of 1: 20.1 ms per loop


In [25]:
%timeit -n 1 -r 1 ddf.loc[[40]]

1 loop, best of 1: 979 µs per loop


In [26]:
%timeit -n 1 -r 1 df.loc[[40]]

1 loop, best of 1: 1.3 ms per loop


## Adding elements

In [27]:
df.tail()

Unnamed: 0,0,1,2,3,4,5,6
95,12,11,16,13,16,2,7
96,1,3,10,10,7,15,7
97,11,10,17,1,12,12,19
98,9,3,10,13,13,11,14
99,12,7,13,1,13,19,11


In [28]:
df.loc[101] = range(7)

In [29]:
df.tail()

Unnamed: 0,0,1,2,3,4,5,6
96,1,3,10,10,7,15,7
97,11,10,17,1,12,12,19
98,9,3,10,13,13,11,14
99,12,7,13,1,13,19,11
101,0,1,2,3,4,5,6


In [30]:
a = np.zeros(shape=(5,4))

In [31]:
a

array([[ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.]])

In [32]:
a[4]

array([ 0.,  0.,  0.,  0.])

In [33]:
a[5] = [1,2,3,4]

IndexError: index 5 is out of bounds for axis 0 with size 5

In [34]:
def fill_a():
    s = pd.Series()
    for i in range(1000):
        s.loc[i] = i*2
    return s

In [35]:
%timeit -n 1 -r 1 fill_a()

1 loop, best of 1: 414 ms per loop


In [36]:
def fill_b():
    s = pd.Series(index=range(1000))
    for i in range(1000):
        s.loc[i] = i*2
    return s

In [37]:
%timeit -n 1 -r 1 fill_b()

1 loop, best of 1: 68.2 ms per loop


In [38]:
%timeit -n 1 -r 1 df.loc[202] = range(7)

1 loop, best of 1: 2.17 ms per loop


In [39]:
%timeit -n 1 -r 1 df.loc[202] = range(7)

1 loop, best of 1: 931 µs per loop


In [40]:
%timeit -n 1 -r 1 df.loc[203] = range(7)

1 loop, best of 1: 2.18 ms per loop


In [41]:
%timeit -n 1 -r 1 df.loc[203] = range(7)

1 loop, best of 1: 720 µs per loop
