import pandas as pd

df= pd.read_csv('data/titanic_train.csv', sep=',')

df.head()

df1=pd.read_csv('data/ex1.csv')

df1.head()

f = open("data/ex1.csv", 'w')
data = """a,b,c,d,message
1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo"""
f.write(data)
f.close()

f = open("data/ex2.csv", 'w')
data = """
1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo
"""
f.write(data)
f.close()

df2= pd.read_csv('data/ex2.csv', header=None)
df2

df2= pd.read_csv('data/ex2.csv', names=['a','b','c','d','message'])
df2

names=['a','b','c','d','message']
df2= pd.read_csv('data/ex2.csv', names=names, index_col='message') # message가 index의 column으로 감
df2

f = open("data/csv_mindex.csv", 'w')
data = """key1,key2,value1,value2
one,a,1,2
one,b,3,4
one,c,5,6
one,d,7,8
two,a,9,10
two,b,11,12
two,c,13,14
two,d,15,16
"""
f.write(data)
f.close()

계층적 색인 지정

parsed= pd.read_csv('data/csv_mindex.csv', index_col=['key1', 'key2'])
parsed

f = open("data/ex3.txt", 'w')
data = """A         B         C
aaa -0.264438 -1.026059 -0.619500
bbb  0.927272  0.302904 -0.032399
ccc -0.264273 -0.386314 -0.217601
ddd -0.871859 -0.348382  1.100491
"""
f.write(data)
f.close()

result= pd.read_table('data/ex3.txt', sep='\s+') #\s+ :공백이 하나 이상 발생한 경우 구분
result

f = open("data/ex4.csv", 'w')
data = """#Hey!
a,b,c,d,message
#by python
#csv file
1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo"""
f.write(data)
f.close()

f = open("data/ex4.csv",'r')
skiplist = []
count=0
while True:
    line = f.readline()
    if not line:
        break
    if '#' in line :
        print(line)
        skiplist.append(count)
    count += 1
f.close()
skiplist

#Hey!

#by python

#csv file

[0, 2, 3]

f=open('data/ex4.csv', 'r', encoding='utf-8')
lineNum=0
skiplist=[]

while True:
    lines=f.readline()
    if not lines:
        break
    if '#' in lines:
        skiplist.append(lineNum)
    lineNum +=1 
f.close()
skiplist

[0, 2, 3]

df1= pd.read_csv('data/ex4.csv', skiprows=[0,2,3]) # 불필요한 줄 지움 
df1

df1= pd.read_csv('data/ex4.csv', skiprows= skiplist) # 불필요한 줄 지움 
df2

f = open("data/ex5.csv", 'w')
data = """something,a,b,c,d,message
one,1,2,3,4,NA
two,5,6,,8,world
three,9,10,11,12,foo
"""
f.write(data)
f.close()

result= pd.read_csv('data/ex5.csv') 
result

pd.isnull(result)

result=pd.read_csv('data/ex5.csv', na_values=['NULL']) # na_values: 특정한 값을 NaN으로 취급하고 싶을 때 사용, 1 넣으면 1이 NaN으로 바뀜 
result

sentinels={'message': ['foo','world'], 'something': ['two']}
result=pd.read_csv('data/ex5.csv', na_values=sentinels)
result

엑셀파일읽기¶

xlsx = 'data/ex01.xlsx'
frame= pd.read_excel(xlsx, 'Sheet1')
frame

엑셀파일저장¶

writer= pd.ExcelWriter('data/ex02.xlsx')
frame.to_excel(writer, 'Sheet1')
writer.save()

누락된 데이터 처리¶

인자	설명
dropna	누락된 데이터가 있는 축(low,column)을 제외시킨다. 어느정도의 누락데이터까지 용인할것인지 지정 할 수 있다.
fillna	누락된 데이터를 대신할 값을 채우거나 'ffill'이나 'bfill'같은 보간 메서드를 적용한다.
isnull	누락되거나 NA인 값을 알려주는 불리언 값이 저장된 같은 형의 객체를 반환
notnull	isnull과 반대되는 메서드

import numpy as np 
import pandas as pd

string_data = pd.Series(['aardvark', 'artichoke', np.nan, 'avocado'])
string_data

0     aardvark
1    artichoke
2          NaN
3      avocado
dtype: object

string_data.isnull()

0    False
1    False
2     True
3    False
dtype: bool

string_data[0]= None 
string_data.isnull()

0     True
1    False
2     True
3    False
dtype: bool

from numpy import nan as NA 
data= pd.Series([1,NA,3.5,NA,7])
data.dropna()

0    1.0
2    3.5
4    7.0
dtype: float64

data[data.notnull()] #data.dropna()와 같은 결과

0    1.0
2    3.5
4    7.0
dtype: float64

data = pd.DataFrame([[1., 6.5, 3.], [1., NA, NA],
                     [NA, NA, NA], [NA, 6.5, 3.]])
data

cleand=data.dropna() #NaN이 하나라도 들어있으면 사라짐 
cleand

cleand= data.dropna(how="all") # how=all : 모두 NaN이면 삭제됨
cleand

# 4번 열을 추가하고 NA 값으로 지정 
data[4]=NA
data

cleand= data.dropna(axis='columns', how="all")
#cleand= data.dropna(axis='1', how="all") 위의 코드와 동일 
cleand

df= pd.DataFrame(np.random.randn(7,3)) #세로가 7개 가로가 3개인
df

df.iloc[0:3,1]=NA
df.iloc[:1,2]=NA
df

cleaned= df.dropna(thresh=2) # 2개이상 NaN이 있으면 삭제됨  
cleaned

결측치 채우기¶

df

filled=df.fillna(0) # NaN을 0으로 대체 
filled

filled2=df.fillna({1:0.9, 2:0})
filled2

df.fillna(0, inplace=False) #원본데이터 안바뀜
df

df.fillna(0, inplace=True) #원본데이터 0으로 바뀜 
df

df= pd.DataFrame(np.random.randn(7,3))
df.iloc[2:,1]=NA
df.iloc[4:,2]=NA
df

filled= df.fillna(method='ffill')
filled

filled= df.fillna(method='ffill', limit=2) #2개만 채움
filled

def.fillna(value=, method='ffill', axis=0, inplace=, limit=)¶

value : 비어있는 값을 채울 스칼라 값이나 dictonary 형식의 객체
method : 보간법(기본=ffill)
axis : 값을 채워넣을 축(기본 axis=0)
inplace : 복사본을 생성하지 않고 호출한 객체에 값을 반환(기본값=False )
limit : 값을 앞 또는 뒤로 몇개까지 채울지 지정

data = pd.DataFrame({'k1': ['one', 'two'] * 3 + ['two'],
                     'k2': [1, 1, 2, 3, 3, 4, 4]})
data

data.duplicated() # 중복이 되는 것 True

0    False
1    False
2    False
3    False
4    False
5    False
6     True
dtype: bool

data.drop_duplicates() # 중복된 것 사라짐

data['v1']=range(7)
data

data.drop_duplicates(['k1']) #k1의 중복요소 삭제

data.drop_duplicates(['k2'])#k2의 중복요소 삭제

data.drop_duplicates(['v1']) # 중복이 없어서 똑같이 나옴

data

data.drop_duplicates(['k1','k2'], keep='last') # keep='last' : 마지막이 살아남음
# k1과 k2의 값이 같은 건 two 4 5/ two 4 6 이 중 마지막이 살아남음

데이터 변형하기¶

data = pd.DataFrame({'food': ['bacon', 'pulled pork', 'bacon',
                              'Pastrami', 'corned beef', 'Bacon',
                              'pastrami', 'honey ham', 'nova lox'],
                     'ounces': [4, 3, 12, 6, 7.5, 8, 3, 5, 6]})
data

meat_to_animal = {
  'bacon': 'pig',
  'pulled pork': 'pig',
  'pastrami': 'cow',
  'corned beef': 'cow',
  'honey ham': 'pig',
  'nova lox': 'salmon'
}
meat_to_animal

{'bacon': 'pig',
 'pulled pork': 'pig',
 'pastrami': 'cow',
 'corned beef': 'cow',
 'honey ham': 'pig',
 'nova lox': 'salmon'}

#소문자로 바꾸기 
lower_cased=data['food'].str.lower()
lower_cased

0          bacon
1    pulled pork
2          bacon
3       pastrami
4    corned beef
5          bacon
6       pastrami
7      honey ham
8       nova lox
Name: food, dtype: object

data['animal']= lower_cased.map(meat_to_animal)
data

#lower_cased=data['food'].str.lower()
#data['animal']= lower_cased.map(meat_to_animal)
data['animal']= data['food'].map(lambda x: meat_to_animal[x.lower()])
data

data=pd.Series([1., -999., 2., -999., -1000., 3.])
data

0       1.0
1    -999.0
2       2.0
3    -999.0
4   -1000.0
5       3.0
dtype: float64

data2 = data.replace(-999, np.nan)
data2

0       1.0
1       NaN
2       2.0
3       NaN
4   -1000.0
5       3.0
dtype: float64

data2 = data.replace([-999, -1000], np.nan)
data2

0    1.0
1    NaN
2    2.0
3    NaN
4    NaN
5    3.0
dtype: float64

data2 = data.replace([-999, -1000], [np.nan, 0])
data2

0    1.0
1    NaN
2    2.0
3    NaN
4    0.0
5    3.0
dtype: float64

data2= data.replace({-999.:np.nan, -1000.:0})
#-999 ->Nan , -1000 ->0
data2

0    1.0
1    NaN
2    2.0
3    NaN
4    0.0
5    3.0
dtype: float64

data = pd.DataFrame(np.arange(12).reshape((3, 4)),
                    index=['Ohio', 'Colorado', 'New York'],
                    columns=['one', 'two', 'three', 'four'])
data

transform= lambda x:x[:4].upper() #X를 4자리로 끊음 
data.index.map(transform)

Index(['OHIO', 'COLO', 'NEW '], dtype='object')

data.index

Index(['Ohio', 'Colorado', 'New York'], dtype='object')

data.index= data.index.map(transform)
data

str.title : 단어의 시작 부분에 있는 문자는 대문자로, 나머지는 모두 소문자로 만든다.

data.rename(index=str.title, columns=str.upper)

data

data.rename(index={'OHIO':'INDIANA'}, columns={'three':'peekaboo'})

data

data.rename(index={'OHIO':'INDIANA'}, inplace=True) # 기존 객체를 수정 
data

data

나이 나누기
18~25(0)
26~35(1)
35~60(2)
60이상(3)

ages = [20, 22, 25, 27, 21, 23, 37, 31, 61, 45, 41, 32]

bins=[18,25,35,60,100] # 각 구간을 나눠줄 숫자값 
cats= pd.cut(ages,bins) #pd.cut(카테고리화 할 숫자데이터, 자를 구간의 구분값)
cats

[(18, 25], (18, 25], (18, 25], (25, 35], (18, 25], ..., (25, 35], (60, 100], (35, 60], (35, 60], (25, 35]]
Length: 12
Categories (4, interval[int64]): [(18, 25] < (25, 35] < (35, 60] < (60, 100]]

cats.codes #각 성분이 몇번쨰 구간에 속해있는지 정수 index로 표시됨

array([0, 0, 0, 1, 0, 0, 2, 1, 3, 2, 2, 1], dtype=int8)

cats.categories # ( ] : 왼쪽 미포함, 오른쪽 포함

IntervalIndex([(18, 25], (25, 35], (35, 60], (60, 100]],
              closed='right',
              dtype='interval[int64]')

pd.value_counts(cats) # 각 구간의 성분의 개수

(18, 25]     5
(35, 60]     3
(25, 35]     3
(60, 100]    1
dtype: int64

pd.cut(ages,[18,25,35,60,100], right=False) # [ ) : 왼쪽 포함, 오른쪽 미포함

[[18, 25), [18, 25), [25, 35), [25, 35), [18, 25), ..., [25, 35), [60, 100), [35, 60), [35, 60), [25, 35)]
Length: 12
Categories (4, interval[int64]): [[18, 25) < [25, 35) < [35, 60) < [60, 100)]

pd.cut(ages,[18,25,35,60,100], right=True) # 위와 반대

[(18, 25], (18, 25], (18, 25], (25, 35], (18, 25], ..., (25, 35], (60, 100], (35, 60], (35, 60], (25, 35]]
Length: 12
Categories (4, interval[int64]): [(18, 25] < (25, 35] < (35, 60] < (60, 100]]

group_names= ['Youth', 'youngAdult', 'MiddleAged', 'Senior']

data2= pd.cut(ages,bins,labels=group_names)
data2

[Youth, Youth, Youth, youngAdult, Youth, ..., youngAdult, Senior, MiddleAged, MiddleAged, youngAdult]
Length: 12
Categories (4, object): [Youth < youngAdult < MiddleAged < Senior]

data2.value_counts()

Youth         5
youngAdult    3
MiddleAged    3
Senior        1
dtype: int64

각 구간 구분값을 bin으로 정의해서 나누었는데 pandas에서 알아서 판단하여 데이터의 길이를 잘라주고 구간을 설정할 수도 있음.

data= np.random.rand(20)
data

array([0.7166629 , 0.27019459, 0.74586198, 0.46917089, 0.45317755,
       0.10626844, 0.08305884, 0.57318399, 0.5936407 , 0.78475135,
       0.33795658, 0.99624303, 0.2733613 , 0.89804862, 0.10861183,
       0.43367864, 0.09130907, 0.8281011 , 0.54757824, 0.0145025 ])

데이터 성분값을 기준으로 자동으로 구간을 나누게 하기 위해서 나눌 구간의 개수만 입력해준다 (성분의 최소값~최대값을 보고 4개구간 나눔)

pd.cut(data, 4, precision=2) #precision=2 소수점 2자리까지 표현

[(0.51, 0.75], (0.26, 0.51], (0.51, 0.75], (0.26, 0.51], (0.26, 0.51], ..., (0.26, 0.51], (0.014, 0.26], (0.75, 1.0], (0.51, 0.75], (0.014, 0.26]]
Length: 20
Categories (4, interval[float64]): [(0.014, 0.26] < (0.26, 0.51] < (0.51, 0.75] < (0.75, 1.0]]

data= pd.DataFrame(np.random.randn(1000,4)) #n붙이면 정규분포.. 
data

data.describe()

data[2] #column 2

0      2.304293
1     -0.256742
2     -0.285212
3      1.830050
4      0.856561
         ...   
995    1.182251
996    0.611918
997    2.086748
998    0.240342
999    0.889384
Name: 2, Length: 1000, dtype: float64

col= data[2]
col[np.abs(col)>3]

402   -3.170485
519   -3.460535
Name: 2, dtype: float64

data[(np.abs(data)>3).any(1)] #data가 절대값이 3보다 큰게 하나라도 있으면 무조건 출력

np.sign(x) : x<0일 때 -1, x==0일 떄 0 ,x>0일 때 1을 반환

data[np.abs(data)>3]= np.sign(data)*3 # 절대값이 3보다 큰 값들을 -1 또는 1로 나타내는데 *3을 해서 3 또는 -3으로 나타나짐
data.describe()

data.head()

np.sign(data).head() # 부호를 나타냄

df = pd.DataFrame(np.arange(5 * 4).reshape((5, 4)))
df

sampler = np.random.permutation(5) # 무작위로 섞인 배열 만든다
sampler

array([4, 1, 3, 2, 0])

df.take(sampler) #sampler 의 값을 인덱스로

df.sample(n=3) # 3개의 인덱스 랜덤하게 추출

choices = pd.Series([5, 7, -1, 6, 4])
choices

0    5
1    7
2   -1
3    6
4    4
dtype: int64

draws = choices.sample(n=10, replace=True)
draws

4    4
4    4
0    5
1    7
1    7
0    5
1    7
1    7
3    6
1    7
dtype: int64

onehot 인코딩¶

문자를 숫자로 바꾸어 주는 방법 중 하나로 onehot인코딩이 있다.
가변수로 만들어주는 것인데, 이는 0과 1로 이루어진 열을 나타낸다.
1은 있다 0은 없다를 나타낸다.

df = pd.DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'b'],
                   'data1': range(6)})
df

pd.get_dummies(df['key']) #df['key]열만 get_dummies해줌

dummies = pd.get_dummies(df['key'])
dummies

dummies = pd.get_dummies(df['key'],prefix='key') # prefix를 활용하여 좀 더 명시적으로 표현 
#기존 df의 컬럼을 반영해주기 위해서 작성
dummies

df_with_dummy = df[['data1']].join(dummies)
df_with_dummy

mnames = ['movie_id', 'title', 'genres']
movies = pd.read_table('movies.dat', sep='::', header=None, names=mnames) 
movies[:10]

all_genres=[]
for x in movies.genres:
    all_genres.extend(x.split('|'))
genres=pd.unique(all_genres)
genres
# genres의 장르를 중복없이 나오게 하기

array(['Animation', "Children's", 'Comedy', 'Adventure', 'Fantasy',
       'Romance', 'Drama', 'Action', 'Crime', 'Thriller', 'Horror',
       'Sci-Fi', 'Documentary', 'War', 'Musical', 'Mystery', 'Film-Noir',
       'Western'], dtype=object)

zero_matrix=np.zeros((len(movies), len(genres)))
dummies= pd.DataFrame(zero_matrix, columns=genres)
dummies

gen= movies.genres[0]
gen.split('|')
dummies.columns.get_indexer(gen.split('|'))

array([0, 1, 2], dtype=int64)

dummies

enumerate
: 보통 for문과 함께 사용
: 인덱스 값을 포함하는 enumerate객체를 리턴
: for문처럼 반복되는 구간에서 객체가 현재 어느 위치에 있는지 알려주는 인덱스 값이 필요할 때 사용하면 유용

for i, gen in enumerate(movies.genres):
    indices = dummies.columns.get_indexer(gen.split('|')) #gen 첫번째 줄에 나와있는 string 
    dummies.iloc[i, indices] = 1 

dummies

pd.set_option('display.max_columns', 25)
#pd.set_option('display.max_row', 10)
movies_windic= movies.join(dummies.add_prefix('Genre_'))
movies_windic.head()

np.random.seed(12345)
values = np.random.rand(10)
values

array([0.92961609, 0.31637555, 0.18391881, 0.20456028, 0.56772503,
       0.5955447 , 0.96451452, 0.6531771 , 0.74890664, 0.65356987])

bins=[0, 0.2, 0.4, 0.6, 0.8, 1]
pd.get_dummies(pd.cut(values, bins))

val= ' a,b, guido '
val.split(',') #공백포함됨

[' a', 'b', ' guido ']

val

' a,b, guido '

val= 'a,b, guido'
val.strip() #양끝 공백 제거

'a,b, guido'

pieces = [x.strip() for x in val.split(',')]
pieces

['a', 'b', 'guido']

first, second, third= pieces 
first+ '::'+ second+ '::'+ third

'a::b::guido'

first

'a'

pieces

['a', 'b', 'guido']

'::'.join(pieces)

'a::b::guido'

'guido' in val

True

val.index(',')

1

val.find(':')

-1

val.find(',')

1

val.index(':')

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-259-2c016e7367ac> in <module>
----> 1 val.index(':')

ValueError: substring not found

val.count(',')

2

val.replace(',', '::')

'a::b:: guido'

val.replace(',', '')

'ab guido'

	pclass	survived	name	sex	age	sibsp	parch	ticket	fare	cabin	embarked	body	home.dest
0	2	1	Mellinger, Miss. Madeleine Violet	female	13.0	0	1	250644	19.5000	NaN	S	NaN	England / Bennington, VT
1	2	1	Wells, Miss. Joan	female	4.0	1	1	29103	23.0000	NaN	S	NaN	Cornwall / Akron, OH
2	2	1	Duran y More, Miss. Florentina	female	30.0	1	0	SC/PARIS 2148	13.8583	NaN	C	NaN	Barcelona, Spain / Havana, Cuba
3	3	0	Scanlan, Mr. James	male	NaN	0	0	36209	7.7250	NaN	Q	NaN	NaN
4	3	1	Bradley, Miss. Bridget Delia	female	22.0	0	0	334914	7.7250	NaN	Q	NaN	Kingwilliamstown, Co Cork, Ireland Glens Falls...

	0	1	2
0	-0.131701	0.165730	-1.445157
1	-0.916618	1.702881	0.635776
2	0.591533	0.112397	-0.523709
3	1.330796	-0.356226	-0.715347
4	0.808900	-0.128411	0.492950
5	-0.303647	-1.336609	-1.564481
6	0.328623	-0.499420	0.384282

	0	1	2
0	-0.131701	NaN	NaN
1	-0.916618	NaN	0.635776
2	0.591533	NaN	-0.523709
3	1.330796	-0.356226	-0.715347
4	0.808900	-0.128411	0.492950
5	-0.303647	-1.336609	-1.564481
6	0.328623	-0.499420	0.384282

	0	1	2
1	-0.916618	NaN	0.635776
2	0.591533	NaN	-0.523709
3	1.330796	-0.356226	-0.715347
4	0.808900	-0.128411	0.492950
5	-0.303647	-1.336609	-1.564481
6	0.328623	-0.499420	0.384282

	0	1	2
0	-0.131701	NaN	NaN
1	-0.916618	NaN	0.635776
2	0.591533	NaN	-0.523709
3	1.330796	-0.356226	-0.715347
4	0.808900	-0.128411	0.492950
5	-0.303647	-1.336609	-1.564481
6	0.328623	-0.499420	0.384282

.

python_pandas(판다스): 계층적 색인 지정, 누락된 데이터처리, 결측치채우기, 데이터 변형하기, onehot인코딩

엑셀파일읽기¶

엑셀파일저장¶

누락된 데이터 처리¶

결측치 채우기¶

def.fillna(value=, method='ffill', axis=0, inplace=, limit=)¶

데이터 변형하기¶

onehot 인코딩¶

'Python' 카테고리의 다른 글

+ Recent posts

티스토리툴바

단축키

내 블로그

블로그 게시글

모든 영역

	A	B	C
aaa	-0.264438	-1.026059	-0.619500
bbb	0.927272	0.302904	-0.032399
ccc	-0.264273	-0.386314	-0.217601
ddd	-0.871859	-0.348382	1.100491

	something	a	b	c	d	message
0	False	False	False	False	False	True
1	False	False	False	True	False	False
2	False	False	False	False	False	False

	0	1	2
0	-0.131701	0.000000	0.000000
1	-0.916618	0.000000	0.635776
2	0.591533	0.000000	-0.523709
3	1.330796	-0.356226	-0.715347
4	0.808900	-0.128411	0.492950
5	-0.303647	-1.336609	-1.564481
6	0.328623	-0.499420	0.384282

	0	1	2
0	-0.131701	0.900000	0.000000
1	-0.916618	0.900000	0.635776
2	0.591533	0.900000	-0.523709
3	1.330796	-0.356226	-0.715347
4	0.808900	-0.128411	0.492950
5	-0.303647	-1.336609	-1.564481
6	0.328623	-0.499420	0.384282

	0	1	2
0	-0.924464	-0.377605	-1.641186
1	-1.683392	-0.488751	-0.973592
2	1.298459	NaN	-0.766517
3	0.340175	NaN	-0.209967
4	-0.890470	NaN	NaN
5	0.125698	NaN	NaN
6	1.980833	NaN	NaN

	food	ounces
0	bacon	4.0
1	pulled pork	3.0
2	bacon	12.0
3	Pastrami	6.0
4	corned beef	7.5
5	Bacon	8.0
6	pastrami	3.0
7	honey ham	5.0
8	nova lox	6.0

	0	1	2	3
0	0.651133	1.584606	2.304293	-0.585606
1	3.000024	-0.510052	-0.256742	1.269666
2	-1.460685	0.493399	-0.285212	0.711587
3	-0.900536	0.580821	1.830050	-0.017385
4	-0.526212	-1.039466	0.856561	-0.704050
...	...	...	...	...
995	1.224748	-0.389117	1.182251	-1.544449
996	-0.665040	-0.029167	0.611918	-0.382876
997	-0.134677	-1.843704	2.086748	0.926602
998	-0.528677	0.842454	0.240342	0.484023
999	-0.501713	0.430430	0.889384	-0.380317

	0	1	2	3
count	1000.000000	1000.000000	1000.000000	1000.000000
mean	-0.015997	-0.023723	-0.011136	0.019692
std	0.968457	1.046275	0.982243	1.004339
min	-3.738030	-4.591808	-3.460535	-3.228758
25%	-0.700473	-0.730756	-0.621459	-0.667099
50%	-0.006314	-0.056220	0.014056	-0.003606
75%	0.650398	0.670439	0.640740	0.713520
max	3.096388	2.836078	2.779539	2.658785

	0	1	2	3
0	1.0	1.0	1.0	-1.0
1	1.0	-1.0	-1.0	1.0
2	-1.0	1.0	-1.0	1.0
3	-1.0	1.0	1.0	-1.0
4	-1.0	-1.0	1.0	-1.0

	movie_id	title	genres
0	1	Toy Story (1995)	Animation\|Children's\|Comedy
1	2	Jumanji (1995)	Adventure\|Children's\|Fantasy
2	3	Grumpier Old Men (1995)	Comedy\|Romance
3	4	Waiting to Exhale (1995)	Comedy\|Drama
4	5	Father of the Bride Part II (1995)	Comedy
5	6	Heat (1995)	Action\|Crime\|Thriller
6	7	Sabrina (1995)	Comedy\|Romance
7	8	Tom and Huck (1995)	Adventure\|Children's
8	9	Sudden Death (1995)	Action
9	10	GoldenEye (1995)	Action\|Adventure\|Thriller

	a	b	c	d	message
0	1	2	3	4	hello
1	5	6	7	8	world
2	9	10	11	12	foo

	0	1	2	3	4
0	1	2	3	4	hello
1	5	6	7	8	world
2	9	10	11	12	foo

	a	b	c	d	message
0	1	2	3	4	hello
1	5	6	7	8	world
2	9	10	11	12	foo

	a	b	c	d
message
hello	1	2	3	4
world	5	6	7	8
foo	9	10	11	12

	Animation	Children's	Comedy	Adventure	Fantasy	Romance	Drama	Action	Crime	Thriller	Horror	Sci-Fi	Documentary	War	Musical	Mystery	Film-Noir	Western
0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
1	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
3	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
4	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
3878	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
3879	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
3880	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
3881	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
3882	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0

	(0.0, 0.2]	(0.2, 0.4]	(0.4, 0.6]	(0.6, 0.8]	(0.8, 1.0]
0	0	0	0	0	1
1	0	1	0	0	0
2	1	0	0	0	0
3	0	1	0	0	0
4	0	0	1	0	0
5	0	0	1	0	0
6	0	0	0	0	1
7	0	0	0	1	0
8	0	0	0	1	0
9	0	0	0	1	0

Python_판다스_데이터분석 (0)	2020.09.14
Python_example (0)	2020.09.11
Python_pandas 문제 (0)	2020.09.09
Python_pandas(판다스):시리즈,데이터프레임,색인,인덱싱,sorting (0)	2020.09.09
Python 기초09_vectorize (0)	2020.09.08

내 블로그 - 관리자 홈 전환	`Q` `Q`
새 글 쓰기	`W` `W`

글 수정 (권한 있는 경우)	`E` `E`
댓글 영역으로 이동	`C` `C`

이 페이지의 URL 복사	`S` `S`
맨 위로 이동	`T` `T`
티스토리 홈 이동	`H` `H`
단축키 안내	`Shift` + `/` `⇧` + `/`

	(0.0, 0.2]	(0.2, 0.4]	(0.4, 0.6]	(0.6, 0.8]	(0.8, 1.0]
0	0	0	0	0	1
1	0	1	0	0	0
2	1	0	0	0	0
3	0	1	0	0	0
4	0	0	1	0	0
5	0	0	1	0	0
6	0	0	0	0	1
7	0	0	0	1	0
8	0	0	0	1	0
9	0	0	0	1	0