import pandas as pd
import numpy as np


df = pd.DataFrame([[15, '남', '덕영중'], [17, '여', '수리중']], index = ['준서', '예은'], columns = ['나이', '성별', '학교'])


print(df)

    나이 성별   학교
준서  15  남  덕영중
예은  17  여  수리중


df.index = ['학생1', '학생2']
df.columns = ['연령', '남녀', '소속']


print(df)

     연령 남녀   소속
학생1  15  남  덕영중
학생2  17  여  수리중


# 나이-> 연령, 성별-> 남녀, '학교'-> 소속 
# 준서-> 학생1, 예은-> 학생2 
df.rename(columns={'나이':'연령', '성별':'남녀', '학교':'소속'}, inplace=True)
df.rename(index={'준서':'학생1', '예은': '학생2'}, inplace=True)


print(df)

     연령 남녀   소속
학생1  15  남  덕영중
학생2  17  여  수리중


exam_data = {'수학': [90, 80, 70], '영어': [98, 89, 95], '음악': [85, 95, 100], '체육': [100, 90, 90]}

df = pd.DataFrame(exam_data, index = ['서준', '우현', '인아'])
print(df)

    수학  영어   음악   체육
서준  90  98   85  100
우현  80  89   95   90
인아  70  95  100   90


df2 = df[:]
df2.drop('우현', inplace=True)
print(df2)

    수학  영어   음악   체육
서준  90  98   85  100
인아  70  95  100   90

C:\Users\for\AppData\Local\Temp\ipykernel_24256\2087369725.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2.drop('우현', inplace=True)


df3 = df[:]
df3.drop(['우현', '인아'], inplace=True)
print(df3)

    수학  영어  음악   체육
서준  90  98  85  100

C:\Users\for\AppData\Local\Temp\ipykernel_24256\864553166.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3.drop(['우현', '인아'], inplace=True)


df4 = df.copy()
df4.drop('수학', axis=1, inplace=True)
print(df4)

    영어   음악   체육
서준  98   85  100
우현  89   95   90
인아  95  100   90


df5 = df.copy()
df5.drop(['영어', '음악'], axis = 1, inplace=True)
print(df5)

    수학   체육
서준  90  100
우현  80   90
인아  70   90


label1 = df.loc['서준']
print(label1, '\n')

position1 = df.iloc[0]
print(position1)

수학     90
영어     98
음악     85
체육    100
Name: 서준, dtype: int64 

수학     90
영어     98
음악     85
체육    100
Name: 서준, dtype: int64


label2 = df.loc[['서준', '우현']]
position2 = df.iloc[[0,1]]

print(label2, '\n', position2)

    수학  영어  음악   체육
서준  90  98  85  100
우현  80  89  95   90 
     수학  영어  음악   체육
서준  90  98  85  100
우현  80  89  95   90


label3 = df.loc['서준':'우현']
position3 = df.iloc[0:1]

print(label3, ' \n', position3)

    수학  영어  음악   체육
서준  90  98  85  100
우현  80  89  95   90  
     수학  영어  음악   체육
서준  90  98  85  100


exam_data = {'이름' : ['서준', '우현', '인아'], '수학': [90, 80, 70], '영어': [98, 89, 95], '음악': [85, 95, 100], '체육': [100, 90, 90]}

df = pd.DataFrame(exam_data)
print(df)
print(type(df))

   이름  수학  영어   음악   체육
0  서준  90  98   85  100
1  우현  80  89   95   90
2  인아  70  95  100   90
<class 'pandas.core.frame.DataFrame'>


math1 = df['수학']
print(math1)
print(type(math1))

0    90
1    80
2    70
Name: 수학, dtype: int64
<class 'pandas.core.series.Series'>


english = df.영어 
print(english)
print(type(english))

0    98
1    89
2    95
Name: 영어, dtype: int64
<class 'pandas.core.series.Series'>


music_gym = df[['음악', '체육']]
print(music_gym)
print(type(music_gym))

    음악   체육
0   85  100
1   95   90
2  100   90
<class 'pandas.core.frame.DataFrame'>


math2 = df[['수학']]
print(math2)
print(type(math2))

   수학
0  90
1  80
2  70
<class 'pandas.core.frame.DataFrame'>


exam_data = {'이름' : ['서준', '우현', '인아'], '수학': [90, 80, 70], '영어': [98, 89, 95], '음악': [85, 95, 100], '체육': [100, 90, 90]}

df = pd.DataFrame(exam_data) 

df.set_index('이름', inplace=True)
print(df)

    수학  영어   음악   체육
이름                  
서준  90  98   85  100
우현  80  89   95   90
인아  70  95  100   90


a = df.loc['서준', '음악']
print(a)

b = df.iloc[0, 2]
print(b)

85
85


c = df.loc['서준', ['음악', '체육']]
print(c)

음악     85
체육    100
Name: 서준, dtype: int64


d = df.iloc[0, [2,3]]
print(d)

음악     85
체육    100
Name: 서준, dtype: int64


e = df.loc['서준', '음악': '체육']
print(e)

음악     85
체육    100
Name: 서준, dtype: int64


f = df.iloc[0, 2:]
print(f)

음악     85
체육    100
Name: 서준, dtype: int64


exam_data = {'이름' : ['서준', '우현', '인아'], '수학': [90, 80, 70], '영어': [98, 89, 95], '음악': [85, 95, 100], '체육': [100, 90, 90]}

df = pd.DataFrame(exam_data)


df['국어'] = 80
print(df)

   이름  수학  영어   음악   체육  국어
0  서준  90  98   85  100  80
1  우현  80  89   95   90  80
2  인아  70  95  100   90  80


exam_data = {'이름' : ['서준', '우현', '인아'], '수학': [90, 80, 70], '영어': [98, 89, 95], '음악': [85, 95, 100], '체육': [100, 90, 90]}

df = pd.DataFrame(exam_data) 
print(df)

   이름  수학  영어   음악   체육
0  서준  90  98   85  100
1  우현  80  89   95   90
2  인아  70  95  100   90


df.loc[3] = 0
print(df)

   이름  수학  영어   음악   체육
0  서준  90  98   85  100
1  우현  80  89   95   90
2  인아  70  95  100   90
3   0   0   0    0    0


df.loc[4] = ['동규', 90, 80, 70, 60]
print(df)

   이름  수학  영어   음악   체육
0  서준  90  98   85  100
1  우현  80  89   95   90
2  인아  70  95  100   90
3   0   0   0    0    0
4  동규  90  80   70   60


# 기존 행 복사
df.loc['행5'] = df.loc[3]
print(df)

    이름  수학  영어   음악   체육
0   서준  90  98   85  100
1   우현  80  89   95   90
2   인아  70  95  100   90
3    0   0   0    0    0
4   동규  90  80   70   60
행5   0   0   0    0    0


exam_data = {'이름' : ['서준', '우현', '인아'], '수학': [90, 80, 70], '영어': [98, 89, 95], '음악': [85, 95, 100], '체육': [100, 90, 90]}

df = pd.DataFrame(exam_data) 
print(df)

   이름  수학  영어   음악   체육
0  서준  90  98   85  100
1  우현  80  89   95   90
2  인아  70  95  100   90


# 이름 열을 새로운 인덱스로 지정
df.set_index('이름', inplace=True)
print(df)

    수학  영어   음악   체육
이름                  
서준  90  98   85  100
우현  80  89   95   90
인아  70  95  100   90


df.iloc[0][3] = 80
print(df)

    수학  영어   음악  체육
이름                 
서준  90  98   85  80
우현  80  89   95  90
인아  70  95  100  90


df.loc['서준']['체육'] = 90
print(df)

    수학  영어   음악  체육
이름                 
서준  90  98   85  90
우현  80  89   95  90
인아  70  95  100  90


df.loc['서준', '체육'] = 100
print(df)

    수학  영어   음악   체육
이름                  
서준  90  98   85  100
우현  80  89   95   90
인아  70  95  100   90


df.loc['서준', ['음악', '체육']] = 50
print(df)

    수학  영어   음악  체육
이름                 
서준  90  98   50  50
우현  80  89   95  90
인아  70  95  100  90


df.loc['서준', ['음악', '체육']] = 100, 50
print(df)

    수학  영어   음악  체육
이름                 
서준  90  98  100  50
우현  80  89   95  90
인아  70  95  100  90


exam_data = {'이름' : ['서준', '우현', '인아'], '수학': [90, 80, 70], '영어': [98, 89, 95], '음악': [85, 95, 100], '체육': [100, 90, 90]}

df = pd.DataFrame(exam_data) 
print(df)

   이름  수학  영어   음악   체육
0  서준  90  98   85  100
1  우현  80  89   95   90
2  인아  70  95  100   90


df = df.transpose()
print(df)

      0   1    2
이름   서준  우현   인아
수학   90  80   70
영어   98  89   95
음악   85  95  100
체육  100  90   90


#한번 더 바꾸면 원본 데이터프레임 
df = df.T
print(df)

   이름  수학  영어   음악   체육
0  서준  90  98   85  100
1  우현  80  89   95   90
2  인아  70  95  100   90


exam_data = {'이름' : ['서준', '우현', '인아'], '수학': [90, 80, 70], '영어': [98, 89, 95], '음악': [85, 95, 100], '체육': [100, 90, 90]}

df = pd.DataFrame(exam_data) 
print(df)

   이름  수학  영어   음악   체육
0  서준  90  98   85  100
1  우현  80  89   95   90
2  인아  70  95  100   90


ndf = df.set_index(['이름'])
print(ndf)

    수학  영어   음악   체육
이름                  
서준  90  98   85  100
우현  80  89   95   90
인아  70  95  100   90


ndf2 = ndf.set_index('음악')
print(ndf2)

     수학  영어   체육
음악              
85   90  98  100
95   80  89   90
100  70  95   90


ndf3 = ndf.set_index(['수학', '음악'])
print(ndf3)

        영어   체육
수학 음악          
90 85   98  100
80 95   89   90
70 100  95   90


dict_data = {'c0' : [1,2,3], 'c1': [4,5,6], 'c2': [7,8,9], 'c3': [10,11,12], 'c4':[13,14,15]}
df = pd.DataFrame(dict_data, index=['r0', 'r1', 'r2'])
print(df)

    c0  c1  c2  c3  c4
r0   1   4   7  10  13
r1   2   5   8  11  14
r2   3   6   9  12  15


new_index = ['r0', 'r1', 'r2', 'r3', 'r4']
ndf = df.reindex(new_index)
print(ndf)
# NaN = "Not a Number", 누락데이터

     c0   c1   c2    c3    c4
r0  1.0  4.0  7.0  10.0  13.0
r1  2.0  5.0  8.0  11.0  14.0
r2  3.0  6.0  9.0  12.0  15.0
r3  NaN  NaN  NaN   NaN   NaN
r4  NaN  NaN  NaN   NaN   NaN


ndf2 = df.reindex(new_index, fill_value = 0)
print(ndf2)

    c0  c1  c2  c3  c4
r0   1   4   7  10  13
r1   2   5   8  11  14
r2   3   6   9  12  15
r3   0   0   0   0   0
r4   0   0   0   0   0


dict_data = {'c0' : [1,2,3], 'c1': [4,5,6], 'c2': [7,8,9], 'c3': [10,11,12], 'c4':[13,14,15]} 
df = pd.DataFrame(dict_data, index=['r0', 'r1', 'r2'])
print(df)

    c0  c1  c2  c3  c4
r0   1   4   7  10  13
r1   2   5   8  11  14
r2   3   6   9  12  15


ndf = df.reset_index()
print(ndf)

  index  c0  c1  c2  c3  c4
0    r0   1   4   7  10  13
1    r1   2   5   8  11  14
2    r2   3   6   9  12  15


dict_data = {'c0' : [1,2,3], 'c1': [4,5,6], 'c2': [7,8,9], 'c3': [10,11,12], 'c4':[13,14,15]} 
df = pd.DataFrame(dict_data, index=['r0', 'r1', 'r2'])
print(df)

    c0  c1  c2  c3  c4
r0   1   4   7  10  13
r1   2   5   8  11  14
r2   3   6   9  12  15


ndf = df.sort_index(ascending=False)
print(ndf)

    c0  c1  c2  c3  c4
r2   3   6   9  12  15
r1   2   5   8  11  14
r0   1   4   7  10  13


# c1 열을 기준으로 데이터프레임을 내림차순 정렬
ndf = df.sort_values(by='c1', ascending=False)
print(ndf)

    c0  c1  c2  c3  c4
r2   3   6   9  12  15
r1   2   5   8  11  14
r0   1   4   7  10  13


student1 = pd.Series({'국어':100, '영어':80, '수학':90})
print(student1)

국어    100
영어     80
수학     90
dtype: int64


# student1의 과목별 점수를 100으로 나눔
percentage = student1/100
print(percentage)
print(type(percentage))

국어    1.0
영어    0.8
수학    0.9
dtype: float64
<class 'pandas.core.series.Series'>


student1 = pd.Series({'국어':100, '영어':80, '수학':90})
student2 = pd.Series({'수학':80, '국어':90, '영어':80})
print(student1)
print('\n')
print(student2)

국어    100
영어     80
수학     90
dtype: int64


수학    80
국어    90
영어    80
dtype: int64


addition = student1 + student2 # 덧셈 
subtraction = student1 - student2 # 뺄셈
multiplication = student1 * student2 # 곱셈 
division = student1 / student2 # 나눗셈 
print(type(division))

<class 'pandas.core.series.Series'>


result = pd.DataFrame([addition, subtraction, multiplication, division], index=['덧셈', '뺄셈','곱셈', '나눗셈'])
print(result)

              국어        수학      영어
덧셈    190.000000   170.000   160.0
뺄셈     10.000000    10.000     0.0
곱셈   9000.000000  7200.000  6400.0
나눗셈     1.111111     1.125     1.0


student1 = pd.Series({'국어':np.nan, '영어':80, '수학':90})
student2 = pd.Series({'수학':80, '국어':90}) 

print(student1)
print('\n')
print(student2)

국어     NaN
영어    80.0
수학    90.0
dtype: float64


수학    80
국어    90
dtype: int64


addition = student1 + student2 # 덧셈 
subtraction = student1 - student2 # 뺄셈
multiplication = student1 * student2 # 곱셈 
division = student1 / student2 # 나눗셈 
print(type(division))

<class 'pandas.core.series.Series'>


result = pd.DataFrame([addition, subtraction, multiplication, division], index=['덧셈', '뺄셈','곱셈', '나눗셈'])
print(result)

     국어        수학  영어
덧셈  NaN   170.000 NaN
뺄셈  NaN    10.000 NaN
곱셈  NaN  7200.000 NaN
나눗셈 NaN     1.125 NaN


student1 = pd.Series({'국어':np.nan, '영어':80, '수학':90})
student2 = pd.Series({'수학':80, '국어':90}) 

print(student1)
print('\n')
print(student2)

국어     NaN
영어    80.0
수학    90.0
dtype: float64


수학    80
국어    90
dtype: int64


sr_add = student1.add(student2, fill_value=0) # 덧셈
sr_sub = student1.sub(student2, fill_value=0) # 뺄셈
sr_mul = student1.mul(student2, fill_value=0) # 곱셈
sr_div = student1.div(student2, fill_value=0) # 나눗셈


result = pd.DataFrame([sr_add, sr_sub, sr_mul, sr_div], index=['덧셈', '뺄셈','곱셈', '나눗셈'])
print(result)
# inf -> 80을 0으로 나누어서 무한대

       국어        수학    영어
덧셈   90.0   170.000  80.0
뺄셈  -90.0    10.000  80.0
곱셈    0.0  7200.000   0.0
나눗셈   0.0     1.125   inf


pip install seaborn

Collecting seaborn
  Downloading seaborn-0.11.2-py3-none-any.whl (292 kB)
Requirement already satisfied: matplotlib>=2.2 in c:\users\for\miniconda3\lib\site-packages (from seaborn) (3.5.1)
Requirement already satisfied: numpy>=1.15 in c:\users\for\miniconda3\lib\site-packages (from seaborn) (1.19.5)
Collecting scipy>=1.0
  Downloading scipy-1.8.0-cp39-cp39-win_amd64.whl (36.9 MB)
Requirement already satisfied: pandas>=0.23 in c:\users\for\miniconda3\lib\site-packages (from seaborn) (1.4.2)
Requirement already satisfied: pillow>=6.2.0 in c:\users\for\miniconda3\lib\site-packages (from matplotlib>=2.2->seaborn) (9.1.0)
Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\for\miniconda3\lib\site-packages (from matplotlib>=2.2->seaborn) (1.4.2)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\for\miniconda3\lib\site-packages (from matplotlib>=2.2->seaborn) (4.33.3)
Requirement already satisfied: pyparsing>=2.2.1 in c:\users\for\miniconda3\lib\site-packages (from matplotlib>=2.2->seaborn) (3.0.4)
Requirement already satisfied: cycler>=0.10 in c:\users\for\miniconda3\lib\site-packages (from matplotlib>=2.2->seaborn) (0.11.0)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\for\miniconda3\lib\site-packages (from matplotlib>=2.2->seaborn) (2.8.2)
Requirement already satisfied: packaging>=20.0 in c:\users\for\miniconda3\lib\site-packages (from matplotlib>=2.2->seaborn) (21.3)
Requirement already satisfied: pytz>=2020.1 in c:\users\for\miniconda3\lib\site-packages (from pandas>=0.23->seaborn) (2022.1)
Requirement already satisfied: six>=1.5 in c:\users\for\miniconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.15.0)
Installing collected packages: scipy, seaborn
Successfully installed scipy-1.8.0 seaborn-0.11.2
Note: you may need to restart the kernel to use updated packages.


import seaborn as sns

titanic = sns.load_dataset('titanic')
df = titanic.loc[:, ['age', 'fare']]
print(df.head())
print(type(df))

    age     fare
0  22.0   7.2500
1  38.0  71.2833
2  26.0   7.9250
3  35.0  53.1000
4  35.0   8.0500
<class 'pandas.core.frame.DataFrame'>


addition = df + 10
print(addition.head())
print(type(addition))

    age     fare
0  32.0  17.2500
1  48.0  81.2833
2  36.0  17.9250
3  45.0  63.1000
4  45.0  18.0500
<class 'pandas.core.frame.DataFrame'>


titanic = sns.load_dataset('titanic')
df = titanic.loc[:, ['age', 'fare']]
print(df.tail())
print(type(df))

      age   fare
886  27.0  13.00
887  19.0  30.00
888   NaN  23.45
889  26.0  30.00
890  32.0   7.75
<class 'pandas.core.frame.DataFrame'>


addition = df + 10
print(addition.tail())
print(type(addition))

      age   fare
886  37.0  23.00
887  29.0  40.00
888   NaN  33.45
889  36.0  40.00
890  42.0  17.75
<class 'pandas.core.frame.DataFrame'>


subtraction = addition - df
print(subtraction.tail())
print(type(subtraction))

      age  fare
886  10.0  10.0
887  10.0  10.0
888   NaN  10.0
889  10.0  10.0
890  10.0  10.0
<class 'pandas.core.frame.DataFrame'>

파이썬, matplotlib 한글 깨짐 해결 (0)	2022.05.23
판다스 기초) 외부 파일 읽어오기, 원하는 파일형식으로 저장하기 (0)	2022.05.12

.

전체 글

판다스 기초) 시리즈/데이터프레임, 행/열 이름, 선택,삭제,추가,변경,초기화,산술연산

행 인덱스/ 열 이름 설정¶

index = 행 인덱스 배열, columns = 열 이름 배열¶

행 / 열 이름 변경¶

rename() 메소드 -> 행 인덱스 또는 열 이름의 일부를 선택하여 변경 가능¶

원본 객체를 변경하려면 inplace=True 옵션 사용¶

행/ 열 삭제¶

drop() 메소드는 새로운 객체를 반환¶

원본 객체를 직접 변경하기 위해서는 inplace=True 옵션 추가¶

행 삭제 -> axis = 0 or 별도로 입력하지 않아도 됨¶

열 삭제 -> axis=1¶

행 선택¶

열 선택¶

원소 선택¶

열 추가¶

기존 열과 중복되는 경우 기존 열의 원소값을 변경¶

행 추가¶

기존 인덱스와 중복되는 경우 기존 행의 원소값을 변경¶

원소 값 변경¶

행, 열의 위치 바꾸기¶

인덱스 활용¶

set_index() 메소드를 사용하여 특정 열을 행 인덱스로 설정¶

/ 행 인덱스를 새로 지정하면 기존 행 인덱스는 삭제됨¶

단, 원본을 바꾸지 않고 새로운 객체를 반환하는 점에 유의,¶

원본을 변경하려면 inplace=True 옵션 사용¶

행 인덱스 재배열¶

행 인덱스 초기화¶

행 인덱스를 기준으로 데이터프레임 정렬¶

ascending=False -> 내림차순¶

ascending=True -> 오름차순¶

특정 열의 데이터 값을 기준으로 데이터프레임 정렬¶

시리즈 연산¶

시리즈 vs 숫자¶

시리즈 vs 시리즈¶

판다스는 같은 과목명(인덱스)를 찾아 정렬한 후 같은 과목명(인덱스)의 점수(데이터 값)끼리 덧셈¶

덧셈의 결과를 과목명(인덱스)에 매칭시키고 새로운 시리즈 객체를 반환¶

한쪽에만 인덱스가 존재하면 연산의 결과 -> NaN¶

동일한 인덱스라 하더라도 한 쪽의 데이터 값이 NaN이면 연산의 결과 -> NaN¶

연산 메소드¶

연산 메소드에 fill_value 옵션을 적용하여 누락 데이터 NaN 대신 숫자 적용¶

데이터프레임 연산¶

데이터프레임 vs 숫자¶

데이터프레임에 어떤 숫자를 더하면 모든 원소에 숫자를 더한다. (사칙연산 가능)¶

데이터프레임 vs 데이터프레임¶

각 데이터프레임의 같은 행, 같은 열 위치에 있는 원소끼리 계산¶

어느 한쪽에 원소가 존재하지 않거나 NaN이면 연산결과는 NaN이다.¶

'Python > Pandas(판다스)' 카테고리의 다른 글

+ Recent posts

티스토리툴바