데이터 입출력¶
외부 파일 읽어오기¶
In [3]:
import pandas as pd
In [2]:
file_path = './file/read_csv_sample.csv'
In [9]:
df1 = pd.read_csv(file_path)
print(df1)
c0 c1 c2 c3
0 5 1 4 7
1 6 2 5 8
2 7 3 6 9
In [10]:
df2 = pd.read_csv(file_path, header=None)
print(df2)
0 1 2 3
0 c0 c1 c2 c3
1 5 1 4 7
2 6 2 5 8
3 7 3 6 9
In [11]:
df3 = pd.read_csv(file_path, index_col=None)
print(df3)
c0 c1 c2 c3
0 5 1 4 7
1 6 2 5 8
2 7 3 6 9
In [15]:
df4 = pd.read_csv(file_path, index_col='c0')
print(df4)
c1 c2 c3
c0
5 1 4 7
6 2 5 8
7 3 6 9
In [18]:
df5 = pd.read_csv(file_path, header=1)
print(df5)
5 1 4 7
0 6 2 5 8
1 7 3 6 9
In [23]:
df6 = pd.read_csv(file_path, index_col=3)
print(df6)
c0 c1 c2
c3
7 5 1 4
8 6 2 5
9 7 3 6
In [29]:
pip install xlrd
Collecting xlrd
Using cached xlrd-2.0.1-py2.py3-none-any.whl (96 kB)
Installing collected packages: xlrd
Successfully installed xlrd-2.0.1
Note: you may need to restart the kernel to use updated packages.
In [30]:
pip install openpyxl
Collecting openpyxl
Using cached openpyxl-3.0.9-py2.py3-none-any.whl (242 kB)
Collecting et-xmlfile
Using cached et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.0.9
Note: you may need to restart the kernel to use updated packages.
In [33]:
df1 = pd.read_excel('./file/남북한발전전력량.xlsx')
print(df1)
전력량 (억㎾h) 발전 전력별 1990 1991 1992 1993 1994 1995 1996 1997 ... 2007 \
0 남한 합계 1077 1186 1310 1444 1650 1847 2055 2244 ... 4031
1 NaN 수력 64 51 49 60 41 55 52 54 ... 50
2 NaN 화력 484 573 696 803 1022 1122 1264 1420 ... 2551
3 NaN 원자력 529 563 565 581 587 670 739 771 ... 1429
4 NaN 신재생 - - - - - - - - ... -
5 북한 합계 277 263 247 221 231 230 213 193 ... 236
6 NaN 수력 156 150 142 133 138 142 125 107 ... 133
7 NaN 화력 121 113 105 88 93 88 88 86 ... 103
8 NaN 원자력 - - - - - - - - ... -
2008 2009 2010 2011 2012 2013 2014 2015 2016
0 4224 4336 4747 4969 5096 5171 5220 5281 5404
1 56 56 65 78 77 84 78 58 66
2 2658 2802 3196 3343 3430 3581 3427 3402 3523
3 1510 1478 1486 1547 1503 1388 1564 1648 1620
4 - - - - 86 118 151 173 195
5 255 235 237 211 215 221 216 190 239
6 141 125 134 132 135 139 130 100 128
7 114 110 103 79 80 82 86 90 111
8 - - - - - - - - -
[9 rows x 29 columns]
In [34]:
df2 = pd.read_excel('./file/남북한발전전력량.xlsx', engine='openpyxl')
print(df2)
전력량 (억㎾h) 발전 전력별 1990 1991 1992 1993 1994 1995 1996 1997 ... 2007 \
0 남한 합계 1077 1186 1310 1444 1650 1847 2055 2244 ... 4031
1 NaN 수력 64 51 49 60 41 55 52 54 ... 50
2 NaN 화력 484 573 696 803 1022 1122 1264 1420 ... 2551
3 NaN 원자력 529 563 565 581 587 670 739 771 ... 1429
4 NaN 신재생 - - - - - - - - ... -
5 북한 합계 277 263 247 221 231 230 213 193 ... 236
6 NaN 수력 156 150 142 133 138 142 125 107 ... 133
7 NaN 화력 121 113 105 88 93 88 88 86 ... 103
8 NaN 원자력 - - - - - - - - ... -
2008 2009 2010 2011 2012 2013 2014 2015 2016
0 4224 4336 4747 4969 5096 5171 5220 5281 5404
1 56 56 65 78 77 84 78 58 66
2 2658 2802 3196 3343 3430 3581 3427 3402 3523
3 1510 1478 1486 1547 1503 1388 1564 1648 1620
4 - - - - 86 118 151 173 195
5 255 235 237 211 215 221 216 190 239
6 141 125 134 132 135 139 130 100 128
7 114 110 103 79 80 82 86 90 111
8 - - - - - - - - -
[9 rows x 29 columns]
In [35]:
df = pd.read_json('./file/read_json_sample.json')
print(df)
name year developer opensource
pandas 2008 Wes Mckinneye True
NumPy 2006 Travis Oliphant True
matplotlib 2003 John D. Hunter True
In [36]:
print(df.index)
Index(['pandas', 'NumPy', 'matplotlib'], dtype='object')
웹에서 가져오기¶
In [49]:
pip install lxml
Requirement already satisfied: lxml in c:\users\for\miniconda3\lib\site-packages (4.8.0)
Note: you may need to restart the kernel to use updated packages.
In [4]:
url = './file/sample.html'
tables = pd.read_html(url)
print(len(tables)) # table 수 확인
print('\n')
for i in range(len(tables)):
print(f'tables[{i}]')
print(tables[i])
print('\n')
2
tables[0]
Unnamed: 0 c0 c1 c2 c3
0 0 0 1 4 7
1 1 1 2 5 8
2 2 2 3 6 9
tables[1]
name year developer opensource
0 NumPy 2006 Travis Oliphant True
1 matplotlib 2003 John D. Hunter True
2 pandas 2008 Wes Mckinneye True
In [5]:
df = tables[1]
df.set_index(['name'], inplace=True)
print(df)
year developer opensource
name
NumPy 2006 Travis Oliphant True
matplotlib 2003 John D. Hunter True
pandas 2008 Wes Mckinneye True
데이터 저장하기¶
In [8]:
data = {'name' : ['Jenny', 'Riah', 'Paul'],
'algol': ["A", "A+", "B"],
'basic': ["C", "B", "B+"],
'c++' : ['B+', 'C+', 'C']
}
df = pd.DataFrame(data)
df.set_index('name', inplace=True) #name 열을 인덱스로 지정
print(df)
algol basic c++
name
Jenny A C B+
Riah A+ B C+
Paul B B+ C
In [9]:
df.to_csv('./file/df_sample_jh.csv') # csv 파일로 저장
In [10]:
df.to_json('./file/df_sample_jh.json') # json 파일로 저장
In [11]:
df.to_excel('./file/df_sample_jh.xlsx') # excel 파일로 저장
In [12]:
data1 = {'name' : ['Jenny', 'Riah', 'Paul'],
'algol': ["A", "A+", "B"],
'basic': ["C", "B", "B+"],
'c++' : ['B+', 'C+', 'C']
}
data2 = {'c0' : [1,2,3],
'c1' : [4,5,6],
'c2' : [7,8,9],
'c3' : [10,11,12],
'c4' : [13,14,15]
}
In [14]:
df1 = pd.DataFrame(data1)
df1.set_index('name', inplace=True)
print(df1)
algol basic c++
name
Jenny A C B+
Riah A+ B C+
Paul B B+ C
In [16]:
df2 = pd.DataFrame(data2)
df2.set_index('c0', inplace=True)
print(df2)
c1 c2 c3 c4
c0
1 4 7 10 13
2 5 8 11 14
3 6 9 12 15
In [17]:
#df1을 sheet1으로, df2를 sheet2로 저장
writer = pd.ExcelWriter('./file/excelwriter_jh.xlsx')
df1.to_excel(writer, sheet_name='select1')
df2.to_excel(writer, sheet_name='select2')
writer.save()
'Python > Pandas(판다스)' 카테고리의 다른 글
파이썬, matplotlib 한글 깨짐 해결 (0) | 2022.05.23 |
---|---|
판다스 기초) 시리즈/데이터프레임, 행/열 이름, 선택,삭제,추가,변경,초기화,산술연산 (0) | 2022.05.09 |