반응형
batter_height_weight_position_insert¶
In [ ]:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException
# 키와 몸무게가 있지 않은 선수 목록
print(sorted(set(regular_season[regular_season['height/weight'].isnull()]['batter_name'])))
# 선수 목록
batter_height_weight_position = {
'강병식': '(우투좌타)',
'강봉규': '',
'강정호': '',
'고도현': '',
'고동진': '',
'고메즈': '',
'고영민': '',
'권용관': '',
'김경모': '',
'김경언': '',
'김광연': '',
'김대륙': '',
'김동주': '',
'김연훈': '',
'김원석': '',
'김원섭': '',
'김종찬': '',
'김종호': '',
'나바로': '',
'나성용': '',
'대니돈': '',
'로메로': '',
'로사리오': '',
'로티노': '',
'마낙길': '',
'모상기': '',
'문우람': '',
'박계현': '',
'박기남': '',
'박노민': '',
'박상규': '',
'박용근': '',
'박재상': '',
'박재홍': '',
'박준서': '',
'박진만': '',
'박진원': '',
'백승룡': '',
'성의준': '',
'손용석': '',
'송지만': '',
'스나이더': '',
'신경현': '',
'신명철': '',
'신현철': '',
'안치용': '',
'알드리지': '',
'양영동': '',
'연경흠': '',
'오재필': '',
'용덕한': '',
'우동균': '',
'유선정': '',
'유재혁': '',
'윤완주': '',
'윤요섭': '',
'이명환': '',
'이민재': '',
'이승재': '',
'이양기': '',
'이여상': '',
'이우민': '',
'이인구': '',
'이정식': '',
'이종범': '',
'이종환': '',
'이태원': '',
'이현곤': '',
'이홍구': '',
'이희근': '',
'임재철': '',
'장성호': '',
'전현태': '',
'정보명': '',
'정상교': '',
'정수성': '',
'정현석': '',
'정형식': '',
'조성환': '',
'조영훈': '',
'조인성': '',
'조중근': '',
'지재옥': '',
'진갑용': '',
'차일목': '',
'채상병': '',
'최경철': '',
'최동수': '',
'최민구': '',
'최선호': '',
'최훈락': '',
'칸투': '',
'테임즈': '',
'피에': '',
'한상훈': '',
'한윤섭': '',
'현재윤': '',
'홍성흔': '',
'황목치승': '',
'황선일': '',
'황정립': ''
}
In [ ]:
# Chrome 옵션 설정
chrome_options = Options()
# chrome_options.add_argument("--headless") # 백그라운드 모드 실행 (필요시 주석 처리)
# Service 객체 생성
service = Service(ChromeDriverManager().install())
# 크롬 웹드라이버 실행
driver = webdriver.Chrome(service=service, options=chrome_options)
wait = WebDriverWait(driver, 10)
# 웹 페이지 로드
driver.get('https://www.koreabaseball.com/Player/Search.aspx')
height_weight_position_list = []
for name in batter_height_weight_position.keys():
batter_name_input = driver.find_element(By.CSS_SELECTOR, '#cphContents_cphContents_cphContents_txtSearchPlayerName')
try:
batter_name_input.clear()
except StaleElementReferenceException:
batter_name_input = driver.find_element(By.CSS_SELECTOR, '#cphContents_cphContents_cphContents_txtSearchPlayerName')
batter_name_input.clear()
batter_name_input.send_keys(name)
driver.find_element(By.CSS_SELECTOR, '#cphContents_cphContents_cphContents_btnSearch').click()
time.sleep(.5)
height_weight = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#cphContents_cphContents_cphContents_udpRecord > div.inquiry > table > tbody > tr > td:nth-child(6)'))).text.replace(', ','/')
position = driver.find_element(By.CSS_SELECTOR, '#cphContents_cphContents_cphContents_udpRecord > div.inquiry > table > tbody > tr > td:nth-child(4)').text
height_weight_position_list.append(height_weight+'/'+position)
driver.quit()
In [ ]:
'''
위키백과에서 투타를 가져오는데, 몇 가지 동명이인이 있어 그 부분은 넘어갔다.
넣지 못한 부분은 일일이 넣을 예정이다.
'''
for idx, name in enumerate(batter_height_weight_position.keys()):
rq = requests.get(f'https://ko.wikipedia.org/wiki/{name}')
time.sleep(.5)
# time.sleep(.2)
soup = BeautifulSoup(rq.text, 'html.parser')
try:
handle = soup.find_all('table')[0]
df = pd.read_html(str(handle))[0]
if len(df.columns) > 2:
df = df.drop(df.columns[2], axis=1)
# display(df)
df = df.T
df.columns = df.loc[0]
df = df.drop(0, axis=0)
try:
hand = str(df['투구·타석'].values[0])
except:
hand = ''
except:
hand = ''
height_weight_position_list[idx] = height_weight_position_list[idx]+'('+hand+')'
In [ ]:
for idx, name in enumerate(batter_height_weight_position.keys()):
batter_height_weight_position[name] = height_weight_position_list[idx]
batter_height_weight_position
In [ ]:
import json
with open('output_data.json', 'w', encoding='utf-8') as json_file:
json.dump(batter_height_weight_position, json_file, ensure_ascii=False, indent=4)
In [ ]:
# JSON 파일을 읽어들여 파이썬 객체로 변환
with open('output_data.json', 'r', encoding='utf-8') as json_file:
data = json.load(json_file)
print(type(data))
data
In [ ]:
'''
위에서 만든 데이터를 적용해서 모두 채웠다.
'''
def insert_batter_info(row, batter_height_weight_position):
name = row['batter_name']
if name in batter_height_weight_position:
info = batter_height_weight_position[name]
height_weight, position = info.split('/')[0] + '/' + info.split('/')[1], info.split('/')[2]
row['height/weight'] = height_weight
row['position'] = position
# display(row[['height/weight', 'position']])
return row
# apply를 사용하여 각 행에 대해 업데이트 적용
regular_season = regular_season.apply(insert_batter_info, axis=1, batter_height_weight_position=data)
display(regular_season[regular_season['batter_name'] == '유재혁'][['batter_name', 'height/weight', 'position']])
regular_season.isna().sum()
In [ ]:
반응형
'아무거나 만들어 봄 > KBO 타자 OPS 예측 경진대회' 카테고리의 다른 글
KBO 타자 OPS 예측 경진대회 - 6 (0) | 2024.12.05 |
---|---|
KBO 타자 OPS 예측 경연대회 - 5 (0) | 2024.12.03 |
KBO 타자 OPS 예측 경연대회 - 3 (0) | 2024.12.03 |
KBO 타자 OPS 예측 경연대회 - 2 (0) | 2024.12.01 |
KBO 타자 OPS 예측 경진대회 - 1 (0) | 2024.11.30 |