selc_automation/main.py

192 lines
7.4 KiB
Python

import re
import time
import requests
import threading
import tkinter as tk
from tkinter import ttk
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, WebDriverException
class Main:
def __init__(self):
self.driver = webdriver.Chrome()
self.isRunning = False
self.save_path = './'
self.init_ui()
# UI 종료 이벤트 핸들러
self.root.protocol("WM_DELETE_WINDOW", self.on_closing)
# 브라우저 상태 감시 스레드 시작
threading.Thread(target=self.monitor_browser, daemon=True).start()
def init_ui(self):
self.root = tk.Tk()
self.root.title("e-러닝 자막 추출기")
self.root.geometry("350x200")
self.root.configure(bg='#F0F0F0')
style = ttk.Style()
style.configure('TButton', font=('Helvetica', 12), padding=10)
style.configure('TLabel', font=('Helvetica', 12), background='#F0F0F0')
style.configure('TProgressbar', thickness=20)
self.label = ttk.Label(self.root, text="강의 주차로 이동 후 실행")
self.label.pack(pady=20)
self.progress = ttk.Progressbar(self.root, orient="horizontal", length=300, mode="determinate")
self.progress.pack(pady=20)
self.btn_run = ttk.Button(self.root, text="Run", command=self.start_thread)
self.btn_run.pack(pady=10)
def startup(self):
self.driver.get('https://selc.or.kr/lms/main/MainView.do')
self.root.mainloop()
def start_thread(self):
if self.isRunning:
return
self.isRunning = True
self.btn_run.config(state=tk.DISABLED)
self.label.config(text="자막 추출 준비 중...")
threading.Thread(target=self.run).start()
def run(self):
try:
self.extract()
finally:
self.driver.switch_to.default_content()
self.isRunning = False
self.btn_run.config(state=tk.NORMAL)
self.progress.stop()
def lv_1(self): # popCourseContent
self.driver.switch_to.default_content()
iframe = WebDriverWait(self.driver, 10).until(
EC.presence_of_element_located((By.ID, "popCourseContent"))
)
self.driver.switch_to.frame(iframe)
print('#=========이동함: popCourseContent')
def lv_2(self): # learning_active
self.lv_1()
iframe = WebDriverWait(self.driver, 10).until(
EC.presence_of_element_located((By.ID, "learning_active"))
)
self.driver.switch_to.frame(iframe)
print('#=========이동함: learning_active')
def clean_subtitle(self, vtt_content):
lines = vtt_content.splitlines()
if lines[0].strip() == "WEBVTT":
lines = lines[1:]
timeline_pattern = re.compile(r'\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}')
cleaned_lines = [line for line in lines if not timeline_pattern.match(line) and line.strip() != '']
cleaned_content = '\n'.join(cleaned_lines)
return cleaned_content
def fs_write(self, data, file_name):
with open(self.save_path + file_name + '.txt', 'w', encoding='utf-8') as file:
file.write(data)
print(f"Subtitles saved to {self.save_path + file_name}.txt")
self.label.config(text=f"추출 완료: {self.save_path + file_name}.txt")
def alert_accept(self):
try:
WebDriverWait(self.driver, 3).until(EC.alert_is_present())
alert = self.driver.switch_to.alert
print(f"Alert text: {alert.text}")
alert.accept()
except TimeoutException:
print("No alert appeared within the specified time")
def extract(self):
result = ""
pattern = re.compile(r"fncLearningWindow\(([^,]+),([^,]+),([^,]+),([^,]+),'LV'[^)]*\)")
# 현재 주차 이름 탐색 중
week_name = self.driver.find_element(By.CSS_SELECTOR, "div.lect_Man > div > span.ft_b").text
result += '[{}]\n'.format(week_name)
# 첫 번째 Video 탐색 중
lectures = self.driver.find_elements(By.CSS_SELECTOR, "div.btn_lecture_view > a")
first_vid = [match.group() for lecture in lectures if (match := pattern.search(lecture.get_attribute("onclick")))][0]
self.driver.execute_script(first_vid) # 첫 번째 Video 실행
# Video 플레이어 Iframe 이동
self.lv_1()
WebDriverWait(self.driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#pop_body > div.learn_left_menu > div > div.overflow_area"))
)
self.lv_2()
WebDriverWait(self.driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#player_html5_api"))
)
# 자막 파일 추출 시작 (반복문) ======================
self.lv_1()
learnings = self.driver.find_elements(By.CSS_SELECTOR, "a[href*='javascript:fncLearningWindow']")
vids = [match.group() for vid in learnings if (match := pattern.search(vid.get_attribute("href")))]
total_vids = len(vids)
self.progress["maximum"] = total_vids # 프로그레스 바 최대값을 비디오 개수로 설정
for i, vid in enumerate(vids):
self.lv_1()
print(f"- {i + 1} 번째 영상 추출 중")
self.label.config(text=f"추출 중 ({i + 1} / {total_vids})")
self.driver.execute_script(vid)
self.alert_accept()
self.lv_2()
WebDriverWait(self.driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#player_html5_api"))
)
self.lv_1()
video_name = self.driver.find_element(By.CSS_SELECTOR, "div.depth2.active > a > span.tx > span").text
self.lv_2()
vtt = WebDriverWait(self.driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#frm_reload > textarea[name='reload_contents_subtitle_ko_file_path']"))
)
content = self.clean_subtitle(requests.get("https://selc.or.kr" + vtt.get_attribute("value")).text)
result += '<{}>\n{}\n\n'.format(video_name, content)
self.progress["value"] += 1 # 각 비디오 처리 후 프로그레스 바 업데이트
# 영상 종료
result += "========================================================\n"
self.lv_1()
self.driver.execute_script(
self.driver.find_element(By.CSS_SELECTOR, "body > div.l_popup_learn > div.title_box > div.btn_func > a").get_attribute("onclick")
)
self.alert_accept()
self.fs_write(result, week_name.split(' ')[0])
self.progress["value"] = self.progress["maximum"] # 완료 시 프로그레스 바 최대값으로 설정
def monitor_browser(self):
while True:
time.sleep(1)
try:
self.driver.title # 브라우저가 열려 있는지 확인하기 위해 속성을 접근
except WebDriverException:
print("브라우저가 닫혔습니다.")
self.root.quit() # UI 종료
break
def on_closing(self):
if self.isRunning:
self.driver.quit()
self.root.destroy()
if __name__ == "__main__":
process = Main()
process.startup()