import re import time import requests import threading import tkinter as tk from tkinter import ttk from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException, WebDriverException class Main: def __init__(self): self.driver = webdriver.Chrome() self.isRunning = False self.save_path = './' self.init_ui() # UI 종료 이벤트 핸들러 self.root.protocol("WM_DELETE_WINDOW", self.on_closing) # 브라우저 상태 감시 스레드 시작 threading.Thread(target=self.monitor_browser, daemon=True).start() def init_ui(self): self.root = tk.Tk() self.root.title("e-러닝 자막 추출기") self.root.geometry("350x200") self.root.configure(bg='#F0F0F0') style = ttk.Style() style.configure('TButton', font=('Helvetica', 12), padding=10) style.configure('TLabel', font=('Helvetica', 12), background='#F0F0F0') style.configure('TProgressbar', thickness=20) self.label = ttk.Label(self.root, text="강의 주차로 이동 후 실행") self.label.pack(pady=20) self.progress = ttk.Progressbar(self.root, orient="horizontal", length=300, mode="determinate") self.progress.pack(pady=20) self.btn_run = ttk.Button(self.root, text="Run", command=self.start_thread) self.btn_run.pack(pady=10) def startup(self): self.driver.get('https://selc.or.kr/lms/main/MainView.do') self.root.mainloop() def start_thread(self): if self.isRunning: return self.isRunning = True self.btn_run.config(state=tk.DISABLED) self.label.config(text="자막 추출 준비 중...") threading.Thread(target=self.run).start() def run(self): try: self.extract() finally: self.driver.switch_to.default_content() self.isRunning = False self.btn_run.config(state=tk.NORMAL) self.progress.stop() def lv_1(self): # popCourseContent self.driver.switch_to.default_content() iframe = WebDriverWait(self.driver, 10).until( EC.presence_of_element_located((By.ID, "popCourseContent")) ) self.driver.switch_to.frame(iframe) print('#=========이동함: popCourseContent') def lv_2(self): # learning_active self.lv_1() iframe = WebDriverWait(self.driver, 10).until( EC.presence_of_element_located((By.ID, "learning_active")) ) self.driver.switch_to.frame(iframe) print('#=========이동함: learning_active') def clean_subtitle(self, vtt_content): lines = vtt_content.splitlines() if lines[0].strip() == "WEBVTT": lines = lines[1:] timeline_pattern = re.compile(r'\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}') cleaned_lines = [line for line in lines if not timeline_pattern.match(line) and line.strip() != ''] cleaned_content = '\n'.join(cleaned_lines) return cleaned_content def fs_write(self, data, file_name): with open(self.save_path + file_name + '.txt', 'w', encoding='utf-8') as file: file.write(data) print(f"Subtitles saved to {self.save_path + file_name}.txt") self.label.config(text=f"추출 완료: {self.save_path + file_name}.txt") def alert_accept(self): try: WebDriverWait(self.driver, 3).until(EC.alert_is_present()) alert = self.driver.switch_to.alert print(f"Alert text: {alert.text}") alert.accept() except TimeoutException: print("No alert appeared within the specified time") def extract(self): result = "" pattern = re.compile(r"fncLearningWindow\(([^,]+),([^,]+),([^,]+),([^,]+),'LV'[^)]*\)") # 현재 주차 이름 탐색 중 week_name = self.driver.find_element(By.CSS_SELECTOR, "div.lect_Man > div > span.ft_b").text result += '[{}]\n'.format(week_name) # 첫 번째 Video 탐색 중 lectures = self.driver.find_elements(By.CSS_SELECTOR, "div.btn_lecture_view > a") first_vid = [match.group() for lecture in lectures if (match := pattern.search(lecture.get_attribute("onclick")))][0] self.driver.execute_script(first_vid) # 첫 번째 Video 실행 # Video 플레이어 Iframe 이동 self.lv_1() WebDriverWait(self.driver, 10).until( EC.presence_of_element_located((By.CSS_SELECTOR, "#pop_body > div.learn_left_menu > div > div.overflow_area")) ) self.lv_2() WebDriverWait(self.driver, 10).until( EC.presence_of_element_located((By.CSS_SELECTOR, "#player_html5_api")) ) # 자막 파일 추출 시작 (반복문) ====================== self.lv_1() learnings = self.driver.find_elements(By.CSS_SELECTOR, "a[href*='javascript:fncLearningWindow']") vids = [match.group() for vid in learnings if (match := pattern.search(vid.get_attribute("href")))] total_vids = len(vids) self.progress["maximum"] = total_vids # 프로그레스 바 최대값을 비디오 개수로 설정 for i, vid in enumerate(vids): self.lv_1() print(f"- {i + 1} 번째 영상 추출 중") self.label.config(text=f"추출 중 ({i + 1} / {total_vids})") self.driver.execute_script(vid) self.alert_accept() self.lv_2() WebDriverWait(self.driver, 10).until( EC.presence_of_element_located((By.CSS_SELECTOR, "#player_html5_api")) ) self.lv_1() video_name = self.driver.find_element(By.CSS_SELECTOR, "div.depth2.active > a > span.tx > span").text self.lv_2() vtt = WebDriverWait(self.driver, 10).until( EC.presence_of_element_located((By.CSS_SELECTOR, "#frm_reload > textarea[name='reload_contents_subtitle_ko_file_path']")) ) content = self.clean_subtitle(requests.get("https://selc.or.kr" + vtt.get_attribute("value")).text) result += '<{}>\n{}\n\n'.format(video_name, content) self.progress["value"] += 1 # 각 비디오 처리 후 프로그레스 바 업데이트 # 영상 종료 result += "========================================================\n" self.lv_1() self.driver.execute_script( self.driver.find_element(By.CSS_SELECTOR, "body > div.l_popup_learn > div.title_box > div.btn_func > a").get_attribute("onclick") ) self.alert_accept() self.fs_write(result, week_name.split(' ')[0]) self.progress["value"] = self.progress["maximum"] # 완료 시 프로그레스 바 최대값으로 설정 def monitor_browser(self): while True: time.sleep(1) try: self.driver.title # 브라우저가 열려 있는지 확인하기 위해 속성을 접근 except WebDriverException: print("브라우저가 닫혔습니다.") self.root.quit() # UI 종료 break def on_closing(self): if self.isRunning: self.driver.quit() self.root.destroy() if __name__ == "__main__": process = Main() process.startup()