233 lines
9.2 KiB
Python
233 lines
9.2 KiB
Python
import os
|
|
import re
|
|
import time
|
|
import requests
|
|
import threading
|
|
import tkinter as tk
|
|
from tkinter import ttk
|
|
from selenium import webdriver
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
from selenium.common.exceptions import TimeoutException, WebDriverException, NoSuchElementException
|
|
import sys
|
|
|
|
class Main:
|
|
def __init__(self):
|
|
self.driver = webdriver.Chrome()
|
|
self.isRunning = False
|
|
self.init_ui()
|
|
|
|
# UI 종료 이벤트 핸들러
|
|
self.root.protocol("WM_DELETE_WINDOW", self.on_closing)
|
|
|
|
# 브라우저 상태 감시 스레드 시작
|
|
threading.Thread(target=self.monitor_browser, daemon=True).start()
|
|
|
|
def init_ui(self):
|
|
self.root = tk.Tk()
|
|
self.center_window(self.root, 350, 200)
|
|
self.root.title("대학 e러닝 자막 추출기")
|
|
self.root.geometry("350x200")
|
|
self.root.configure(bg='#F0F0F0')
|
|
icon_path = self.resource_path('favicon.ico')
|
|
self.root.iconbitmap(icon_path)
|
|
|
|
style = ttk.Style()
|
|
style.configure('TButton', font=('Helvetica', 12), padding=10)
|
|
style.configure('TLabel', font=('Helvetica', 12), background='#F0F0F0')
|
|
style.configure('TProgressbar', thickness=20)
|
|
|
|
self.label = ttk.Label(self.root, text="강의 주차로 이동 후 실행", anchor='center')
|
|
self.label.pack(pady=20)
|
|
|
|
self.progress = ttk.Progressbar(self.root, orient="horizontal", length=300, mode="determinate")
|
|
self.progress.pack(pady=20)
|
|
|
|
self.btn_run = ttk.Button(self.root, text="Run", command=self.start_thread)
|
|
self.btn_run.pack(pady=10)
|
|
|
|
def center_window(self, root, width, height):
|
|
screen_width = root.winfo_screenwidth()
|
|
screen_height = root.winfo_screenheight()
|
|
x = (screen_width / 2) - (width / 2)
|
|
y = (screen_height / 2) - (height / 2)
|
|
root.geometry(f'{width}x{height}+{int(x)}+{int(y)}')
|
|
|
|
def startup(self):
|
|
# self.center_browser()
|
|
self.driver.get('https://selc.or.kr/lms/main/MainView.do')
|
|
self.root.mainloop()
|
|
|
|
def center_browser(self):
|
|
screen_width = self.driver.execute_script("return window.screen.availWidth;")
|
|
screen_height = self.driver.execute_script("return window.screen.availHeight;")
|
|
window_width = 1200
|
|
window_height = 800
|
|
x = (screen_width / 2) - (window_width / 2)
|
|
y = (screen_height / 2) - (window_height / 2)
|
|
self.driver.set_window_size(window_width, window_height)
|
|
self.driver.set_window_position(int(x), int(y))
|
|
|
|
def start_thread(self):
|
|
if self.isRunning:
|
|
return
|
|
self.isRunning = True
|
|
self.btn_run.config(state=tk.DISABLED)
|
|
self.label.config(text="자막 추출 준비 중...")
|
|
threading.Thread(target=self.run).start()
|
|
|
|
def run(self):
|
|
try:
|
|
self.extract()
|
|
finally:
|
|
self.driver.switch_to.default_content()
|
|
self.isRunning = False
|
|
self.btn_run.config(state=tk.NORMAL)
|
|
self.progress.stop()
|
|
|
|
def lv_1(self): # popCourseContent
|
|
self.driver.switch_to.default_content()
|
|
iframe = WebDriverWait(self.driver, 10).until(
|
|
EC.presence_of_element_located((By.ID, "popCourseContent"))
|
|
)
|
|
self.driver.switch_to.frame(iframe)
|
|
print('#=========이동함: popCourseContent')
|
|
|
|
def lv_2(self): # learning_active
|
|
self.lv_1()
|
|
iframe = WebDriverWait(self.driver, 10).until(
|
|
EC.presence_of_element_located((By.ID, "learning_active"))
|
|
)
|
|
self.driver.switch_to.frame(iframe)
|
|
print('#=========이동함: learning_active')
|
|
|
|
def clean_subtitle(self, vtt_content):
|
|
lines = vtt_content.splitlines()
|
|
if lines[0].strip() == "WEBVTT":
|
|
lines = lines[1:]
|
|
timeline_pattern = re.compile(r'\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}')
|
|
cleaned_lines = [line for line in lines if not timeline_pattern.match(line) and line.strip() != '']
|
|
cleaned_content = '\n'.join(cleaned_lines)
|
|
return cleaned_content
|
|
|
|
def fs_write(self, data, file_name):
|
|
base_path = os.path.dirname(sys.executable) if getattr(sys, 'frozen', False) else os.path.dirname(__file__)
|
|
save_path = os.path.join(base_path, file_name+'.txt')
|
|
with open(save_path, 'w', encoding='utf-8') as file:
|
|
file.write(data)
|
|
print(f"Subtitles saved to {save_path}")
|
|
self.label.config(text=f"추출 완료: {save_path}")
|
|
|
|
def alert_accept(self):
|
|
try:
|
|
WebDriverWait(self.driver, 3).until(EC.alert_is_present())
|
|
alert = self.driver.switch_to.alert
|
|
print(f"Alert text: {alert.text}")
|
|
alert.accept()
|
|
except TimeoutException:
|
|
print("No alert appeared within the specified time")
|
|
|
|
def extract(self):
|
|
result = ""
|
|
pattern = re.compile(r"fncLearningWindow\(([^,]+),([^,]+),([^,]+),([^,]+),'LV'[^)]*\)")
|
|
|
|
# 현재 주차 이름 탐색 중
|
|
try:
|
|
week_name = self.driver.find_element(By.CSS_SELECTOR, "div.lect_Man > div > span.ft_b").text
|
|
result += '[{}]\n'.format(week_name)
|
|
except NoSuchElementException:
|
|
print("올바르지 않은 페이지입니다.")
|
|
self.label.config(text="올바르지 않은 페이지입니다.")
|
|
self.isRunning = False
|
|
self.btn_run.config(state=tk.NORMAL)
|
|
return
|
|
|
|
# 첫 번째 Video 탐색 중
|
|
lectures = self.driver.find_elements(By.CSS_SELECTOR, "div.btn_lecture_view > a")
|
|
first_vid = [match.group() for lecture in lectures if (match := pattern.search(lecture.get_attribute("onclick")))][0]
|
|
self.driver.execute_script(first_vid) # 첫 번째 Video 실행
|
|
|
|
# Video 플레이어 Iframe 이동
|
|
self.lv_1()
|
|
WebDriverWait(self.driver, 10).until(
|
|
EC.presence_of_element_located((By.CSS_SELECTOR, "#pop_body > div.learn_left_menu > div > div.overflow_area"))
|
|
)
|
|
self.lv_2()
|
|
WebDriverWait(self.driver, 10).until(
|
|
EC.presence_of_element_located((By.CSS_SELECTOR, "#player_html5_api"))
|
|
)
|
|
|
|
# 자막 파일 추출 시작 (반복문) ======================
|
|
self.lv_1()
|
|
learnings = self.driver.find_elements(By.CSS_SELECTOR, "a[href*='javascript:fncLearningWindow']")
|
|
vids = [match.group() for vid in learnings if (match := pattern.search(vid.get_attribute("href")))]
|
|
self.progress["maximum"] = len(vids) # 프로그레스 바 최대값을 비디오 개수로 설정
|
|
|
|
for i, vid in enumerate(vids):
|
|
self.lv_1()
|
|
print(f"- {i + 1} 번째 영상 추출 중")
|
|
self.label.config(text=f"추출 중 ({i + 1} / {len(vids)})")
|
|
self.driver.execute_script(vid)
|
|
self.alert_accept()
|
|
|
|
self.lv_2()
|
|
WebDriverWait(self.driver, 10).until(
|
|
EC.presence_of_element_located((By.CSS_SELECTOR, "#player_html5_api"))
|
|
)
|
|
|
|
self.lv_1()
|
|
video_name = self.driver.find_element(By.CSS_SELECTOR, "div.depth2.active > a > span.tx > span").text
|
|
|
|
self.lv_2()
|
|
vtt = WebDriverWait(self.driver, 10).until(
|
|
EC.presence_of_element_located((By.CSS_SELECTOR, "#frm_reload > textarea[name='reload_contents_subtitle_ko_file_path']"))
|
|
)
|
|
content = self.clean_subtitle(requests.get("https://selc.or.kr" + vtt.get_attribute("value")).text)
|
|
|
|
result += '<{}>\n{}\n\n'.format(video_name, content)
|
|
|
|
self.progress["value"] += 1 # 각 비디오 처리 후 프로그레스 바 업데이트
|
|
|
|
# 영상 종료
|
|
result += "========================================================\n"
|
|
self.lv_1()
|
|
self.driver.execute_script(
|
|
self.driver.find_element(By.CSS_SELECTOR, "body > div.l_popup_learn > div.title_box > div.btn_func > a").get_attribute("onclick")
|
|
)
|
|
self.alert_accept()
|
|
|
|
self.fs_write(result, week_name.split(' ')[0])
|
|
self.progress["value"] = self.progress["maximum"] # 완료 시 프로그레스 바 최대값으로 설정
|
|
|
|
def monitor_browser(self):
|
|
while True:
|
|
time.sleep(1)
|
|
try:
|
|
self.driver.title # 브라우저가 열려 있는지 확인하기 위해 속성을 접근
|
|
except WebDriverException:
|
|
print("브라우저가 닫혔습니다.")
|
|
self.root.quit() # UI 종료
|
|
break
|
|
|
|
def on_closing(self):
|
|
if self.isRunning:
|
|
self.driver.quit()
|
|
else:
|
|
self.driver.quit()
|
|
self.root.destroy()
|
|
|
|
def resource_path(self, relative_path):
|
|
""" Get absolute path to resource, works for dev and for PyInstaller """
|
|
try:
|
|
# PyInstaller creates a temp folder and stores path in _MEIPASS
|
|
base_path = sys._MEIPASS
|
|
except Exception:
|
|
base_path = os.path.abspath(".")
|
|
|
|
return os.path.join(base_path, relative_path)
|
|
|
|
if __name__ == "__main__":
|
|
process = Main()
|
|
process.startup()
|