selc_automation/main.py

233 lines
9.2 KiB
Python

import os
import re
import time
import requests
import threading
import tkinter as tk
from tkinter import ttk
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, WebDriverException, NoSuchElementException
import sys
class Main:
def __init__(self):
self.driver = webdriver.Chrome()
self.isRunning = False
self.init_ui()
# UI 종료 이벤트 핸들러
self.root.protocol("WM_DELETE_WINDOW", self.on_closing)
# 브라우저 상태 감시 스레드 시작
threading.Thread(target=self.monitor_browser, daemon=True).start()
def init_ui(self):
self.root = tk.Tk()
self.center_window(self.root, 350, 200)
self.root.title("대학 e러닝 자막 추출기")
self.root.geometry("350x200")
self.root.configure(bg='#F0F0F0')
icon_path = self.resource_path('favicon.ico')
self.root.iconbitmap(icon_path)
style = ttk.Style()
style.configure('TButton', font=('Helvetica', 12), padding=10)
style.configure('TLabel', font=('Helvetica', 12), background='#F0F0F0')
style.configure('TProgressbar', thickness=20)
self.label = ttk.Label(self.root, text="강의 주차로 이동 후 실행", anchor='center')
self.label.pack(pady=20)
self.progress = ttk.Progressbar(self.root, orient="horizontal", length=300, mode="determinate")
self.progress.pack(pady=20)
self.btn_run = ttk.Button(self.root, text="Run", command=self.start_thread)
self.btn_run.pack(pady=10)
def center_window(self, root, width, height):
screen_width = root.winfo_screenwidth()
screen_height = root.winfo_screenheight()
x = (screen_width / 2) - (width / 2)
y = (screen_height / 2) - (height / 2)
root.geometry(f'{width}x{height}+{int(x)}+{int(y)}')
def startup(self):
# self.center_browser()
self.driver.get('https://selc.or.kr/lms/main/MainView.do')
self.root.mainloop()
def center_browser(self):
screen_width = self.driver.execute_script("return window.screen.availWidth;")
screen_height = self.driver.execute_script("return window.screen.availHeight;")
window_width = 1200
window_height = 800
x = (screen_width / 2) - (window_width / 2)
y = (screen_height / 2) - (window_height / 2)
self.driver.set_window_size(window_width, window_height)
self.driver.set_window_position(int(x), int(y))
def start_thread(self):
if self.isRunning:
return
self.isRunning = True
self.btn_run.config(state=tk.DISABLED)
self.label.config(text="자막 추출 준비 중...")
threading.Thread(target=self.run).start()
def run(self):
try:
self.extract()
finally:
self.driver.switch_to.default_content()
self.isRunning = False
self.btn_run.config(state=tk.NORMAL)
self.progress.stop()
def lv_1(self): # popCourseContent
self.driver.switch_to.default_content()
iframe = WebDriverWait(self.driver, 10).until(
EC.presence_of_element_located((By.ID, "popCourseContent"))
)
self.driver.switch_to.frame(iframe)
print('#=========이동함: popCourseContent')
def lv_2(self): # learning_active
self.lv_1()
iframe = WebDriverWait(self.driver, 10).until(
EC.presence_of_element_located((By.ID, "learning_active"))
)
self.driver.switch_to.frame(iframe)
print('#=========이동함: learning_active')
def clean_subtitle(self, vtt_content):
lines = vtt_content.splitlines()
if lines[0].strip() == "WEBVTT":
lines = lines[1:]
timeline_pattern = re.compile(r'\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}')
cleaned_lines = [line for line in lines if not timeline_pattern.match(line) and line.strip() != '']
cleaned_content = '\n'.join(cleaned_lines)
return cleaned_content
def fs_write(self, data, file_name):
base_path = os.path.dirname(sys.executable) if getattr(sys, 'frozen', False) else os.path.dirname(__file__)
save_path = os.path.join(base_path, file_name+'.txt')
with open(save_path, 'w', encoding='utf-8') as file:
file.write(data)
print(f"Subtitles saved to {save_path}")
self.label.config(text=f"추출 완료: {save_path}")
def alert_accept(self):
try:
WebDriverWait(self.driver, 3).until(EC.alert_is_present())
alert = self.driver.switch_to.alert
print(f"Alert text: {alert.text}")
alert.accept()
except TimeoutException:
print("No alert appeared within the specified time")
def extract(self):
result = ""
pattern = re.compile(r"fncLearningWindow\(([^,]+),([^,]+),([^,]+),([^,]+),'LV'[^)]*\)")
# 현재 주차 이름 탐색 중
try:
week_name = self.driver.find_element(By.CSS_SELECTOR, "div.lect_Man > div > span.ft_b").text
result += '[{}]\n'.format(week_name)
except NoSuchElementException:
print("올바르지 않은 페이지입니다.")
self.label.config(text="올바르지 않은 페이지입니다.")
self.isRunning = False
self.btn_run.config(state=tk.NORMAL)
return
# 첫 번째 Video 탐색 중
lectures = self.driver.find_elements(By.CSS_SELECTOR, "div.btn_lecture_view > a")
first_vid = [match.group() for lecture in lectures if (match := pattern.search(lecture.get_attribute("onclick")))][0]
self.driver.execute_script(first_vid) # 첫 번째 Video 실행
# Video 플레이어 Iframe 이동
self.lv_1()
WebDriverWait(self.driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#pop_body > div.learn_left_menu > div > div.overflow_area"))
)
self.lv_2()
WebDriverWait(self.driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#player_html5_api"))
)
# 자막 파일 추출 시작 (반복문) ======================
self.lv_1()
learnings = self.driver.find_elements(By.CSS_SELECTOR, "a[href*='javascript:fncLearningWindow']")
vids = [match.group() for vid in learnings if (match := pattern.search(vid.get_attribute("href")))]
self.progress["maximum"] = len(vids) # 프로그레스 바 최대값을 비디오 개수로 설정
for i, vid in enumerate(vids):
self.lv_1()
print(f"- {i + 1} 번째 영상 추출 중")
self.label.config(text=f"추출 중 ({i + 1} / {len(vids)})")
self.driver.execute_script(vid)
self.alert_accept()
self.lv_2()
WebDriverWait(self.driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#player_html5_api"))
)
self.lv_1()
video_name = self.driver.find_element(By.CSS_SELECTOR, "div.depth2.active > a > span.tx > span").text
self.lv_2()
vtt = WebDriverWait(self.driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#frm_reload > textarea[name='reload_contents_subtitle_ko_file_path']"))
)
content = self.clean_subtitle(requests.get("https://selc.or.kr" + vtt.get_attribute("value")).text)
result += '<{}>\n{}\n\n'.format(video_name, content)
self.progress["value"] += 1 # 각 비디오 처리 후 프로그레스 바 업데이트
# 영상 종료
result += "========================================================\n"
self.lv_1()
self.driver.execute_script(
self.driver.find_element(By.CSS_SELECTOR, "body > div.l_popup_learn > div.title_box > div.btn_func > a").get_attribute("onclick")
)
self.alert_accept()
self.fs_write(result, week_name.split(' ')[0])
self.progress["value"] = self.progress["maximum"] # 완료 시 프로그레스 바 최대값으로 설정
def monitor_browser(self):
while True:
time.sleep(1)
try:
self.driver.title # 브라우저가 열려 있는지 확인하기 위해 속성을 접근
except WebDriverException:
print("브라우저가 닫혔습니다.")
self.root.quit() # UI 종료
break
def on_closing(self):
if self.isRunning:
self.driver.quit()
else:
self.driver.quit()
self.root.destroy()
def resource_path(self, relative_path):
""" Get absolute path to resource, works for dev and for PyInstaller """
try:
# PyInstaller creates a temp folder and stores path in _MEIPASS
base_path = sys._MEIPASS
except Exception:
base_path = os.path.abspath(".")
return os.path.join(base_path, relative_path)
if __name__ == "__main__":
process = Main()
process.startup()