SanYeCao-Nonebot/src/clover_yuc_wiki/yuc_wiki.py

123 lines
3.4 KiB
Python
Raw Normal View History

2025-02-20 09:33:58 +00:00
import os
from pathlib import Path
import requests
from os import getcwd
from bs4 import BeautifulSoup
from datetime import datetime,timedelta
2025-02-20 09:33:58 +00:00
from nonebot_plugin_htmlrender import template_to_pic
from src.configs.path_config import yuc_wiki_path
base_url = "https://yuc.wiki/"
new = "https://yuc.wiki/new"
async def get_yuc_wiki(keyword):
"""
获取当季动漫
"""
template_name,response = '',''
2025-02-20 09:33:58 +00:00
try:
if keyword == '本季新番':
template_name = await generate_season_url()
response = requests.get(base_url + f'{template_name}')
2025-02-20 09:33:58 +00:00
else:
template_name = 'forecast_anime'
response = requests.get(new)
if response.status_code != 200:
return None
2025-02-20 09:33:58 +00:00
soup = await dispose_html(response)
with open(yuc_wiki_path+f'{template_name}.html', 'w', encoding='utf-8') as f:
f.write(str(soup))
await get_yuc_wiki_image(template_name,568,1885)
except (Exception, IOError) as e:
print(f"Error occurred: {e}")
return yuc_wiki_path+f'{template_name}.jpeg'
async def generate_season_url():
"""
获取当前季度
:return:
"""
now = datetime.now()
quarter_month = ((now.month - 1) // 3) * 3 + 1
return f"{now.year}{quarter_month:02d}"
async def dispose_html(response):
"""
处理html
:param response:
:return:
"""
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
2025-02-20 09:33:58 +00:00
first_table = soup.select_one('table')
if first_table:
first_table.decompose()
2025-02-20 09:33:58 +00:00
for tag in soup.select('header, aside'):
tag.decompose()
hr_tags = soup.find_all('hr')
if len(hr_tags) >= 2:
second_hr = hr_tags[1]
2025-02-20 09:33:58 +00:00
next_element = second_hr.next_sibling
while next_element:
next_sibling = next_element.next_sibling
next_element.extract()
next_element = next_sibling
for tag in soup.find_all(['a', 'link', 'img', 'script', 'source']):
2025-02-20 09:33:58 +00:00
if tag.name == 'img' and tag.get('data-src'):
tag['src'] = tag['data-src']
del tag['data-src']
2025-02-20 09:33:58 +00:00
attr = 'href' if tag.name in ['a', 'link'] else 'src'
if tag.has_attr(attr):
path = tag[attr].lstrip('/\\')
if not path.startswith(('http://', 'https://')):
tag[attr] = f"{base_url}{path}"
if path.startswith('http://'):
tag[attr] = path.replace('http://', 'https://', 1)
return soup
async def get_yuc_wiki_image(template_name,width,height):
file = os.path.join(yuc_wiki_path, f"{template_name}.jpeg")
2025-02-20 09:33:58 +00:00
if os.path.exists(file):
with open(file,"rb") as image_file:
2025-02-20 09:33:58 +00:00
return image_file.read()
image_bytes = await template_to_pic(
template_path=yuc_wiki_path,
template_name=f'{template_name}.html',
templates={"data": None},
quality=40,
2025-02-20 09:33:58 +00:00
type="jpeg",
pages={
"viewport": {"width": width, "height": height},
"base_url": f"file://{getcwd()}",
},
wait=2,
)
await save_img(image_bytes,template_name)
async def save_img(data: bytes,template_name : str):
"""
保存yuc_wiki图片
:param template_name:
:param data:
:return:
"""
file_path = yuc_wiki_path + f"{template_name}.jpeg"
with open(file_path, "wb") as file:
file.write(data)
print("保存图片完成")