mirror of
https://github.com/xuthus83/LittlePaimon.git
synced 2024-10-21 16:27:15 +08:00
171 lines
5.3 KiB
Python
171 lines
5.3 KiB
Python
# -*- coding: UTF-8 -*-
|
|
"""
|
|
该脚本可以直接获取wiki上的语音文件 并保存进数据库中
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
|
|
from bs4 import BeautifulSoup
|
|
from nonebot import logger
|
|
from sqlitedict import SqliteDict # TODO 加入requirements
|
|
|
|
from utils import aiorequests
|
|
from .util import get_path
|
|
|
|
# OUT_PUT = Path(__file__).parent / 'voice'
|
|
OUT_PUT = Path() / 'data' / 'LittlePaimon' / 'guess_voice' / 'voice'
|
|
|
|
BASE_URL = 'https://wiki.biligame.com/ys/'
|
|
|
|
API = {'character_list': '角色', 'voice': '%s语音'}
|
|
|
|
config = {
|
|
# 日 英 韩
|
|
'voice_language': ['日', '英', '韩']
|
|
}
|
|
|
|
# dir_data = os.path.join(os.path.dirname(__file__), 'data')
|
|
dir_data = Path() / 'data' / 'LittlePaimon' / 'guess_voice' / 'data'
|
|
|
|
# if not os.path.exists(dir_data):
|
|
# os.makedirs(dir_data)
|
|
dir_data.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
############
|
|
|
|
def init_db(db_dir, db_name='db.sqlite') -> SqliteDict:
|
|
return SqliteDict(str(get_path(db_dir, db_name)),
|
|
encode=json.dumps,
|
|
decode=json.loads,
|
|
autocommit=True)
|
|
|
|
|
|
db = init_db('data', 'voice.sqlite')
|
|
|
|
|
|
############
|
|
|
|
# 获取角色列表
|
|
async def get_character_list():
|
|
html = await aiorequests.get(url=(BASE_URL + API['character_list']))
|
|
soup = BeautifulSoup(html.text, 'lxml')
|
|
char_list = soup.find(attrs={
|
|
'class': 'resp-tab-content',
|
|
'style': 'display:block;'
|
|
})
|
|
char_list1 = char_list.find_all(attrs={'class': 'g C5星'})
|
|
res = list(set(map(lambda x: x.find('div', class_='L').text, char_list1)))
|
|
char_list2 = char_list.find_all(attrs={'class': 'g C5'})
|
|
res.extend(list(set(map(lambda x: x.find('div', class_='L').text, char_list2))))
|
|
char_list3 = char_list.find_all(attrs={'class': 'g C4星'})
|
|
res.extend(list(set(map(lambda x: x.find('div', class_='L').text, char_list3))))
|
|
res.sort()
|
|
return res
|
|
|
|
|
|
# 获取角色语音
|
|
async def get_voice_info(character_name: str):
|
|
logger.info('获取数据: %s' % character_name)
|
|
html = await aiorequests.get(url=(BASE_URL + API['voice'] % character_name))
|
|
soup = BeautifulSoup(html.text, 'lxml')
|
|
if soup.find(text='本页面目前没有内容。您可以在其他页面中'):
|
|
return None
|
|
voice_list = soup.find_all(attrs={'class': 'visible-md'})[2:]
|
|
info_list = []
|
|
for item in voice_list:
|
|
item_tab = item.find_all(attrs={'class': ''})[1:]
|
|
if isinstance(item_tab[1].next, str):
|
|
return info_list
|
|
info_list.append({
|
|
'title': item_tab[0].text,
|
|
'text': item_tab[5].text,
|
|
'中': item_tab[1].next.attrs.get('data-src', ''),
|
|
'日': item_tab[2].next.attrs.get('data-src', ''),
|
|
'英': item_tab[3].next.attrs.get('data-src', ''),
|
|
'韩': item_tab[4].next.attrs.get('data-src', ''),
|
|
})
|
|
return info_list
|
|
|
|
|
|
# 下载音频文件到本地
|
|
async def download(url, path):
|
|
res = await aiorequests.get(url=url, timeout=30)
|
|
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
with open(path, "wb") as f:
|
|
f.write(res.read())
|
|
|
|
|
|
async def update_voice_data():
|
|
# 获取全部人物列表
|
|
char_list = await get_character_list()
|
|
for char in char_list:
|
|
info = await get_voice_info(char)
|
|
if not info:
|
|
continue
|
|
data = []
|
|
for v in info:
|
|
chn = ''
|
|
jap = ''
|
|
eng = ''
|
|
kor = ''
|
|
for language in config['voice_language']:
|
|
url = v[language]
|
|
if not url:
|
|
continue
|
|
path = str(Path() / language / char / Path(url).name)
|
|
out_path = OUT_PUT / path
|
|
out = str(out_path)
|
|
if not out_path.exists():
|
|
await download(url, out)
|
|
|
|
if language == '中':
|
|
chn = path
|
|
elif language == '日':
|
|
jap = path
|
|
elif language == '英':
|
|
eng = path
|
|
elif language == '韩':
|
|
kor = path
|
|
|
|
logger.info('下载成功: %s -> %s' % (char, path))
|
|
|
|
data.append({
|
|
'title': v['title'],
|
|
'text': v['text'],
|
|
'chn': chn,
|
|
'jap': jap,
|
|
'eng': eng,
|
|
'kor': kor
|
|
})
|
|
# 存入数据库
|
|
db[char] = data
|
|
|
|
|
|
async def voice_list_by_mys():
|
|
url = 'https://api-static.mihoyo.com/common/blackboard/ys_obc/v1/home/content/list?app_sn=ys_obc&channel_id=84'
|
|
resp = await aiorequests.get(url=url, timeout=30)
|
|
json_data = resp.json()
|
|
if json_data['retcode']:
|
|
raise Exception(json_data['message'])
|
|
try:
|
|
data_list = json_data['data']['list'][0]['list']
|
|
except KeyError as e:
|
|
raise Exception('获取语音列表失败, 请联系作者修复')
|
|
|
|
return {x['title'].split()[0]: x for x in data_list}
|
|
|
|
|
|
async def voice_detail_by_mys(content_id):
|
|
url = 'https://bbs.mihoyo.com/ys/obc/content/%s/detail?bbs_presentation_style=no_header' % content_id
|
|
res = await aiorequests.get(url=url, timeout=30)
|
|
soup = BeautifulSoup(res.text, 'lxml')
|
|
paragraph_box = soup.select('.obc-tmpl__paragraph-box')
|
|
|
|
return [{
|
|
'text': x.get_text(),
|
|
'chn': x.find('source').attrs['src']
|
|
} for x in paragraph_box]
|