From 0b3d13f614ae147cf72322c6bfbff26f0362f2b1 Mon Sep 17 00:00:00 2001
From: meatjam <851404658@qq.com>
Date: Mon, 27 Jun 2022 20:10:05 +0800
Subject: [PATCH] =?UTF-8?q?refactor(guess=5Fvoice):=20=E6=96=B0=E5=A2=9E?=
 =?UTF-8?q?=E5=B9=B6=E4=BD=BF=E7=94=A8=E4=BA=86=E7=B1=B3=E6=B8=B8=E7=A4=BE?=
 =?UTF-8?q?=E9=80=94=E5=BE=84=E8=8E=B7=E5=8F=96=E8=A7=92=E8=89=B2=E8=AF=AD?=
 =?UTF-8?q?=E9=9F=B3=EF=BC=8C=E5=8E=9Fbiligame=E9=80=94=E5=BE=84=E8=8E=B7?=
 =?UTF-8?q?=E5=8F=96=E7=9A=84=E8=AF=AD=E9=9F=B3=E6=9C=89=E8=BE=83=E5=A4=9A?=
 =?UTF-8?q?=E7=BC=BA=E5=A4=B1=EF=BC=8C=E5=B0=A4=E5=85=B6=E6=98=AF=E4=B8=AD?=
 =?UTF-8?q?=E6=96=87=E4=BB=A5=E5=A4=96=E7=9A=84=E8=AF=AD=E8=A8=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Guess_voice/download_data.py | 64 +++++++++++++++++++++++++++++++++++-
 1 file changed, 63 insertions(+), 1 deletion(-)

diff --git a/Guess_voice/download_data.py b/Guess_voice/download_data.py
index b8fdc08..3217170 100644
--- a/Guess_voice/download_data.py
+++ b/Guess_voice/download_data.py
@@ -18,6 +18,8 @@ from .util import get_path
 OUT_PUT = Path() / 'data' / 'LittlePaimon' / 'guess_voice' / 'voice'
 
 BASE_URL = 'https://wiki.biligame.com/ys/'
+BASE_URL_MYS = 'https://bbs.mihoyo.com'
+BASE_URL_MYS_CHARACTERS_LIST = '/ys/obc/channel/map/189/25?bbs_presentation_style=no_header'
 
 API = {'character_list': '角色', 'voice': '%s语音'}
 
@@ -90,6 +92,66 @@ async def get_voice_info(character_name: str):
     return info_list
 
 
+# 获取角色语音，通过米游社途径。可获取完整的中日英韩语音。
+async def get_voice_info_mys(character_name: str):
+    character_name = character_name.strip()
+    logger.info('获取数据: %s' % character_name)
+    html = await aiorequests.get(url=(BASE_URL_MYS + BASE_URL_MYS_CHARACTERS_LIST))
+    soup = BeautifulSoup(html.text, 'lxml')
+    soup_char_container = soup.select('.collection-avatar')[0]
+    url_char_page = None
+    for char_soup in soup_char_container.select('.collection-avatar__title'):
+        if char_soup.text.find(character_name) != -1:
+            url_char_page = char_soup.parent.attrs.get('href', None)
+            break
+    if url_char_page is None:
+        return None
+    html = await aiorequests.get(url=(BASE_URL_MYS + url_char_page))
+    soup = BeautifulSoup(html.text, 'lxml')
+    soup_voice_languages, soup_voice_lists = soup.select('[data-part="voiceTab"] > ul')
+    language_tab_indices = {
+        '中': -1,
+        '日': -1,
+        '英': -1,
+        '韩': -1
+    }
+    for soup_lan in soup_voice_languages.select('li'):
+        language = soup_lan.text
+        language_tab_index = int(soup_lan.attrs.get('data-index'))
+        if language.find('中') != -1 or language.find('汉') != -1:
+            language_tab_indices['中'] = language_tab_index
+        elif language.find('日') != -1:
+            language_tab_indices['日'] = language_tab_index
+        elif language.find('英') != -1:
+            language_tab_indices['英'] = language_tab_index
+        elif language.find('韩') != -1:
+            language_tab_indices['韩'] = language_tab_index
+    language_voices = {
+        '中': [],
+        '日': [],
+        '英': [],
+        '韩': []
+    }
+    for lan, voice_list in language_voices.items():
+        for soup_row in soup_voice_lists.select(f'li[data-index="{language_tab_indices[lan]}"] > table:nth-of-type(2) > tbody > tr'):
+            soup_source = soup_row.select('audio > source')
+            voice_list.append(soup_source[0].attrs.get('src') if len(soup_source) != 0 else '')
+
+    info_list = []
+    soup_title = soup_voice_lists.select('li:first-child > table:nth-of-type(2) > tbody > tr td:nth-child(1)')
+    soup_text = soup_voice_lists.select('li:first-child > table:nth-of-type(2) > tbody > tr td:nth-child(2) > div > span')
+    for index in range(len(soup_title)):
+        info_list.append({
+            'title': soup_title[index].text.strip(),
+            'text': soup_text[index].text.strip(),
+            '中': language_voices['中'][index],
+            '日': language_voices['日'][index],
+            '英': language_voices['英'][index],
+            '韩': language_voices['韩'][index],
+        })
+    return info_list
+
+
 # 下载音频文件到本地
 async def download(url, path):
     res = await aiorequests.get(url=url, timeout=30)
@@ -102,7 +164,7 @@ async def update_voice_data():
     # 获取全部人物列表
     char_list = await get_character_list()
     for char in char_list:
-        info = await get_voice_info(char)
+        info = await get_voice_info_mys(char)
         if not info:
             continue
         data = []