import re
from urllib.parse import urlparse
import requests
import utils
import pprint
class DY(object):
def __init__(self, app=None):
self.app = app
if app is not None:
self.init_app(app)
self.headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'max-age=0',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1'
}
self.domain = ['www.douyin.com',
'v.douyin.com',
'www.snssdk.com',
'www.amemv.com',
'www.iesdouyin.com',
'aweme.snssdk.com']
def init_app(self, app):
self.app = app
def parse(self, url):
share_url = self.get_share_url(url)
share_url_parse = urlparse(share_url)
if share_url_parse.netloc not in self.domain:
raise Exception("链接无效")
vid = re.findall(r'\/share\/video\/(\d*)', share_url_parse.path)[0]
match = re.search(r'\/share\/video\/(\d*)', share_url_parse.path)
if match:
vid = match.group(1)
response = requests.get(share_url, headers=self.headers, allow_redirects=False)
print(vid)
if vid:
return self.get_data(vid)
else:
raise Exception("解析失败")
def get_share_url(self, url):
response = requests.get(url,
headers=self.headers,
allow_redirects=False)
print(response.headers.keys())
print(response.headers['Location'])
if 'Location' in response.headers.keys():
return response.headers['Location']
else:
raise Exception("解析失败")
def get_data(self,vid):
url = f"https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids={vid}"
response = requests.get(url, headers=self.headers, )
result = response.json()
print(result)
if not response.status_code == 200:
raise Exception("解析失败")
item = result.get("item_list")[0]
author = item.get("author").get("nickname")
mp4 = item.get("video").get("play_addr").get("url_list")[0]
cover = item.get("video").get("cover").get("url_list")[0]
mp4 = mp4.replace("playwm", "play")
res = requests.get(mp4, headers=self.headers, allow_redirects=True)
mp4 = res.url
desc = item.get("desc")
mp3 = item.get("music").get("play_url").get("url_list")[0]
data = dict()
data['mp3'] = mp3
data['mp4'] = mp4
data['cover'] = cover
data['nickname'] = author
data['desc'] = desc
return data
dy = DY()
if __name__ == '__main__':
dy = DY()
data = dy.parse("https://v.douyin.com/dwPes7D/")
pprint.pprint(data)
直接放代码了,原理自行浏览器f12 修改ua之后看看就明白了,网上也有很多现成的网页可以用,这个也是借鉴了知乎上的一个大佬给的思路,地址忘记保存了等着找找补上,网页端在做了 准备用flask顺便练手