1. 前端JS相关
- 三元运算
v1 = 条件 ? 值A : 值B; # 如果条件成立v1=值A,不成立v1等于值Bres = 1 === 1 ? 99 : 88 # res=99
- 特殊的逻辑运算
v1 = 1===1 || 2===2 # Ture
v2 = 9 || 14 # 9
v3 = 0 || 15 # 15
v3 = 0 || 15 || "zhangfei" # 15
- 赋值和比较
v1 = 11 === (n=123) # Flase
- 案例:
v1 = 1 > ( n = 2) || 1 === 1 ? 9 :8 # 分析
n = 2
v1 = 9
var o = (null === (n = window.byted_acrawler) || void 0 === n ? void 0 : null === (a = n.sign) || void 0 === a ? void 0 : a.call(n, i)) || "";void 0 -> undifined
# 分析(window.byted_acrawler不为空、window.byted_acrawler.sign不为空)
var o = (null === (n = window.byted_acrawler) || void 0 === n ? void 0 : null === (a = n.sign) || void 0 === a ? void 0 : a.call(n, i)) || "";var o = window.byted_acrawler.sign.call(n,i) || ""var o = window.byted_acrawler.sign.call(n,i)
- 执行函数
function sign(v1){// this在函数内部console.log(v1);
}
// 执行,函数内部this=window全局对象
sign(123) # 123// 执行函数内部会把第一个参数赋值给 this=123
sign.call(123,456) # 456
// n就会传递给call函数中this
// i当做参数传递
var o = window.byted_acrawler.sign.call(n,i)
var o = window.byted_acrawler.sign(i)
- 扩展
# 之前的javascript不支持面向对象,通过将函数去伪造
function Person(name,age){this.name=name;this.age = age
}obj = new Person("张飞",123)
- 函数的参数
function sign(){console.log(arguments)
}sign()
sign(11,22,33)
sign(11,22,44,55)
虽然没定义参数,但是可以传入参数
- 合并对象补充JS环境
v1 = { k1: 123 }
v2 = { k2:99, k3:888}Objects.assign(v1,v2) # 将第二个字典全部更新到V1;和python字典update很像console.log(v1) # {k1: 123, k2:99, k3:888}
2.编译js代码
2.1 node.js编译代码
v1.js
function func(arg) {return arg + 'i666';
}
let data = func("老铁");
console.log(data)
- node编译执行
- python执行执行本地命令:
node v1.js
import os
import subprocess# 根据自己的操作系统去修改(相当于python的sys.path,加载安装的模块)
os.environ["NODE_PATH"] = "/usr/local/lib/node_modules/" signature = subprocess.getoutput('node v1.js')
2.2 pyexecjs编译代码
准备环境:
- node.js
- pyexecjs模块
pip install pyexecjs
例如:
v2.js
function func(arg) {return arg + '666';
}
- 执行js代码
import execjs
import osos.environ["NODE_PATH"] = "/usr/local/lib/node_modules/"
with open('v2.js', mode='r', encoding='utf-8') as f:js = f.read()JS = execjs.compile(js)sign = JS.call("func", "微信")
print(sign) # 微信666
node.js:电脑上安装上node.js之后(编译器,相当于装CPython解释器), 自动安装npm(第三方包管理器,相当于pip)
2.3 浏览器环境
有些JS的代码你从别的地拿过来执行的时候不成功,因为需要模拟浏览器环境
环境准备:
- node.js
- jsdom(通过后端node+js代码实现伪造浏览器环境)
npm install node-gyp@latest sudo npm explore -g npm -- npm i node-gyp@latest
npm install jsdom -g # -g全局安装
注意:上述安装成功后已可以模拟浏览器环境,由于今天的头条他的内容。
npm install canvas -g
方式一:v10.js
const jsdom = require("jsdom");
const {JSDOM} = jsdom;const resourceLoader = new jsdom.ResourceLoader({userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.55 Safari/537.36",
});const html = `<!DOCTYPE html><p>Hello world</p>`;const dom = new JSDOM(html, {url: "https://www.toutiao.com",referrer: "https://example.com/",contentType: "text/html",resources: resourceLoader,
});console.log(dom.window.location)
console.log(dom.window.navigator.userAgent)
console.log(dom.window.document.referrer)
import os
import subprocess# 根据自己的操作系统去修改(相当于python的sys.path,加载安装的模块)
os.environ["NODE_PATH"] = "/usr/local/lib/node_modules/" res = subprocess.getoutput('node v10.js')
方式二:无法补充环境时
const jsdom = require("jsdom");
const {JSDOM} = jsdom;const resourceLoader = new jsdom.ResourceLoader({userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.55 Safari/537.36"
});const html = `<!DOCTYPE html><p>Hello world</p>`;
const dom = new JSDOM(html, {url: "https://www.toutiao.com",referrer: "https://example.com/",contentType: "text/html",resources: resourceLoader,
});/*
console.log(dom.window.location)
console.log(dom.window.navigator.userAgent)
console.log(dom.window.document.referrer)
*/window = global;const params = {location: {hash: "",host: "www.toutiao.com",hostname: "www.toutiao.com",href: "https://www.toutiao.com",origin: "https://www.toutiao.com",pathname: "/",port: "",protocol: "https:",search: "",},navigator: {appCodeName: "Mozilla",appName: "Netscape",appVersion: "5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36",cookieEnabled: true,deviceMemory: 8,doNotTrack: null,hardwareConcurrency: 4,language: "zh-CN",languages: ["zh-CN", "zh"],maxTouchPoints: 0,onLine: true,platform: "MacIntel",product: "Gecko",productSub: "20030107",userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36",vendor: "Google Inc.",vendorSub: "",webdriver: false}
};Object.assign(global,params) # location、navigator设置成了全局变量# 在下面如果你使用location.href、navigator.appCodeName
我们在上面代码加入window = global这样window.location.href、window.appCodeName也能够获取到
注意:在nodejs中默认代码中会有一个global的关键字(全局变量)。
v1 = 123; # 写了个全局变量,相当于global赋了个值
console.log(global);
global.v1 = 123
global.v2 = 123
global.navigator = {...
}
console.log(v1,v2);navigator.userAgent
3.头条
3.1 分析请求
直接发送获取到结果:
import requests
# 这后面的就是我们需要注意的签名_02B4Z6wo009010IJgRwAAIDDtGCIOlEVa8tCLYWAALV5CV7lvAp2MWxOhC9EGgecK8orbBZu.elV57IoxY70Cqa8TI2XW0z.U3dOc84bBFDE83277HsB4oykmNYgkYd-9NbV8enDst.RVEBu76
res = requests.get(url="https://www.toutiao.com/api/pc/list/feed?offset=0&channel_id=94349549395&max_behot_time=0&category=pc_profile_channel&disable_raw_data=true&aid=24&app_name=toutiao_web&_signature=_02B4Z6wo009010IJgRwAAIDDtGCIOlEVa8tCLYWAALV5CV7lvAp2MWxOhC9EGgecK8orbBZu.elV57IoxY70Cqa8TI2XW0z.U3dOc84bBFDE83277HsB4oykmNYgkYd-9NbV8enDst.RVEBu76",headers={"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0"}
)print(res.text)
3.2 _signature(寻找签名因为具有失效性,假如操作体育可能就获取不到了)
(null === (n = window.byted_acrawler) || void 0 === n || null === (a = n.sign) || void 0 === a ? void 0 : a.call(n, o)) || ""
n=undefined;
o={url:"https://www.toutiao.com/api/pc/list/feed?offset=0&channel_id=94349549395&max_behot_time=0&category=pc_profile_channel&disable_raw_data=true&aid=24&app_name=toutiao_web"}
var o = window.byted_acrawler.sign.call(n,o);
再简化一下
o={url:"https://www.toutiao.com/api/pc/list/feed?offset=0&channel_id=94349549395&max_behot_time=0&category=pc_profile_channel&disable_raw_data=true&aid=24&app_name=toutiao_web"}
var o = window.byted_acrawler.sign(o);
- 找到sign算法,看看他是内部实现(走不通)。
- 应该有一个js,给全局变量中赋值,
- 整体调用试试看,把JS粘贴过来,找到了这个JS加载完之后赋的值