将被刮擦
完整代码
如果您不需要解释,请看一下the full code example in the online IDE
const cheerio = require("cheerio");
const axios = require("axios");
const productId = "14938360545167499200"; // Parameter defines the ID of a product you want to get the results for
const AXIOS_OPTIONS = {
headers: {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36",
}, // adding the User-Agent header as one way to prevent the request from being blocked
params: {
hl: "en", // parameter defines the language to use for the Google search
gl: "us", // parameter defines the country to use for the Google search
},
};
function getProductSpecs() {
return axios.get(`https://www.google.com/shopping/product/${productId}/specs`, AXIOS_OPTIONS).then(function ({ data }) {
let $ = cheerio.load(data);
let category;
return {
productId,
title: $(".BvQan")?.text().trim(),
reviews: parseInt($(".HiT7Id > span")?.attr("aria-label")?.replace(",", "")),
rating: parseFloat($(".UzThIf")?.attr("aria-label")),
extensions: Array.from($(".OA4wid")).map((el) => $(el).text().replaceAll("·", "").trim()),
description: $(".bwcLrc")?.text().trim(),
specsResults: Array.from($(".O2pTHb tr")).reduce((results, el) => {
if (!$(el).hasClass("vm91i")) {
category = $(el).text().trim();
} else {
results[`${category}`] = {
...results[`${category}`],
[$(el).find(".ipBhab")?.text().trim()]: $(el).find(".AnDf0c")?.text().trim(),
};
}
return { ...results };
}, {}),
};
});
}
getProductSpecs().then((result) => console.dir(result, { depth: null }));
准备
首先,我们需要创建一个node.js* project,然后添加koude0软件包koude1将koude1和koude2和koude2添加到网站上。
。为此,在我们项目的目录中,打开命令行并输入:
$ npm init -y
,然后:
$ npm i cheerio axios
*如果您没有安装node.js,则可以download it from nodejs.org并遵循安装documentation。
Process
首先,我们需要从HTML元素中提取数据。通过SelectorGadget Chrome extension,获得合适的CSS选择器的过程非常容易,通过单击浏览器中的所需元素,我们能够获取CSS选择器。但是,它并不总是完美地工作,尤其是当JavaScript大量使用该网站时。
如果您想了解更多有关它们的信息,我们在Serpapi上有专门的web Scraping with CSS Selectors博客文章。
下面的GIF说明了选择结果不同部分的方法。
代码说明
const cheerio = require("cheerio");
const axios = require("axios");
接下来,我们编写产品ID,请求选项:带有koude6的koude5,用于用作“真实”用户访问,以及提出请求的必要参数:
const productId = "14938360545167499200"; // Parameter defines the ID of a product you want to get the results for
const AXIOS_OPTIONS = {
headers: {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36",
}, // adding the User-Agent header as one way to prevent the request from being blocked
params: {
hl: "en", // parameter defines the language to use for the Google search
gl: "us", // parameter defines the country to use for the Google search
},
};
ð注意:Default koude2 request user-agent is koude8因此,网站了解这是一个发送请求并可能阻止其的脚本。 Check what's your user-agent。
接下来,我们编写一个函数,该函数使请求并返回接收到的数据。我们收到了koude2请求的响应,该请求具有我们destructured的data
键,并用koude1对其进行解析:
function getProductSpecs() {
return axios
.get(`https://www.google.com/shopping/product/${productId}/specs`, AXIOS_OPTIONS)
.then(function ({ data }) {
let $ = cheerio.load(data);
...
})
}
接下来,我们需要使用下一个方法获取页面的不同部分:
title: $(".BvQan")?.text().trim(),
reviews: parseInt($(".HiT7Id > span")?.attr("aria-label")?.replace(",", "")),
rating: parseFloat($(".UzThIf")?.attr("aria-label")),
extensions: Array.from($(".OA4wid")).map((el) => $(el).text().replaceAll("·", "").trim()),
description: $(".bwcLrc")?.text().trim(),
下面的屏幕截图显示DOM中的哪些结构具有规格表:
要获得规格,我们需要编写一个空变量来更改当前类别。然后,我们使用koude20方法(它允许用结果使对象)迭代使用koude17方法构建的数组。
在reduce
函数中,我们检查当前元素是否没有类"vm91i"
(hasClass()方法),我们将其设置为类别。下一个元素来自此类别,因此我们将它们添加到当前类别(使用koude24。并重复下一个类别:
let category;
return {
...
specsResults: Array.from($(".O2pTHb tr")).reduce((results, el) => {
if (!$(el).hasClass("vm91i")) {
category = $(el).text().trim();
} else {
results[`${category}`] = {
...results[`${category}`],
[$(el).find(".ipBhab")?.text().trim()]: $(el).find(".AnDf0c")?.text().trim(),
};
}
return { ...results };
}, {}),
};
现在我们可以启动我们的解析器:
$ node YOUR_FILE_NAME # YOUR_FILE_NAME is the name of your .js file
输出
{
"productId":"14938360545167499200",
"title":"Apple iPhone 12 Pro - 128 GB - Silver - Unlocked",
"reviews":5109,
"rating":4.5,
"extensions":[
"Smartphone",
"Dual SIM",
"iOS",
"5G",
"With Wireless Charging",
"Triple Lens",
"GSM",
"CDMA",
"With OLED Display",
"Facial Recognition"
],
"description":"Apple · iPhone · iPhone 12 · iPhone 12 Pro · iOS · 6.1′′ · Facial Recognition · 12 MP front camera · 12 MP rear camera · Smartphone · With Wireless Charging. Beautifully bright 6.1-inch Super Retina XDR display. Ceramic Shield with 4x better drop performance. Incredible low-light photography with a new Pro camera system, and 4x optical zoom range. Cinema-grade Dolby Vision video recording, editing, and playback. Night mode portraits and next-level AR experiences with the LiDAR Scanner. Powerful A14 Bionic chip. 5G capable. And new MagSafe accessories for easy attach and faster wireless charging. For infinitely spectacular possibilities. Legal. The display has rounded corners. When measured as a rectangle, the screen is 6.06 inches diagonally. Actual viewable area is less. Claim based on iPhone 12 Pro Ceramic Shield front compared with previous-generation iPhone. Data plan required. 5G is available in select markets and through select carriers. Speeds vary based on site conditions and carrier. Accessories are sold separately. Apple ProRAW coming soon. iPhone 12 Pro is splash, water, and dust resistant and was tested under controlled laboratory conditions with a rating of IP68 under IEC standard 60529 (maximum depth of 6 meters up to 30 minutes). Splash, water, and dust resistance are not permanent conditions. Resistance might decrease as a result of normal wear. Do not attempt to charge a wet iPhone; refer to the user guide for cleaning and drying instructions.",
"specsResults":{
"General":{
"Product Type":"Smartphone",
"Manufacturer Model Number":"A2341",
"Form Factor":"Touch",
...and oter specs
},
"Cellular":{
"Technology":"CDMA2000 1X / GSM / WCDMA (UMTS)",
"Mobile Broadband Generation":"5G",
"Service Provider":"Not specified",
...and oter specs
},
...and other categories
}
}
usuingaoqian32 from serpapi
本节是为了显示DIY解决方案与我们的解决方案之间的比较。
最大的区别是您不需要从头开始创建解析器并维护它。
也有可能在Google的某个时候阻止请求,我们在后端处理它,因此无需弄清楚如何自己做或弄清楚要使用哪个验证码,代理提供商。 p>
首先,我们需要安装koude25:
npm i google-search-results-nodejs
这是full code example,如果您不需要说明:
const SerpApi = require("google-search-results-nodejs");
const search = new SerpApi.GoogleSearch(process.env.API_KEY); //your API key from serpapi.com
const params = {
product_id: "14938360545167499200", // Parameter defines the ID of a product you want to get the results for.
engine: "google_product", // search engine
device: "desktop", //Parameter defines the device to use to get the results. It can be set to "desktop" (default), "tablet", or "mobile"
hl: "en", // parameter defines the language to use for the Google search
gl: "us", // parameter defines the country to use for the Google search
specs: true, // parameter for fetching specs results
};
const getJson = () => {
return new Promise((resolve) => {
search.json(params, resolve);
});
};
const getResults = async () => {
const json = await getJson();
return { ...json.product_results, specsResults: json.specs_results };
};
getResults().then((result) => console.dir(result, { depth: null }));
代码说明
首先,我们需要从koude25库中声明SerpApi
,并使用SerpApi的API键定义新的search
实例:
const SerpApi = require("google-search-results-nodejs");
const search = new SerpApi.GoogleSearch(API_KEY);
接下来,我们为提出请求的必要参数编写:
const params = {
product_id: "14938360545167499200", // Parameter defines the ID of a product you want to get the results for.
engine: "google_product", // search engine
device: "desktop", //Parameter defines the device to use to get the results. It can be set to "desktop" (default), "tablet", or "mobile"
hl: "en", // parameter defines the language to use for the Google search
gl: "us", // parameter defines the country to use for the Google search
specs: true, // parameter for fetching specs results
};
接下来,我们从Serpapi库中包装搜索方法,以便进一步处理搜索结果:
const getJson = () => {
return new Promise((resolve) => {
search.json(params, resolve);
});
};
最后,我们声明了从页面获取数据并返回的函数getResult
:
const getResults = async () => {
...
};
在此功能中,我们获得了带有结果的json
,并使用koude24的json
返回对象:
const json = await getJson();
return { ...json.product_results, specsResults: json.specs_results };
之后,我们运行getResults
函数,并使用koude34方法在控制台中打印所有接收的信息,该方法允许您使用带有必要参数的对象来更改默认输出选项:
getResults().then((result) => console.dir(result, { depth: null }));
输出
{
"product_id":14938360545167500000,
"title":"Apple iPhone 12 Pro - 128 GB - Silver - Unlocked",
"reviews":5109,
"rating":4.5,
"extensions":[
"Smartphone",
"Dual SIM",
"iOS",
"5G",
"With Wireless Charging",
"Triple Lens",
"GSM",
"CDMA",
"With OLED Display",
"Facial Recognition"
],
"description":"Apple · iPhone · iPhone 12 · iPhone 12 Pro · iOS · 6.1′′ · Facial Recognition · 12 MP front camera · 12 MP rear camera · Smartphone · With Wireless Charging. Beautifully bright 6.1-inch Super Retina XDR display. Ceramic Shield with 4x better drop performance. Incredible low-light photography with a new Pro camera system, and 4x optical zoom range. Cinema-grade Dolby Vision video recording, editing, and playback. Night mode portraits and next-level AR experiences with the LiDAR Scanner. Powerful A14 Bionic chip. 5G capable. And new MagSafe accessories for easy attach and faster wireless charging. For infinitely spectacular possibilities. Legal. The display has rounded corners. When measured as a rectangle, the screen is 6.06 inches diagonally. Actual viewable area is less. Claim based on iPhone 12 Pro Ceramic Shield front compared with previous-generation iPhone. Data plan required. 5G is available in select markets and through select carriers. Speeds vary based on site conditions and carrier. Accessories are sold separately. Apple ProRAW coming soon. iPhone 12 Pro is splash, water, and dust resistant and was tested under controlled laboratory conditions with a rating of IP68 under IEC standard 60529 (maximum depth of 6 meters up to 30 minutes). Splash, water, and dust resistance are not permanent conditions. Resistance might decrease as a result of normal wear. Do not attempt to charge a wet iPhone; refer to the user guide for cleaning and drying instructions.",
"specsResults":{
"general":{
"product_type":"Smartphone",
"manufacturer_model_number":"A2341",
"form_factor":"Touch",
...and oter specs
},
"cellular":{
"technology":"CDMA2000 1X / GSM / WCDMA (UMTS)",
"mobile_broadband_generation":"5G",
"service_provider":"Not specified",
...and oter specs
},
...and other categories
}
}
链接
如果您想查看一些用serpapi制定的项目,write me a message。
添加一个Feature Requestð«或Bugð