| public class JdSkuSpider : ISpiderContext { public SpiderContextBuilder GetBuilder() { Log.TaskId = "JD SKU Weekly"; SpiderContext context = new SpiderContext { SpiderName = "JD SKU " + DateTimeUtils.MONDAY_RUN_ID, CachedSize = 1, ThreadNum = 8, Site = new Site { EncodingName = "UTF-8" }, Scheduler = new RedisScheduler() { Host = "redis", Port = 6379, Password = "" }, StartUrls=new Dictionary<string, Dictionary<string, object>> { { "http://list.jd.com/list.html?cat=9987,653,655&page=1&go=0&JL=6_0_0&ms=5", new Dictionary<string, object> { { "name","手机" }, { "cat3","9987" } } }, }, Pipeline = new MysqlPipeline() { ConnectString = "" }, Downloader = new HttpDownloader() }; return new SpiderContextBuilder(context, typeof(Product)); } [Schema("jd", "sku_v2", Suffix = TableSuffix.Monday)] [TargetUrl(new[] { @"page=[0-9]+" }, "//*[@id=\"J_bottomPage\"]")] [TypeExtractBy(Expression = "//div[contains(@class,'j-sku-item')]", Multi = true)] [Indexes(Primary = "sku")] public class Product : ISpiderEntity { private static readonly DateTime runId; static Product() { DateTime dt = DateTime.Now; runId = new DateTime(dt.Year, dt.Month, 1); } [StoredAs("category", DataType.String, 20)] [PropertyExtractBy(Expression = "name", Type = ExtractType.Enviroment)] public string CategoryName { get; set; } [StoredAs("cat3", DataType.String, 20)] [PropertyExtractBy(Expression = "cat3", Type = ExtractType.Enviroment)] public int CategoryId { get; set; } [StoredAs("url", DataType.Text)] [PropertyExtractBy(Expression = "./div[1]/a/@href")] public string Url { get; set; } [StoredAs("sku", DataType.String, 25)] [PropertyExtractBy(Expression = "./@data-sku")] public string Sku { get; set; } [StoredAs("commentscount", DataType.String, 20)] [PropertyExtractBy(Expression = "./div[@class='p-commit']/strong/a")] public long CommentsCount { get; set; } [StoredAs("shopname", DataType.String, 100)] [PropertyExtractBy(Expression = "./div[@class='p-shop hide']/span[1]/a[1]")] public string ShopName { get; set; } [StoredAs("name", DataType.String, 50)] [PropertyExtractBy(Expression = "./div[@class='p-name']/a/em")] public string Name { get; set; } [StoredAs("shopid", DataType.String, 25)] public string ShopId { get; set; } [StoredAs("venderid", DataType.String, 25)] [PropertyExtractBy(Expression = "./@venderid")] public string VenderId { get; set; } [StoredAs("jdzy_shop_id", DataType.String, 25)] [PropertyExtractBy(Expression = "./@jdzy_shop_id")] public string JdzyShopId { get; set; } [StoredAs("cdate", DataType.Time)] [PropertyExtractBy(Expression = "now", Type = ExtractType.Enviroment)] public DateTime CDate => DateTime.Now; } } |