Python爬虫(入门+进阶)学习笔记 1-8 使用自动化神器Selenium爬取动态网页(案例三:爬取淘宝商品)
selenium 是一套完整的web应用程序测试系统,包含了测试的录制(selenium IDE),编写及运行(Selenium Remote Control)和测试的并行处理(Selenium Grid)。Selenium的核心Sel
权限没有,则隐藏 function forum_list_access_filter($forumlist, $gid, $allow = 'allowread') { global $grouplist; if (empty($forumlist)) return array(); if (1 == $gid) return $forumlist; $forumlist_filter = $forumlist; $group = $grouplist[$gid]; foreach ($forumlist_filter as $fid => $forum) { if (empty($forum['accesson']) && empty($group[$allow]) || !empty($forum['accesson']) && empty($forum['accesslist'][$gid][$allow])) { unset($forumlist_filter[$fid]); } unset($forumlist_filter[$fid]['accesslist']); } return $forumlist_filter; } function forum_filter_moduid($moduids) { $moduids = trim($moduids); if (empty($moduids)) return ''; $arr = explode(',', $moduids); $r = array(); foreach ($arr as $_uid) { $_uid = intval($_uid); $_user = user_read($_uid); if (empty($_user)) continue; if ($_user['gid'] > 4) continue; $r[] = $_uid; } return implode(',', $r); } function forum_safe_info($forum) { //unset($forum['moduids']); return $forum; } function forum_filter($forumlist) { foreach ($forumlist as &$val) { unset($val['brief'], $val['announcement'], $val['seo_title'], $val['seo_keywords'], $val['create_date_fmt'], $val['icon_url'], $val['modlist']); } return $forumlist; } function forum_format_url($forum) { global $conf; if (0 == $forum['category']) { // 列表URL $url = url('list-' . $forum['fid'], '', FALSE); } elseif (1 == $forum['category']) { // 频道 $url = url('category-' . $forum['fid'], '', FALSE); } elseif (2 == $forum['category']) { // 单页 $url = url('read-' . trim($forum['brief']), '', FALSE); } if ($conf['url_rewrite_on'] > 1 && $forum['well_alias']) { if (0 == $forum['category'] || 1 == $forum['category']) { $url = url($forum['well_alias'], '', FALSE); } elseif (2 == $forum['category']) { // 单页 $url = ($forum['threads'] && $forum['brief']) ? url($forum['well_alias'] . '-' . trim($forum['brief']), '', FALSE) : url($forum['well_alias'], '', FALSE); } } return $url; } function well_forum_alias() { $forumlist = forum_list_cache(); if (empty($forumlist)) return ''; $key = 'forum-alias'; static $cache = array(); if (isset($cache[$key])) return $cache[$key]; $cache[$key] = array(); foreach ($forumlist as $val) { if ($val['well_alias']) $cache[$key][$val['fid']] = $val['well_alias']; } return array_flip($cache[$key]); } function well_forum_alias_cache() { global $conf; $key = 'forum-alias-cache'; static $cache = array(); // 用静态变量只能在当前 request 生命周期缓存,跨进程需要再加一层缓存:redis/memcached/xcache/apc if (isset($cache[$key])) return $cache[$key]; if ('mysql' == $conf['cache']['type']) { $arr = well_forum_alias(); } else { $arr = cache_get($key); if (NULL === $arr) { $arr = well_forum_alias(); !empty($arr) AND cache_set($key, $arr); } } $cache[$key] = empty($arr) ? '' : $arr; return $cache[$key]; } ?> $v = implode(",", $v); $temp[] = $v; } // 去掉重复的字符串,也就是重复的一维数组 $temp = array_unique($temp); // 再将拆开的数组重新组装 $output = array(); foreach ($temp as $k => $v) { if ($stkeep) $k = $starr[$k]; if ($ndformat) { $temparr = explode(",", $v); foreach ($temparr as $ndkey => $ndval) $output[$k][$ndarr[$ndkey]] = $ndval; } else $output[$k] = explode(",", $v); } return $output; } // 合并二维数组 如重复 值以第一个数组值为准 function array2_merge($array1, $array2, $key = '') { if (empty($array1) || empty($array2)) return NULL; $arr = array(); foreach ($array1 as $k => $v) { isset($v[$key]) ? $arr[$v[$key]] = array_merge($v, $array2[$k]) : $arr[] = array_merge($v, $array2[$k]); } return $arr; } /* * 对二维数组排序 两个数组必须有一个相同的键值 * $array1 需要排序数组 * $array2 按照该数组key排序 * */ function array2_sort_key($array1, $array2, $key = '') { if (empty($array1) || empty($array2)) return NULL; $arr = array(); foreach ($array2 as $k => $v) { if (isset($v[$key]) && $v[$key] == $array1[$v[$key]][$key]) { $arr[$v[$key]] = $array1[$v[$key]]; } else { $arr[] = $v; } } return $arr; } ?>