diff --git a/ruoyi-admin/src/main/java/com/ruoyi/web/util/WebMagicProxyUtil.java b/ruoyi-admin/src/main/java/com/ruoyi/web/util/WebMagicProxyUtil.java new file mode 100644 index 00000000..73ac6a3f --- /dev/null +++ b/ruoyi-admin/src/main/java/com/ruoyi/web/util/WebMagicProxyUtil.java @@ -0,0 +1,205 @@ +package com.ruoyi.web.util; + +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.yaml.snakeyaml.Yaml; + +import us.codecraft.webmagic.downloader.HttpClientDownloader; +import us.codecraft.webmagic.proxy.Proxy; +import us.codecraft.webmagic.proxy.SimpleProxyProvider; + +/** + * WebMagic爬虫代理工具类 + * + * @author ruoyi + */ +public class WebMagicProxyUtil { + + private static final Logger logger = LoggerFactory.getLogger(WebMagicProxyUtil.class); + + // 随机数生成器 + private static final Random random = new Random(); + + // 默认代理配置 + private static final String DEFAULT_PROXY_HOST = "127.0.0.1"; + private static final String DEFAULT_PROXY_PORT = "7890"; + + // 存储所有代理节点信息 + private static List> proxyNodes = new ArrayList<>(); + + // 当前使用的代理信息 + private static String currentProxyName = ""; + private static String currentProxyHost = DEFAULT_PROXY_HOST; + private static int currentProxyPort = Integer.parseInt(DEFAULT_PROXY_PORT); + + static { + loadProxyConfig(); + } + + /** + * 加载代理配置文件 + */ + @SuppressWarnings("unchecked") + private static void loadProxyConfig() { + try { + // 首先尝试从config目录加载 + File configFile = new File("/www/java_mall/erp/config/test_proxy.yml"); + + // 如果配置文件不存在,尝试从类路径加载 + if (!configFile.exists()) { + configFile = new File("config/test_proxy.yml"); + } + + if (configFile.exists()) { + Yaml yaml = new Yaml(); + try (InputStream inputStream = new FileInputStream(configFile)) { + Map config = yaml.load(inputStream); + if (config != null && config.containsKey("proxies")) { + List> proxies = (List>) config.get("proxies"); + proxyNodes = proxies.stream() + .filter(proxy -> proxy != null && proxy.containsKey("name") && proxy.containsKey("server") && proxy.containsKey("port")) + .collect(Collectors.toList()); + logger.info("成功加载{}个代理节点配置", proxyNodes.size()); + } + } + } else { + logger.warn("未找到代理配置文件,将使用默认代理设置"); + } + } catch (Exception e) { + logger.error("加载代理配置失败", e); + } + } + + /** + * 获取所有可用代理节点 + */ + public static List> getAllProxies() { + if (proxyNodes.isEmpty()) { + loadProxyConfig(); + } + + if (proxyNodes.isEmpty()) { + // 如果配置文件中没有代理,至少返回一个默认代理 + List> defaultProxyList = new ArrayList<>(); + Map defaultProxy = new HashMap<>(); + defaultProxy.put("name", "默认代理"); + defaultProxy.put("server", DEFAULT_PROXY_HOST); + defaultProxy.put("port", DEFAULT_PROXY_PORT); + defaultProxy.put("type", "http"); + defaultProxyList.add(defaultProxy); + return defaultProxyList; + } + + return proxyNodes.stream() + .map(node -> { + Map proxyInfo = new HashMap<>(); + proxyInfo.put("name", (String) node.get("name")); + proxyInfo.put("server", (String) node.get("server")); + proxyInfo.put("port", String.valueOf(node.get("port"))); + proxyInfo.put("type", (String) node.get("type")); + return proxyInfo; + }) + .collect(Collectors.toList()); + } + + /** + * 设置当前使用的代理节点 + */ + public static boolean setCurrentProxy(String proxyName) { + // 查找匹配的代理节点 + Map targetProxy = proxyNodes.stream() + .filter(node -> proxyName.equals(node.get("name"))) + .findFirst() + .orElse(null); + + if (targetProxy != null) { + currentProxyName = (String) targetProxy.get("name"); + currentProxyHost = (String) targetProxy.get("server"); + currentProxyPort = ((Number) targetProxy.get("port")).intValue(); + + logger.info("已设置代理节点: {}, 地址: {}:{}", currentProxyName, currentProxyHost, currentProxyPort); + return true; + } else if ("默认代理".equals(proxyName)) { + // 使用默认代理 + currentProxyName = "默认代理"; + currentProxyHost = DEFAULT_PROXY_HOST; + currentProxyPort = Integer.parseInt(DEFAULT_PROXY_PORT); + logger.info("已设置默认代理: {}:{}", DEFAULT_PROXY_HOST, DEFAULT_PROXY_PORT); + return true; + } else { + logger.warn("未找到指定的代理节点: {}, 将使用默认代理", proxyName); + return false; + } + } + + /** + * 获取配置了代理的下载器 + */ + public static HttpClientDownloader getProxyDownloader() { + return getProxyDownloader(currentProxyHost, currentProxyPort); + } + + /** + * 获取配置了指定代理的下载器 + */ + public static HttpClientDownloader getProxyDownloader(String host, int port) { + HttpClientDownloader httpClientDownloader = new HttpClientDownloader(); + clearSystemProxy(); + + try { + // 设置系统代理 + System.setProperty("http.proxyHost", host); + System.setProperty("http.proxyPort", String.valueOf(port)); + System.setProperty("https.proxyHost", host); + System.setProperty("https.proxyPort", String.valueOf(port)); + System.setProperty("http.nonProxyHosts", "localhost|127.0.0.1"); + + // 设置WebMagic代理 + httpClientDownloader.setProxyProvider(SimpleProxyProvider.from( + new Proxy(host, port) + )); + + logger.info("已配置代理: {}:{}", host, port); + } catch (Exception e) { + logger.error("设置代理失败", e); + } + + return httpClientDownloader; + } + + /** + * 获取配置了默认代理的下载器 + */ + public static HttpClientDownloader getDefaultProxyDownloader() { + return getProxyDownloader(DEFAULT_PROXY_HOST, Integer.parseInt(DEFAULT_PROXY_PORT)); + } + + /** + * 清除系统代理设置 + */ + public static void clearSystemProxy() { + System.clearProperty("http.proxyHost"); + System.clearProperty("http.proxyPort"); + System.clearProperty("https.proxyHost"); + System.clearProperty("https.proxyPort"); + System.clearProperty("socksProxyHost"); + System.clearProperty("socksProxyPort"); + } + + /** + * 获取随机休眠时间 + */ + public static int getRandomSleepTime(int min, int max) { + return min + random.nextInt(max - min); + } +} \ No newline at end of file