123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778 |
- package J20250805.demo01;
- import java.io.*;
- import java.util.*;
- import java.util.concurrent.*;
- /**
- * @author WanJl
- * @version 1.0
- * @title Demo04
- * @description
- * 并发文件去重
- * 需求:
- * - 多线程读取多个日志文件(`log1.txt`、`log2.txt`...),提取所有 URL(假设每行一个 URL)
- * - 使用线程安全的集合存储不重复的 URL
- * - 所有线程执行完毕后,将去重后的 URL 按字母顺序写入`unique_urls.txt`
- * @create 2025/8/5
- */
- public class Demo04 {
- //创建一个集合:要求线程安全,存储不重复的元素
- // Set newSetFromMap(Map<> map) 该方法的参数是Map类型对象,返回值是Set类型对象
- //作用就是将一个Map集合对象转换为Set集合。Map的key就是不能重复的,其实就是Set
- private static Set<String> set=Collections.newSetFromMap(new ConcurrentHashMap<>());
- /**
- * 先处理单个日志文件,并且把URL添加到线程安全的集合中
- * @param fileName
- */
- private static void processLogFile(String fileName){
- try(BufferedReader br=new BufferedReader(new FileReader(fileName))) {
- String url;
- while ((url= br.readLine())!=null){
- url=url.trim();//去掉空格
- if (!url.isEmpty()){
- set.add(url);
- }
- }
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
- System.out.println("完成处理文件"+fileName);
- }
- /**
- * 将去重后的 URL 按字母顺序写入`unique_urls.txt`
- * @param outputFileName
- */
- private static void writeUniqueUrlsToFile(String outputFileName){
- //先把集合转换为列表并且进行排序
- List<String> list=new ArrayList<>(set);
- Collections.sort(list);
- //写入文件
- try(BufferedWriter bw=new BufferedWriter(new FileWriter(outputFileName))){
- for (String url:list){
- bw.write(url);
- bw.newLine();
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- public static void main(String[] args) {
- List<String> logFiles=Arrays.asList ("log1.txt","log2.txt","log3.txt");
- //创建线程池
- ExecutorService executorService = Executors.newFixedThreadPool(5);
- for (int i = 0; i < logFiles.size(); i++) {
- int t=i;
- executorService.submit(()->{
- processLogFile(logFiles.get(t));
- });
- }
- writeUniqueUrlsToFile("unique_urls.txt");
- }
- }
|