WARNING: THIS SITE IS A MIRROR OF GITHUB.COM / IT CANNOT LOGIN OR REGISTER ACCOUNTS / THE CONTENTS ARE PROVIDED AS-IS / THIS SITE ASSUMES NO RESPONSIBILITY FOR ANY DISPLAYED CONTENT OR LINKS / IF YOU FOUND SOMETHING MAY NOT GOOD FOR EVERYONE, CONTACT ADMIN AT ilovescratch@foxmail.com
Skip to content

Commit e5ff4c6

Browse files
author
Diljit
authored
fix: eliminate concurrency bug in file deletion logic by removing side effects from parallel stream (#40744)
## Description Refactored the `updateEntitiesInRepo` method to avoid mutating a non-thread-safe `HashSet` (`filesInRepo`) inside a parallel stream. The previous implementation added file paths to `filesInRepo` as a side effect within a `.parallelStream()`, which could lead to race conditions and data corruption. The new approach uses a pure mapping operation to build the map and then derives the set of file paths, ensuring thread safety and improving code clarity. Fixes #`Issue Number` _or_ Fixes `Issue URL` > [!WARNING] > _If no issue exists, please create an issue first, and check with the maintainers if the issue is valid._ ## Automation /ok-to-test tags="@tag.All" ### 🔍 Cypress test results <!-- This is an auto-generated comment: Cypress test results --> > [!IMPORTANT] > 🟣 🟣 🟣 Your tests are running. > Tests running at: <https://github.com/appsmithorg/appsmith/actions/runs/15209038828> > Commit: 3e3b283 > Workflow: `PR Automation test suite` > Tags: `@tag.All` > Spec: `` > <hr>Fri, 23 May 2025 11:19:05 UTC <!-- end of auto-generated comment: Cypress test results --> ## Communication Should the DevRel and Marketing teams inform users about this change? - [ ] Yes - [ ] No <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **Bug Fixes** - Improved error recovery when saving updates to the Git repository, ensuring changes are still applied even if an error occurs during the update process. - **Refactor** - Enhanced the process for detecting and handling updated or deleted files in the Git repository, leading to more reliable synchronization and cleaner repository state. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
1 parent 1552b1d commit e5ff4c6

File tree

1 file changed

+93
-24
lines changed

1 file changed

+93
-24
lines changed

app/server/appsmith-git/src/main/java/com/appsmith/git/files/FileUtilsCEImpl.java

Lines changed: 93 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -265,14 +265,27 @@ public Mono<Path> saveArtifactToGitRepo(
265265
baseRepoSuffix, branchName)
266266
.name("constructGitResourceMapFromGitRepo");
267267

268-
return gitResourceMapFromFSMono.flatMap(gitResourceMapFromFS -> {
269-
try {
270-
updateEntitiesInRepo(gitResourceMapFromDB, baseRepo, gitResourceMapFromFS);
271-
} catch (IOException e) {
272-
return Mono.error(e);
273-
}
274-
return Mono.just(baseRepo);
275-
});
268+
return gitResourceMapFromFSMono
269+
.flatMap(gitResourceMapFromFS -> {
270+
try {
271+
updateEntitiesInRepo(gitResourceMapFromDB, baseRepo, gitResourceMapFromFS);
272+
} catch (IOException e) {
273+
return Mono.error(e);
274+
}
275+
return Mono.just(baseRepo);
276+
})
277+
.onErrorResume(error -> {
278+
return Mono.defer(() -> {
279+
return Mono.just(baseRepo).flatMap(baseRepo1 -> {
280+
try {
281+
updateEntitiesInRepoFallback(gitResourceMapFromDB, baseRepo);
282+
return Mono.just(baseRepo1);
283+
} catch (IOException e) {
284+
return Mono.error(e);
285+
}
286+
});
287+
});
288+
});
276289
})
277290
.subscribeOn(scheduler);
278291
}
@@ -336,23 +349,79 @@ protected Set<String> updateEntitiesInRepo(
336349
throws IOException {
337350
Map<GitResourceIdentity, Object> resourceMapFromDB = gitResourceMapFromDB.getGitResourceMap();
338351

339-
Set<String> filesInRepo = new HashSet<>();
340-
Set<String> updatedFiles = new HashSet<>();
341-
Set<String> filesInDB = resourceMapFromDB.keySet().parallelStream()
352+
Set<String> filesPathsFromDB = resourceMapFromDB.keySet().parallelStream()
342353
.map(gitResourceIdentity -> gitResourceIdentity.getFilePath())
343354
.collect(Collectors.toSet());
344355

345-
Map<String, Object> filesInFS = gitResourceMapFromFS.getGitResourceMap().entrySet().parallelStream()
356+
Map<String, Object> filePathToObjectsFromFS =
357+
gitResourceMapFromFS.getGitResourceMap().entrySet().parallelStream()
358+
.collect(Collectors.toMap(entry -> entry.getKey().getFilePath(), entry -> entry.getValue()));
359+
360+
Set<String> filePathsFromFS = new HashSet<>(filePathToObjectsFromFS.keySet());
361+
362+
// Readme files shouldn't be modified/deleted/or updated.
363+
filePathsFromFS.remove(README_FILE_NAME);
364+
filePathsFromFS.removeAll(filesPathsFromDB);
365+
366+
// Delete all the files because they are no longer needed
367+
// This covers both older structures of storing files and,
368+
// legitimate changes in the artifact that might cause deletions
369+
filePathsFromFS.stream().parallel().forEach(filePath -> {
370+
try {
371+
Files.deleteIfExists(baseRepo.resolve(filePath));
372+
} catch (IOException e) {
373+
// We ignore files that could not be deleted and expect to come back to this at a later point
374+
// Just log the path for now
375+
log.error("Unable to delete file at path: {}", filePath);
376+
}
377+
});
378+
379+
// Now go through the resource map and based on resource type, check if the resource is modified before
380+
// serialization
381+
Set<String> newAndUpdatedFilePaths = resourceMapFromDB.entrySet().parallelStream()
346382
.map(entry -> {
347-
filesInRepo.add(entry.getKey().getFilePath());
348-
return entry;
383+
GitResourceIdentity key = entry.getKey();
384+
boolean resourceUpdated = true;
385+
try {
386+
resourceUpdated = fileOperations.hasFileChanged(
387+
entry.getValue(), filePathToObjectsFromFS.get(key.getFilePath()));
388+
} catch (IOException e) {
389+
log.error("Error while checking if file has changed", e);
390+
}
391+
392+
if (resourceUpdated) {
393+
log.info("Resource updated: {}", key.getFilePath());
394+
String filePath = key.getFilePath();
395+
saveResourceCommon(entry.getValue(), baseRepo.resolve(filePath));
396+
397+
return filePath;
398+
}
399+
400+
return null;
349401
})
350-
.collect(Collectors.toMap(entry -> entry.getKey().getFilePath(), entry -> entry.getValue()));
402+
.filter(Objects::nonNull)
403+
.collect(Collectors.toSet());
404+
405+
Set<String> allFileChanges = new HashSet<>();
406+
allFileChanges.addAll(newAndUpdatedFilePaths);
407+
allFileChanges.addAll(filePathsFromFS);
408+
return allFileChanges;
409+
}
410+
411+
protected Set<String> updateEntitiesInRepoFallback(GitResourceMap gitResourceMap, Path baseRepo)
412+
throws IOException {
413+
ModifiedResources modifiedResources = gitResourceMap.getModifiedResources();
414+
Map<GitResourceIdentity, Object> resourceMap = gitResourceMap.getGitResourceMap();
415+
416+
Set<String> filesInRepo = getExistingFilesInRepo(baseRepo);
417+
418+
Set<String> updatedFilesToBeSerialized = resourceMap.keySet().parallelStream()
419+
.map(gitResourceIdentity -> gitResourceIdentity.getFilePath())
420+
.collect(Collectors.toSet());
351421

352422
// Remove all files that need to be serialized from the existing files list, as well as the README file
353423
// What we are left with are all the files to be deleted
354-
355-
filesInRepo.removeAll(filesInDB);
424+
filesInRepo.removeAll(updatedFilesToBeSerialized);
356425
filesInRepo.remove(README_FILE_NAME);
357426

358427
// Delete all the files because they are no longer needed
@@ -370,19 +439,20 @@ protected Set<String> updateEntitiesInRepo(
370439

371440
// Now go through the resource map and based on resource type, check if the resource is modified before
372441
// serialization
373-
resourceMapFromDB.entrySet().parallelStream()
442+
// Or simply choose the mechanism for serialization
443+
Map<GitResourceType, GitResourceType> modifiedResourcesTypes = getModifiedResourcesTypes();
444+
return resourceMap.entrySet().parallelStream()
374445
.map(entry -> {
375446
GitResourceIdentity key = entry.getKey();
376447
boolean resourceUpdated = true;
377-
try {
448+
if (modifiedResourcesTypes.containsKey(key.getResourceType()) && modifiedResources != null) {
449+
GitResourceType comparisonType = modifiedResourcesTypes.get(key.getResourceType());
450+
378451
resourceUpdated =
379-
fileOperations.hasFileChanged(entry.getValue(), filesInFS.get(key.getFilePath()));
380-
} catch (IOException e) {
381-
log.error("Error while checking if file has changed", e);
452+
modifiedResources.isResourceUpdatedNew(comparisonType, key.getResourceIdentifier());
382453
}
383454

384455
if (resourceUpdated) {
385-
log.info("Resource updated: {}", key.getFilePath());
386456
String filePath = key.getFilePath();
387457
saveResourceCommon(entry.getValue(), baseRepo.resolve(filePath));
388458

@@ -392,7 +462,6 @@ protected Set<String> updateEntitiesInRepo(
392462
})
393463
.filter(Objects::nonNull)
394464
.collect(Collectors.toSet());
395-
return updatedFiles;
396465
}
397466

398467
protected Set<String> updateEntitiesInRepo(ApplicationGitReference applicationGitReference, Path baseRepo) {

0 commit comments

Comments
 (0)