WARNING: THIS SITE IS A MIRROR OF GITHUB.COM / IT CANNOT LOGIN OR REGISTER ACCOUNTS / THE CONTENTS ARE PROVIDED AS-IS / THIS SITE ASSUMES NO RESPONSIBILITY FOR ANY DISPLAYED CONTENT OR LINKS / IF YOU FOUND SOMETHING MAY NOT GOOD FOR EVERYONE, CONTACT ADMIN AT ilovescratch@foxmail.com
Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions projects/jupyter-server-ydoc/jupyter_server_ydoc/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,6 @@ async def open(self, room_id: str) -> None: # type:ignore[override]
"""
On connection open.
"""
self.create_task(self._websocket_server.serve(self))

if isinstance(self.room, DocumentRoom):
# Close the connection if the document session expired
session_id = self.get_query_argument("sessionId", "")
Expand All @@ -237,16 +235,20 @@ async def open(self, room_id: str) -> None: # type:ignore[override]
1003,
f"Document session {session_id} expired. You need to reload this browser tab.",
)
return

# cancel the deletion of the room if it was scheduled
if self.room.cleaner is not None:
self.room.cleaner.cancel()

try:
# Initialize the room
# FIX: Initialize the room BEFORE starting to serve clients.
# This prevents a race condition where SYNC_STEP1 is sent before
# the document is loaded, causing cell duplication when the client
# responds with SYNC_STEP2.
# See: https://github.com/jupyterlab/jupyterlab/issues/14031
async with self._room_lock(self._room_id):
await self.room.initialize()
self._emit_awareness_event(self.current_user.username, "join")
except Exception as e:
_, _, file_id = decode_file_path(self._room_id)
file = self._file_loaders[file_id]
Expand All @@ -267,9 +269,15 @@ async def open(self, room_id: str) -> None: # type:ignore[override]
self._message_queue.put_nowait(b"")
self._cleanup_delay = 0
await self._clean_room()
return # Don't start serving if initialization failed

# NOW start serving - after successful initialization
self.create_task(self._websocket_server.serve(self))
self._emit(LogLevel.INFO, "initialize", "New client connected.")
self._emit_awareness_event(self.current_user.username, "join")
else:
# TransientRoom (e.g., awareness) - no initialization needed
self.create_task(self._websocket_server.serve(self))
if self._room_id != "JupyterLab:globalAwareness":
self._emit_awareness_event(self.current_user.username, "join")

Expand Down
31 changes: 30 additions & 1 deletion projects/jupyter-server-ydoc/jupyter_server_ydoc/rooms.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ async def initialize(self) -> None:
if not read_from_source:
# if YStore updates and source file are out-of-sync, resync updates with source
if self._document.source != model["content"]:
# TODO: Delete document from the store.
self._emit(
LogLevel.INFO,
"initialize",
Expand All @@ -151,6 +150,14 @@ async def initialize(self) -> None:
self._file.path,
self.ystore.__class__.__name__,
)
# FIX: Clear the document content BEFORE loading from file to prevent
# cell duplication. When out-of-sync is detected, the YDoc contains
# content from the YStore that may have corrupted state (e.g., from
# previous duplications). If we don't clear it, the set() method will
# try to MERGE the YStore content with the file content instead of
# REPLACING it, which causes cell duplication.
# See: https://github.com/jupyterlab/jupyterlab/issues/14031
self._clear_document()
Comment on lines +153 to +160
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doing it here would undo the benefits of work done to address #509 and jupyterlab/jupyterlab#18100, e.g.

set() method will try to MERGE the YStore content with the file content instead of REPLACING it

This should be:

  • tested (there should be a test showing the problem, failing now, passing after a test)
  • addressed in jupyter-ydoc set() method for notebook, not in this package
  • hard reload should be only performed IF required to prevent UI side-effects

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be easier to test this by adding a UI test here, even if the fix ultimately goes to the jupyter-ydoc package because this repo has UI tests setup.

read_from_source = True

if read_from_source:
Expand Down Expand Up @@ -178,6 +185,28 @@ def _emit(self, level: LogLevel, action: str | None = None, msg: str | None = No

self._logger.emit(schema_id=JUPYTER_COLLABORATION_EVENTS_URI, data=data)

def _clear_document(self) -> None:
"""
Clears the document content to allow a clean reload from file.

This is necessary when an out-of-sync condition is detected between the
YStore and the file. Without clearing, the set() method would try to
merge the existing content with the new content, potentially causing
cell duplication in notebooks.

For notebooks (YNotebook), this clears the cells array. The metadata
and state will be properly updated by the subsequent set() call.
"""
# Check if the document has a ycells attribute (notebooks)
if hasattr(self._document, "ycells"):
# Use a transaction to ensure atomic operation
with self._document.ydoc.transaction():
self._document.ycells.clear()
self.log.info(
"Cleared document content in room %s to prevent duplication",
self._room_id,
)

async def stop(self) -> None:
"""
Stop the room.
Expand Down
Loading