← 返回首页
perf: add TTL-based stale lease cleanup, reduce workspace lookups · Sibyl-Research-Team/AutoResearch-SibylSystem@cce8e3a · GitHub
Skip to content

Navigation Menu

Toggle navigation
Sign in
Appearance settings
Search or jump to...

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Include my email address so I can be contacted

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
Resetting focus

Commit cce8e3a

Browse files
perf: add TTL-based stale lease cleanup, reduce workspace lookups
1 parent 86f62d3 commit cce8e3a

2 files changed

Lines changed: 75 additions & 0 deletions

File tree

‎sibyl/gpu_scheduler.py‎

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,15 +147,29 @@ def _lease_entry_matches_running(gpu_id: int, entry: dict) -> bool:
147147
return False
148148

149149

150+
_LEASE_TTL_SEC = 3600 # 1 hour — if lease is older AND workspace can't confirm, remove
151+
152+
150153
def _clean_global_gpu_leases_unlocked(leases: dict[str, dict]) -> dict[str, dict]:
151154
cleaned: dict[str, dict] = {}
155+
now = time.time()
152156
for gpu_key, entry in leases.items():
153157
try:
154158
gpu_id = int(gpu_key)
155159
except (TypeError, ValueError):
156160
continue
161+
# Fast path: if lease is recent, keep it without expensive workspace check
162+
claimed_at = entry.get("claimed_at", 0)
163+
if now - claimed_at < 60: # Less than 1 minute old — always keep
164+
cleaned[str(gpu_id)] = entry
165+
continue
166+
# Check if workspace still confirms this lease
157167
if _lease_entry_matches_running(gpu_id, entry):
158168
cleaned[str(gpu_id)] = entry
169+
elif now - claimed_at < _LEASE_TTL_SEC:
170+
# Workspace doesn't confirm but lease is recent — keep (may be in transition)
171+
cleaned[str(gpu_id)] = entry
172+
# else: workspace doesn't confirm AND lease is old → drop
159173
return cleaned
160174

161175

‎tests/test_gpu_scheduler.py‎

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1183,6 +1183,67 @@ def test_script_includes_dispatch_logic(self):
11831183
assert "DISPATCH" in script
11841184

11851185

1186+
# ══════════════════════════════════════════════
1187+
# TTL-based stale lease cleanup
1188+
# ══════════════════════════════════════════════
1189+
1190+
def test_stale_leases_cleaned_by_ttl(tmp_path, monkeypatch):
1191+
"""Leases older than TTL should be cleaned even if workspace is gone."""
1192+
import time as time_mod
1193+
from sibyl import gpu_scheduler
1194+
1195+
monkeypatch.setattr(gpu_scheduler, "_global_gpu_leases_path",
1196+
lambda: tmp_path / "gpu_leases.json")
1197+
1198+
old_lease = {
1199+
"0": {
1200+
"workspace_root": "/nonexistent/path",
1201+
"task_ids": ["old_task"],
1202+
"claimed_at": time_mod.time() - 7200, # 2 hours ago
1203+
}
1204+
}
1205+
cleaned = gpu_scheduler._clean_global_gpu_leases_unlocked(old_lease)
1206+
assert "0" not in cleaned # Should be removed — workspace gone + old
1207+
1208+
1209+
def test_recent_leases_kept_without_workspace_check(tmp_path, monkeypatch):
1210+
"""Leases less than 60s old should always be kept."""
1211+
import time as time_mod
1212+
from sibyl import gpu_scheduler
1213+
1214+
monkeypatch.setattr(gpu_scheduler, "_global_gpu_leases_path",
1215+
lambda: tmp_path / "gpu_leases.json")
1216+
1217+
recent_lease = {
1218+
"0": {
1219+
"workspace_root": "/nonexistent/path",
1220+
"task_ids": ["new_task"],
1221+
"claimed_at": time_mod.time() - 10, # 10 seconds ago
1222+
}
1223+
}
1224+
cleaned = gpu_scheduler._clean_global_gpu_leases_unlocked(recent_lease)
1225+
assert "0" in cleaned # Should be kept — very recent
1226+
1227+
1228+
def test_mid_age_lease_kept_when_workspace_gone(tmp_path, monkeypatch):
1229+
"""Leases newer than TTL but older than 60s, with gone workspace, are kept."""
1230+
import time as time_mod
1231+
from sibyl import gpu_scheduler
1232+
1233+
monkeypatch.setattr(gpu_scheduler, "_global_gpu_leases_path",
1234+
lambda: tmp_path / "gpu_leases.json")
1235+
1236+
mid_lease = {
1237+
"0": {
1238+
"workspace_root": "/nonexistent/path",
1239+
"task_ids": ["mid_task"],
1240+
"claimed_at": time_mod.time() - 600, # 10 minutes ago (< 1hr TTL)
1241+
}
1242+
}
1243+
cleaned = gpu_scheduler._clean_global_gpu_leases_unlocked(mid_lease)
1244+
assert "0" in cleaned # Should be kept — within TTL even though workspace is gone
1245+
1246+
11861247
# ══════════════════════════════════════════════
11871248
# Failed tasks don't block pipeline
11881249
# ══════════════════════════════════════════════

0 commit comments

Comments
 (0)

Footer

© 2026 GitHub, Inc.