← 返回首页
refactor: optimize reflection+evolution — async hook, effectiveness t… · Sibyl-Research-Team/AutoResearch-SibylSystem@ba1f398 · GitHub
Skip to content

Navigation Menu

Toggle navigation
Sign in
Appearance settings
Search or jump to...

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Include my email address so I can be contacted

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
Resetting focus

Commit ba1f398

Browse files
refactor: optimize reflection+evolution — async hook, effectiveness tracking, synonym keys
- Make _post_reflection_hook steps 4-6 async (evolution recording in daemon thread) - Add effectiveness tracking: lessons marked effective/ineffective based on issue recurrence - Add ISSUE_SYNONYMS table (40+ entries) for issue_key normalization across languages - Merge IterationLogger from reflection.py into reflection_postprocess.py (re-export stub kept)
1 parent 5624332 commit ba1f398

4 files changed

Lines changed: 385 additions & 93 deletions

File tree

‎sibyl/evolution.py‎

Lines changed: 139 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,62 @@ def normalize_quality_trajectory(trajectory: object) -> str:
209209
return "stagnant"
210210

211211

212+
# Synonym table for issue_key normalization — maps semantically equivalent
213+
# terms to a single canonical form so that "ablation study 缺失" and
214+
# "缺少 ablation" produce the same hash.
215+
ISSUE_SYNONYMS: dict[str, str] = {
216+
# Chinese → English canonical forms
217+
"缺失": "missing",
218+
"缺少": "missing",
219+
"缺乏": "missing",
220+
"没有": "missing",
221+
"不足": "insufficient",
222+
"薄弱": "weak",
223+
"较弱": "weak",
224+
"不够": "insufficient",
225+
"消融实验": "ablation",
226+
"消融研究": "ablation",
227+
"ablation study": "ablation",
228+
"ablation studies": "ablation",
229+
"ablation experiment": "ablation",
230+
"复现": "reproducibility",
231+
"可复现": "reproducibility",
232+
"可复现性": "reproducibility",
233+
"reproducible": "reproducibility",
234+
"基线": "baseline",
235+
"基准": "baseline",
236+
"baseline comparison": "baseline",
237+
"对比实验": "comparison",
238+
"对比分析": "comparison",
239+
"比较": "comparison",
240+
"文献综述": "literature review",
241+
"相关工作": "related work",
242+
"related works": "related work",
243+
"实验设计": "experiment design",
244+
"一致性": "consistency",
245+
"不一致": "inconsistency",
246+
"冗余": "redundant",
247+
"可读性": "readability",
248+
"清晰度": "clarity",
249+
"不清晰": "unclear",
250+
"显著性": "significance",
251+
"统计显著": "statistical significance",
252+
"过拟合": "overfitting",
253+
"欠拟合": "underfitting",
254+
}
255+
256+
# Pre-sort by descending key length so longer phrases match first.
257+
_SORTED_SYNONYM_KEYS: list[str] = sorted(ISSUE_SYNONYMS, key=len, reverse=True)
258+
259+
260+
def _apply_synonym_normalization(text: str) -> str:
261+
"""Replace synonymous terms in *text* with their canonical form."""
262+
for key in _SORTED_SYNONYM_KEYS:
263+
if key in text:
264+
text = text.replace(key, ISSUE_SYNONYMS[key])
265+
return text
266+
267+
212268
def build_issue_key(description: str, category: str = "") -> str:
213269
category_value = normalize_issue_category(category, description=description)
214270
normalized = _normalize_text(description).lower()
@@ -217,8 +273,15 @@ def build_issue_key(description: str, category: str = "") -> str:
217273
normalized = re.sub(r"\b\d+(?:\.\d+)?(?:pp|%|x|h|min|hours?)?\b", " ", normalized)
218274
normalized = re.sub(r"[^\w\u4e00-\u9fff\s]", " ", normalized)
219275
normalized = re.sub(r"\s+", " ", normalized).strip()
220-
preview = "-".join(normalized.split()[:8])[:72] or "issue"
221-
digest = hashlib.sha1(normalized.encode("utf-8")).hexdigest()[:12] if normalized else "empty"
276+
# Apply synonym normalization before hashing
277+
normalized = _apply_synonym_normalization(normalized)
278+
normalized = re.sub(r"\s+", " ", normalized).strip()
279+
# Sort tokens so word-order differences do not affect the key
280+
# (e.g., "missing ablation" == "ablation missing").
281+
tokens = sorted(normalized.split())
282+
sorted_text = " ".join(tokens)
283+
preview = "-".join(tokens[:8])[:72] or "issue"
284+
digest = hashlib.sha1(sorted_text.encode("utf-8")).hexdigest()[:12] if sorted_text else "empty"
222285
return f"{category_value}:{preview}:{digest}"
223286

224287

@@ -1086,3 +1149,77 @@ def reset_overlays(self):
10861149
def _save_insights(self, insights: list[EvolutionInsight]):
10871150
data = [asdict(i) for i in insights]
10881151
_write_json_atomic(self.insights_path, data)
1152+
1153+
# ------------------------------------------------------------------
1154+
# Effectiveness tracking (optimization #8)
1155+
# ------------------------------------------------------------------
1156+
1157+
def update_effectiveness(
1158+
self,
1159+
classified_issues: list[dict],
1160+
previous_overlay_keys: list[str] | None = None,
1161+
) -> dict[str, str]:
1162+
"""Compare current issues against digest lessons and update effectiveness.
1163+
1164+
Logic:
1165+
- If a lesson's issue_key still appears in *classified_issues* → ``ineffective``
1166+
- If a lesson's issue_key is absent from *classified_issues* AND the
1167+
lesson has been around for >=2 outcomes → ``effective``
1168+
- Otherwise stays ``unverified``
1169+
1170+
*previous_overlay_keys* is an optional pre-computed list of issue keys
1171+
that were present in the overlay at the start of this iteration. When
1172+
``None`` the method derives keys from the current digest.
1173+
1174+
Returns a mapping ``{issue_key: new_effectiveness}`` for keys that changed.
1175+
"""
1176+
with _evolution_lock(self.EVOLUTION_DIR):
1177+
outcomes = self._load_outcomes()
1178+
digest = self._build_digest_from_outcomes(outcomes)
1179+
1180+
# Build set of issue keys present in the current iteration
1181+
current_keys: set[str] = set()
1182+
for issue in classified_issues:
1183+
key = (
1184+
issue.get("issue_key")
1185+
or build_issue_key(
1186+
issue.get("description", ""),
1187+
issue.get("category", ""),
1188+
)
1189+
)
1190+
if key:
1191+
current_keys.add(key)
1192+
1193+
# Determine which digest keys were "active lessons" before this iteration
1194+
if previous_overlay_keys is not None:
1195+
lesson_keys = set(previous_overlay_keys)
1196+
else:
1197+
lesson_keys = set()
1198+
for entry in digest:
1199+
key = build_issue_key(entry.pattern_summary, entry.category)
1200+
if key:
1201+
lesson_keys.add(key)
1202+
1203+
changed: dict[str, str] = {}
1204+
for entry in digest:
1205+
entry_key = build_issue_key(entry.pattern_summary, entry.category)
1206+
if not entry_key or entry_key not in lesson_keys:
1207+
continue
1208+
1209+
if entry_key in current_keys:
1210+
# Issue still present → lesson was ineffective
1211+
if entry.effectiveness != "ineffective":
1212+
entry.effectiveness = "ineffective"
1213+
changed[entry_key] = "ineffective"
1214+
else:
1215+
# Issue disappeared AND lesson existed for >=2 occurrences → effective
1216+
if entry.total_occurrences >= 2 and entry.effectiveness != "effective":
1217+
entry.effectiveness = "effective"
1218+
changed[entry_key] = "effective"
1219+
1220+
if changed:
1221+
self._write_digest_cache(digest)
1222+
insights = self._analyze_patterns_from_digest(digest)
1223+
self._save_insights(insights)
1224+
1225+
return changed

0 commit comments

Comments
 (0)

Footer

© 2026 GitHub, Inc.