4 files changed
@@ -209,6 +209,62 @@ def normalize_quality_trajectory(trajectory: object) -> str: | |||
| 209 | 209 | return "stagnant" | |
| 210 | 210 | ||
| 211 | 211 | ||
| 212 | + # Synonym table for issue_key normalization — maps semantically equivalent | ||
| 213 | + # terms to a single canonical form so that "ablation study 缺失" and | ||
| 214 | + # "缺少 ablation" produce the same hash. | ||
| 215 | + ISSUE_SYNONYMS: dict[str, str] = { | ||
| 216 | + # Chinese → English canonical forms | ||
| 217 | + "缺失": "missing", | ||
| 218 | + "缺少": "missing", | ||
| 219 | + "缺乏": "missing", | ||
| 220 | + "没有": "missing", | ||
| 221 | + "不足": "insufficient", | ||
| 222 | + "薄弱": "weak", | ||
| 223 | + "较弱": "weak", | ||
| 224 | + "不够": "insufficient", | ||
| 225 | + "消融实验": "ablation", | ||
| 226 | + "消融研究": "ablation", | ||
| 227 | + "ablation study": "ablation", | ||
| 228 | + "ablation studies": "ablation", | ||
| 229 | + "ablation experiment": "ablation", | ||
| 230 | + "复现": "reproducibility", | ||
| 231 | + "可复现": "reproducibility", | ||
| 232 | + "可复现性": "reproducibility", | ||
| 233 | + "reproducible": "reproducibility", | ||
| 234 | + "基线": "baseline", | ||
| 235 | + "基准": "baseline", | ||
| 236 | + "baseline comparison": "baseline", | ||
| 237 | + "对比实验": "comparison", | ||
| 238 | + "对比分析": "comparison", | ||
| 239 | + "比较": "comparison", | ||
| 240 | + "文献综述": "literature review", | ||
| 241 | + "相关工作": "related work", | ||
| 242 | + "related works": "related work", | ||
| 243 | + "实验设计": "experiment design", | ||
| 244 | + "一致性": "consistency", | ||
| 245 | + "不一致": "inconsistency", | ||
| 246 | + "冗余": "redundant", | ||
| 247 | + "可读性": "readability", | ||
| 248 | + "清晰度": "clarity", | ||
| 249 | + "不清晰": "unclear", | ||
| 250 | + "显著性": "significance", | ||
| 251 | + "统计显著": "statistical significance", | ||
| 252 | + "过拟合": "overfitting", | ||
| 253 | + "欠拟合": "underfitting", | ||
| 254 | + } | ||
| 255 | + | ||
| 256 | + # Pre-sort by descending key length so longer phrases match first. | ||
| 257 | + _SORTED_SYNONYM_KEYS: list[str] = sorted(ISSUE_SYNONYMS, key=len, reverse=True) | ||
| 258 | + | ||
| 259 | + | ||
| 260 | + def _apply_synonym_normalization(text: str) -> str: | ||
| 261 | + """Replace synonymous terms in *text* with their canonical form.""" | ||
| 262 | + for key in _SORTED_SYNONYM_KEYS: | ||
| 263 | + if key in text: | ||
| 264 | + text = text.replace(key, ISSUE_SYNONYMS[key]) | ||
| 265 | + return text | ||
| 266 | + | ||
| 267 | + | ||
| 212 | 268 | def build_issue_key(description: str, category: str = "") -> str: | |
| 213 | 269 | category_value = normalize_issue_category(category, description=description) | |
| 214 | 270 | normalized = _normalize_text(description).lower() | |
@@ -217,8 +273,15 @@ def build_issue_key(description: str, category: str = "") -> str: | |||
| 217 | 273 | normalized = re.sub(r"\b\d+(?:\.\d+)?(?:pp|%|x|h|min|hours?)?\b", " ", normalized) | |
| 218 | 274 | normalized = re.sub(r"[^\w\u4e00-\u9fff\s]", " ", normalized) | |
| 219 | 275 | normalized = re.sub(r"\s+", " ", normalized).strip() | |
| 220 | - preview = "-".join(normalized.split()[:8])[:72] or "issue" | ||
| 221 | - digest = hashlib.sha1(normalized.encode("utf-8")).hexdigest()[:12] if normalized else "empty" | ||
| 276 | + # Apply synonym normalization before hashing | ||
| 277 | + normalized = _apply_synonym_normalization(normalized) | ||
| 278 | + normalized = re.sub(r"\s+", " ", normalized).strip() | ||
| 279 | + # Sort tokens so word-order differences do not affect the key | ||
| 280 | + # (e.g., "missing ablation" == "ablation missing"). | ||
| 281 | + tokens = sorted(normalized.split()) | ||
| 282 | + sorted_text = " ".join(tokens) | ||
| 283 | + preview = "-".join(tokens[:8])[:72] or "issue" | ||
| 284 | + digest = hashlib.sha1(sorted_text.encode("utf-8")).hexdigest()[:12] if sorted_text else "empty" | ||
| 222 | 285 | return f"{category_value}:{preview}:{digest}" | |
| 223 | 286 | ||
| 224 | 287 | ||
@@ -1086,3 +1149,77 @@ def reset_overlays(self): | |||
| 1086 | 1149 | def _save_insights(self, insights: list[EvolutionInsight]): | |
| 1087 | 1150 | data = [asdict(i) for i in insights] | |
| 1088 | 1151 | _write_json_atomic(self.insights_path, data) | |
| 1152 | + | ||
| 1153 | + # ------------------------------------------------------------------ | ||
| 1154 | + # Effectiveness tracking (optimization #8) | ||
| 1155 | + # ------------------------------------------------------------------ | ||
| 1156 | + | ||
| 1157 | + def update_effectiveness( | ||
| 1158 | + self, | ||
| 1159 | + classified_issues: list[dict], | ||
| 1160 | + previous_overlay_keys: list[str] | None = None, | ||
| 1161 | + ) -> dict[str, str]: | ||
| 1162 | + """Compare current issues against digest lessons and update effectiveness. | ||
| 1163 | + | ||
| 1164 | + Logic: | ||
| 1165 | + - If a lesson's issue_key still appears in *classified_issues* → ``ineffective`` | ||
| 1166 | + - If a lesson's issue_key is absent from *classified_issues* AND the | ||
| 1167 | + lesson has been around for >=2 outcomes → ``effective`` | ||
| 1168 | + - Otherwise stays ``unverified`` | ||
| 1169 | + | ||
| 1170 | + *previous_overlay_keys* is an optional pre-computed list of issue keys | ||
| 1171 | + that were present in the overlay at the start of this iteration. When | ||
| 1172 | + ``None`` the method derives keys from the current digest. | ||
| 1173 | + | ||
| 1174 | + Returns a mapping ``{issue_key: new_effectiveness}`` for keys that changed. | ||
| 1175 | + """ | ||
| 1176 | + with _evolution_lock(self.EVOLUTION_DIR): | ||
| 1177 | + outcomes = self._load_outcomes() | ||
| 1178 | + digest = self._build_digest_from_outcomes(outcomes) | ||
| 1179 | + | ||
| 1180 | + # Build set of issue keys present in the current iteration | ||
| 1181 | + current_keys: set[str] = set() | ||
| 1182 | + for issue in classified_issues: | ||
| 1183 | + key = ( | ||
| 1184 | + issue.get("issue_key") | ||
| 1185 | + or build_issue_key( | ||
| 1186 | + issue.get("description", ""), | ||
| 1187 | + issue.get("category", ""), | ||
| 1188 | + ) | ||
| 1189 | + ) | ||
| 1190 | + if key: | ||
| 1191 | + current_keys.add(key) | ||
| 1192 | + | ||
| 1193 | + # Determine which digest keys were "active lessons" before this iteration | ||
| 1194 | + if previous_overlay_keys is not None: | ||
| 1195 | + lesson_keys = set(previous_overlay_keys) | ||
| 1196 | + else: | ||
| 1197 | + lesson_keys = set() | ||
| 1198 | + for entry in digest: | ||
| 1199 | + key = build_issue_key(entry.pattern_summary, entry.category) | ||
| 1200 | + if key: | ||
| 1201 | + lesson_keys.add(key) | ||
| 1202 | + | ||
| 1203 | + changed: dict[str, str] = {} | ||
| 1204 | + for entry in digest: | ||
| 1205 | + entry_key = build_issue_key(entry.pattern_summary, entry.category) | ||
| 1206 | + if not entry_key or entry_key not in lesson_keys: | ||
| 1207 | + continue | ||
| 1208 | + | ||
| 1209 | + if entry_key in current_keys: | ||
| 1210 | + # Issue still present → lesson was ineffective | ||
| 1211 | + if entry.effectiveness != "ineffective": | ||
| 1212 | + entry.effectiveness = "ineffective" | ||
| 1213 | + changed[entry_key] = "ineffective" | ||
| 1214 | + else: | ||
| 1215 | + # Issue disappeared AND lesson existed for >=2 occurrences → effective | ||
| 1216 | + if entry.total_occurrences >= 2 and entry.effectiveness != "effective": | ||
| 1217 | + entry.effectiveness = "effective" | ||
| 1218 | + changed[entry_key] = "effective" | ||
| 1219 | + | ||
| 1220 | + if changed: | ||
| 1221 | + self._write_digest_cache(digest) | ||
| 1222 | + insights = self._analyze_patterns_from_digest(digest) | ||
| 1223 | + self._save_insights(insights) | ||
| 1224 | + | ||
| 1225 | + return changed | ||
0 commit comments