Modifications
This commit is contained in:
@@ -389,7 +389,7 @@ def plot_single_json_error_analysis_with_log(
|
|||||||
]
|
]
|
||||||
plt.legend(handles=legend_elements, loc="upper right", frameon=True, shadow=True, title="Legend")
|
plt.legend(handles=legend_elements, loc="upper right", frameon=True, shadow=True, title="Legend")
|
||||||
|
|
||||||
plt.title("Validation: Confidence vs. Error Magnitude (Single JSON)", fontsize=15, pad=30)
|
# plt.title("Validation: Confidence vs. Error Magnitude (Single JSON)", fontsize=15, pad=30)
|
||||||
plt.ylabel("Mean Absolute Error (EDSS Points)", fontsize=12)
|
plt.ylabel("Mean Absolute Error (EDSS Points)", fontsize=12)
|
||||||
plt.xlabel("LLM Confidence Bracket", fontsize=12)
|
plt.xlabel("LLM Confidence Bracket", fontsize=12)
|
||||||
plt.grid(axis="y", linestyle=":", alpha=0.5)
|
plt.grid(axis="y", linestyle=":", alpha=0.5)
|
||||||
@@ -414,6 +414,317 @@ plot_single_json_error_analysis_with_log(json_path, gt_path)
|
|||||||
|
|
||||||
##
|
##
|
||||||
|
|
||||||
|
# %% 1json (rewritten with robust parsing + detailed data log + Pearson r in plot)
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import json
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import seaborn as sns
|
||||||
|
from matplotlib.patches import Patch
|
||||||
|
from matplotlib.lines import Line2D
|
||||||
|
from scipy.stats import pearsonr
|
||||||
|
|
||||||
|
def plot_single_json_error_analysis_with_log(
|
||||||
|
json_file_path,
|
||||||
|
ground_truth_path,
|
||||||
|
edss_gt_col="EDSS",
|
||||||
|
min_bin_count=5,
|
||||||
|
):
|
||||||
|
def norm_str(x):
|
||||||
|
# normalize identifiers and dates consistently
|
||||||
|
return str(x).strip().lower()
|
||||||
|
|
||||||
|
def parse_edss(x):
|
||||||
|
# robust numeric parse: handles "3,5" as 3.5, blanks, "nan", etc.
|
||||||
|
if x is None:
|
||||||
|
return np.nan
|
||||||
|
s = str(x).strip()
|
||||||
|
if s == "" or s.lower() in {"nan", "none", "null"}:
|
||||||
|
return np.nan
|
||||||
|
s = s.replace(",", ".")
|
||||||
|
return pd.to_numeric(s, errors="coerce")
|
||||||
|
|
||||||
|
print("\n" + "="*80)
|
||||||
|
print("SINGLE-JSON ERROR ANALYSIS (WITH LOG)")
|
||||||
|
print("="*80)
|
||||||
|
print(f"JSON: {json_file_path}")
|
||||||
|
print(f"GT: {ground_truth_path}")
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 1) Load Ground Truth
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
df_gt = pd.read_csv(ground_truth_path, sep=";")
|
||||||
|
|
||||||
|
required_gt_cols = {"unique_id", "MedDatum", edss_gt_col}
|
||||||
|
missing_cols = required_gt_cols - set(df_gt.columns)
|
||||||
|
if missing_cols:
|
||||||
|
raise ValueError(f"GT is missing required columns: {missing_cols}. Available: {df_gt.columns.tolist()}")
|
||||||
|
|
||||||
|
df_gt["unique_id"] = df_gt["unique_id"].map(norm_str)
|
||||||
|
df_gt["MedDatum"] = df_gt["MedDatum"].map(norm_str)
|
||||||
|
df_gt["key"] = df_gt["unique_id"] + "_" + df_gt["MedDatum"]
|
||||||
|
|
||||||
|
# Robust EDSS parsing
|
||||||
|
df_gt["EDSS_gt"] = df_gt[edss_gt_col].map(parse_edss)
|
||||||
|
|
||||||
|
# GT logs
|
||||||
|
print("\n--- GT LOG ---")
|
||||||
|
print(f"GT rows: {len(df_gt)}")
|
||||||
|
print(f"GT unique keys: {df_gt['key'].nunique()}")
|
||||||
|
gt_dup = df_gt["key"].duplicated(keep=False).sum()
|
||||||
|
print(f"GT duplicate-key rows: {gt_dup}")
|
||||||
|
print(f"GT missing EDSS (numeric): {df_gt['EDSS_gt'].isna().sum()}")
|
||||||
|
print(f"GT missing EDSS unique keys: {df_gt.loc[df_gt['EDSS_gt'].isna(), 'key'].nunique()}")
|
||||||
|
|
||||||
|
if gt_dup > 0:
|
||||||
|
print("\n[WARNING] GT has duplicate keys. Merge can duplicate rows. Example duplicate keys:")
|
||||||
|
print(df_gt.loc[df_gt["key"].duplicated(keep=False), "key"].value_counts().head(10))
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 2) Load Predictions from the specific JSON
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
with open(json_file_path, "r", encoding="utf-8") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
|
||||||
|
total_entries = len(data)
|
||||||
|
success_entries = sum(1 for e in data if e.get("success"))
|
||||||
|
|
||||||
|
all_preds = []
|
||||||
|
skipped = {
|
||||||
|
"not_success": 0,
|
||||||
|
"missing_uid_or_date": 0,
|
||||||
|
"missing_edss": 0,
|
||||||
|
"missing_conf": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
for entry in data:
|
||||||
|
if not entry.get("success"):
|
||||||
|
skipped["not_success"] += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
res = entry.get("result", {})
|
||||||
|
uid = res.get("unique_id")
|
||||||
|
md = res.get("MedDatum")
|
||||||
|
|
||||||
|
if uid is None or md is None or str(uid).strip() == "" or str(md).strip() == "":
|
||||||
|
skipped["missing_uid_or_date"] += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
edss_pred = parse_edss(res.get("EDSS"))
|
||||||
|
conf = pd.to_numeric(res.get("certainty_percent"), errors="coerce")
|
||||||
|
|
||||||
|
if pd.isna(edss_pred):
|
||||||
|
skipped["missing_edss"] += 1
|
||||||
|
if pd.isna(conf):
|
||||||
|
skipped["missing_conf"] += 1
|
||||||
|
|
||||||
|
all_preds.append({
|
||||||
|
"unique_id": norm_str(uid),
|
||||||
|
"MedDatum": norm_str(md),
|
||||||
|
"key": norm_str(uid) + "_" + norm_str(md),
|
||||||
|
"EDSS_pred": edss_pred,
|
||||||
|
"confidence": conf,
|
||||||
|
})
|
||||||
|
|
||||||
|
df_pred = pd.DataFrame(all_preds)
|
||||||
|
|
||||||
|
# Pred logs
|
||||||
|
print("\n--- PRED LOG ---")
|
||||||
|
print(f"JSON total entries: {total_entries}")
|
||||||
|
print(f"JSON success entries: {success_entries}")
|
||||||
|
print(f"Pred rows loaded (success + has keys): {len(df_pred)}")
|
||||||
|
if len(df_pred) == 0:
|
||||||
|
print("[ERROR] No usable prediction rows found. Nothing to plot.")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"Pred unique keys: {df_pred['key'].nunique()}")
|
||||||
|
print(f"Pred missing EDSS (numeric): {df_pred['EDSS_pred'].isna().sum()}")
|
||||||
|
print(f"Pred missing confidence: {df_pred['confidence'].isna().sum()}")
|
||||||
|
print("Skipped counts:", skipped)
|
||||||
|
|
||||||
|
key_counts = df_pred["key"].value_counts()
|
||||||
|
dup_pred_rows = (key_counts > 1).sum()
|
||||||
|
max_rep = int(key_counts.max())
|
||||||
|
print(f"Keys with >1 prediction in this JSON: {dup_pred_rows}")
|
||||||
|
print(f"Max repetitions of a single key in this JSON: {max_rep}")
|
||||||
|
if max_rep > 1:
|
||||||
|
print("Top repeated keys in this JSON:")
|
||||||
|
print(key_counts.head(10))
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 3) Merge
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
gt_key_set = set(df_gt["key"])
|
||||||
|
df_pred["key_in_gt"] = df_pred["key"].isin(gt_key_set)
|
||||||
|
not_in_gt = df_pred.loc[~df_pred["key_in_gt"]]
|
||||||
|
|
||||||
|
print("\n--- KEY MATCH LOG ---")
|
||||||
|
print(f"Pred rows with key found in GT: {df_pred['key_in_gt'].sum()} / {len(df_pred)}")
|
||||||
|
print(f"Pred rows with key NOT found in GT: {len(not_in_gt)}")
|
||||||
|
if len(not_in_gt) > 0:
|
||||||
|
print("[WARNING] Some prediction keys are not present in GT. First 10:")
|
||||||
|
print(not_in_gt[["unique_id", "MedDatum", "key"]].head(10))
|
||||||
|
|
||||||
|
df_merged = df_pred.merge(
|
||||||
|
df_gt[["key", "EDSS_gt"]],
|
||||||
|
on="key",
|
||||||
|
how="inner",
|
||||||
|
validate="many_to_one"
|
||||||
|
)
|
||||||
|
|
||||||
|
print("\n--- MERGE LOG ---")
|
||||||
|
print(f"Merged rows (inner join): {len(df_merged)}")
|
||||||
|
print(f"Merged unique keys: {df_merged['key'].nunique()}")
|
||||||
|
print(f"Merged missing GT EDSS: {df_merged['EDSS_gt'].isna().sum()}")
|
||||||
|
print(f"Merged missing pred EDSS: {df_merged['EDSS_pred'].isna().sum()}")
|
||||||
|
print(f"Merged missing confidence:{df_merged['confidence'].isna().sum()}")
|
||||||
|
|
||||||
|
rows_complete = df_merged.dropna(subset=["EDSS_gt", "EDSS_pred", "confidence"])
|
||||||
|
print("\n--- FILTER LOG (what will be used for stats/plot) ---")
|
||||||
|
print(f"Rows with all required fields (EDSS_gt, EDSS_pred, confidence): {len(rows_complete)}")
|
||||||
|
if len(rows_complete) == 0:
|
||||||
|
print("[ERROR] No complete rows after filtering. Nothing to plot.")
|
||||||
|
return
|
||||||
|
|
||||||
|
rows_complete = rows_complete.copy()
|
||||||
|
rows_complete["abs_error"] = (rows_complete["EDSS_pred"] - rows_complete["EDSS_gt"]).abs()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 4) Pearson correlation on row-level data
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
corr_df = rows_complete.dropna(subset=["confidence", "abs_error"]).copy()
|
||||||
|
|
||||||
|
if len(corr_df) >= 2 and corr_df["confidence"].nunique() > 1 and corr_df["abs_error"].nunique() > 1:
|
||||||
|
r_value, p_value = pearsonr(corr_df["confidence"], corr_df["abs_error"])
|
||||||
|
corr_text = f"Pearson r = {r_value:.3f}\np = {p_value:.3g}\nn = {len(corr_df)}"
|
||||||
|
else:
|
||||||
|
r_value, p_value = np.nan, np.nan
|
||||||
|
corr_text = f"Pearson r = NA\np = NA\nn = {len(corr_df)}"
|
||||||
|
|
||||||
|
print("\n--- CORRELATION LOG ---")
|
||||||
|
print(corr_text.replace("\n", " | "))
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 5) Binning + stats
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
bins = [0, 70, 80, 90, 100]
|
||||||
|
labels = ["Low (<70%)", "Moderate (70-80%)", "High (80-90%)", "Very High (90-100%)"]
|
||||||
|
|
||||||
|
rows_complete["conf_bin"] = pd.cut(rows_complete["confidence"], bins=bins, labels=labels, include_lowest=True)
|
||||||
|
conf_outside = rows_complete["conf_bin"].isna().sum()
|
||||||
|
print(f"Rows with confidence outside [0,100] or outside bin edges: {conf_outside}")
|
||||||
|
if conf_outside > 0:
|
||||||
|
print("Example confidences outside bins:")
|
||||||
|
print(rows_complete.loc[rows_complete["conf_bin"].isna(), "confidence"].head(20).to_list())
|
||||||
|
|
||||||
|
df_plot = rows_complete.dropna(subset=["conf_bin"])
|
||||||
|
stats = (
|
||||||
|
df_plot.groupby("conf_bin", observed=True)["abs_error"]
|
||||||
|
.agg(mean="mean", std="std", count="count")
|
||||||
|
.reindex(labels)
|
||||||
|
.reset_index()
|
||||||
|
)
|
||||||
|
|
||||||
|
print("\n--- BIN STATS ---")
|
||||||
|
print(stats)
|
||||||
|
|
||||||
|
low_bins = stats.loc[stats["count"].fillna(0) < min_bin_count, ["conf_bin", "count"]]
|
||||||
|
if not low_bins.empty:
|
||||||
|
print(f"\n[WARNING] Some bins have < {min_bin_count} rows; error bars/trend may be unstable:")
|
||||||
|
print(low_bins)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 6) Plot
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
plt.figure(figsize=(13, 8))
|
||||||
|
colors = sns.color_palette("Blues", n_colors=len(labels))
|
||||||
|
|
||||||
|
means = stats["mean"].to_numpy()
|
||||||
|
counts = stats["count"].fillna(0).astype(int).to_numpy()
|
||||||
|
stds = stats["std"].to_numpy()
|
||||||
|
means_plot = np.nan_to_num(means, nan=0.0)
|
||||||
|
|
||||||
|
bars = plt.bar(labels, means_plot, color=colors, edgecolor="black", alpha=0.85)
|
||||||
|
|
||||||
|
sem = np.where((counts > 1) & (~np.isnan(stds)), stds / np.sqrt(counts), np.nan)
|
||||||
|
plt.errorbar(labels, means_plot, yerr=sem, fmt="none", c="black", capsize=8, elinewidth=1.5)
|
||||||
|
|
||||||
|
valid_idx = np.where(~np.isnan(means))[0]
|
||||||
|
if len(valid_idx) >= 2:
|
||||||
|
x_idx = np.arange(len(labels))
|
||||||
|
z = np.polyfit(valid_idx, means[valid_idx], 1)
|
||||||
|
p = np.poly1d(z)
|
||||||
|
plt.plot(x_idx, p(x_idx), color="#e74c3c", linestyle="--", linewidth=3, zorder=5)
|
||||||
|
trend_label = "Trend Line"
|
||||||
|
else:
|
||||||
|
trend_label = "Trend Line (insufficient bins)"
|
||||||
|
print("\n[INFO] Not enough non-empty bins to fit a trend line.")
|
||||||
|
|
||||||
|
# Data labels
|
||||||
|
for i, bar in enumerate(bars):
|
||||||
|
n_count = int(counts[i])
|
||||||
|
mae_val = means[i]
|
||||||
|
if np.isnan(mae_val) or n_count == 0:
|
||||||
|
txt = "empty"
|
||||||
|
y = 0.02
|
||||||
|
else:
|
||||||
|
txt = f"MAE: {mae_val:.2f}\nn={n_count}"
|
||||||
|
y = bar.get_height() + 0.04
|
||||||
|
plt.text(
|
||||||
|
bar.get_x() + bar.get_width()/2,
|
||||||
|
y,
|
||||||
|
txt,
|
||||||
|
ha="center",
|
||||||
|
va="bottom",
|
||||||
|
fontweight="bold",
|
||||||
|
fontsize=10
|
||||||
|
)
|
||||||
|
|
||||||
|
# Pearson correlation text box inside plot
|
||||||
|
ax = plt.gca()
|
||||||
|
ax.text(
|
||||||
|
0.02, 0.98,
|
||||||
|
corr_text,
|
||||||
|
transform=ax.transAxes,
|
||||||
|
ha="left",
|
||||||
|
va="top",
|
||||||
|
fontsize=11,
|
||||||
|
zorder=10,
|
||||||
|
bbox=dict(boxstyle="round,pad=0.4", facecolor="white", edgecolor="gray", alpha=0.95)
|
||||||
|
)
|
||||||
|
# Legend
|
||||||
|
legend_elements = [
|
||||||
|
Patch(facecolor=colors[0], edgecolor="black", label=f"Bin 1: {labels[0]}"),
|
||||||
|
Patch(facecolor=colors[1], edgecolor="black", label=f"Bin 2: {labels[1]}"),
|
||||||
|
Patch(facecolor=colors[2], edgecolor="black", label=f"Bin 3: {labels[2]}"),
|
||||||
|
Patch(facecolor=colors[3], edgecolor="black", label=f"Bin 4: {labels[3]}"),
|
||||||
|
Line2D([0], [0], color="#e74c3c", linestyle="--", lw=3, label=trend_label),
|
||||||
|
Line2D([0], [0], color="black", marker="_", linestyle="None", markersize=10, label="Std Error (SEM)"),
|
||||||
|
Patch(color="none", label="Metric: Mean Absolute Error (MAE)")
|
||||||
|
]
|
||||||
|
plt.legend(handles=legend_elements, loc="upper right", frameon=True, shadow=True, title="Legend")
|
||||||
|
|
||||||
|
# plt.title("Validation: Confidence vs. Error Magnitude (Single JSON)", fontsize=15, pad=30)
|
||||||
|
plt.ylabel("Mean Absolute Error (EDSS Points)", fontsize=12)
|
||||||
|
plt.xlabel("LLM Confidence Bracket", fontsize=12)
|
||||||
|
plt.grid(axis="y", linestyle=":", alpha=0.5)
|
||||||
|
|
||||||
|
ymax = np.nanmax(means) if np.any(~np.isnan(means)) else 0.0
|
||||||
|
plt.ylim(0, max(0.5, float(ymax) + 0.6))
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
print("\n" + "="*80)
|
||||||
|
print("DONE")
|
||||||
|
print("="*80)
|
||||||
|
|
||||||
|
|
||||||
|
# --- RUN ---
|
||||||
|
json_path = "/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique_results_iter_1_20260212_020628.json"
|
||||||
|
gt_path = "/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv"
|
||||||
|
|
||||||
|
plot_single_json_error_analysis_with_log(json_path, gt_path)
|
||||||
|
##
|
||||||
|
|
||||||
# %% Certainty vs Delta (rewritten with robust parsing + detailed data loss logs)
|
# %% Certainty vs Delta (rewritten with robust parsing + detailed data loss logs)
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|||||||
+1
-1
@@ -1535,6 +1535,6 @@ plot_single_json_error_analysis(json_path, gt_path)
|
|||||||
#plot_error_distribution_by_confidence('/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration', '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv')
|
#plot_error_distribution_by_confidence('/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration', '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv')
|
||||||
#plot_confidence_vs_abs_error_refined('/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration', '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv')
|
#plot_confidence_vs_abs_error_refined('/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration', '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv')
|
||||||
#plot_confidence_vs_abs_error_with_counts('/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration', '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv')
|
#plot_confidence_vs_abs_error_with_counts('/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration', '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv')
|
||||||
#plot_final_thesis_error_chart('/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration', '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv')
|
plot_final_thesis_error_chart('/home/shahin/Lab/Doktorarbeit/Barcelona/Data/iteration', '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv')
|
||||||
|
|
||||||
##
|
##
|
||||||
|
|||||||
+133
-96
@@ -401,7 +401,7 @@ import seaborn as sns
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
# Load your data from TSV file
|
# Load your data from TSV file
|
||||||
file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/join_results_unique.tsv'
|
file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/results/join_results_unique.tsv'
|
||||||
df = pd.read_csv(file_path, sep='\t')
|
df = pd.read_csv(file_path, sep='\t')
|
||||||
|
|
||||||
# Replace comma with dot for numeric conversion in GT.EDSS and result.EDSS
|
# Replace comma with dot for numeric conversion in GT.EDSS and result.EDSS
|
||||||
@@ -745,7 +745,7 @@ df = df.rename(columns=column_mapping)
|
|||||||
df['MedDatum'] = pd.to_datetime(df['MedDatum'], errors='coerce')
|
df['MedDatum'] = pd.to_datetime(df['MedDatum'], errors='coerce')
|
||||||
|
|
||||||
# Patient
|
# Patient
|
||||||
patient_id = '6389d658'
|
patient_id = 'd13e4aa3'
|
||||||
patient_data = df[df['unique_id'] == patient_id].sort_values('MedDatum').copy()
|
patient_data = df[df['unique_id'] == patient_id].sort_values('MedDatum').copy()
|
||||||
if patient_data.empty:
|
if patient_data.empty:
|
||||||
raise ValueError(f"No data found for patient: {patient_id}")
|
raise ValueError(f"No data found for patient: {patient_id}")
|
||||||
@@ -1764,100 +1764,7 @@ plt.show()
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
# %% Difference Gemini easy
|
# %% Difference Plot Gemini
|
||||||
|
|
||||||
|
|
||||||
# --- 1. Process Error Data ---
|
|
||||||
system_names = [name.split('.')[1] for name, _ in functional_systems_to_plot]
|
|
||||||
plot_list = []
|
|
||||||
|
|
||||||
for gt_col, res_col in functional_systems_to_plot:
|
|
||||||
sys_name = gt_col.split('.')[1]
|
|
||||||
|
|
||||||
# Robust parsing
|
|
||||||
gt = df[gt_col].apply(safe_parse)
|
|
||||||
res = df[res_col].apply(safe_parse)
|
|
||||||
error = res - gt
|
|
||||||
|
|
||||||
# Calculate counts
|
|
||||||
matches = (error == 0).sum()
|
|
||||||
under = (error < 0).sum()
|
|
||||||
over = (error > 0).sum()
|
|
||||||
total = error.dropna().count()
|
|
||||||
|
|
||||||
# Calculate Percentages
|
|
||||||
# Using max(total, 1) to avoid division by zero
|
|
||||||
divisor = max(total, 1)
|
|
||||||
match_pct = (matches / divisor) * 100
|
|
||||||
under_pct = (under / divisor) * 100
|
|
||||||
over_pct = (over / divisor) * 100
|
|
||||||
|
|
||||||
plot_list.append({
|
|
||||||
'System': sys_name.replace('_', ' ').title(),
|
|
||||||
'Matches': matches,
|
|
||||||
'MatchPct': match_pct,
|
|
||||||
'Under': under,
|
|
||||||
'UnderPct': under_pct,
|
|
||||||
'Over': over,
|
|
||||||
'OverPct': over_pct
|
|
||||||
})
|
|
||||||
|
|
||||||
stats_df = pd.DataFrame(plot_list)
|
|
||||||
|
|
||||||
# --- 2. Plotting ---
|
|
||||||
fig, ax = plt.subplots(figsize=(12, 8)) # Slightly taller for multi-line labels
|
|
||||||
|
|
||||||
color_under = '#E74C3C'
|
|
||||||
color_over = '#3498DB'
|
|
||||||
bar_height = 0.6
|
|
||||||
|
|
||||||
y_pos = np.arange(len(stats_df))
|
|
||||||
|
|
||||||
ax.barh(y_pos, -stats_df['Under'], bar_height, label='Under-scored', color=color_under, edgecolor='white', alpha=0.8)
|
|
||||||
ax.barh(y_pos, stats_df['Over'], bar_height, label='Over-scored', color=color_over, edgecolor='white', alpha=0.8)
|
|
||||||
|
|
||||||
# --- 3. Aesthetics & Labels ---
|
|
||||||
|
|
||||||
for i, row in stats_df.iterrows():
|
|
||||||
# Constructing a detailed label for the left side
|
|
||||||
# Matches (Bold) | Under % | Over %
|
|
||||||
label_text = (
|
|
||||||
f"$\mathbf{{{row['System']}}}$\n"
|
|
||||||
f"Matches: {int(row['Matches'])} ({row['MatchPct']:.1f}%)\n"
|
|
||||||
f"Under: {int(row['Under'])} ({row['UnderPct']:.1f}%) | Over: {int(row['Over'])} ({row['OverPct']:.1f}%)"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Position text to the left of the x=0 line
|
|
||||||
ax.text(ax.get_xlim()[0] - 0.5, i, label_text, va='center', ha='right', fontsize=9, color='#333333', linespacing=1.3)
|
|
||||||
|
|
||||||
# Zero line
|
|
||||||
ax.axvline(0, color='black', linewidth=1.2, alpha=0.7)
|
|
||||||
|
|
||||||
# Clean up axes
|
|
||||||
ax.set_yticks([])
|
|
||||||
ax.set_xlabel('Number of Patients with Error', fontsize=11, fontweight='bold', labelpad=10)
|
|
||||||
#ax.set_title('Directional Error Analysis by Functional System', fontsize=14, pad=30)
|
|
||||||
|
|
||||||
# Make X-axis labels absolute
|
|
||||||
ax.set_xticklabels([int(abs(tick)) for tick in ax.get_xticks()])
|
|
||||||
|
|
||||||
# Remove spines
|
|
||||||
for spine in ['top', 'right', 'left']:
|
|
||||||
ax.spines[spine].set_visible(False)
|
|
||||||
|
|
||||||
# Legend
|
|
||||||
ax.legend(loc='upper right', frameon=False, bbox_to_anchor=(1, 1.1))
|
|
||||||
|
|
||||||
# Grid
|
|
||||||
ax.xaxis.grid(True, linestyle='--', alpha=0.3)
|
|
||||||
|
|
||||||
plt.tight_layout()
|
|
||||||
plt.show()
|
|
||||||
##
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# %% name
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import os
|
import os
|
||||||
@@ -1946,6 +1853,136 @@ ax.legend(loc='upper right', frameon=False, bbox_to_anchor=(1, 1.1), ncol=2)
|
|||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
plt.show()
|
plt.show()
|
||||||
##
|
##
|
||||||
|
|
||||||
|
|
||||||
|
# %% Functional System Error Boxplots
|
||||||
|
import pandas as pd
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
from matplotlib.patches import Patch
|
||||||
|
from matplotlib.lines import Line2D
|
||||||
|
|
||||||
|
# --- Configuration & Theme ---
|
||||||
|
plt.rcParams['font.family'] = 'Arial'
|
||||||
|
figure_save_path = 'project/visuals/functional_systems_boxplot.svg'
|
||||||
|
|
||||||
|
# --- 1. Build error data for boxplots ---
|
||||||
|
boxplot_data = []
|
||||||
|
system_labels = []
|
||||||
|
sample_sizes = []
|
||||||
|
|
||||||
|
for gt_col, res_col in functional_systems_to_plot:
|
||||||
|
sys_name = gt_col.split('.')[1]
|
||||||
|
|
||||||
|
# Robust parsing
|
||||||
|
gt = df[gt_col].apply(safe_parse)
|
||||||
|
res = df[res_col].apply(safe_parse)
|
||||||
|
|
||||||
|
# Error = result - ground truth
|
||||||
|
error = (res - gt).dropna()
|
||||||
|
|
||||||
|
# Ignore all 0 errors
|
||||||
|
error = error[error != 0]
|
||||||
|
|
||||||
|
# Keep only systems that actually have non-zero data
|
||||||
|
if len(error) > 0:
|
||||||
|
clean_name = sys_name.replace('_', ' ').title()
|
||||||
|
boxplot_data.append(error.values)
|
||||||
|
system_labels.append(clean_name)
|
||||||
|
sample_sizes.append(len(error))
|
||||||
|
|
||||||
|
# Safety check
|
||||||
|
if not boxplot_data:
|
||||||
|
raise ValueError("No valid non-zero error data available for any functional system.")
|
||||||
|
|
||||||
|
# Put n into x-axis labels so it doesn't overlap the plot
|
||||||
|
xtick_labels = [f"{label}\n(n={n})" for label, n in zip(system_labels, sample_sizes)]
|
||||||
|
|
||||||
|
# --- 2. Plotting ---
|
||||||
|
fig, ax = plt.subplots(figsize=(14, 8))
|
||||||
|
|
||||||
|
bp = ax.boxplot(
|
||||||
|
boxplot_data,
|
||||||
|
vert=True,
|
||||||
|
patch_artist=True,
|
||||||
|
labels=xtick_labels,
|
||||||
|
showmeans=True,
|
||||||
|
meanline=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- 3. Styling ---
|
||||||
|
box_face = '#D6EAF8'
|
||||||
|
box_edge = '#2980B9'
|
||||||
|
whisker_col = '#7F8C8D'
|
||||||
|
median_col = '#C0392B'
|
||||||
|
mean_col = '#1ABC9C'
|
||||||
|
flier_face = '#95A5A6'
|
||||||
|
flier_edge = '#7F8C8D'
|
||||||
|
|
||||||
|
for box in bp['boxes']:
|
||||||
|
box.set(facecolor=box_face, edgecolor=box_edge, linewidth=1.5)
|
||||||
|
|
||||||
|
for whisker in bp['whiskers']:
|
||||||
|
whisker.set(color=whisker_col, linewidth=1.2)
|
||||||
|
|
||||||
|
for cap in bp['caps']:
|
||||||
|
cap.set(color=whisker_col, linewidth=1.2)
|
||||||
|
|
||||||
|
for median in bp['medians']:
|
||||||
|
median.set(color=median_col, linewidth=2)
|
||||||
|
|
||||||
|
for mean in bp['means']:
|
||||||
|
mean.set(marker='o', markerfacecolor=mean_col, markeredgecolor='black', markersize=6)
|
||||||
|
|
||||||
|
for flier in bp['fliers']:
|
||||||
|
flier.set(marker='o', markerfacecolor=flier_face, markeredgecolor=flier_edge, alpha=0.6, markersize=4)
|
||||||
|
|
||||||
|
# Reference line at zero error
|
||||||
|
ax.axhline(0, color='black', linewidth=1.2, linestyle='--')
|
||||||
|
|
||||||
|
# Labels and formatting
|
||||||
|
ax.set_xlabel('Functional System', fontsize=11, fontweight='bold')
|
||||||
|
ax.set_ylabel('Error (Result - Ground Truth)', fontsize=11, fontweight='bold')
|
||||||
|
|
||||||
|
# Rotate x labels for readability
|
||||||
|
plt.xticks(rotation=45, ha='right')
|
||||||
|
|
||||||
|
# Grid and spines
|
||||||
|
ax.yaxis.grid(True, linestyle='--', alpha=0.3)
|
||||||
|
for spine in ['top', 'right']:
|
||||||
|
ax.spines[spine].set_visible(False)
|
||||||
|
|
||||||
|
# --- 4. Legend above the plot, outside the axes ---
|
||||||
|
legend_handles = [
|
||||||
|
Patch(facecolor=box_face, edgecolor=box_edge, label='IQR (25th-75th percentile)'),
|
||||||
|
Line2D([0], [0], color=median_col, lw=2, label='Median'),
|
||||||
|
Line2D([0], [0], marker='o', color='w', markerfacecolor=mean_col,
|
||||||
|
markeredgecolor='black', markersize=7, label='Mean'),
|
||||||
|
Line2D([0], [0], marker='o', color='w', markerfacecolor=flier_face,
|
||||||
|
markeredgecolor=flier_edge, alpha=0.8, markersize=6, label='Outlier'),
|
||||||
|
Line2D([0], [0], color='black', lw=1.2, linestyle='--', label='Zero error reference')
|
||||||
|
]
|
||||||
|
|
||||||
|
ax.legend(
|
||||||
|
handles=legend_handles,
|
||||||
|
loc='lower center',
|
||||||
|
bbox_to_anchor=(0.5, 1.02),
|
||||||
|
ncol=3,
|
||||||
|
frameon=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# Leave room at the top for the legend
|
||||||
|
plt.tight_layout(rect=[0, 0, 1, 0.90])
|
||||||
|
|
||||||
|
# Optional save
|
||||||
|
os.makedirs(os.path.dirname(figure_save_path), exist_ok=True)
|
||||||
|
plt.savefig(figure_save_path, format='svg', bbox_inches='tight')
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
##
|
||||||
|
|
||||||
|
|
||||||
# %% test
|
# %% test
|
||||||
# Diagnose: what are the actual differences?
|
# Diagnose: what are the actual differences?
|
||||||
print("\n🔍 Raw differences (first 5 rows per system):")
|
print("\n🔍 Raw differences (first 5 rows per system):")
|
||||||
|
|||||||
Reference in New Issue
Block a user