diff --git a/audit.py b/audit.py index 7fa3039..00dd9bf 100644 --- a/audit.py +++ b/audit.py @@ -2326,12 +2326,12 @@ def correlation_scatter_raw_certainty_json1_reference( xs = np.linspace(np.nanmin(x), np.nanmax(x), 200) ax.plot(xs, a * xs + b, linestyle="--", linewidth=2.5, color=trend_color) - ax.set_xlabel("certainty_percent (from JSON 1, per key)") - ax.set_ylabel("Absolute Error |EDSS_pred − EDSS_gt|" if y_mode == "abs" else "Signed Error (EDSS_pred − EDSS_gt)") - ax.set_title( - f"Correlation: JSON1 certainty_percent vs {y_col} (All iterations)\n" - f"Pearson r={pearson:.3f} | Spearman ρ={spearman:.3f}" - ) + ax.set_xlabel("certainty percent") + ax.set_ylabel("Absolute Error" if y_mode == "abs" else "Signed Error (EDSS_pred − EDSS_gt)") +# ax.set_title( +# f"Correlation: JSON1 certainty_percent vs {y_col} (All iterations)\n" +# f"Pearson r={pearson:.3f} | Spearman ρ={spearman:.3f}" +# ) ax.grid(linestyle=":", alpha=0.5) # Colorbar @@ -2362,7 +2362,7 @@ correlation_scatter_raw_certainty_json1_reference( json1_file_path=json1_path, ground_truth_path="/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv", y_mode="abs", - save_svg_path="/home/shahin/Lab/Doktorarbeit/Barcelona/results/corr_json1_abs_error.svg" + # save_svg_path="/home/shahin/Lab/Doktorarbeit/Barcelona/results/corr_json1_abs_error.svg" ) ## diff --git a/show_plots.py b/show_plots.py index 49184ac..36c3d2a 100644 --- a/show_plots.py +++ b/show_plots.py @@ -718,128 +718,155 @@ plt.show() ## - - - - - -# %% Dashboard +# %% Dashboard import pandas as pd import matplotlib.pyplot as plt -import seaborn as sns -from datetime import datetime +import matplotlib.dates as mdates import numpy as np +from matplotlib.gridspec import GridSpec + +def to_numeric_comma(s: pd.Series) -> pd.Series: + # accepts 1.5 and 1,5 + return pd.to_numeric(s.astype(str).str.replace(",", ".", regex=False), errors="coerce") # Load the data file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv' df = pd.read_csv(file_path, sep='\t') -# Rename columns to remove 'result.' prefix and handle spaces +# Rename columns to remove 'result.' prefix and replace spaces column_mapping = {} for col in df.columns: if col.startswith('result.'): - new_name = col.replace('result.', '') - # Handle spaces in column names (replace with underscores if needed) - new_name = new_name.replace(' ', '_') + new_name = col.replace('result.', '').replace(' ', '_') column_mapping[col] = new_name df = df.rename(columns=column_mapping) -# Convert MedDatum to datetime -df['MedDatum'] = pd.to_datetime(df['MedDatum']) +# Parse MedDatum safely +df['MedDatum'] = pd.to_datetime(df['MedDatum'], errors='coerce') -# Check what columns actually exist in the dataset -print("Available columns:") -print(df.columns.tolist()) -print("\nFirst few rows:") -print(df.head()) +# Patient +patient_id = '6389d658' +patient_data = df[df['unique_id'] == patient_id].sort_values('MedDatum').copy() +if patient_data.empty: + raise ValueError(f"No data found for patient: {patient_id}") -# Hardcode specific patient names -patient_names = ['2bf8486d'] +# Functional systems + EDSS +edss_col, edss_title = ('GT.EDSS', 'EDSS') -# Define the functional systems (columns to plot) - adjust based on actual column names -functional_systems = ['EDSS', 'Visual', 'Sensory', 'Motor', 'Brainstem', 'Cerebellar', 'Autonomic', 'Bladder', 'Intellectual'] +functional_systems = [ + ('GT.VISUAL_OPTIC_FUNCTIONS', 'Visual / Optic'), + ('GT.CEREBELLAR_FUNCTIONS', 'Cerebellar'), + ('GT.BRAINSTEM_FUNCTIONS', 'Brainstem'), + ('GT.SENSORY_FUNCTIONS', 'Sensory'), + ('GT.PYRAMIDAL_FUNCTIONS', 'Pyramidal (Motor)'), + ('GT.AMBULATION', 'Ambulation'), + ('GT.CEREBRAL_FUNCTIONS', 'Cerebral'), + ('GT.BOWEL_AND_BLADDER_FUNCTIONS', 'Bowel & Bladder'), +] -# Create subplots horizontally (2 columns, adjust rows as needed) -num_plots = len(functional_systems) -num_cols = 2 -num_rows = (num_plots + num_cols - 1) // num_cols # Ceiling division +# y-axis max rules +ymax_by_col = { + 'GT.PYRAMIDAL_FUNCTIONS': 6, + 'GT.SENSORY_FUNCTIONS': 6, + 'GT.BOWEL_AND_BLADDER_FUNCTIONS': 6, + 'GT.VISUAL_OPTIC_FUNCTIONS': 6, + 'GT.CEREBELLAR_FUNCTIONS': 5, + 'GT.CEREBRAL_FUNCTIONS': 5, + 'GT.BRAINSTEM_FUNCTIONS': 5, + 'GT.EDSS': 10, +} +default_ymax = 6 -fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 4*num_rows), sharex=False) # Changed sharex=False -if num_plots == 1: - axes = [axes] -elif num_rows == 1: - axes = axes -else: - axes = axes.flatten() +# ---------- Build shared "event dates" ticks ---------- +cols_for_dates = [edss_col] + [c for c, _ in functional_systems] +event_dates = [] -# Plot for the hardcoded patient -for i, system in enumerate(functional_systems): - # Filter data for this specific patient - patient_data = df[df['unique_id'] == patient_names[0]].sort_values('MedDatum') +for c in cols_for_dates: + if c in patient_data.columns: + y = to_numeric_comma(patient_data[c]) # <-- changed + x = patient_data['MedDatum'] + tmp = pd.DataFrame({"x": x, "y": y}).dropna(subset=["x", "y"]) + event_dates.extend(tmp["x"].tolist()) - # Check if patient data exists - if patient_data.empty: - print(f"No data found for patient: {patient_names[0]}") - continue +event_dates = sorted(pd.Series(event_dates).drop_duplicates().tolist()) - # Check if the system column exists in the data - if system in patient_data.columns: - # Plot the specific functional system - if not patient_data[system].isna().all(): - axes[i].plot(patient_data['MedDatum'], patient_data[system], marker='o', linewidth=2, label=system) - axes[i].set_ylabel('Score') - axes[i].set_title(f'Functional System: {system}') - axes[i].grid(True, alpha=0.3) - axes[i].legend() - else: - axes[i].set_title(f'Functional System: {system} (No data)') - axes[i].set_ylabel('Score') - axes[i].grid(True, alpha=0.3) +max_ticks = 8 +if len(event_dates) > max_ticks: + idx = np.linspace(0, len(event_dates) - 1, max_ticks, dtype=int) + event_dates = [event_dates[i] for i in idx] + +# ---------- A4 figure ---------- +fig = plt.figure(figsize=(11.69, 8.27)) +gs = GridSpec(nrows=3, ncols=4, figure=fig, height_ratios=[2.0, 1.0, 1.0], hspace=0.5, wspace=0.35) + +def style_time_axis(ax, show_labels=True): + ax.set_xticks(event_dates) + ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m')) + ax.tick_params(axis='x', rotation=30, labelsize=8, pad=2) + if not show_labels: + ax.tick_params(labelbottom=False) + +# ---------- EDSS main plot ---------- +ax_main = fig.add_subplot(gs[0, :]) + +if edss_col in patient_data.columns: + y = to_numeric_comma(patient_data[edss_col]) # <-- changed + x = patient_data['MedDatum'] + plot_df = pd.DataFrame({"x": x, "y": y}).dropna(subset=["x", "y"]).sort_values("x") + + ax_main.set_title(edss_title, fontsize=14, fontweight='bold') + ax_main.set_ylabel("Score") + ax_main.set_ylim(0, ymax_by_col.get(edss_col, default_ymax)) + ax_main.grid(True, alpha=0.3) + + if not plot_df.empty: + ax_main.plot(plot_df["x"], plot_df["y"], marker='o', linewidth=3, color='tab:red') else: - # Try to find column with similar name (case insensitive) - found_column = None - for col in df.columns: - if system.lower() in col.lower(): - found_column = col - break + ax_main.set_title("EDSS (no numeric data)", fontsize=14, fontweight='bold') +else: + ax_main.set_title("EDSS (missing column GT.EDSS)", fontsize=14, fontweight='bold') + ax_main.set_ylim(0, ymax_by_col.get(edss_col, 10)) + ax_main.grid(True, alpha=0.3) - if found_column: - print(f"Found similar column: {found_column}") - if not patient_data[found_column].isna().all(): - axes[i].plot(patient_data['MedDatum'], patient_data[found_column], marker='o', linewidth=2, label=found_column) - axes[i].set_ylabel('Score') - axes[i].set_title(f'Functional System: {system} (found as: {found_column})') - axes[i].grid(True, alpha=0.3) - axes[i].legend() +style_time_axis(ax_main) + +# ---------- Small aligned plots ---------- +small_axes = [] +for k, (col, title) in enumerate(functional_systems): + r = 1 + (k // 4) + c = (k % 4) + ax = fig.add_subplot(gs[r, c], sharex=ax_main) + small_axes.append(ax) + + ymax = ymax_by_col.get(col, default_ymax) + ax.set_title(title, fontsize=10) + ax.set_ylabel("Score") + ax.set_ylim(0, ymax) + ax.grid(True, alpha=0.3) + + if col in patient_data.columns: + y = to_numeric_comma(patient_data[col]) # <-- changed + x = patient_data['MedDatum'] + plot_df = pd.DataFrame({"x": x, "y": y}).dropna(subset=["x", "y"]).sort_values("x") + + if not plot_df.empty: + ax.plot(plot_df["x"], plot_df["y"], marker='o', linewidth=2, color='tab:blue') else: - axes[i].set_title(f'Functional System: {system} (Column not found)') - axes[i].set_ylabel('Score') - axes[i].grid(True, alpha=0.3) + ax.set_title(f"{title} (no data)", fontsize=10) + else: + ax.set_title(f"{title} (missing)", fontsize=10) -# Hide empty subplots -for i in range(len(functional_systems), len(axes)): - axes[i].set_visible(False) + style_time_axis(ax) -# Set x-axis label for the last row only -for i in range(len(functional_systems)): - if i >= len(axes) - num_cols: # Last row - axes[i].set_xlabel('Date') - -# Force date formatting on all axes -for ax in axes: - ax.tick_params(axis='x', rotation=45) - ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%Y-%m-%d')) - ax.xaxis.set_major_locator(plt.matplotlib.dates.MonthLocator()) - -# Automatically format x-axis dates -plt.gcf().autofmt_xdate() +# Hide x tick labels on first row of small plots +for ax in small_axes[:4]: + ax.tick_params(labelbottom=False) plt.tight_layout() +fig.subplots_adjust(hspace=0.7) plt.show() - ## - # %% Table import pandas as pd import matplotlib.pyplot as plt