Config Dashbprard

This commit is contained in:
2026-02-23 18:19:50 +01:00
parent 118e3e63b3
commit 816c50e467
2 changed files with 124 additions and 97 deletions

View File

@@ -2326,12 +2326,12 @@ def correlation_scatter_raw_certainty_json1_reference(
xs = np.linspace(np.nanmin(x), np.nanmax(x), 200) xs = np.linspace(np.nanmin(x), np.nanmax(x), 200)
ax.plot(xs, a * xs + b, linestyle="--", linewidth=2.5, color=trend_color) ax.plot(xs, a * xs + b, linestyle="--", linewidth=2.5, color=trend_color)
ax.set_xlabel("certainty_percent (from JSON 1, per key)") ax.set_xlabel("certainty percent")
ax.set_ylabel("Absolute Error |EDSS_pred EDSS_gt|" if y_mode == "abs" else "Signed Error (EDSS_pred EDSS_gt)") ax.set_ylabel("Absolute Error" if y_mode == "abs" else "Signed Error (EDSS_pred EDSS_gt)")
ax.set_title( # ax.set_title(
f"Correlation: JSON1 certainty_percent vs {y_col} (All iterations)\n" # f"Correlation: JSON1 certainty_percent vs {y_col} (All iterations)\n"
f"Pearson r={pearson:.3f} | Spearman ρ={spearman:.3f}" # f"Pearson r={pearson:.3f} | Spearman ρ={spearman:.3f}"
) # )
ax.grid(linestyle=":", alpha=0.5) ax.grid(linestyle=":", alpha=0.5)
# Colorbar # Colorbar
@@ -2362,7 +2362,7 @@ correlation_scatter_raw_certainty_json1_reference(
json1_file_path=json1_path, json1_file_path=json1_path,
ground_truth_path="/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv", ground_truth_path="/home/shahin/Lab/Doktorarbeit/Barcelona/Data/GT_Numbers.csv",
y_mode="abs", y_mode="abs",
save_svg_path="/home/shahin/Lab/Doktorarbeit/Barcelona/results/corr_json1_abs_error.svg" # save_svg_path="/home/shahin/Lab/Doktorarbeit/Barcelona/results/corr_json1_abs_error.svg"
) )
## ##

View File

@@ -718,128 +718,155 @@ plt.show()
## ##
# %% Dashboard
# %% Dashboard
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import seaborn as sns import matplotlib.dates as mdates
from datetime import datetime
import numpy as np import numpy as np
from matplotlib.gridspec import GridSpec
def to_numeric_comma(s: pd.Series) -> pd.Series:
# accepts 1.5 and 1,5
return pd.to_numeric(s.astype(str).str.replace(",", ".", regex=False), errors="coerce")
# Load the data # Load the data
file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv' file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv'
df = pd.read_csv(file_path, sep='\t') df = pd.read_csv(file_path, sep='\t')
# Rename columns to remove 'result.' prefix and handle spaces # Rename columns to remove 'result.' prefix and replace spaces
column_mapping = {} column_mapping = {}
for col in df.columns: for col in df.columns:
if col.startswith('result.'): if col.startswith('result.'):
new_name = col.replace('result.', '') new_name = col.replace('result.', '').replace(' ', '_')
# Handle spaces in column names (replace with underscores if needed)
new_name = new_name.replace(' ', '_')
column_mapping[col] = new_name column_mapping[col] = new_name
df = df.rename(columns=column_mapping) df = df.rename(columns=column_mapping)
# Convert MedDatum to datetime # Parse MedDatum safely
df['MedDatum'] = pd.to_datetime(df['MedDatum']) df['MedDatum'] = pd.to_datetime(df['MedDatum'], errors='coerce')
# Check what columns actually exist in the dataset # Patient
print("Available columns:") patient_id = '6389d658'
print(df.columns.tolist()) patient_data = df[df['unique_id'] == patient_id].sort_values('MedDatum').copy()
print("\nFirst few rows:") if patient_data.empty:
print(df.head()) raise ValueError(f"No data found for patient: {patient_id}")
# Hardcode specific patient names # Functional systems + EDSS
patient_names = ['2bf8486d'] edss_col, edss_title = ('GT.EDSS', 'EDSS')
# Define the functional systems (columns to plot) - adjust based on actual column names functional_systems = [
functional_systems = ['EDSS', 'Visual', 'Sensory', 'Motor', 'Brainstem', 'Cerebellar', 'Autonomic', 'Bladder', 'Intellectual'] ('GT.VISUAL_OPTIC_FUNCTIONS', 'Visual / Optic'),
('GT.CEREBELLAR_FUNCTIONS', 'Cerebellar'),
('GT.BRAINSTEM_FUNCTIONS', 'Brainstem'),
('GT.SENSORY_FUNCTIONS', 'Sensory'),
('GT.PYRAMIDAL_FUNCTIONS', 'Pyramidal (Motor)'),
('GT.AMBULATION', 'Ambulation'),
('GT.CEREBRAL_FUNCTIONS', 'Cerebral'),
('GT.BOWEL_AND_BLADDER_FUNCTIONS', 'Bowel & Bladder'),
]
# Create subplots horizontally (2 columns, adjust rows as needed) # y-axis max rules
num_plots = len(functional_systems) ymax_by_col = {
num_cols = 2 'GT.PYRAMIDAL_FUNCTIONS': 6,
num_rows = (num_plots + num_cols - 1) // num_cols # Ceiling division 'GT.SENSORY_FUNCTIONS': 6,
'GT.BOWEL_AND_BLADDER_FUNCTIONS': 6,
'GT.VISUAL_OPTIC_FUNCTIONS': 6,
'GT.CEREBELLAR_FUNCTIONS': 5,
'GT.CEREBRAL_FUNCTIONS': 5,
'GT.BRAINSTEM_FUNCTIONS': 5,
'GT.EDSS': 10,
}
default_ymax = 6
fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 4*num_rows), sharex=False) # Changed sharex=False # ---------- Build shared "event dates" ticks ----------
if num_plots == 1: cols_for_dates = [edss_col] + [c for c, _ in functional_systems]
axes = [axes] event_dates = []
elif num_rows == 1:
axes = axes
else:
axes = axes.flatten()
# Plot for the hardcoded patient for c in cols_for_dates:
for i, system in enumerate(functional_systems): if c in patient_data.columns:
# Filter data for this specific patient y = to_numeric_comma(patient_data[c]) # <-- changed
patient_data = df[df['unique_id'] == patient_names[0]].sort_values('MedDatum') x = patient_data['MedDatum']
tmp = pd.DataFrame({"x": x, "y": y}).dropna(subset=["x", "y"])
event_dates.extend(tmp["x"].tolist())
# Check if patient data exists event_dates = sorted(pd.Series(event_dates).drop_duplicates().tolist())
if patient_data.empty:
print(f"No data found for patient: {patient_names[0]}")
continue
# Check if the system column exists in the data max_ticks = 8
if system in patient_data.columns: if len(event_dates) > max_ticks:
# Plot the specific functional system idx = np.linspace(0, len(event_dates) - 1, max_ticks, dtype=int)
if not patient_data[system].isna().all(): event_dates = [event_dates[i] for i in idx]
axes[i].plot(patient_data['MedDatum'], patient_data[system], marker='o', linewidth=2, label=system)
axes[i].set_ylabel('Score') # ---------- A4 figure ----------
axes[i].set_title(f'Functional System: {system}') fig = plt.figure(figsize=(11.69, 8.27))
axes[i].grid(True, alpha=0.3) gs = GridSpec(nrows=3, ncols=4, figure=fig, height_ratios=[2.0, 1.0, 1.0], hspace=0.5, wspace=0.35)
axes[i].legend()
else: def style_time_axis(ax, show_labels=True):
axes[i].set_title(f'Functional System: {system} (No data)') ax.set_xticks(event_dates)
axes[i].set_ylabel('Score') ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
axes[i].grid(True, alpha=0.3) ax.tick_params(axis='x', rotation=30, labelsize=8, pad=2)
if not show_labels:
ax.tick_params(labelbottom=False)
# ---------- EDSS main plot ----------
ax_main = fig.add_subplot(gs[0, :])
if edss_col in patient_data.columns:
y = to_numeric_comma(patient_data[edss_col]) # <-- changed
x = patient_data['MedDatum']
plot_df = pd.DataFrame({"x": x, "y": y}).dropna(subset=["x", "y"]).sort_values("x")
ax_main.set_title(edss_title, fontsize=14, fontweight='bold')
ax_main.set_ylabel("Score")
ax_main.set_ylim(0, ymax_by_col.get(edss_col, default_ymax))
ax_main.grid(True, alpha=0.3)
if not plot_df.empty:
ax_main.plot(plot_df["x"], plot_df["y"], marker='o', linewidth=3, color='tab:red')
else: else:
# Try to find column with similar name (case insensitive) ax_main.set_title("EDSS (no numeric data)", fontsize=14, fontweight='bold')
found_column = None else:
for col in df.columns: ax_main.set_title("EDSS (missing column GT.EDSS)", fontsize=14, fontweight='bold')
if system.lower() in col.lower(): ax_main.set_ylim(0, ymax_by_col.get(edss_col, 10))
found_column = col ax_main.grid(True, alpha=0.3)
break
if found_column: style_time_axis(ax_main)
print(f"Found similar column: {found_column}")
if not patient_data[found_column].isna().all(): # ---------- Small aligned plots ----------
axes[i].plot(patient_data['MedDatum'], patient_data[found_column], marker='o', linewidth=2, label=found_column) small_axes = []
axes[i].set_ylabel('Score') for k, (col, title) in enumerate(functional_systems):
axes[i].set_title(f'Functional System: {system} (found as: {found_column})') r = 1 + (k // 4)
axes[i].grid(True, alpha=0.3) c = (k % 4)
axes[i].legend() ax = fig.add_subplot(gs[r, c], sharex=ax_main)
small_axes.append(ax)
ymax = ymax_by_col.get(col, default_ymax)
ax.set_title(title, fontsize=10)
ax.set_ylabel("Score")
ax.set_ylim(0, ymax)
ax.grid(True, alpha=0.3)
if col in patient_data.columns:
y = to_numeric_comma(patient_data[col]) # <-- changed
x = patient_data['MedDatum']
plot_df = pd.DataFrame({"x": x, "y": y}).dropna(subset=["x", "y"]).sort_values("x")
if not plot_df.empty:
ax.plot(plot_df["x"], plot_df["y"], marker='o', linewidth=2, color='tab:blue')
else: else:
axes[i].set_title(f'Functional System: {system} (Column not found)') ax.set_title(f"{title} (no data)", fontsize=10)
axes[i].set_ylabel('Score') else:
axes[i].grid(True, alpha=0.3) ax.set_title(f"{title} (missing)", fontsize=10)
# Hide empty subplots style_time_axis(ax)
for i in range(len(functional_systems), len(axes)):
axes[i].set_visible(False)
# Set x-axis label for the last row only # Hide x tick labels on first row of small plots
for i in range(len(functional_systems)): for ax in small_axes[:4]:
if i >= len(axes) - num_cols: # Last row ax.tick_params(labelbottom=False)
axes[i].set_xlabel('Date')
# Force date formatting on all axes
for ax in axes:
ax.tick_params(axis='x', rotation=45)
ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%Y-%m-%d'))
ax.xaxis.set_major_locator(plt.matplotlib.dates.MonthLocator())
# Automatically format x-axis dates
plt.gcf().autofmt_xdate()
plt.tight_layout() plt.tight_layout()
fig.subplots_adjust(hspace=0.7)
plt.show() plt.show()
## ##
# %% Table # %% Table
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt