Modifications

This commit is contained in:
2026-04-27 11:52:53 +02:00
parent 816c50e467
commit 90d411f086
3 changed files with 446 additions and 98 deletions
+133 -96
View File
@@ -401,7 +401,7 @@ import seaborn as sns
import numpy as np
# Load your data from TSV file
file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/join_results_unique.tsv'
file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/results/join_results_unique.tsv'
df = pd.read_csv(file_path, sep='\t')
# Replace comma with dot for numeric conversion in GT.EDSS and result.EDSS
@@ -745,7 +745,7 @@ df = df.rename(columns=column_mapping)
df['MedDatum'] = pd.to_datetime(df['MedDatum'], errors='coerce')
# Patient
patient_id = '6389d658'
patient_id = 'd13e4aa3'
patient_data = df[df['unique_id'] == patient_id].sort_values('MedDatum').copy()
if patient_data.empty:
raise ValueError(f"No data found for patient: {patient_id}")
@@ -1764,100 +1764,7 @@ plt.show()
# %% Difference Gemini easy
# --- 1. Process Error Data ---
system_names = [name.split('.')[1] for name, _ in functional_systems_to_plot]
plot_list = []
for gt_col, res_col in functional_systems_to_plot:
sys_name = gt_col.split('.')[1]
# Robust parsing
gt = df[gt_col].apply(safe_parse)
res = df[res_col].apply(safe_parse)
error = res - gt
# Calculate counts
matches = (error == 0).sum()
under = (error < 0).sum()
over = (error > 0).sum()
total = error.dropna().count()
# Calculate Percentages
# Using max(total, 1) to avoid division by zero
divisor = max(total, 1)
match_pct = (matches / divisor) * 100
under_pct = (under / divisor) * 100
over_pct = (over / divisor) * 100
plot_list.append({
'System': sys_name.replace('_', ' ').title(),
'Matches': matches,
'MatchPct': match_pct,
'Under': under,
'UnderPct': under_pct,
'Over': over,
'OverPct': over_pct
})
stats_df = pd.DataFrame(plot_list)
# --- 2. Plotting ---
fig, ax = plt.subplots(figsize=(12, 8)) # Slightly taller for multi-line labels
color_under = '#E74C3C'
color_over = '#3498DB'
bar_height = 0.6
y_pos = np.arange(len(stats_df))
ax.barh(y_pos, -stats_df['Under'], bar_height, label='Under-scored', color=color_under, edgecolor='white', alpha=0.8)
ax.barh(y_pos, stats_df['Over'], bar_height, label='Over-scored', color=color_over, edgecolor='white', alpha=0.8)
# --- 3. Aesthetics & Labels ---
for i, row in stats_df.iterrows():
# Constructing a detailed label for the left side
# Matches (Bold) | Under % | Over %
label_text = (
f"$\mathbf{{{row['System']}}}$\n"
f"Matches: {int(row['Matches'])} ({row['MatchPct']:.1f}%)\n"
f"Under: {int(row['Under'])} ({row['UnderPct']:.1f}%) | Over: {int(row['Over'])} ({row['OverPct']:.1f}%)"
)
# Position text to the left of the x=0 line
ax.text(ax.get_xlim()[0] - 0.5, i, label_text, va='center', ha='right', fontsize=9, color='#333333', linespacing=1.3)
# Zero line
ax.axvline(0, color='black', linewidth=1.2, alpha=0.7)
# Clean up axes
ax.set_yticks([])
ax.set_xlabel('Number of Patients with Error', fontsize=11, fontweight='bold', labelpad=10)
#ax.set_title('Directional Error Analysis by Functional System', fontsize=14, pad=30)
# Make X-axis labels absolute
ax.set_xticklabels([int(abs(tick)) for tick in ax.get_xticks()])
# Remove spines
for spine in ['top', 'right', 'left']:
ax.spines[spine].set_visible(False)
# Legend
ax.legend(loc='upper right', frameon=False, bbox_to_anchor=(1, 1.1))
# Grid
ax.xaxis.grid(True, linestyle='--', alpha=0.3)
plt.tight_layout()
plt.show()
##
# %% name
# %% Difference Plot Gemini
import pandas as pd
import matplotlib.pyplot as plt
import os
@@ -1946,6 +1853,136 @@ ax.legend(loc='upper right', frameon=False, bbox_to_anchor=(1, 1.1), ncol=2)
plt.tight_layout()
plt.show()
##
# %% Functional System Error Boxplots
import pandas as pd
import matplotlib.pyplot as plt
import os
import numpy as np
from matplotlib.patches import Patch
from matplotlib.lines import Line2D
# --- Configuration & Theme ---
plt.rcParams['font.family'] = 'Arial'
figure_save_path = 'project/visuals/functional_systems_boxplot.svg'
# --- 1. Build error data for boxplots ---
boxplot_data = []
system_labels = []
sample_sizes = []
for gt_col, res_col in functional_systems_to_plot:
sys_name = gt_col.split('.')[1]
# Robust parsing
gt = df[gt_col].apply(safe_parse)
res = df[res_col].apply(safe_parse)
# Error = result - ground truth
error = (res - gt).dropna()
# Ignore all 0 errors
error = error[error != 0]
# Keep only systems that actually have non-zero data
if len(error) > 0:
clean_name = sys_name.replace('_', ' ').title()
boxplot_data.append(error.values)
system_labels.append(clean_name)
sample_sizes.append(len(error))
# Safety check
if not boxplot_data:
raise ValueError("No valid non-zero error data available for any functional system.")
# Put n into x-axis labels so it doesn't overlap the plot
xtick_labels = [f"{label}\n(n={n})" for label, n in zip(system_labels, sample_sizes)]
# --- 2. Plotting ---
fig, ax = plt.subplots(figsize=(14, 8))
bp = ax.boxplot(
boxplot_data,
vert=True,
patch_artist=True,
labels=xtick_labels,
showmeans=True,
meanline=False
)
# --- 3. Styling ---
box_face = '#D6EAF8'
box_edge = '#2980B9'
whisker_col = '#7F8C8D'
median_col = '#C0392B'
mean_col = '#1ABC9C'
flier_face = '#95A5A6'
flier_edge = '#7F8C8D'
for box in bp['boxes']:
box.set(facecolor=box_face, edgecolor=box_edge, linewidth=1.5)
for whisker in bp['whiskers']:
whisker.set(color=whisker_col, linewidth=1.2)
for cap in bp['caps']:
cap.set(color=whisker_col, linewidth=1.2)
for median in bp['medians']:
median.set(color=median_col, linewidth=2)
for mean in bp['means']:
mean.set(marker='o', markerfacecolor=mean_col, markeredgecolor='black', markersize=6)
for flier in bp['fliers']:
flier.set(marker='o', markerfacecolor=flier_face, markeredgecolor=flier_edge, alpha=0.6, markersize=4)
# Reference line at zero error
ax.axhline(0, color='black', linewidth=1.2, linestyle='--')
# Labels and formatting
ax.set_xlabel('Functional System', fontsize=11, fontweight='bold')
ax.set_ylabel('Error (Result - Ground Truth)', fontsize=11, fontweight='bold')
# Rotate x labels for readability
plt.xticks(rotation=45, ha='right')
# Grid and spines
ax.yaxis.grid(True, linestyle='--', alpha=0.3)
for spine in ['top', 'right']:
ax.spines[spine].set_visible(False)
# --- 4. Legend above the plot, outside the axes ---
legend_handles = [
Patch(facecolor=box_face, edgecolor=box_edge, label='IQR (25th-75th percentile)'),
Line2D([0], [0], color=median_col, lw=2, label='Median'),
Line2D([0], [0], marker='o', color='w', markerfacecolor=mean_col,
markeredgecolor='black', markersize=7, label='Mean'),
Line2D([0], [0], marker='o', color='w', markerfacecolor=flier_face,
markeredgecolor=flier_edge, alpha=0.8, markersize=6, label='Outlier'),
Line2D([0], [0], color='black', lw=1.2, linestyle='--', label='Zero error reference')
]
ax.legend(
handles=legend_handles,
loc='lower center',
bbox_to_anchor=(0.5, 1.02),
ncol=3,
frameon=False
)
# Leave room at the top for the legend
plt.tight_layout(rect=[0, 0, 1, 0.90])
# Optional save
os.makedirs(os.path.dirname(figure_save_path), exist_ok=True)
plt.savefig(figure_save_path, format='svg', bbox_inches='tight')
plt.show()
##
# %% test
# Diagnose: what are the actual differences?
print("\n🔍 Raw differences (first 5 rows per system):")