From 2f507bcf20685bc486da2089b8e4dcdb42249ae2 Mon Sep 17 00:00:00 2001 From: Shahin Ramezanzadeh Date: Sun, 8 Feb 2026 01:59:38 +0100 Subject: [PATCH] Adjsuting and cleaning --- Data/show_plots.py | 91 ++++++++++++++++++++++++++++ Data/style2.py | 135 ------------------------------------------ Data/styled_tables.py | 74 ----------------------- figure1.py | 57 ++++++++++++++++++ 4 files changed, 148 insertions(+), 209 deletions(-) delete mode 100644 Data/style2.py delete mode 100644 Data/styled_tables.py diff --git a/Data/show_plots.py b/Data/show_plots.py index ab94934..724f46a 100644 --- a/Data/show_plots.py +++ b/Data/show_plots.py @@ -1828,6 +1828,97 @@ plt.tight_layout() plt.show() ## + + +# %% name +import pandas as pd +import matplotlib.pyplot as plt +import os +import numpy as np + +# --- Configuration & Theme --- +plt.rcParams['font.family'] = 'Arial' +figure_save_path = 'project/visuals/functional_systems_magnitude_focus.svg' + +# --- 1. Process Error Data with Magnitude Breakdown --- +system_names = [name.split('.')[1] for name, _ in functional_systems_to_plot] +plot_list = [] + +for gt_col, res_col in functional_systems_to_plot: + sys_name = gt_col.split('.')[1] + + # Robust parsing + gt = df[gt_col].apply(safe_parse) + res = df[res_col].apply(safe_parse) + error = res - gt + + # Granular Counts + matches = (error == 0).sum() + u_1 = (error == -1).sum() + u_2plus = (error <= -2).sum() + o_1 = (error == 1).sum() + o_2plus = (error >= 2).sum() + + total = error.dropna().count() + divisor = max(total, 1) + + plot_list.append({ + 'System': sys_name.replace('_', ' ').title(), + 'Matches': matches, 'MatchPct': (matches / divisor) * 100, + 'U1': u_1, 'U2': u_2plus, 'UnderTotal': u_1 + u_2plus, + 'UnderPct': ((u_1 + u_2plus) / divisor) * 100, + 'O1': o_1, 'O2': o_2plus, 'OverTotal': o_1 + o_2plus, + 'OverPct': ((o_1 + o_2plus) / divisor) * 100 + }) + +stats_df = pd.DataFrame(plot_list) + +# --- 2. Plotting --- +fig, ax = plt.subplots(figsize=(13, 8)) + +# Define Magnitude Colors +c_under_dark, c_under_light = '#C0392B', '#E74C3C' # Dark Red (-2+), Soft Red (-1) +c_over_dark, c_over_light = '#2980B9', '#3498DB' # Dark Blue (+2+), Soft Blue (+1) +bar_height = 0.6 +y_pos = np.arange(len(stats_df)) + +# Plot Under-scored (Stacked: -2+ then -1) +ax.barh(y_pos, -stats_df['U2'], bar_height, color=c_under_dark, label='Under -2+', edgecolor='white') +ax.barh(y_pos, -stats_df['U1'], bar_height, left=-stats_df['U2'], color=c_under_light, label='Under -1', edgecolor='white') + +# Plot Over-scored (Stacked: +1 then +2+) +ax.barh(y_pos, stats_df['O1'], bar_height, color=c_over_light, label='Over +1', edgecolor='white') +ax.barh(y_pos, stats_df['O2'], bar_height, left=stats_df['O1'], color=c_over_dark, label='Over +2+', edgecolor='white') + +# --- 3. Aesthetics & Table Labels --- +for i, row in stats_df.iterrows(): + label_text = ( + f"$\\mathbf{{{row['System']}}}$\n" + f"Match: {int(row['Matches'])} ({row['MatchPct']:.1f}%)\n" + f"Under: {int(row['UnderTotal'])} ({row['UnderPct']:.1f}%) | Over: {int(row['OverTotal'])} ({row['OverPct']:.1f}%)" + ) + # Position table text to the left + ax.text(ax.get_xlim()[0] - 0.5, i, label_text, va='center', ha='right', fontsize=9, color='#333333', linespacing=1.4) + +# Formatting +ax.axvline(0, color='black', linewidth=1.2) +ax.set_yticks([]) +ax.set_xlabel('Number of Patients with Error', fontsize=11, fontweight='bold') +#ax.set_title('Directional Error Magnitude (Under vs. Over Scoring)', fontsize=14, pad=35) + +# Absolute X-axis labels +ax.set_xticklabels([int(abs(tick)) for tick in ax.get_xticks()]) + +# Remove spines and add grid +for spine in ['top', 'right', 'left']: ax.spines[spine].set_visible(False) +ax.xaxis.grid(True, linestyle='--', alpha=0.3) + +# Legend with magnitude info +ax.legend(loc='upper right', frameon=False, bbox_to_anchor=(1, 1.1), ncol=2) + +plt.tight_layout() +plt.show() +## # %% test # Diagnose: what are the actual differences? print("\nšŸ” Raw differences (first 5 rows per system):") diff --git a/Data/style2.py b/Data/style2.py deleted file mode 100644 index ef26d04..0000000 --- a/Data/style2.py +++ /dev/null @@ -1,135 +0,0 @@ -import pandas as pd -import numpy as np -import seaborn as sns -import matplotlib.pyplot as plt -import dataframe_image as dfi -# Load data -df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t') - -# 1. Identify all GT and result columns -gt_columns = [col for col in df.columns if col.startswith('GT.')] -result_columns = [col for col in df.columns if col.startswith('result.')] - -print("GT Columns found:", gt_columns) -print("Result Columns found:", result_columns) - -# 2. Create proper mapping between GT and result columns -# Handle various naming conventions (spaces, underscores, etc.) -column_mapping = {} - -for gt_col in gt_columns: - base_name = gt_col.replace('GT.', '') - - # Clean the base name for matching - remove spaces, underscores, etc. - # Try different matching approaches - candidates = [ - f'result.{base_name}', # Exact match - f'result.{base_name.replace(" ", "_")}', # With underscores - f'result.{base_name.replace("_", " ")}', # With spaces - f'result.{base_name.replace(" ", "")}', # No spaces - f'result.{base_name.replace("_", "")}' # No underscores - ] - - # Also try case-insensitive matching - candidates.append(f'result.{base_name.lower()}') - candidates.append(f'result.{base_name.upper()}') - - # Try to find matching result column - matched = False - for candidate in candidates: - if candidate in result_columns: - column_mapping[gt_col] = candidate - matched = True - break - - # If no exact match found, try partial matching - if not matched: - # Try to match by removing special characters and comparing - base_clean = ''.join(e for e in base_name if e.isalnum() or e in ['_', ' ']) - for result_col in result_columns: - result_base = result_col.replace('result.', '') - result_clean = ''.join(e for e in result_base if e.isalnum() or e in ['_', ' ']) - if base_clean.lower() == result_clean.lower(): - column_mapping[gt_col] = result_col - matched = True - break - -print("Column mapping:", column_mapping) - -# 3. Faster, vectorized computation using the corrected mapping -data_list = [] - -for gt_col, result_col in column_mapping.items(): - print(f"Processing {gt_col} vs {result_col}") - - # Convert to numeric, forcing errors to NaN - s1 = pd.to_numeric(df[gt_col], errors='coerce').astype(float) - s2 = pd.to_numeric(df[result_col], errors='coerce').astype(float) - - # Calculate matches (abs difference <= 0.5) - diff = np.abs(s1 - s2) - matches = (diff <= 0.5).sum() - - # Determine the denominator (total valid comparisons) - valid_count = diff.notna().sum() - - if valid_count > 0: - percentage = (matches / valid_count) * 100 - else: - percentage = 0 - - # Extract clean base name for display - base_name = gt_col.replace('GT.', '') - - data_list.append({ - 'GT': base_name, - 'Match %': round(percentage, 1) - }) - - - -# 4. Prepare Data for Plotting -match_df = pd.DataFrame(data_list) -match_df = match_df.sort_values('Match %', ascending=False) # Sort for better visual flow - -# 5. Create the Styled Gradient Table -def style_agreement_table(df): - return (df.style - .format({'Match %': '{:.1f}%'}) # Add % sign - .background_gradient(cmap='RdYlGn', subset=['Match %'], vmin=50, vmax=100) # Red to Green gradient - .set_properties(**{ - 'text-align': 'center', - 'font-size': '12pt', - 'border-collapse': 'collapse', - 'border': '1px solid #D3D3D3' - }) - .set_table_styles([ - # Style the header - {'selector': 'th', 'props': [ - ('background-color', '#404040'), - ('color', 'white'), - ('font-weight', 'bold'), - ('text-transform', 'uppercase'), - ('padding', '10px') - ]}, - # Add hover effect - {'selector': 'tr:hover', 'props': [('background-color', '#f5f5f5')]} - ]) - .set_caption("EDSS Agreement Analysis: Ground Truth vs. Results (Tolerance ±0.5)") - ) - -# To display in a Jupyter Notebook: -styled_table = style_agreement_table(match_df) -styled_table - -dfi.export(styled_table, "styled_table.png") -#styled_table.to_html("agreement_report.html") -# 6. Save as SVG - -#plt.savefig("agreement_table.svg", format='svg', dpi=300, bbox_inches='tight') -#print("Successfully saved agreement_table.svg") - -# Show plot if running in a GUI environment -plt.show() - - diff --git a/Data/styled_tables.py b/Data/styled_tables.py deleted file mode 100644 index 4c3ebc5..0000000 --- a/Data/styled_tables.py +++ /dev/null @@ -1,74 +0,0 @@ -import pandas as pd -import numpy as np -import seaborn as sns - -# Sample data (replace with your actual df) -df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t') - -# Identify GT and Result columns -gt_columns = [col for col in df.columns if col.startswith('GT.')] -result_columns = [col for col in df.columns if col.startswith('result.')] - -# Create mapping -column_mapping = {} -for gt_col in gt_columns: - base_name = gt_col.replace('GT.', '') - result_col = f'result.{base_name}' - if result_col in result_columns: - column_mapping[gt_col] = result_col - -# Function to compute match percentage for each GT-Result pair -def compute_match_percentages(df, column_mapping): - percentages = [] - for gt_col, result_col in column_mapping.items(): - count = 0 - total = len(df) - - for _, row in df.iterrows(): - gt_val = row[gt_col] - result_val = row[result_col] - - # Handle NaN values - if pd.isna(gt_val) or pd.isna(result_val): - continue - - # Handle non-numeric values - try: - gt_float = float(gt_val) - result_float = float(result_val) - except (ValueError, TypeError): - # Skip rows with non-numeric values - continue - - # Check if values are within 0.5 tolerance - if abs(gt_float - result_float) <= 0.5: - count += 1 - - percentage = (count / total) * 100 - percentages.append({ - 'GT_Column': gt_col, - 'Result_Column': result_col, - 'Match_Percentage': round(percentage, 1) - }) - - return pd.DataFrame(percentages) - -# Compute match percentages -match_df = compute_match_percentages(df, column_mapping) - -# Create a pivot table for gradient display (optional but helpful) -pivot_table = match_df.set_index(['GT_Column', 'Result_Column'])['Match_Percentage'].unstack(fill_value=0) - -# Apply gradient background -cm = sns.light_palette("green", as_cmap=True) -styled_table = pivot_table.style.background_gradient(cmap=cm, axis=None) - -# Display result -print("Agreement Percentage Table (with gradient):") -styled_table - - - -# Save the styled table to a file -styled_table.to_html("agreement_report.html") -print("Report saved to agreement_report.html") diff --git a/figure1.py b/figure1.py index ee51e62..e9f1ee7 100644 --- a/figure1.py +++ b/figure1.py @@ -263,3 +263,60 @@ plt.legend(frameon=False, loc='upper center', bbox_to_anchor=(0.5, -0.05)) plt.tight_layout() plt.show() ## + + + + +# %% name +import matplotlib.pyplot as plt + +# Data +data = { + 'Visit': [9, 8, 7, 6, 5, 4, 3, 2, 1], + 'patient_count': [2, 3, 3, 6, 13, 17, 28, 24, 32] +} + +# Create figure and axis +fig, ax = plt.subplots(figsize=(10, 6)) + +# Plot the bar chart +bars = ax.bar(data['Visit'], data['patient_count'], color='darkblue', label='Patients by Visit Count') + +# Add labels and title +ax.set_xlabel('Visit Number (from last to first)', fontsize=12) +ax.set_ylabel('Number of Patients', fontsize=12) +ax.set_title('Patient Visits by Visit Number', fontsize=14) + +# Invert x-axis to show Visit 9 on the left (descending order) if desired, but keep natural order (1–9 left to right) +# For descending order (9→1 from left to right), we'd need to reverse: +# Visit = data['Visit'][::-1], patient_count = data['patient_count'][::-1] +# But standard practice is ascending (1 to 9), so we'll sort accordingly: +# Let's sort by Visit to ensure left-to-right: 1,2,...,9 + +# Actually, your current Visit list is [9,8,...,1], which is descending. +# Let's sort by Visit for intuitive left-to-right increasing order: +sorted_indices = sorted(range(len(data['Visit'])), key=lambda i: data['Visit'][i]) +visit_sorted = [data['Visit'][i] for i in sorted_indices] +count_sorted = [data['patient_count'][i] for i in sorted_indices] + +# Re-plot with sorted x-axis: +ax.clear() +bars = ax.bar(visit_sorted, count_sorted, color='darkblue', label='Patients by Visit Count') + +# Re-apply labels, etc. +ax.set_xlabel('Number of Visits', fontsize=12) +ax.set_ylabel('Number of Unique Patients', fontsize=12) +#ax.set_title('Number of Patients by Visit Number', fontsize=14) + +# Add legend +ax.legend() + +# Improve layout and grid +ax.grid(axis='y', linestyle='--', alpha=0.7) +plt.xticks(visit_sorted) # Ensure all integer visit numbers are shown + +# Show the plot +plt.tight_layout() +plt.show() + +##