From a1a8abfb8e9447ce31394615dae9c80eeecbcc6a Mon Sep 17 00:00:00 2001 From: Shahin Ramezanzadeh Date: Mon, 19 Jan 2026 01:26:14 +0100 Subject: [PATCH] beautiful plot --- Data/show_plots.py | 84 +++++++++++++++++++++------- Data/style2.py | 135 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 199 insertions(+), 20 deletions(-) create mode 100644 Data/style2.py diff --git a/Data/show_plots.py b/Data/show_plots.py index b24578e..bda9e3c 100644 --- a/Data/show_plots.py +++ b/Data/show_plots.py @@ -407,7 +407,7 @@ import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt - +import dataframe_image as dfi # Load data df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t') @@ -491,32 +491,76 @@ for gt_col, result_col in column_mapping.items(): 'Match %': round(percentage, 1) }) -# 4. Prepare Data for Plotting + + + +# 4. Prepare Data match_df = pd.DataFrame(data_list) +# Clean up labels: Replace underscores with spaces and capitalize +match_df['GT'] = match_df['GT'].str.replace('_', ' ').str.title() +match_df = match_df.sort_values('Match %', ascending=False) -# Handle case where no matches were found -if len(match_df) == 0: - print("No valid column pairs found for comparison") - exit() +# 5. Create a "Beautiful" Table using Seaborn Heatmap +def create_luxury_table(df, output_file="edss_agreement.png"): + # Set the aesthetic style + sns.set_theme(style="white", font="sans-serif") -# 5. Create the Plot -plt.figure(figsize=(10, 8)) -sns.set_theme(style="white") + # Prepare data for heatmap + plot_data = df.set_index('GT')[['Match %']] -# Create heatmap -ax = sns.heatmap( - match_df.set_index('GT')[['Match %']], # Just the percentage column - annot=True, # Show the numbers in the boxes - fmt=".1f", # Format to 1 decimal place - cmap="YlGnBu", # Yellow-Green-Blue color palette - cbar_kws={'label': 'Agreement (%)'}, - linewidths=.5 -) + # Initialize the figure + # Height is dynamic based on number of rows + fig, ax = plt.subplots(figsize=(8, len(df) * 0.6)) -plt.title('Agreement Percentage (Tolerance ±0.5)', pad=20) -plt.tight_layout() + # Create a custom diverging color map (Deep Red -> Mustard -> Emerald) + # This looks more professional than standard 'RdYlGn' + cmap = sns.diverging_palette(15, 135, s=80, l=55, as_cmap=True) + # Draw the heatmap + sns.heatmap( + plot_data, + annot=True, + fmt=".1f", + cmap=cmap, + center=85, # Centers the color transition + vmin=50, vmax=100, # Range of the gradient + linewidths=2, + linecolor='white', + cbar=False, # Remove color bar for a "table" look + annot_kws={"size": 14, "weight": "bold", "family": "sans-serif"} + ) + + # Styling the Axes (Turning the heatmap into a table) + ax.set_xlabel("") + ax.set_ylabel("") + ax.xaxis.tick_top() # Move "Match %" label to top + ax.set_xticklabels(['Agreement (%)'], fontsize=14, fontweight='bold', color='#2c3e50') + ax.tick_params(axis='y', labelsize=12, labelcolor='#2c3e50', length=0) + + # Add a thin border around the plot + for _, spine in ax.spines.items(): + spine.set_visible(True) + spine.set_color('#ecf0f1') + + plt.title('EDSS Subcategory Consistency Analysis', fontsize=16, pad=40, fontweight='bold', color='#2c3e50') + + # Add a subtle footer + plt.figtext(0.5, 0.02, "Tolerance: ±0.5 points | N = [Total Samples]", + wrap=True, horizontalalignment='center', fontsize=10, color='gray', style='italic') + + # Save with high resolution + plt.tight_layout() + plt.savefig(output_file, dpi=300, bbox_inches='tight') + print(f"Beautiful table saved as {output_file}") + +# Execute +create_luxury_table(match_df) + + +# Run the function +save_styled_table(match_df) # 6. Save as SVG + #plt.savefig("agreement_table.svg", format='svg', dpi=300, bbox_inches='tight') #print("Successfully saved agreement_table.svg") diff --git a/Data/style2.py b/Data/style2.py new file mode 100644 index 0000000..ef26d04 --- /dev/null +++ b/Data/style2.py @@ -0,0 +1,135 @@ +import pandas as pd +import numpy as np +import seaborn as sns +import matplotlib.pyplot as plt +import dataframe_image as dfi +# Load data +df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t') + +# 1. Identify all GT and result columns +gt_columns = [col for col in df.columns if col.startswith('GT.')] +result_columns = [col for col in df.columns if col.startswith('result.')] + +print("GT Columns found:", gt_columns) +print("Result Columns found:", result_columns) + +# 2. Create proper mapping between GT and result columns +# Handle various naming conventions (spaces, underscores, etc.) +column_mapping = {} + +for gt_col in gt_columns: + base_name = gt_col.replace('GT.', '') + + # Clean the base name for matching - remove spaces, underscores, etc. + # Try different matching approaches + candidates = [ + f'result.{base_name}', # Exact match + f'result.{base_name.replace(" ", "_")}', # With underscores + f'result.{base_name.replace("_", " ")}', # With spaces + f'result.{base_name.replace(" ", "")}', # No spaces + f'result.{base_name.replace("_", "")}' # No underscores + ] + + # Also try case-insensitive matching + candidates.append(f'result.{base_name.lower()}') + candidates.append(f'result.{base_name.upper()}') + + # Try to find matching result column + matched = False + for candidate in candidates: + if candidate in result_columns: + column_mapping[gt_col] = candidate + matched = True + break + + # If no exact match found, try partial matching + if not matched: + # Try to match by removing special characters and comparing + base_clean = ''.join(e for e in base_name if e.isalnum() or e in ['_', ' ']) + for result_col in result_columns: + result_base = result_col.replace('result.', '') + result_clean = ''.join(e for e in result_base if e.isalnum() or e in ['_', ' ']) + if base_clean.lower() == result_clean.lower(): + column_mapping[gt_col] = result_col + matched = True + break + +print("Column mapping:", column_mapping) + +# 3. Faster, vectorized computation using the corrected mapping +data_list = [] + +for gt_col, result_col in column_mapping.items(): + print(f"Processing {gt_col} vs {result_col}") + + # Convert to numeric, forcing errors to NaN + s1 = pd.to_numeric(df[gt_col], errors='coerce').astype(float) + s2 = pd.to_numeric(df[result_col], errors='coerce').astype(float) + + # Calculate matches (abs difference <= 0.5) + diff = np.abs(s1 - s2) + matches = (diff <= 0.5).sum() + + # Determine the denominator (total valid comparisons) + valid_count = diff.notna().sum() + + if valid_count > 0: + percentage = (matches / valid_count) * 100 + else: + percentage = 0 + + # Extract clean base name for display + base_name = gt_col.replace('GT.', '') + + data_list.append({ + 'GT': base_name, + 'Match %': round(percentage, 1) + }) + + + +# 4. Prepare Data for Plotting +match_df = pd.DataFrame(data_list) +match_df = match_df.sort_values('Match %', ascending=False) # Sort for better visual flow + +# 5. Create the Styled Gradient Table +def style_agreement_table(df): + return (df.style + .format({'Match %': '{:.1f}%'}) # Add % sign + .background_gradient(cmap='RdYlGn', subset=['Match %'], vmin=50, vmax=100) # Red to Green gradient + .set_properties(**{ + 'text-align': 'center', + 'font-size': '12pt', + 'border-collapse': 'collapse', + 'border': '1px solid #D3D3D3' + }) + .set_table_styles([ + # Style the header + {'selector': 'th', 'props': [ + ('background-color', '#404040'), + ('color', 'white'), + ('font-weight', 'bold'), + ('text-transform', 'uppercase'), + ('padding', '10px') + ]}, + # Add hover effect + {'selector': 'tr:hover', 'props': [('background-color', '#f5f5f5')]} + ]) + .set_caption("EDSS Agreement Analysis: Ground Truth vs. Results (Tolerance ±0.5)") + ) + +# To display in a Jupyter Notebook: +styled_table = style_agreement_table(match_df) +styled_table + +dfi.export(styled_table, "styled_table.png") +#styled_table.to_html("agreement_report.html") +# 6. Save as SVG + +#plt.savefig("agreement_table.svg", format='svg', dpi=300, bbox_inches='tight') +#print("Successfully saved agreement_table.svg") + +# Show plot if running in a GUI environment +plt.show() + +