beautiful plot
This commit is contained in:
@@ -407,7 +407,7 @@ import pandas as pd
|
||||
import numpy as np
|
||||
import seaborn as sns
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
import dataframe_image as dfi
|
||||
# Load data
|
||||
df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t')
|
||||
|
||||
@@ -491,32 +491,76 @@ for gt_col, result_col in column_mapping.items():
|
||||
'Match %': round(percentage, 1)
|
||||
})
|
||||
|
||||
# 4. Prepare Data for Plotting
|
||||
|
||||
|
||||
|
||||
# 4. Prepare Data
|
||||
match_df = pd.DataFrame(data_list)
|
||||
# Clean up labels: Replace underscores with spaces and capitalize
|
||||
match_df['GT'] = match_df['GT'].str.replace('_', ' ').str.title()
|
||||
match_df = match_df.sort_values('Match %', ascending=False)
|
||||
|
||||
# Handle case where no matches were found
|
||||
if len(match_df) == 0:
|
||||
print("No valid column pairs found for comparison")
|
||||
exit()
|
||||
# 5. Create a "Beautiful" Table using Seaborn Heatmap
|
||||
def create_luxury_table(df, output_file="edss_agreement.png"):
|
||||
# Set the aesthetic style
|
||||
sns.set_theme(style="white", font="sans-serif")
|
||||
|
||||
# 5. Create the Plot
|
||||
plt.figure(figsize=(10, 8))
|
||||
sns.set_theme(style="white")
|
||||
# Prepare data for heatmap
|
||||
plot_data = df.set_index('GT')[['Match %']]
|
||||
|
||||
# Create heatmap
|
||||
ax = sns.heatmap(
|
||||
match_df.set_index('GT')[['Match %']], # Just the percentage column
|
||||
annot=True, # Show the numbers in the boxes
|
||||
fmt=".1f", # Format to 1 decimal place
|
||||
cmap="YlGnBu", # Yellow-Green-Blue color palette
|
||||
cbar_kws={'label': 'Agreement (%)'},
|
||||
linewidths=.5
|
||||
# Initialize the figure
|
||||
# Height is dynamic based on number of rows
|
||||
fig, ax = plt.subplots(figsize=(8, len(df) * 0.6))
|
||||
|
||||
# Create a custom diverging color map (Deep Red -> Mustard -> Emerald)
|
||||
# This looks more professional than standard 'RdYlGn'
|
||||
cmap = sns.diverging_palette(15, 135, s=80, l=55, as_cmap=True)
|
||||
|
||||
# Draw the heatmap
|
||||
sns.heatmap(
|
||||
plot_data,
|
||||
annot=True,
|
||||
fmt=".1f",
|
||||
cmap=cmap,
|
||||
center=85, # Centers the color transition
|
||||
vmin=50, vmax=100, # Range of the gradient
|
||||
linewidths=2,
|
||||
linecolor='white',
|
||||
cbar=False, # Remove color bar for a "table" look
|
||||
annot_kws={"size": 14, "weight": "bold", "family": "sans-serif"}
|
||||
)
|
||||
|
||||
plt.title('Agreement Percentage (Tolerance ±0.5)', pad=20)
|
||||
plt.tight_layout()
|
||||
# Styling the Axes (Turning the heatmap into a table)
|
||||
ax.set_xlabel("")
|
||||
ax.set_ylabel("")
|
||||
ax.xaxis.tick_top() # Move "Match %" label to top
|
||||
ax.set_xticklabels(['Agreement (%)'], fontsize=14, fontweight='bold', color='#2c3e50')
|
||||
ax.tick_params(axis='y', labelsize=12, labelcolor='#2c3e50', length=0)
|
||||
|
||||
# Add a thin border around the plot
|
||||
for _, spine in ax.spines.items():
|
||||
spine.set_visible(True)
|
||||
spine.set_color('#ecf0f1')
|
||||
|
||||
plt.title('EDSS Subcategory Consistency Analysis', fontsize=16, pad=40, fontweight='bold', color='#2c3e50')
|
||||
|
||||
# Add a subtle footer
|
||||
plt.figtext(0.5, 0.02, "Tolerance: ±0.5 points | N = [Total Samples]",
|
||||
wrap=True, horizontalalignment='center', fontsize=10, color='gray', style='italic')
|
||||
|
||||
# Save with high resolution
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_file, dpi=300, bbox_inches='tight')
|
||||
print(f"Beautiful table saved as {output_file}")
|
||||
|
||||
# Execute
|
||||
create_luxury_table(match_df)
|
||||
|
||||
|
||||
# Run the function
|
||||
save_styled_table(match_df)
|
||||
# 6. Save as SVG
|
||||
|
||||
#plt.savefig("agreement_table.svg", format='svg', dpi=300, bbox_inches='tight')
|
||||
#print("Successfully saved agreement_table.svg")
|
||||
|
||||
|
||||
135
Data/style2.py
Normal file
135
Data/style2.py
Normal file
@@ -0,0 +1,135 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import seaborn as sns
|
||||
import matplotlib.pyplot as plt
|
||||
import dataframe_image as dfi
|
||||
# Load data
|
||||
df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t')
|
||||
|
||||
# 1. Identify all GT and result columns
|
||||
gt_columns = [col for col in df.columns if col.startswith('GT.')]
|
||||
result_columns = [col for col in df.columns if col.startswith('result.')]
|
||||
|
||||
print("GT Columns found:", gt_columns)
|
||||
print("Result Columns found:", result_columns)
|
||||
|
||||
# 2. Create proper mapping between GT and result columns
|
||||
# Handle various naming conventions (spaces, underscores, etc.)
|
||||
column_mapping = {}
|
||||
|
||||
for gt_col in gt_columns:
|
||||
base_name = gt_col.replace('GT.', '')
|
||||
|
||||
# Clean the base name for matching - remove spaces, underscores, etc.
|
||||
# Try different matching approaches
|
||||
candidates = [
|
||||
f'result.{base_name}', # Exact match
|
||||
f'result.{base_name.replace(" ", "_")}', # With underscores
|
||||
f'result.{base_name.replace("_", " ")}', # With spaces
|
||||
f'result.{base_name.replace(" ", "")}', # No spaces
|
||||
f'result.{base_name.replace("_", "")}' # No underscores
|
||||
]
|
||||
|
||||
# Also try case-insensitive matching
|
||||
candidates.append(f'result.{base_name.lower()}')
|
||||
candidates.append(f'result.{base_name.upper()}')
|
||||
|
||||
# Try to find matching result column
|
||||
matched = False
|
||||
for candidate in candidates:
|
||||
if candidate in result_columns:
|
||||
column_mapping[gt_col] = candidate
|
||||
matched = True
|
||||
break
|
||||
|
||||
# If no exact match found, try partial matching
|
||||
if not matched:
|
||||
# Try to match by removing special characters and comparing
|
||||
base_clean = ''.join(e for e in base_name if e.isalnum() or e in ['_', ' '])
|
||||
for result_col in result_columns:
|
||||
result_base = result_col.replace('result.', '')
|
||||
result_clean = ''.join(e for e in result_base if e.isalnum() or e in ['_', ' '])
|
||||
if base_clean.lower() == result_clean.lower():
|
||||
column_mapping[gt_col] = result_col
|
||||
matched = True
|
||||
break
|
||||
|
||||
print("Column mapping:", column_mapping)
|
||||
|
||||
# 3. Faster, vectorized computation using the corrected mapping
|
||||
data_list = []
|
||||
|
||||
for gt_col, result_col in column_mapping.items():
|
||||
print(f"Processing {gt_col} vs {result_col}")
|
||||
|
||||
# Convert to numeric, forcing errors to NaN
|
||||
s1 = pd.to_numeric(df[gt_col], errors='coerce').astype(float)
|
||||
s2 = pd.to_numeric(df[result_col], errors='coerce').astype(float)
|
||||
|
||||
# Calculate matches (abs difference <= 0.5)
|
||||
diff = np.abs(s1 - s2)
|
||||
matches = (diff <= 0.5).sum()
|
||||
|
||||
# Determine the denominator (total valid comparisons)
|
||||
valid_count = diff.notna().sum()
|
||||
|
||||
if valid_count > 0:
|
||||
percentage = (matches / valid_count) * 100
|
||||
else:
|
||||
percentage = 0
|
||||
|
||||
# Extract clean base name for display
|
||||
base_name = gt_col.replace('GT.', '')
|
||||
|
||||
data_list.append({
|
||||
'GT': base_name,
|
||||
'Match %': round(percentage, 1)
|
||||
})
|
||||
|
||||
|
||||
|
||||
# 4. Prepare Data for Plotting
|
||||
match_df = pd.DataFrame(data_list)
|
||||
match_df = match_df.sort_values('Match %', ascending=False) # Sort for better visual flow
|
||||
|
||||
# 5. Create the Styled Gradient Table
|
||||
def style_agreement_table(df):
|
||||
return (df.style
|
||||
.format({'Match %': '{:.1f}%'}) # Add % sign
|
||||
.background_gradient(cmap='RdYlGn', subset=['Match %'], vmin=50, vmax=100) # Red to Green gradient
|
||||
.set_properties(**{
|
||||
'text-align': 'center',
|
||||
'font-size': '12pt',
|
||||
'border-collapse': 'collapse',
|
||||
'border': '1px solid #D3D3D3'
|
||||
})
|
||||
.set_table_styles([
|
||||
# Style the header
|
||||
{'selector': 'th', 'props': [
|
||||
('background-color', '#404040'),
|
||||
('color', 'white'),
|
||||
('font-weight', 'bold'),
|
||||
('text-transform', 'uppercase'),
|
||||
('padding', '10px')
|
||||
]},
|
||||
# Add hover effect
|
||||
{'selector': 'tr:hover', 'props': [('background-color', '#f5f5f5')]}
|
||||
])
|
||||
.set_caption("EDSS Agreement Analysis: Ground Truth vs. Results (Tolerance ±0.5)")
|
||||
)
|
||||
|
||||
# To display in a Jupyter Notebook:
|
||||
styled_table = style_agreement_table(match_df)
|
||||
styled_table
|
||||
|
||||
dfi.export(styled_table, "styled_table.png")
|
||||
#styled_table.to_html("agreement_report.html")
|
||||
# 6. Save as SVG
|
||||
|
||||
#plt.savefig("agreement_table.svg", format='svg', dpi=300, bbox_inches='tight')
|
||||
#print("Successfully saved agreement_table.svg")
|
||||
|
||||
# Show plot if running in a GUI environment
|
||||
plt.show()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user