updated git ignore and new files
This commit is contained in:
71
Data/styled_tables.py
Normal file
71
Data/styled_tables.py
Normal file
@@ -0,0 +1,71 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import seaborn as sns
|
||||
|
||||
# Sample data (replace with your actual df)
|
||||
df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t')
|
||||
|
||||
# Identify GT and Result columns
|
||||
gt_columns = [col for col in df.columns if col.startswith('GT.')]
|
||||
result_columns = [col for col in df.columns if col.startswith('result.')]
|
||||
|
||||
# Create mapping
|
||||
column_mapping = {}
|
||||
for gt_col in gt_columns:
|
||||
base_name = gt_col.replace('GT.', '')
|
||||
result_col = f'result.{base_name}'
|
||||
if result_col in result_columns:
|
||||
column_mapping[gt_col] = result_col
|
||||
|
||||
# Function to compute match percentage for each GT-Result pair
|
||||
def compute_match_percentages(df, column_mapping):
|
||||
percentages = []
|
||||
for gt_col, result_col in column_mapping.items():
|
||||
count = 0
|
||||
total = len(df)
|
||||
|
||||
for _, row in df.iterrows():
|
||||
gt_val = row[gt_col]
|
||||
result_val = row[result_col]
|
||||
|
||||
# Handle NaN values
|
||||
if pd.isna(gt_val) or pd.isna(result_val):
|
||||
continue
|
||||
|
||||
# Handle non-numeric values
|
||||
try:
|
||||
gt_float = float(gt_val)
|
||||
result_float = float(result_val)
|
||||
except (ValueError, TypeError):
|
||||
# Skip rows with non-numeric values
|
||||
continue
|
||||
|
||||
# Check if values are within 0.5 tolerance
|
||||
if abs(gt_float - result_float) <= 0.5:
|
||||
count += 1
|
||||
|
||||
percentage = (count / total) * 100
|
||||
percentages.append({
|
||||
'GT_Column': gt_col,
|
||||
'Result_Column': result_col,
|
||||
'Match_Percentage': round(percentage, 1)
|
||||
})
|
||||
|
||||
return pd.DataFrame(percentages)
|
||||
|
||||
# Compute match percentages
|
||||
match_df = compute_match_percentages(df, column_mapping)
|
||||
|
||||
# Create a pivot table for gradient display (optional but helpful)
|
||||
pivot_table = match_df.set_index(['GT_Column', 'Result_Column'])['Match_Percentage'].unstack(fill_value=0)
|
||||
|
||||
# Apply gradient background
|
||||
cm = sns.light_palette("green", as_cmap=True)
|
||||
styled_table = pivot_table.style.background_gradient(cmap=cm, axis=None)
|
||||
|
||||
# Display result
|
||||
print("Agreement Percentage Table (with gradient):")
|
||||
styled_table
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user