Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from pathlib import Path 

2from typing import Union, List 

3 

4from tqdm.auto import tqdm 

5import pandas as pd 

6from elfragmentador import constants 

7from elfragmentador.evaluate import polyfit 

8 

9 

10def calculate_file_iRT(file: Union[Path, str]) -> pd.DataFrame: 

11 df = pd.read_csv(str(file)) 

12 df["RT"] = df["Min Start Time"] + df["Max End Time"] 

13 fits = {} 

14 for g, sub_df in df.groupby("File Name"): 

15 irt_sub_df = sub_df[ 

16 [x in constants.IRT_PEPTIDES for x in sub_df["Peptide Modified Sequence"]] 

17 ].copy() 

18 if len(irt_sub_df) < 4: 

19 continue 

20 

21 irt_sub_df["iRT"] = [ 

22 constants.IRT_PEPTIDES[x]["irt"] 

23 for x in irt_sub_df["Peptide Modified Sequence"] 

24 ] 

25 fit = polyfit(irt_sub_df["RT"], irt_sub_df["iRT"]) 

26 fits.update({g: fit}) 

27 

28 pred_irt = ( 

29 lambda rt, poly: None 

30 if poly is None 

31 else rt * poly["polynomial"][0] + poly["polynomial"][1] 

32 ) 

33 df["Calculated iRT"] = [ 

34 pred_irt(y, fits.get(x, None)) for x, y in zip(df["File Name"], df["RT"]) 

35 ] 

36 return df.dropna().copy().reindex() 

37 

38 

39def calculate_multifile_iRT(filelist: List[Union[str, Path]]): 

40 out_dfs = (calculate_file_iRT(x) for x in tqdm(filelist)) 

41 

42 out_df = pd.concat(out_dfs) 

43 group_cols = [x for x in list(out_df) if "Sequence" in x] 

44 gdf = ( 

45 out_df.groupby(group_cols) 

46 .aggregate({"Calculated iRT": ["mean", "std", "count"]}) 

47 .fillna(0) 

48 ) 

49 gdf.columns = [" ".join(col) for col in gdf.columns.values] 

50 gdf.sort_values("Calculated iRT std") 

51 return gdf