WARNING: THIS SITE IS A MIRROR OF GITHUB.COM / IT CANNOT LOGIN OR REGISTER ACCOUNTS / THE CONTENTS ARE PROVIDED AS-IS / THIS SITE ASSUMES NO RESPONSIBILITY FOR ANY DISPLAYED CONTENT OR LINKS / IF YOU FOUND SOMETHING MAY NOT GOOD FOR EVERYONE, CONTACT ADMIN AT ilovescratch@foxmail.com
Skip to content

Commit f435e91

Browse files
Merge pull request #6 from compomics/minor-changes
Minor changes
2 parents 1f9c335 + 6efcfbb commit f435e91

File tree

8 files changed

+721
-520
lines changed

8 files changed

+721
-520
lines changed

.github/workflows/test.yml

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
name: Test and Lint Workflow
2+
3+
on:
4+
push:
5+
branches:
6+
- '*'
7+
pull_request:
8+
branches:
9+
- main
10+
11+
jobs:
12+
test:
13+
runs-on: ubuntu-latest
14+
strategy:
15+
matrix:
16+
python-version: ['3.10', '3.11', '3.12']
17+
18+
steps:
19+
# Checkout the code from the repository
20+
- name: Checkout code
21+
uses: actions/checkout@v3
22+
23+
# Set up Python environment for each version in the matrix
24+
- name: Set up Python ${{ matrix.python-version }}
25+
uses: actions/setup-python@v4
26+
with:
27+
python-version: ${{ matrix.python-version }}
28+
29+
# Install dependencies
30+
- name: Install dependencies
31+
run: |
32+
python -m pip install --upgrade pip
33+
pip install .[dev] # Install both runtime and dev dependencies
34+
35+
# Run linting with ruff (this will catch print statements)
36+
- name: Lint with ruff
37+
run: |
38+
pip install ruff # Ensure ruff is available if not installed through dev dependencies
39+
ruff check mumble tests # Lint your package and test directories
40+
41+
# Run formatting checks with black
42+
- name: Check formatting with black
43+
run: |
44+
pip install black # Ensure black is available if not installed through dev dependencies
45+
black --check . # Check if code is correctly formatted
46+
47+
# Run tests with pytest
48+
- name: Run tests with pytest
49+
run: |
50+
pip install pytest
51+
pytest --maxfail=1 --disable-warnings -q

mumble/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1-
__version__ = "0.1.2"
1+
__version__ = "0.2.0"
2+
__all__ = ["PSMHandler"]
23

34
from mumble.mumble import PSMHandler

mumble/__main__.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,19 @@
11
import click
2+
import logging
3+
4+
from rich.logging import RichHandler
25

36
from mumble import PSMHandler
47

58

9+
# setup logging
10+
logging.basicConfig(
11+
level=logging.INFO, # Set the logging level
12+
format="%(message)s", # Simple format for logging
13+
datefmt="[%X]", # Time format
14+
handlers=[RichHandler(rich_tracebacks=True, show_path=False)],
15+
)
16+
617
# Define CLI options as a dictionary
718
CLI_OPTIONS = {
819
"psm_list": {

mumble/file_handler.py

Lines changed: 56 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ class _SpectrumFileHandler:
3030
This class uses the pyteomics library for parsing MGF and mzML files.
3131
Parsed spectra are stored as `rustyms.RawSpectrum` objects.
3232
"""
33-
33+
3434
def __init__(self, spectrum_file: str):
3535
self.spectrum_file = spectrum_file
3636
self.spectra = {} # Initialize an empty dictionary to hold the spectra
@@ -46,7 +46,6 @@ def __init__(self, spectrum_file: str):
4646
else:
4747
raise ValueError("Unsupported file format. Only MGF and mzML are supported.")
4848

49-
5049
def _parse_mgf(self):
5150
"""
5251
Parse an MGF (Mascot Generic Format) file and store each spectrum as a RawSpectrum object.
@@ -61,31 +60,37 @@ def _parse_mgf(self):
6160
try:
6261
with mgf.MGF(self.spectrum_file) as spectra:
6362
for spectrum in spectra:
64-
spectrum_id = spectrum['params'].get('title', 'Unknown') # Extract spectrum ID from the MGF params
65-
precursor_mass = spectrum['params'].get('pepmass', [None])[0] # Extract precursor mass
66-
63+
spectrum_id = spectrum["params"].get(
64+
"title", "Unknown"
65+
) # Extract spectrum ID from the MGF params
66+
precursor_mass = spectrum["params"].get("pepmass", [None])[
67+
0
68+
] # Extract precursor mass
69+
6770
# Extract retention time
6871
rt = 0.0
69-
if 'rtinseconds' in spectrum['params']:
70-
rt = float(spectrum['params']['rtinseconds'])
71-
elif 'retention time' in spectrum['params']:
72-
rt = float(spectrum['params']['retention time'])
72+
if "rtinseconds" in spectrum["params"]:
73+
rt = float(spectrum["params"]["rtinseconds"])
74+
elif "retention time" in spectrum["params"]:
75+
rt = float(spectrum["params"]["retention time"])
7376

7477
# Extract precursor charge
7578
precursor_charge = 0
76-
if 'charge' in spectrum['params']:
77-
charge_str = spectrum['params']['charge']
78-
precursor_charge = int(charge_str.strip('+')) # Remove '+' and convert to int
79+
if "charge" in spectrum["params"]:
80+
charge_str = spectrum["params"]["charge"]
81+
precursor_charge = int(
82+
charge_str.strip("+")
83+
) # Remove '+' and convert to int
7984

8085
# Create a RawSpectrum object using required fields and additional attributes
8186
self.spectra[spectrum_id] = RawSpectrum(
82-
title=spectrum_id,
83-
num_scans=len(spectrum['m/z array']),
87+
title=spectrum_id,
88+
num_scans=len(spectrum["m/z array"]),
8489
rt=rt,
8590
precursor_charge=precursor_charge,
86-
mz_array=np.array(spectrum['m/z array']),
87-
intensity_array=np.array(spectrum['intensity array']),
88-
precursor_mass=precursor_mass
91+
mz_array=np.array(spectrum["m/z array"]),
92+
intensity_array=np.array(spectrum["intensity array"]),
93+
precursor_mass=precursor_mass,
8994
)
9095
logging.info(f"Parsed {len(self.spectra)} spectra from {self.spectrum_file}")
9196
except Exception as e:
@@ -105,36 +110,40 @@ def _parse_mzml(self):
105110
try:
106111
with mzml.MzML(self.spectrum_file) as spectra:
107112
for spectrum in spectra:
108-
spectrum_id = spectrum.get('id', None) # Get the spectrum ID from the mzML spectrum
113+
spectrum_id = spectrum.get(
114+
"id", None
115+
) # Get the spectrum ID from the mzML spectrum
109116
precursor_mass = 0.0
110117
precursor_charge = 0
111118
rt = 0.0
112119

113120
# Extract precursor mass and charge if available
114-
if 'precursorList' in spectrum and spectrum['precursorList']:
115-
precursor = spectrum['precursorList']['precursor'][0]
116-
if 'selectedIonList' in precursor:
117-
selected_ion = precursor['selectedIonList']['selectedIon'][0]
118-
precursor_mass = selected_ion.get('selected ion m/z', 0.0)
119-
precursor_charge = int(selected_ion.get('charge state', 0))
121+
if "precursorList" in spectrum and spectrum["precursorList"]:
122+
precursor = spectrum["precursorList"]["precursor"][0]
123+
if "selectedIonList" in precursor:
124+
selected_ion = precursor["selectedIonList"]["selectedIon"][0]
125+
precursor_mass = selected_ion.get("selected ion m/z", 0.0)
126+
precursor_charge = int(selected_ion.get("charge state", 0))
120127

121128
# Extract retention time
122-
if 'scanList' in spectrum and spectrum['scanList']:
123-
scan = spectrum['scanList']['scan'][0]
124-
for cv_param in scan.get('cvParam', []):
125-
if cv_param.get('accession') == 'MS:1000016': # accession for scan start time
126-
rt = float(cv_param.get('value', 0.0))
129+
if "scanList" in spectrum and spectrum["scanList"]:
130+
scan = spectrum["scanList"]["scan"][0]
131+
for cv_param in scan.get("cvParam", []):
132+
if (
133+
cv_param.get("accession") == "MS:1000016"
134+
): # accession for scan start time
135+
rt = float(cv_param.get("value", 0.0))
127136
break
128137

129138
# Create a RawSpectrum object using required fields and additional attributes
130139
self.spectra[spectrum_id] = RawSpectrum(
131140
title=spectrum_id,
132-
num_scans=len(spectrum['m/z array']),
141+
num_scans=len(spectrum["m/z array"]),
133142
rt=rt,
134143
precursor_charge=precursor_charge,
135-
mz_array=np.array(spectrum['m/z array']),
136-
intensity_array=np.array(spectrum['intensity array']),
137-
precursor_mass=precursor_mass
144+
mz_array=np.array(spectrum["m/z array"]),
145+
intensity_array=np.array(spectrum["intensity array"]),
146+
precursor_mass=precursor_mass,
138147
)
139148
logging.info(f"Parsed {len(self.spectra)} spectra from {self.spectrum_file}")
140149
except Exception as e:
@@ -143,10 +152,10 @@ def _parse_mzml(self):
143152
def get_spectrum_from_psm(self, psm: PSM):
144153
"""
145154
Retrieve a RawSpectrum for a PSM by its ID.
146-
155+
147156
Args:
148157
psm (PSM): psm object
149-
158+
150159
Returns:
151160
RawSpectrum: The retrieved spectrum or None if not found.
152161
"""
@@ -155,10 +164,10 @@ def get_spectrum_from_psm(self, psm: PSM):
155164
def get_spectra_from_psm_list(self, psmList: PSMList):
156165
"""
157166
Retrieve all spectra for a PSMList.
158-
167+
159168
Args:
160169
psmList (PSMList): A list of PSM objects.
161-
170+
162171
Returns:
163172
list: A list of RawSpectrum objects corresponding to the PSMs.
164173
None is included for any spectra not found.
@@ -168,7 +177,7 @@ def get_spectra_from_psm_list(self, psmList: PSMList):
168177
def get_all_spectra(self):
169178
"""
170179
Retrieve all parsed spectra.
171-
180+
172181
Returns:
173182
dict: A dictionary of all parsed spectra, where keys are spectrum IDs
174183
and values are RawSpectrum objects.
@@ -180,12 +189,12 @@ class _MetadataParser:
180189
"""
181190
Class to parse metadata files (CSV/TSV) containing PSM information.
182191
"""
183-
192+
184193
@staticmethod
185194
def parse_csv_file(file_name: str, delimiter: str = "\t") -> list:
186195
"""
187196
Parse a CSV or TSV file containing PSM information and create PSM objects.
188-
197+
189198
Args:
190199
file_name (str): Path to the CSV or TSV file.
191200
delimiter (str, optional): Delimiter used in the file. Defaults to "\t".
@@ -200,11 +209,11 @@ def parse_csv_file(file_name: str, delimiter: str = "\t") -> list:
200209
pd.errors.ParserError: If there's an error parsing the file.
201210
202211
Notes:
203-
The file must contain at least the following columns:
212+
The file must contain at least the following columns:
204213
'peptidoform', 'spectrum_id', and 'precursor_mz'.
205214
If any of these columns are missing, an error is logged and an empty list is returned.
206215
"""
207-
216+
208217
try:
209218
df = pd.read_csv(file_name, delimiter=delimiter)
210219
except FileNotFoundError as e:
@@ -228,7 +237,11 @@ def parse_csv_file(file_name: str, delimiter: str = "\t") -> list:
228237

229238
# Create a list of PSM objects from the DataFrame rows
230239
peptidoforms = [
231-
PSM(peptidoform=row["peptidoform"], spectrum_id=row["spectrum_id"], precursor_mz=row["precursor_mz"])
240+
PSM(
241+
peptidoform=row["peptidoform"],
242+
spectrum_id=row["spectrum_id"],
243+
precursor_mz=row["precursor_mz"],
244+
)
232245
for _, row in df.iterrows()
233246
]
234247

0 commit comments

Comments
 (0)