timbmg commited on
Commit
4caa453
·
unverified ·
1 Parent(s): 098f27a

inital commit

Browse files
.dockerignore ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ data/
3
+ .env
4
+ .gitignore
5
+ .gitattributes
6
+ .git
7
+ .github
8
+ .streamlit
9
+ .vscode
10
+ .idea
11
+ .pytest_cache/
12
+ .coverage
13
+ .tox/
14
+ .venv/
15
+ .cache/
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ __pycache__/
2
+ data/
3
+ .cache/
Dockerfile ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dockerfile for ScicoQA Demo - HuggingFace Spaces
2
+ FROM python:3.11-slim
3
+
4
+ # Install system dependencies
5
+ RUN apt-get update && apt-get install -y \
6
+ git \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ # Set working directory
10
+ WORKDIR /app
11
+
12
+ # Copy requirements first for better caching
13
+ COPY requirements.txt requirements.txt
14
+
15
+ # Install Python dependencies
16
+ RUN pip install --no-cache-dir --upgrade pip && \
17
+ pip install --no-cache-dir -r requirements.txt
18
+
19
+ # Copy application code
20
+ COPY . /app
21
+
22
+ # Create data directories
23
+ RUN mkdir -p /app/data/papers /app/data/repos-raw
24
+
25
+ # Set environment variables for Streamlit
26
+ ENV STREAMLIT_SERVER_PORT=7860
27
+ ENV STREAMLIT_SERVER_ADDRESS=0.0.0.0
28
+ ENV STREAMLIT_SERVER_HEADLESS=true
29
+ ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
30
+
31
+ # Expose port for HuggingFace Spaces
32
+ EXPOSE 7860
33
+
34
+ # Run Streamlit app
35
+ CMD ["streamlit", "run", "app.py", "--server.port", "7860", "--server.address", "0.0.0.0", "--server.headless", "true"]
README.md CHANGED
@@ -1,10 +1,8 @@
1
  ---
2
- title: Scicoqa
3
- emoji: 🌖
4
  colorFrom: indigo
5
  colorTo: blue
6
  sdk: docker
7
  pinned: false
8
  ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: SciCoQA Discrepancy Detection
3
+ emoji: 🔬
4
  colorFrom: indigo
5
  colorTo: blue
6
  sdk: docker
7
  pinned: false
8
  ---
 
 
app.py ADDED
@@ -0,0 +1,981 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Main Streamlit app for ScicoQA Discrepancy Detection Demo."""
2
+
3
+ import logging
4
+ import os
5
+ import time
6
+ from pathlib import Path
7
+
8
+ import streamlit as st
9
+ from dotenv import load_dotenv
10
+
11
+ from core.arxiv2md_demo import Arxiv2MD
12
+ from core.code_loader_demo import CodeLoader
13
+ from core.llm_demo import LLM
14
+ from core.model_config import (
15
+ PROVIDER_PRESETS,
16
+ create_local_model_config,
17
+ create_provider_model_config,
18
+ get_api_key_env_name,
19
+ get_provider_from_model,
20
+ )
21
+ from core.ollama_models import fetch_ollama_models
22
+ from core.openrouter_models import fetch_free_models, get_model_config
23
+ from core.prompt_demo import Prompt
24
+ from core.token_counter_demo import TokenCounter
25
+ from parsing import parse_discrepancies
26
+
27
+ # Load environment variables
28
+ load_dotenv()
29
+
30
+ # Configure logging
31
+ logging.basicConfig(
32
+ level=logging.INFO,
33
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
34
+ )
35
+ logger = logging.getLogger(__name__)
36
+
37
+ # Page configuration
38
+ st.set_page_config(
39
+ page_title="SciCoQA Paper- Code Discrepancy Detection",
40
+ page_icon="🔬",
41
+ layout="wide",
42
+ initial_sidebar_state=400,
43
+ )
44
+
45
+
46
+ # Constants
47
+ MAX_CONTEXT_SIZE = 131072 # Default max context
48
+ MAX_TOKENS_BUFFER = 0.9 # Use 90% of max tokens
49
+
50
+
51
+ def validate_urls(arxiv_url: str, github_url: str) -> tuple[bool, str]:
52
+ """Validate input URLs."""
53
+ if not arxiv_url:
54
+ return False, "Please provide an arXiv URL"
55
+ if not github_url:
56
+ return False, "Please provide a GitHub URL"
57
+
58
+ if "arxiv.org" not in arxiv_url and not arxiv_url.startswith("http"):
59
+ # Try to construct URL from ID
60
+ if arxiv_url.replace(".", "").replace("v", "").isdigit():
61
+ arxiv_url = f"https://arxiv.org/abs/{arxiv_url}"
62
+ else:
63
+ return False, "Invalid arXiv URL format"
64
+
65
+ if "github.com" not in github_url:
66
+ return False, "Please provide a valid GitHub URL"
67
+
68
+ return True, ""
69
+
70
+
71
+ def validate_files(paper_file, code_file) -> tuple[bool, str]:
72
+ """Validate uploaded files."""
73
+ if paper_file is None:
74
+ return False, "Please upload a paper markdown file"
75
+ if code_file is None:
76
+ return False, "Please upload a repository text file"
77
+
78
+ # Check file types
79
+ if paper_file.name and not paper_file.name.endswith(('.md', '.markdown', '.txt')):
80
+ return False, "Paper file should be a markdown (.md) or text (.txt) file"
81
+ if code_file.name and not code_file.name.endswith('.txt'):
82
+ return False, "Repository file should be a text (.txt) file"
83
+
84
+ return True, ""
85
+
86
+
87
+ def process_discrepancy_detection(
88
+ paper_text: str | None = None,
89
+ code_text: str | None = None,
90
+ arxiv_url: str | None = None,
91
+ github_url: str | None = None,
92
+ model_config: dict | None = None,
93
+ ):
94
+ """Main processing pipeline for discrepancy detection."""
95
+ results = {
96
+ "paper_text": None,
97
+ "code_prompt": None,
98
+ "prompt": None,
99
+ "llm_response": None,
100
+ "discrepancies": None,
101
+ "error": None,
102
+ "step_timings": None,
103
+ }
104
+
105
+ # Use a single compact status container
106
+ step_timings = {} # Store timings for each step
107
+
108
+ # Note: Uploaded files (paper_text, code_text) are only in memory and never saved
109
+ # URL fetches (arxiv_url, github_url) use persistent cache directories for performance
110
+
111
+ try:
112
+ with st.status("🔄 Processing...", expanded=False) as status:
113
+ try:
114
+ # Step 1: Fetch/process paper
115
+ step_start = time.time()
116
+ if arxiv_url:
117
+ # Fetch from arXiv - use persistent cache directory
118
+ status.update(label="📄 Fetching paper from arXiv...", state="running")
119
+ try:
120
+ # Use persistent directory for caching (OK to save fetched papers)
121
+ arxiv2md = Arxiv2MD(output_dir=Path("data/papers"))
122
+ paper_text = arxiv2md(arxiv_url)
123
+ results["paper_text"] = paper_text
124
+ step_time = time.time() - step_start
125
+ step_timings["Paper Fetch"] = step_time
126
+ st.write(f"✅ Paper fetched: {step_time:.1f}s")
127
+ status.update(
128
+ label=f"✅ Paper fetched ({step_time:.1f}s)",
129
+ state="running",
130
+ )
131
+ except Exception as e:
132
+ error_msg = f"Error fetching paper: {str(e)}"
133
+ logger.error(error_msg)
134
+ results["error"] = error_msg
135
+ status.update(label="❌ Error fetching paper", state="error")
136
+ return results
137
+ else:
138
+ # Use provided paper text
139
+ status.update(label="📄 Processing paper...", state="running")
140
+ try:
141
+ results["paper_text"] = paper_text
142
+ step_time = time.time() - step_start
143
+ step_timings["Paper Processing"] = step_time
144
+ st.write(f"✅ Paper processed: {step_time:.1f}s")
145
+ status.update(
146
+ label=f"✅ Paper processed ({step_time:.1f}s)",
147
+ state="running",
148
+ )
149
+ except Exception as e:
150
+ error_msg = f"Error processing paper: {str(e)}"
151
+ logger.error(error_msg)
152
+ results["error"] = error_msg
153
+ status.update(label="❌ Error processing paper", state="error")
154
+ return results
155
+
156
+ # Step 2: Fetch/process code
157
+ step_start = time.time()
158
+ code_loader = None
159
+ if github_url:
160
+ # Fetch from GitHub - use persistent cache directory
161
+ status.update(label="📦 Fetching code from GitHub...", state="running")
162
+ try:
163
+ # Use persistent directory for caching (OK to save fetched repos)
164
+ code_loader = CodeLoader(
165
+ github_url=github_url,
166
+ max_file_size_mb=1.0,
167
+ raw_repo_dir=Path("data/repos-raw"),
168
+ )
169
+ step_time = time.time() - step_start
170
+ step_timings["Repository Clone"] = step_time
171
+ st.write(f"✅ Repository cloned: {step_time:.1f}s")
172
+ status.update(
173
+ label=f"✅ Repository cloned ({step_time:.1f}s)",
174
+ state="running",
175
+ )
176
+ except Exception as e:
177
+ error_msg = f"Error cloning repository: {str(e)}"
178
+ logger.error(error_msg)
179
+ results["error"] = error_msg
180
+ status.update(label="❌ Error cloning repository", state="error")
181
+ return results
182
+ else:
183
+ # Code text is already provided
184
+ status.update(label="📦 Processing repository...", state="running")
185
+ step_time = time.time() - step_start
186
+ step_timings["Code Processing"] = step_time
187
+ st.write(f"✅ Repository processed: {step_time:.1f}s")
188
+ status.update(
189
+ label=f"✅ Repository processed ({step_time:.1f}s)",
190
+ state="running",
191
+ )
192
+
193
+ # Step 5: Calculate tokens and prepare prompt
194
+ step_start = time.time()
195
+ status.update(label="📝 Preparing prompt...", state="running")
196
+ try:
197
+ # Use provided model config
198
+ tokenizer_name = model_config["tokenizer"]
199
+ max_context = model_config["max_context"]
200
+
201
+ token_counter = TokenCounter(model=tokenizer_name)
202
+
203
+ # Calculate tokens for paper + prompt template
204
+ prompt_template = Prompt("discrepancy_generation")
205
+ intermediate_prompt = prompt_template(paper=paper_text, code="")
206
+ tokens_intermediate_prompt = token_counter(intermediate_prompt)
207
+
208
+ # Calculate remaining tokens for code
209
+ max_total_tokens = int(max_context * MAX_TOKENS_BUFFER)
210
+ remaining_code_tokens = max_total_tokens - tokens_intermediate_prompt
211
+
212
+ logger.info(f"Tokens in intermediate prompt: {tokens_intermediate_prompt}")
213
+ logger.info(f"Remaining tokens for code: {remaining_code_tokens}")
214
+
215
+ # Get code prompt with token limit
216
+ if code_loader:
217
+ # Use CodeLoader for GitHub repos
218
+ code_prompt = code_loader.get_code_prompt(
219
+ token_counter=token_counter,
220
+ max_tokens=remaining_code_tokens,
221
+ )
222
+ else:
223
+ # Truncate code text to fit within token limit
224
+ # Simple approach: count tokens as we add content
225
+ code_prompt = ""
226
+ code_tokens = 0
227
+ code_lines = code_text.split('\n')
228
+
229
+ for line in code_lines:
230
+ line_with_newline = line + '\n'
231
+ line_tokens = token_counter(line_with_newline)
232
+ if code_tokens + line_tokens > remaining_code_tokens:
233
+ logger.warning(f"Truncating code at {code_tokens} tokens (limit: {remaining_code_tokens})")
234
+ break
235
+ code_prompt += line_with_newline
236
+ code_tokens += line_tokens
237
+
238
+ results["code_prompt"] = code_prompt
239
+
240
+ # Construct final prompt
241
+ final_prompt = prompt_template(paper=paper_text, code=code_prompt)
242
+ results["prompt"] = final_prompt
243
+
244
+ final_tokens = token_counter(final_prompt)
245
+ logger.info(f"Total tokens in final prompt: {final_tokens}")
246
+
247
+ # Calculate max_tokens for completion (respecting model's context limit)
248
+ # Leave some buffer for safety (use 95% of remaining context)
249
+ max_context = model_config["max_context"]
250
+ remaining_for_completion = max_context - final_tokens
251
+
252
+ if remaining_for_completion <= 0:
253
+ error_msg = f"Prompt too long: {final_tokens} tokens exceeds model's context limit of {max_context} tokens"
254
+ logger.error(error_msg)
255
+ results["error"] = error_msg
256
+ status.update(label="❌ Prompt too long", state="error")
257
+ return results
258
+
259
+ # Use 95% of remaining to be safe, but ensure at least some tokens
260
+ max_tokens_for_completion = max(1, int(remaining_for_completion * 0.95))
261
+
262
+ logger.info(f"Max context: {max_context}, Input tokens: {final_tokens}, Remaining: {remaining_for_completion}, Max completion tokens: {max_tokens_for_completion}")
263
+
264
+ step_time = time.time() - step_start
265
+ step_timings["Prompt Preparation"] = step_time
266
+ st.write(f"✅ Prompt prepared: {step_time:.1f}s ({final_tokens:,} tokens, max output: {max_tokens_for_completion:,} tokens)")
267
+ status.update(
268
+ label=f"✅ Prompt prepared ({step_time:.1f}s, {final_tokens:,} tokens)",
269
+ state="running",
270
+ )
271
+ except Exception as e:
272
+ error_msg = f"Error preparing prompt: {str(e)}"
273
+ logger.error(error_msg)
274
+ results["error"] = error_msg
275
+ status.update(label="❌ Error preparing prompt", state="error")
276
+ return results
277
+
278
+ # Step 6: Detect discrepancies with LLM
279
+ step_start = time.time()
280
+ status.update(label="🤖\uFE0F Detecting discrepancies (this may take a while)...", state="running")
281
+ try:
282
+ # Extract model configuration
283
+ model = model_config["model"]
284
+ api_key = model_config.get("api_key")
285
+ api_base = model_config.get("api_base")
286
+ max_context = model_config.get("max_context")
287
+
288
+ llm = LLM(
289
+ model=model,
290
+ api_key=api_key,
291
+ api_base=api_base,
292
+ temperature=1.0,
293
+ top_p=1.0,
294
+ reasoning_effort="high",
295
+ max_context=max_context,
296
+ max_tokens=max_tokens_for_completion, # Respect model's context limit
297
+ )
298
+
299
+ response = llm(final_prompt)
300
+ results["llm_response"] = response
301
+
302
+ # Extract content from response
303
+ choices = response.get("choices", [])
304
+ if not choices:
305
+ raise ValueError("No choices in LLM response")
306
+
307
+ content = (
308
+ choices[0]
309
+ .get("message", {})
310
+ .get("content", "")
311
+ )
312
+
313
+ if not content:
314
+ raise ValueError("Empty content in LLM response")
315
+
316
+ # Parse discrepancies
317
+ discrepancies = parse_discrepancies(content)
318
+ results["discrepancies"] = discrepancies
319
+
320
+ step_time = time.time() - step_start
321
+ step_timings["LLM Inference"] = step_time
322
+ total_time = sum(step_timings.values())
323
+
324
+ st.write(f"✅ LLM inference: {step_time:.1f}s")
325
+ st.write("---")
326
+ st.write(f"**Total time: {total_time:.1f}s**")
327
+
328
+ if discrepancies:
329
+ count = len(discrepancies)
330
+ discrepancy_text = "discrepancy" if count == 1 else "discrepancies"
331
+ status.update(
332
+ label=f"✅ Complete! Found {count} {discrepancy_text} ({total_time:.1f}s total)",
333
+ state="complete",
334
+ )
335
+ else:
336
+ status.update(
337
+ label=f"✅ Complete! No discrepancies found ({total_time:.1f}s total)",
338
+ state="complete",
339
+ )
340
+
341
+ except Exception as e:
342
+ error_msg = f"Error during LLM inference: {str(e)}"
343
+ logger.error(error_msg)
344
+ results["error"] = error_msg
345
+ status.update(label="❌ Error during inference", state="error")
346
+ return results
347
+
348
+ except Exception as e:
349
+ error_msg = f"Unexpected error: {str(e)}"
350
+ logger.error(error_msg, exc_info=True)
351
+ results["error"] = error_msg
352
+ status.update(label="❌ Unexpected error", state="error")
353
+ return results
354
+
355
+ results["step_timings"] = step_timings
356
+ return results
357
+
358
+ except Exception as e:
359
+ # Handle any errors that occur outside the status context
360
+ error_msg = f"Unexpected error: {str(e)}"
361
+ logger.error(error_msg, exc_info=True)
362
+ results["error"] = error_msg
363
+ return results
364
+
365
+
366
+ def main():
367
+ """Main Streamlit app."""
368
+ st.title("🔬 :rainbow[SciCoQA] Paper-Code Discrepancy Detection")
369
+ st.markdown(
370
+ """
371
+ _Detect discrepancies between scientific papers and their code implementations._
372
+ """
373
+ )
374
+
375
+ # About section in main area
376
+ with st.expander("ℹ️ About", expanded=False):
377
+ st.markdown(
378
+ """
379
+ This tool is a demo of our research paper on detecting discrepancies between scientific papers and their
380
+ code implementations. You can read our paper here: [arXiv:2601.XXXX](https://arxiv.org/pdf/2601.XXXX).
381
+
382
+ This tool helps researchers and developers identify inconsistencies between scientific papers and their
383
+ corresponding code implementations. Such discrepancies can lead to reproducibility issues, incorrect
384
+ implementations, or misunderstandings of the research. By using advanced LLMs to analyze both the paper
385
+ text and code, this app automatically detects mismatches in algorithms, parameters, data processing steps,
386
+ and other implementation details.
387
+
388
+ **⚠️ Important Limitations:**
389
+ Our research found that **recall is still low** - meaning the tool may miss some discrepancies.
390
+ **All outputs should be used with human verification** and should not be relied upon as the sole method
391
+ for discrepancy detection.
392
+
393
+ **LLM Provider Recommendations:**
394
+ - **Free Models (OpenRouter)**: Best for quick checks of already public paper+code combinations
395
+ - **Local Models (Ollama/vLLM)**: Best for privacy-sensitive content, e.g. for unpublished papers or code
396
+ - **Provider Models (OpenAI, Anthropic, etc.)**: Best for high precision and best recall
397
+
398
+ **Features:**
399
+ - Support for multiple LLM providers (free, local, or premium models)
400
+ - Automatic content fetching from arXiv and GitHub
401
+ - File upload support for custom papers and repositories
402
+ - Secure API key handling (keys never stored or logged)
403
+
404
+ **Resources:**
405
+ - 📦 **Code**: [GitHub Repository](https://github.com/UKPLab/scicoqa)
406
+ - 📊 **Dataset**: [Hugging Face Dataset](https://huggingface.co/datasets/ukplab/scicoqa)
407
+ - 🌐 **Project Website**: [ukplab.github.io/scicoqa](https://ukplab.github.io/scicoqa)
408
+
409
+ **Citation:**
410
+ If you find this tool useful, please cite our paper:
411
+ ```bibtex
412
+ @article{scicoqa2026,
413
+ title = {SciCoQA: Quality Assurance for Scientific Paper-Code Alignment},
414
+ author = {Baumgärtner, Tim and Gurevych, Iryna},
415
+ journal = {arXiv preprint arXiv:XXXX.XXXXX},
416
+ year = {2026},
417
+ url = {https://github.com/UKPLab/scicoqa}
418
+ }
419
+ ```
420
+ """
421
+ )
422
+
423
+ # ========== SIDEBAR: Model Configuration ==========
424
+ with st.sidebar:
425
+ st.header("🤖\uFE0F Model Configuration")
426
+
427
+ # Determine label based on current selection
428
+ model_config = None
429
+ model_name = None
430
+ display_model_name = None
431
+
432
+ # Check if we have a model config in session state
433
+ if "model_config" in st.session_state and st.session_state.model_config:
434
+ existing_config = st.session_state.model_config
435
+ display_model_name = existing_config.get("name") or existing_config.get("model", "Unknown")
436
+
437
+ if display_model_name:
438
+ st.caption(f"Current: {display_model_name}")
439
+
440
+ # Model type selection
441
+ model_type = st.radio(
442
+ "Model Type",
443
+ options=["Free Models (OpenRouter)", "Local Model (Ollama/vLLM)", "Provider (OpenAI, Anthropic, Gemini, etc.)"],
444
+ help="Select free models (no API key), local models (Ollama/vLLM), or provider models (requires API key)",
445
+ key="model_type_radio",
446
+ index=0, # Default to Free Models
447
+ )
448
+ # Store in session state for access outside sidebar
449
+ st.session_state.model_type = model_type
450
+
451
+ st.divider()
452
+
453
+ # Model selection based on type
454
+ if model_type == "Free Models (OpenRouter)":
455
+ # Fetch free models from OpenRouter API (uses file-based cache, refreshes daily)
456
+ if "free_models_cache" not in st.session_state:
457
+ with st.spinner("Loading free models from OpenRouter..."):
458
+ free_models_raw = fetch_free_models()
459
+ st.session_state.free_models_cache = free_models_raw
460
+
461
+ free_models_raw = st.session_state.free_models_cache
462
+
463
+ if not free_models_raw:
464
+ st.error("⚠️ Could not fetch free models from OpenRouter. Please try again later or use a different model type.")
465
+ model_config = None
466
+ else:
467
+ # Show privacy warning
468
+ st.warning(
469
+ "⚠️ **Privacy Notice**: Free models are provided via [OpenRouter](https://openrouter.ai). "
470
+ "The model provider may log your prompts and outputs. For enhanced privacy, consider using Local or Provider models with your own API keys."
471
+ )
472
+ # Create model options from fetched models
473
+ model_options = {get_model_config(m)["name"]: get_model_config(m) for m in free_models_raw}
474
+
475
+ if model_options:
476
+ # Find default index for gpt-oss
477
+ model_names = list(model_options.keys())
478
+ default_index = 0
479
+ for idx, name in enumerate(model_names):
480
+ if "nemotron 3 nano 30b" in name.lower():
481
+ default_index = idx
482
+ break
483
+
484
+ model_name = st.selectbox(
485
+ "Select Free Model",
486
+ options=model_names,
487
+ help="Free models via OpenRouter (no API key required)",
488
+ key="free_model_select",
489
+ index=default_index,
490
+ )
491
+ model_config = model_options[model_name]
492
+
493
+ else:
494
+ st.error("⚠️ No free models available. Please try again later or use a different model type.")
495
+ model_config = None
496
+
497
+ elif model_type == "Local Model (Ollama/vLLM)":
498
+ st.info("🖥️ **Local Model**: Use models running locally via Ollama or vLLM (OpenAI-compatible server).")
499
+
500
+ local_model_type = st.radio(
501
+ "Local Server Type",
502
+ options=["Ollama", "vLLM (OpenAI-compatible)"],
503
+ help="Select the type of local server",
504
+ key="local_server_type",
505
+ )
506
+
507
+ if local_model_type == "Ollama":
508
+ # API Base URL comes first
509
+ api_base = st.text_input(
510
+ "API Base URL",
511
+ value="http://localhost:11434",
512
+ help="Ollama API base URL",
513
+ key="ollama_api_base",
514
+ )
515
+
516
+ # Query Ollama for available models if API base is provided
517
+ model_input = None
518
+ if api_base and api_base.strip():
519
+ try:
520
+ with st.spinner("Fetching available models from Ollama..."):
521
+ available_models = fetch_ollama_models(api_base.strip())
522
+
523
+ if available_models:
524
+ model_input = st.selectbox(
525
+ "Select Model",
526
+ options=available_models,
527
+ help="Select a model from your Ollama server",
528
+ key="ollama_model_select",
529
+ )
530
+ else:
531
+ st.warning("⚠️ No models found or unable to connect to Ollama. You can still enter a model name manually.")
532
+ model_input = st.text_input(
533
+ "Model Name (manual entry)",
534
+ placeholder="e.g., llama2, mistral, codellama",
535
+ help="Enter the Ollama model name manually (without 'ollama/' prefix)",
536
+ key="ollama_model_input_manual",
537
+ )
538
+ except Exception as e:
539
+ logger.error(f"Error fetching Ollama models: {e}")
540
+ st.warning(f"⚠️ Could not fetch models from Ollama: {str(e)}. You can still enter a model name manually.")
541
+ model_input = st.text_input(
542
+ "Model Name (manual entry)",
543
+ placeholder="e.g., llama2, mistral, codellama",
544
+ help="Enter the Ollama model name manually (without 'ollama/' prefix)",
545
+ key="ollama_model_input_manual",
546
+ )
547
+ else:
548
+ st.info("💡 Enter the API Base URL above to see available models, or enter a model name manually below.")
549
+ model_input = st.text_input(
550
+ "Model Name",
551
+ placeholder="e.g., llama2, mistral, codellama",
552
+ help="Enter the Ollama model name (without 'ollama/' prefix)",
553
+ key="ollama_model_input",
554
+ )
555
+
556
+ max_context = st.number_input(
557
+ "Max Context (tokens)",
558
+ min_value=1000,
559
+ max_value=1000000,
560
+ value=131072,
561
+ step=1000,
562
+ help="Maximum context window size in tokens",
563
+ key="ollama_max_context",
564
+ )
565
+
566
+ if model_input and api_base:
567
+ model_name = f"ollama/{model_input}"
568
+ model_config = create_local_model_config(
569
+ model=model_name,
570
+ api_base=api_base.strip(),
571
+ max_context=max_context,
572
+ )
573
+ else: # vLLM
574
+ model_input = st.text_input(
575
+ "Model Name",
576
+ placeholder="e.g., gpt-3.5-turbo, mistralai/Mistral-7B-Instruct-v0.1",
577
+ help="Enter the model name for vLLM",
578
+ key="vllm_model_input",
579
+ )
580
+ api_base = st.text_input(
581
+ "API Base URL",
582
+ value="http://localhost:8000/v1",
583
+ help="vLLM API base URL (OpenAI-compatible endpoint)",
584
+ key="vllm_api_base",
585
+ )
586
+ max_context = st.number_input(
587
+ "Max Context (tokens)",
588
+ min_value=1000,
589
+ max_value=1000000,
590
+ value=131072,
591
+ step=1000,
592
+ help="Maximum context window size in tokens",
593
+ key="vllm_max_context",
594
+ )
595
+
596
+ if model_input:
597
+ model_name = model_input
598
+ model_config = create_local_model_config(
599
+ model=model_name,
600
+ api_base=api_base,
601
+ max_context=max_context,
602
+ )
603
+
604
+ else: # Provider Model
605
+ st.info("🔑 **Provider Model**: Use your own API keys to access premium models. Your keys are never stored, logged, or displayed.")
606
+
607
+ provider_subtype = st.radio(
608
+ "Model Selection",
609
+ options=["Preset", "Custom"],
610
+ help="Select from preset models or enter a custom model",
611
+ key="provider_subtype",
612
+ )
613
+
614
+ if provider_subtype == "Preset":
615
+ model_name = st.selectbox(
616
+ "Select Model",
617
+ options=list(PROVIDER_PRESETS.keys()),
618
+ help="Select a preset model (API key required)",
619
+ key="preset_model_select",
620
+ )
621
+ preset_config = PROVIDER_PRESETS[model_name]
622
+ api_key_env = preset_config["api_key_env"]
623
+ api_key_label = api_key_env.replace("_", " ").title()
624
+
625
+ api_key = st.text_input(
626
+ f"{api_key_label}",
627
+ type="password",
628
+ help=f"Enter your {api_key_label}. Your key is never stored, logged, or displayed.",
629
+ placeholder=f"sk-..." if "OPENAI" in api_key_env else "Enter API key",
630
+ key="preset_api_key",
631
+ )
632
+
633
+ if api_key:
634
+ model_config = create_provider_model_config(
635
+ model=preset_config["model"],
636
+ api_key=api_key,
637
+ max_context=preset_config["max_context"],
638
+ tokenizer=preset_config["tokenizer"],
639
+ )
640
+ else: # Custom
641
+ custom_model_name = st.text_input(
642
+ "Model Name (litellm format)",
643
+ placeholder="e.g., gpt-4o, claude-3-5-sonnet, gemini/gemini-1.5-pro",
644
+ help="Enter the model name in litellm format. See [litellm documentation](https://docs.litellm.ai/docs/providers) for supported formats.",
645
+ key="custom_model_name",
646
+ )
647
+ custom_max_context = st.number_input(
648
+ "Max Context (tokens)",
649
+ min_value=1000,
650
+ max_value=10000000,
651
+ value=128000,
652
+ step=1000,
653
+ help="Maximum context window size in tokens",
654
+ key="custom_max_context",
655
+ )
656
+
657
+ if custom_model_name:
658
+ provider = get_provider_from_model(custom_model_name)
659
+ api_key_env = get_api_key_env_name(provider)
660
+ api_key_label = api_key_env.replace("_", " ").title()
661
+
662
+ api_key = st.text_input(
663
+ f"{api_key_label}",
664
+ type="password",
665
+ help=f"Enter your {api_key_label}. Your key is never stored, logged, or displayed.",
666
+ placeholder=f"sk-..." if "OPENAI" in api_key_env else "Enter API key",
667
+ key="custom_api_key",
668
+ )
669
+
670
+ if api_key:
671
+ model_name = custom_model_name
672
+ model_config = create_provider_model_config(
673
+ model=custom_model_name,
674
+ api_key=api_key,
675
+ max_context=custom_max_context,
676
+ )
677
+
678
+ st.markdown(
679
+ "📚 **Need help with model format?** See the [litellm documentation](https://docs.litellm.ai/docs/providers) "
680
+ "for supported providers and model naming conventions."
681
+ )
682
+
683
+ st.caption("🔒 Your API key is secure: never stored, logged, or displayed")
684
+
685
+ # Show model info if model is selected
686
+ if model_config:
687
+ display_name = model_config.get("name") or model_config.get("model", model_name or "Unknown")
688
+ st.caption(f"📊 Max Context: {model_config['max_context']:,} tokens")
689
+
690
+ # ========== MAIN AREA: Input Form and Results ==========
691
+
692
+ # Store model config in session state for next render
693
+ if model_config:
694
+ st.session_state.model_config = model_config
695
+ st.session_state.model_name = model_config.get("name") or model_config.get("model", model_name or "Unknown")
696
+
697
+ # Input form
698
+ with st.form("discrepancy_form"):
699
+ # Input method selection using tabs
700
+ tab_links, tab_files = st.tabs(["arXiv and GitHub Links", "Upload Paper and Code Files"])
701
+
702
+ # Initialize variables
703
+ arxiv_url = None
704
+ github_url = None
705
+ paper_file = None
706
+ code_file = None
707
+ input_method = None
708
+
709
+ with tab_links:
710
+ col1, col2 = st.columns(2)
711
+
712
+ with col1:
713
+ arxiv_url = st.text_input(
714
+ "arXiv Paper",
715
+ value=st.session_state.get("example_arxiv_url", ""),
716
+ placeholder="https://arxiv.org/abs/2006.12834 or 2006.12834",
717
+ help="Enter the arXiv paper URL or just the paper ID",
718
+ label_visibility="visible",
719
+ )
720
+
721
+ with col2:
722
+ github_url = st.text_input(
723
+ "GitHub Code",
724
+ value=st.session_state.get("example_github_url", ""),
725
+ placeholder="https://github.com/username/repo",
726
+ help="Enter the full GitHub repository URL",
727
+ label_visibility="visible",
728
+ )
729
+
730
+ if arxiv_url or github_url:
731
+ input_method = "arXiv and GitHub Links"
732
+
733
+ with tab_files:
734
+ # Instructions section for file preparation
735
+ with st.expander("📖 How to prepare files", expanded=False):
736
+ st.markdown("""
737
+ <h3>Converting PDF to Markdown with Pandoc</h3>
738
+
739
+ 1. Install pandoc:
740
+ ```
741
+ brew install pandoc
742
+ ```
743
+ For installing pandoc on Windows or Linux, see the [pandoc documentation](https://pandoc.org/installing.html).
744
+
745
+ 2. Convert your latex to markdown:
746
+ ```bash
747
+ pandoc main.tex -f latex -t markdown -s --wrap=none -o paper.md
748
+ ```
749
+
750
+ <h3>Converting Repository to Text with Gitingest</h3>
751
+
752
+ 1. Install gitingest:
753
+ ```bash
754
+ pip install gitingest
755
+ ```
756
+
757
+ 2. Generate repository text file:
758
+ ```bash
759
+ gitingest https://github.com/your-username/your-repo \\
760
+ --token YOUR_GITHUB_TOKEN \\
761
+ -i "*.c,*.cc,*.cpp,*.cu,*.h,*.hpp,*.java,*.jl,*.m,*.matlab,Makefile,*.md,*.pl,*.ps1,*.py,*.r,*.sh,config.txt,*.rs,readme.txt,requirements_dev.txt,requirements-dev.txt,requirements.dev.txt,requirements.txt,*.scala,*.yaml,*.yml" -o repo.txt
762
+ ```
763
+
764
+ **Note**: Modify the file extension list to include the files you want to include in the repository text file. For private repositories, you'll need a GitHub token. For public repositories, you can omit the `--token` parameter.
765
+ """, unsafe_allow_html=True)
766
+
767
+ col1, col2 = st.columns(2)
768
+
769
+ with col1:
770
+ paper_file = st.file_uploader(
771
+ "Paper Markdown File",
772
+ type=["md", "markdown", "txt"],
773
+ help="Upload the paper as a markdown file",
774
+ label_visibility="visible",
775
+ )
776
+
777
+ with col2:
778
+ code_file = st.file_uploader(
779
+ "Repository Text File",
780
+ type=["txt"],
781
+ help="Upload the repository as a text file (generated using gitingest)",
782
+ label_visibility="visible",
783
+ )
784
+
785
+ if paper_file or code_file:
786
+ input_method = "Upload Paper and Code Files"
787
+
788
+ submitted = st.form_submit_button("Detect Discrepancies", type="primary", use_container_width=True)
789
+
790
+ # Store model info in session state
791
+ st.session_state.model_config = model_config
792
+
793
+ # Process form submission
794
+ if submitted:
795
+ # Determine input method based on which inputs are filled
796
+ # Check if files are provided (Upload method) - prioritize files if any are uploaded
797
+ if paper_file is not None or code_file is not None:
798
+ is_valid, error_msg = validate_files(paper_file, code_file)
799
+ if not is_valid:
800
+ st.error(error_msg)
801
+ return
802
+
803
+ # Read file contents
804
+ try:
805
+ paper_text = paper_file.read().decode("utf-8") if paper_file else None
806
+ code_text = code_file.read().decode("utf-8") if code_file else None
807
+ except Exception as e:
808
+ st.error(f"Error reading files: {str(e)}")
809
+ return
810
+
811
+ arxiv_url = None
812
+ github_url = None
813
+ # Otherwise check if URLs are provided (Links method)
814
+ elif arxiv_url or github_url:
815
+ is_valid, error_msg = validate_urls(arxiv_url, github_url)
816
+ if not is_valid:
817
+ st.error(error_msg)
818
+ return
819
+
820
+ paper_text = None
821
+ code_text = None
822
+ else:
823
+ st.error("Please provide either arXiv and GitHub links, or upload paper and code files.")
824
+ return
825
+
826
+ # Clear example values after form submission
827
+ if "example_arxiv_url" in st.session_state:
828
+ del st.session_state["example_arxiv_url"]
829
+ if "example_github_url" in st.session_state:
830
+ del st.session_state["example_github_url"]
831
+
832
+ # Validate model selection
833
+ if model_config is None:
834
+ st.error("Please select a valid model.")
835
+ return
836
+
837
+ # Validate API key for provider models
838
+ model_type = st.session_state.get("model_type", "Provider (OpenAI, Anthropic, Gemini, etc.)")
839
+ if model_type == "Provider (OpenAI, Anthropic, Gemini, etc.)":
840
+ if "api_key" not in model_config or not model_config.get("api_key"):
841
+ st.error("⚠️ API key required for provider models. Please enter your API key.")
842
+ return
843
+
844
+ # Process
845
+ with st.spinner("Processing..."):
846
+ results = process_discrepancy_detection(
847
+ paper_text=paper_text,
848
+ code_text=code_text,
849
+ arxiv_url=arxiv_url,
850
+ github_url=github_url,
851
+ model_config=model_config,
852
+ )
853
+
854
+ # Display results
855
+ if results["error"]:
856
+ st.error(f"❌ Error: {results['error']}")
857
+ return
858
+
859
+ # Display discrepancies
860
+ st.divider()
861
+ st.header("Results")
862
+
863
+ if results["discrepancies"]:
864
+ count = len(results["discrepancies"])
865
+ discrepancy_text = "discrepancy" if count == 1 else "discrepancies"
866
+ st.success(f"Found {count} {discrepancy_text}")
867
+
868
+ # Display each discrepancy in a tab
869
+ tab_labels = [f"Discrepancy {idx}" for idx in range(1, count + 1)]
870
+ tabs = st.tabs(tab_labels)
871
+
872
+ for idx, (tab, discrepancy) in enumerate(zip(tabs, results["discrepancies"])):
873
+ with tab:
874
+ st.markdown(discrepancy)
875
+ st.divider()
876
+ else:
877
+ st.info("✅ No discrepancies found between the paper and code.")
878
+ st.divider()
879
+
880
+ # Technical Details - Combined debug sections
881
+ with st.expander("🔧 Technical Details", expanded=False):
882
+ # Raw prompt section
883
+ if results["prompt"]:
884
+ st.subheader("📝 Raw Prompt")
885
+ st.markdown("**Final prompt sent to the LLM (after truncation):**")
886
+ model_config = st.session_state.get("model_config")
887
+ if model_config:
888
+ tokenizer_name = model_config["tokenizer"]
889
+ token_counter = TokenCounter(model=tokenizer_name)
890
+ prompt_tokens = token_counter(results["prompt"])
891
+ st.caption(f"Prompt tokens: {prompt_tokens:,}")
892
+ # Make prompt scrollable
893
+ st.markdown(
894
+ """
895
+ <style>
896
+ .prompt-code-wrapper pre {
897
+ max-height: 400px;
898
+ overflow-y: auto;
899
+ }
900
+ </style>
901
+ <div class="prompt-code-wrapper">
902
+ """,
903
+ unsafe_allow_html=True
904
+ )
905
+ st.code(results["prompt"], language="text")
906
+ st.markdown("</div>", unsafe_allow_html=True)
907
+ st.divider()
908
+
909
+ # Raw output section
910
+ if results["llm_response"]:
911
+ st.subheader("📄 Raw LLM Output")
912
+ content = (
913
+ results["llm_response"]
914
+ .get("choices", [{}])[0]
915
+ .get("message", {})
916
+ .get("content", "")
917
+ )
918
+ # Show token count instead of character count
919
+ model_config = st.session_state.get("model_config")
920
+ if model_config:
921
+ tokenizer_name = model_config["tokenizer"]
922
+ token_counter = TokenCounter(model=tokenizer_name)
923
+ output_tokens = token_counter(content)
924
+ st.caption(f"Output tokens: {output_tokens:,}")
925
+ st.code(content, language="yaml")
926
+ st.divider()
927
+
928
+ # Step timing information
929
+ if results.get("step_timings"):
930
+ st.subheader("⏱️ Step Timing")
931
+ step_timings = results["step_timings"]
932
+ total_time = sum(step_timings.values())
933
+
934
+ # Display timing for each step
935
+ for step_name, step_time in step_timings.items():
936
+ percentage = (step_time / total_time * 100) if total_time > 0 else 0
937
+ st.write(f"**{step_name}**: {step_time:.2f}s ({percentage:.1f}%)")
938
+
939
+ st.metric("**Total Time**", f"{total_time:.2f}s")
940
+ st.divider()
941
+
942
+ # Debug info
943
+ st.subheader("🔍 Debug Information")
944
+ col1, col2, col3 = st.columns(3)
945
+ with col1:
946
+ # Get model config from session state for token counting
947
+ model_config = st.session_state.get("model_config")
948
+ if model_config:
949
+ tokenizer_name = model_config["tokenizer"]
950
+ token_counter = TokenCounter(model=tokenizer_name)
951
+
952
+ if results["paper_text"]:
953
+ paper_tokens = token_counter(results["paper_text"])
954
+ st.metric("Paper Tokens", f"{paper_tokens:,}")
955
+ if results["code_prompt"]:
956
+ code_tokens = token_counter(results["code_prompt"])
957
+ st.metric("Code Tokens", f"{code_tokens:,}")
958
+ with col2:
959
+ if results["llm_response"]:
960
+ usage = results["llm_response"].get("usage", {})
961
+ if usage:
962
+ input_tokens = usage.get("prompt_tokens", "N/A")
963
+ output_tokens = usage.get("completion_tokens", "N/A")
964
+ st.metric("Input Tokens", f"{input_tokens:,}" if input_tokens != "N/A" else "N/A")
965
+ st.metric("Output Tokens", f"{output_tokens:,}" if output_tokens != "N/A" else "N/A")
966
+ with col3:
967
+ if results["llm_response"]:
968
+ usage = results["llm_response"].get("usage", {})
969
+ if usage:
970
+ total_tokens = usage.get("total_tokens", "N/A")
971
+ st.metric("Total Tokens", f"{total_tokens:,}" if total_tokens != "N/A" else "N/A")
972
+ # Extract cost from response metadata
973
+ cost = results["llm_response"].get("metadata", {}).get("cost", 0.0)
974
+ if cost > 0:
975
+ st.metric("Cost", f"${cost:.4f}")
976
+ else:
977
+ st.metric("Cost", "Free")
978
+
979
+
980
+ if __name__ == "__main__":
981
+ main()
core/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Core modules for ScicoQA demo
2
+
3
+
core/arxiv2md_demo.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Standalone arxiv2md integration for converting arXiv papers to markdown."""
2
+
3
+ import hashlib
4
+ import logging
5
+ import os
6
+ import re
7
+ from pathlib import Path
8
+ from urllib.parse import urlparse
9
+
10
+ import requests
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class Arxiv2MD:
16
+ """Convert arXiv papers to markdown using arxiv2md API."""
17
+
18
+ API_BASE = "https://arxiv2md.org/api/markdown"
19
+ RATE_LIMIT_RPM = 30 # 30 requests per minute per IP
20
+
21
+ def __init__(self, output_dir: Path = Path("data") / "papers"):
22
+ self.output_dir = output_dir
23
+ self.output_dir.mkdir(parents=True, exist_ok=True)
24
+
25
+ def _extract_paper_id(self, arxiv_url: str) -> str:
26
+ """Extract paper ID from arXiv URL."""
27
+ logger.info(f"Extracting paper ID from URL: {arxiv_url}")
28
+
29
+ # Handle different arXiv URL formats
30
+ if "arxiv.org" in arxiv_url:
31
+ # Remove version suffix if present (e.g., v1, v2)
32
+ arxiv_url = re.sub(r"v\d+$", "", arxiv_url)
33
+ # Extract ID from URL
34
+ parts = arxiv_url.split("/")
35
+ paper_id = parts[-1].replace(".pdf", "").replace(".html", "")
36
+ logger.info(f"Extracted arXiv ID: {paper_id}")
37
+ return paper_id
38
+ else:
39
+ # Assume it's already an ID
40
+ paper_id = arxiv_url.replace(".pdf", "").replace(".html", "")
41
+ return paper_id
42
+
43
+ def _get_paper_path(self, paper_id: str) -> Path:
44
+ """Get the file path for a cached paper."""
45
+ return self.output_dir / f"{paper_id}.md"
46
+
47
+ def _load_cached_paper(self, paper_id: str) -> str | None:
48
+ """Load cached paper if available."""
49
+ paper_path = self._get_paper_path(paper_id)
50
+ if paper_path.exists():
51
+ with open(paper_path, "r", encoding="utf-8") as f:
52
+ text = f.read()
53
+ logger.info(f"Loaded cached paper {paper_id} from {paper_path}")
54
+ return text
55
+ return None
56
+
57
+ def _save_paper(self, paper_id: str, markdown: str):
58
+ """Save processed paper to cache."""
59
+ paper_path = self._get_paper_path(paper_id)
60
+ with open(paper_path, "w", encoding="utf-8") as f:
61
+ f.write(markdown)
62
+ logger.info(f"Saved paper {paper_id} to {paper_path}")
63
+
64
+ def _fetch_markdown(self, arxiv_url: str) -> str:
65
+ """Fetch markdown from arxiv2md API."""
66
+ logger.info(f"Fetching markdown from arxiv2md API for {arxiv_url}")
67
+
68
+ # Prepare API parameters
69
+ params = {
70
+ "url": arxiv_url,
71
+ "remove_refs": "true", # Remove references section (required)
72
+ "remove_toc": "true", # Remove table of contents
73
+ "remove_citations": "true", # Remove inline citations
74
+ }
75
+
76
+ try:
77
+ response = requests.get(self.API_BASE, params=params, timeout=60)
78
+ response.raise_for_status()
79
+ markdown = response.text
80
+ logger.info(f"Successfully fetched markdown ({len(markdown)} chars)")
81
+ return markdown
82
+ except requests.exceptions.RequestException as e:
83
+ logger.error(f"Error fetching from arxiv2md API: {e}")
84
+ raise Exception(f"Failed to fetch paper from arxiv2md: {e}")
85
+
86
+ def __call__(self, arxiv_url: str) -> str:
87
+ """Process an arXiv URL and return its markdown content.
88
+
89
+ Args:
90
+ arxiv_url: URL to the arXiv paper (e.g., https://arxiv.org/abs/2006.12834)
91
+
92
+ Returns:
93
+ Markdown text of the paper with references removed
94
+ """
95
+ logger.debug(f"Arxiv2MD({arxiv_url})")
96
+
97
+ # Extract paper ID
98
+ paper_id = self._extract_paper_id(arxiv_url)
99
+
100
+ # Check cache first
101
+ cached_text = self._load_cached_paper(paper_id)
102
+ if cached_text is not None:
103
+ return cached_text
104
+
105
+ # Fetch from API
106
+ markdown = self._fetch_markdown(arxiv_url)
107
+
108
+ # Save to cache
109
+ self._save_paper(paper_id, markdown)
110
+
111
+ return markdown
112
+
113
+
core/code_loader_demo.py ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Standalone CodeLoader for loading and processing GitHub repositories."""
2
+
3
+ import logging
4
+ import os
5
+ import shutil
6
+ from pathlib import Path
7
+ from typing import Callable
8
+
9
+ import git
10
+ import nbconvert
11
+ import nbformat
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class CodeLoader:
17
+ """Load and process GitHub repositories for code analysis."""
18
+
19
+ def __init__(
20
+ self,
21
+ github_url: str,
22
+ max_file_size_mb: float = 1.0,
23
+ raw_repo_dir: str | Path = "data/repos-raw",
24
+ ):
25
+ logger.info(
26
+ f"Initializing CodeLoader for {github_url} with max file size "
27
+ f"{max_file_size_mb} MB and raw repo dir {raw_repo_dir}"
28
+ )
29
+ self.github_url = github_url
30
+ self.max_file_size_mb = max_file_size_mb
31
+ self.raw_repo_dir = Path(raw_repo_dir)
32
+ self.repo_path = self.raw_repo_dir / self.github_url_to_repo_name
33
+
34
+ self.clone_repo()
35
+ self.files = self._get_files()
36
+
37
+ @property
38
+ def github_url_to_repo_name(self):
39
+ """Convert GitHub URL to a safe directory name."""
40
+ base_name = (
41
+ self.github_url.rstrip("/").split("/")[-2]
42
+ + "__"
43
+ + self.github_url.rstrip("/").split("/")[-1]
44
+ )
45
+ # Remove .git suffix if present
46
+ if base_name.endswith(".git"):
47
+ base_name = base_name[:-4]
48
+ return base_name
49
+
50
+ def clone_repo(self):
51
+ """Clone or validate existing repository."""
52
+ if self.repo_path.exists():
53
+ logger.info(f"Repository already exists at {self.repo_path}")
54
+
55
+ # Validate repository integrity
56
+ try:
57
+ repo = git.Repo(self.repo_path)
58
+ # Verify repository health
59
+ try:
60
+ _ = repo.head.commit.hexsha
61
+ except (ValueError, git.BadName) as e:
62
+ logger.warning(
63
+ f"Repository has missing or corrupted commits at "
64
+ f"{self.repo_path}, removing and re-cloning. Error: {e}"
65
+ )
66
+ shutil.rmtree(self.repo_path)
67
+ self.clone_repo() # Recursive call to re-clone
68
+ return
69
+
70
+ logger.info("Repository already exists and is valid")
71
+ return
72
+
73
+ except (git.InvalidGitRepositoryError, git.GitCommandError) as e:
74
+ logger.warning(
75
+ f"Invalid or corrupted git repository at {self.repo_path}, "
76
+ f"removing and re-cloning. Error: {e}"
77
+ )
78
+ shutil.rmtree(self.repo_path)
79
+ self.clone_repo() # Recursive call to re-clone
80
+ return
81
+
82
+ # Clone the repository
83
+ logger.info(f"Cloning repo {self.github_url} to {self.repo_path}")
84
+ self.raw_repo_dir.mkdir(parents=True, exist_ok=True)
85
+ repo = git.Repo.clone_from(self.github_url, str(self.repo_path))
86
+
87
+ # Clean up the repository
88
+ self._cleanup_repo()
89
+
90
+ def _cleanup_repo(self):
91
+ """Remove docs/test directories, convert notebooks, and remove large files."""
92
+ # Remove docs/test directories
93
+ for root, dirs, _ in os.walk(self.repo_path):
94
+ # CRITICAL: Skip .git directory
95
+ if ".git" in dirs:
96
+ dirs.remove(".git")
97
+
98
+ # Create a copy of dirs to avoid modification during iteration
99
+ dirs_to_remove = [
100
+ dir
101
+ for dir in dirs
102
+ if dir in ["docs", "doc", "test", "tests", "example", "examples"]
103
+ ]
104
+ for dir in dirs_to_remove:
105
+ dir_path = Path(root) / dir
106
+ logger.info(f"Removing directory: {dir_path}")
107
+ shutil.rmtree(dir_path)
108
+ dirs.remove(dir)
109
+
110
+ # Convert Jupyter notebooks to Python files
111
+ for root, dirs, files in os.walk(self.repo_path):
112
+ # Skip .git directory
113
+ if ".git" in dirs:
114
+ dirs.remove(".git")
115
+
116
+ for file in files:
117
+ if file.endswith(".ipynb"):
118
+ logger.info(f"Converting Jupyter Notebook {file} to .py")
119
+ try:
120
+ nb = nbformat.read(Path(root) / file, as_version=4)
121
+ # Clear outputs
122
+ for cell in nb.cells:
123
+ if cell.get("cell_type") == "code":
124
+ cell["outputs"] = []
125
+ cell["execution_count"] = None
126
+
127
+ # Convert to .py
128
+ exporter = nbconvert.PythonExporter()
129
+ source, _ = exporter.from_notebook_node(nb)
130
+ source = (
131
+ "# This file was converted from a jupyter notebook "
132
+ f"called {file}. All outputs have been removed.\n{source}"
133
+ )
134
+ with open(Path(root) / file.replace(".ipynb", ".py"), "w") as f:
135
+ f.write(source)
136
+ # Remove the original notebook
137
+ os.remove(Path(root) / file)
138
+ except Exception as e:
139
+ logger.warning(f"Failed to convert notebook {file}: {e}")
140
+ raise e
141
+
142
+ # Remove large files
143
+ for root, dirs, files in os.walk(self.repo_path):
144
+ # Skip .git directory
145
+ if ".git" in dirs:
146
+ dirs.remove(".git")
147
+
148
+ for file in files:
149
+ file_path = Path(root) / file
150
+ try:
151
+ file_size = file_path.stat().st_size
152
+ except FileNotFoundError as e:
153
+ logger.warning(f"Failed to get size of {file_path}: {e}")
154
+ continue
155
+ if file_size > self.mb_to_bytes(self.max_file_size_mb):
156
+ logger.info(f"Removing large file: {file_path}")
157
+ os.remove(file_path)
158
+
159
+ def _get_files(self):
160
+ """Get all files from the repository."""
161
+ files = {}
162
+ for root, _, _files in os.walk(self.repo_path):
163
+ for file in _files:
164
+ file_path = Path(root) / file
165
+ if ".git" in str(file_path):
166
+ continue
167
+
168
+ # Get relative path from repo root
169
+ file_path_key = file_path.relative_to(self.repo_path)
170
+
171
+ try:
172
+ with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
173
+ content = f.read()
174
+ files[str(file_path_key)] = content
175
+ except Exception as e:
176
+ logger.warning(f"Could not read {file_path}: {e}")
177
+
178
+ # Order keys alphabetically
179
+ files = dict(sorted(files.items()))
180
+ return files
181
+
182
+ @staticmethod
183
+ def mb_to_bytes(mb: float) -> int:
184
+ """Convert megabytes to bytes."""
185
+ return int(mb * 1024 * 1024)
186
+
187
+ def get_files_by_extension(
188
+ self, extensions: list[str] | None = None
189
+ ) -> dict[str, str]:
190
+ """Get files filtered by extension."""
191
+ if extensions is None:
192
+ # Note: ipynb files are converted to .py during cleanup
193
+ extensions = [
194
+ ".c",
195
+ ".cc",
196
+ ".cpp",
197
+ ".cu",
198
+ ".h",
199
+ ".hpp",
200
+ ".java",
201
+ ".jl",
202
+ ".m",
203
+ ".matlab",
204
+ ".Makefile",
205
+ ".md",
206
+ ".pl",
207
+ ".ps1",
208
+ ".py",
209
+ ".r",
210
+ ".sh",
211
+ "config.txt",
212
+ ".rs",
213
+ "readme.txt",
214
+ "requirements_dev.txt",
215
+ "requirements-dev.txt",
216
+ "requirements.dev.txt",
217
+ "requirements.txt",
218
+ ".scala",
219
+ ".yaml",
220
+ ".yml",
221
+ ]
222
+ return {
223
+ k: v
224
+ for k, v in self.files.items()
225
+ if k.lower().endswith(tuple(extensions))
226
+ }
227
+
228
+ def get_repo_tree(self):
229
+ """Generate a tree representation of the repository."""
230
+ repo_tree = ""
231
+ for root, dirs, files in os.walk(self.repo_path):
232
+ # Exclude the .git directory
233
+ if ".git" in dirs:
234
+ dirs.remove(".git")
235
+
236
+ level = str(Path(root).relative_to(self.repo_path)).count(os.sep)
237
+ indent = "│ " * (level - 1) + "├── " if level > 0 else ""
238
+
239
+ # Don't print the starting path itself, just its contents
240
+ if level > 0:
241
+ repo_tree += f"{indent}{Path(root).name}/\n"
242
+
243
+ sub_indent = "│ " * level + "├── "
244
+ for f in files:
245
+ repo_tree += f"{sub_indent}{f}\n"
246
+ return repo_tree
247
+
248
+ def get_code_prompt(
249
+ self,
250
+ file_extensions: list[str] | None = None,
251
+ token_counter: Callable | None = None,
252
+ max_tokens: int | None = None,
253
+ code_changes: list[dict[str, str]] | None = None,
254
+ ) -> str:
255
+ """Generate code prompt with repo tree and file contents."""
256
+ code_prompt = "Repo tree:\n" + self.get_repo_tree() + "\n\n"
257
+ tokens = token_counter(code_prompt) if token_counter is not None else 0
258
+
259
+ files_to_replace = {}
260
+ if code_changes:
261
+ files_to_replace = {
262
+ cc["file_name"]: cc["discrepancy_code"] for cc in code_changes
263
+ }
264
+ logger.debug(
265
+ f"Files to replace: {len(files_to_replace)}: {files_to_replace.keys()}"
266
+ )
267
+
268
+ for file_path, file_content in self.get_files_by_extension(
269
+ file_extensions
270
+ ).items():
271
+ if file_path in files_to_replace:
272
+ logger.debug(f"Replacing code for {file_path} with changed code")
273
+ file_content = files_to_replace[file_path]
274
+ code_file = f"# ---\n# File: {file_path}\n# Content:\n{file_content}\n"
275
+ if token_counter is not None:
276
+ logger.debug(f"Adding file: {file_path}")
277
+ num_tokens = token_counter(code_file)
278
+ tokens += num_tokens
279
+ logger.debug(
280
+ f"Number of tokens in file: {num_tokens}. "
281
+ f"Total number of tokens in code prompt: {tokens}"
282
+ )
283
+ if max_tokens and tokens > max_tokens:
284
+ logger.warning(
285
+ f"Truncating. Max tokens reached for {self.github_url}. "
286
+ f"Max tokens for code is {max_tokens}"
287
+ )
288
+ break
289
+ code_prompt += code_file
290
+ return code_prompt
291
+
292
+
core/llm_demo.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Standalone LLM client using litellm for multiple providers."""
2
+
3
+ import logging
4
+ import os
5
+
6
+ from litellm import completion, completion_cost
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class LLM:
12
+ """LLM client supporting multiple providers via litellm unified interface."""
13
+
14
+ def __init__(
15
+ self,
16
+ model: str,
17
+ api_key: str | None = None,
18
+ api_base: str | None = None,
19
+ temperature: float = 1.0,
20
+ top_p: float = 1.0,
21
+ reasoning_effort: str = "high",
22
+ max_tokens: int | None = None,
23
+ max_context: int | None = None,
24
+ ):
25
+ """
26
+ Initialize LLM client.
27
+
28
+ Args:
29
+ model: Model identifier in litellm format (e.g., "gpt-4o", "claude-3-5-sonnet", "openrouter/nvidia/nemotron-3-nano-30b-a3b:free", "ollama/llama2")
30
+ api_key: API key (optional, can also be set via environment variable)
31
+ api_base: API base URL (for local models like Ollama/vLLM)
32
+ temperature: Sampling temperature
33
+ top_p: Top-p sampling parameter
34
+ reasoning_effort: Reasoning effort level ("high" for models that support it)
35
+ max_tokens: Maximum tokens to generate
36
+ max_context: Maximum context window size (required for Ollama models as num_ctx)
37
+ """
38
+ self.model = model
39
+ self.api_key = api_key
40
+ self.api_base = api_base
41
+ self.temperature = temperature
42
+ self.top_p = top_p
43
+ self.max_tokens = max_tokens
44
+ self.max_context = max_context
45
+
46
+ # Convert reasoning_effort to extra_body format
47
+ if reasoning_effort == "high":
48
+ self.extra_body = {"think": "high"}
49
+ else:
50
+ self.extra_body = {}
51
+
52
+ # Never log API keys - only log masked version
53
+ masked_key = f"{api_key[:8]}..." if api_key and len(api_key) > 8 else "None"
54
+ logger.info(f"Initialized LLM client for {model} (key: {masked_key}, api_base: {api_base})")
55
+
56
+ def __call__(self, prompt: str) -> dict:
57
+ """
58
+ Generate completion from prompt.
59
+
60
+ Args:
61
+ prompt: Input prompt text
62
+
63
+ Returns:
64
+ Response dictionary with 'choices' containing the generated text and 'cost' in metadata
65
+ """
66
+ # Never log the prompt if it might contain sensitive info
67
+ logger.debug(f"Calling LLM with prompt length: {len(prompt)} chars")
68
+
69
+ try:
70
+ # Build base kwargs - litellm handles provider detection automatically
71
+ kwargs = {
72
+ "model": self.model,
73
+ "messages": [{"role": "user", "content": prompt}],
74
+ "temperature": self.temperature,
75
+ "top_p": self.top_p,
76
+ "max_tokens": self.max_tokens,
77
+ }
78
+
79
+ # Set API key if provided
80
+ if self.api_key:
81
+ kwargs["api_key"] = self.api_key
82
+ # Set API base for local models
83
+ if self.api_base:
84
+ kwargs["api_base"] = self.api_base
85
+
86
+ # For Ollama models, set num_ctx (max context tokens)
87
+ if self.model.startswith("ollama/") and self.max_context:
88
+ kwargs["num_ctx"] = self.max_context
89
+ logger.debug(f"Using {self.max_context} tokens (num_ctx) for Ollama model {self.model}")
90
+
91
+ # Add extra_body for reasoning effort if specified
92
+ if self.extra_body:
93
+ kwargs["extra_body"] = self.extra_body
94
+
95
+ response = completion(**kwargs)
96
+
97
+ # Convert to dict format
98
+ if hasattr(response, "model_dump"):
99
+ result = response.model_dump()
100
+ else:
101
+ # Fallback for older litellm versions
102
+ result = {
103
+ "choices": [
104
+ {
105
+ "message": {
106
+ "content": response.choices[0].message.content
107
+ }
108
+ }
109
+ ],
110
+ "usage": response.usage.model_dump() if hasattr(response.usage, "model_dump") else {},
111
+ }
112
+
113
+ # Calculate cost using litellm
114
+ try:
115
+ cost = completion_cost(response)
116
+ except Exception as e:
117
+ logger.warning(f"Error calculating cost: {e}")
118
+ cost = 0.0
119
+
120
+ # Add cost to result metadata
121
+ if "metadata" not in result:
122
+ result["metadata"] = {}
123
+ result["metadata"]["cost"] = cost
124
+
125
+ logger.info(f"LLM call completed successfully (cost: ${cost:.4f})")
126
+ return result
127
+
128
+ except Exception as e:
129
+ # Never log API keys in error messages
130
+ error_msg = str(e)
131
+ # Remove any potential API key leaks from error messages
132
+ if self.api_key and self.api_key in error_msg:
133
+ error_msg = error_msg.replace(self.api_key, "***REDACTED***")
134
+ logger.error(f"Error calling LLM: {error_msg}")
135
+ raise Exception(f"LLM API error: {error_msg}") from e
136
+
core/model_config.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Model configuration helpers and preset models."""
2
+
3
+ from typing import Any
4
+
5
+ # Preset provider models for quick selection
6
+ PROVIDER_PRESETS = {
7
+ "GPT-5": {
8
+ "model": "gpt-5-2025-08-07",
9
+ "tokenizer": "openai/gpt-5-2025-08-07",
10
+ "max_context": 272000,
11
+ "api_key_env": "OPENAI_API_KEY",
12
+ },
13
+ "GPT-5 Mini": {
14
+ "model": "gpt-5-mini-2025-08-07",
15
+ "tokenizer": "openai/gpt-5-mini-2025-08-07",
16
+ "max_context": 272000,
17
+ "api_key_env": "OPENAI_API_KEY",
18
+ },
19
+ "GPT-5 Nano": {
20
+ "model": "gpt-5-nano-2025-08-07",
21
+ "tokenizer": "openai/gpt-5-nano-2025-08-07",
22
+ "max_context": 272000,
23
+ "api_key_env": "OPENAI_API_KEY",
24
+ },
25
+ "GPT-4o": {
26
+ "model": "gpt-4o",
27
+ "tokenizer": "openai/gpt-4o",
28
+ "max_context": 128000,
29
+ "api_key_env": "OPENAI_API_KEY",
30
+ },
31
+ "GPT-4 Turbo": {
32
+ "model": "gpt-4-turbo",
33
+ "tokenizer": "openai/gpt-4-turbo",
34
+ "max_context": 128000,
35
+ "api_key_env": "OPENAI_API_KEY",
36
+ },
37
+ "Claude 3.5 Sonnet": {
38
+ "model": "claude-3-5-sonnet-20241022",
39
+ "tokenizer": "anthropic/claude-3-5-sonnet",
40
+ "max_context": 200000,
41
+ "api_key_env": "ANTHROPIC_API_KEY",
42
+ },
43
+ "Claude 3 Opus": {
44
+ "model": "claude-3-opus-20240229",
45
+ "tokenizer": "anthropic/claude-3-opus",
46
+ "max_context": 200000,
47
+ "api_key_env": "ANTHROPIC_API_KEY",
48
+ },
49
+ "Gemini 3.0 Pro": {
50
+ "model": "gemini/gemini-3-pro-preview",
51
+ "tokenizer": "gemini/gemini-3-pro-preview",
52
+ "max_context": 2000000,
53
+ "api_key_env": "GEMINI_API_KEY",
54
+ },
55
+ "Gemini 3.0 Flash": {
56
+ "model": "gemini/gemini-3-flash-preview",
57
+ "tokenizer": "gemini/gemini-3-flash-preview",
58
+ "max_context": 1000000,
59
+ "api_key_env": "GEMINI_API_KEY",
60
+ },
61
+ "Gemini 2.5 Pro": {
62
+ "model": "gemini/gemini-2.5-pro",
63
+ "tokenizer": "gemini/gemini-2.5-pro",
64
+ "max_context": 2000000,
65
+ "api_key_env": "GEMINI_API_KEY",
66
+ },
67
+ "Gemini 2.5 Flash": {
68
+ "model": "gemini/gemini-2.5-flash",
69
+ "tokenizer": "gemini/gemini-2.5-flash",
70
+ "max_context": 1000000,
71
+ "api_key_env": "GEMINI_API_KEY",
72
+ },
73
+ }
74
+
75
+
76
+ def create_local_model_config(
77
+ model: str,
78
+ api_base: str | None = None,
79
+ max_context: int = 131072,
80
+ tokenizer: str | None = None,
81
+ ) -> dict[str, Any]:
82
+ """
83
+ Create a local model configuration.
84
+
85
+ Args:
86
+ model: Model name (e.g., "ollama/llama2" or "gpt-3.5-turbo" for vLLM)
87
+ api_base: API base URL (defaults based on model type)
88
+ max_context: Maximum context window size
89
+ tokenizer: Tokenizer name for token counting
90
+
91
+ Returns:
92
+ Model configuration dictionary
93
+ """
94
+ # Set default API base based on model type
95
+ if api_base is None:
96
+ if model.startswith("ollama/"):
97
+ api_base = "http://localhost:11434"
98
+ elif model.startswith("vllm/") or not model.startswith(("ollama/", "openrouter/")):
99
+ # Assume OpenAI-compatible (vLLM)
100
+ api_base = "http://localhost:8000/v1"
101
+
102
+ # Infer tokenizer if not provided
103
+ if tokenizer is None:
104
+ if model.startswith("ollama/"):
105
+ # Try to infer from model name
106
+ model_name = model.replace("ollama/", "")
107
+ tokenizer = f"hf/{model_name}"
108
+ else:
109
+ # For vLLM/OpenAI-compatible, try to infer
110
+ tokenizer = model.replace("vllm/", "")
111
+
112
+ return {
113
+ "type": "local",
114
+ "model": model,
115
+ "api_base": api_base,
116
+ "max_context": max_context,
117
+ "tokenizer": tokenizer,
118
+ }
119
+
120
+
121
+ def create_provider_model_config(
122
+ model: str,
123
+ api_key: str,
124
+ max_context: int = 128000,
125
+ tokenizer: str | None = None,
126
+ ) -> dict[str, Any]:
127
+ """
128
+ Create a provider model configuration.
129
+
130
+ Args:
131
+ model: Model name in litellm format
132
+ api_key: API key for the provider
133
+ max_context: Maximum context window size
134
+ tokenizer: Tokenizer name for token counting
135
+
136
+ Returns:
137
+ Model configuration dictionary
138
+ """
139
+ # Infer tokenizer if not provided
140
+ if tokenizer is None:
141
+ # Try to infer from model name
142
+ if model.startswith("openai/") or not "/" in model:
143
+ # OpenAI models
144
+ model_name = model.replace("openai/", "")
145
+ tokenizer = f"openai/{model_name}"
146
+ elif model.startswith("anthropic/") or model.startswith("claude-"):
147
+ # Anthropic models
148
+ model_name = model.replace("anthropic/", "")
149
+ tokenizer = f"anthropic/{model_name}"
150
+ elif model.startswith("gemini/"):
151
+ # Gemini models
152
+ model_name = model.replace("gemini/", "")
153
+ tokenizer = f"gemini/{model_name}"
154
+ else:
155
+ # Generic fallback
156
+ tokenizer = "gpt2"
157
+
158
+ return {
159
+ "type": "provider",
160
+ "model": model,
161
+ "api_key": api_key,
162
+ "max_context": max_context,
163
+ "tokenizer": tokenizer,
164
+ }
165
+
166
+
167
+ def get_provider_from_model(model: str) -> str:
168
+ """
169
+ Infer provider name from model identifier.
170
+
171
+ Args:
172
+ model: Model name in litellm format
173
+
174
+ Returns:
175
+ Provider name hint (e.g., "openai", "anthropic", "gemini")
176
+ """
177
+ model_lower = model.lower()
178
+ if model_lower.startswith("openai/") or not "/" in model:
179
+ return "openai"
180
+ elif model_lower.startswith("anthropic/") or model_lower.startswith("claude-"):
181
+ return "anthropic"
182
+ elif model_lower.startswith("gemini/"):
183
+ return "gemini"
184
+ elif model_lower.startswith("openrouter/"):
185
+ return "openrouter"
186
+ elif model_lower.startswith("cohere/"):
187
+ return "cohere"
188
+ elif model_lower.startswith("mistral/"):
189
+ return "mistral"
190
+ else:
191
+ return "other"
192
+
193
+
194
+ def get_api_key_env_name(provider: str) -> str:
195
+ """
196
+ Get the environment variable name for API key based on provider.
197
+
198
+ Args:
199
+ provider: Provider name
200
+
201
+ Returns:
202
+ Environment variable name for API key
203
+ """
204
+ provider_to_key = {
205
+ "openai": "OPENAI_API_KEY",
206
+ "anthropic": "ANTHROPIC_API_KEY",
207
+ "gemini": "GEMINI_API_KEY",
208
+ "openrouter": "OPENROUTER_API_KEY",
209
+ "cohere": "COHERE_API_KEY",
210
+ "mistral": "MISTRAL_API_KEY",
211
+ "other": "API_KEY",
212
+ }
213
+ return provider_to_key.get(provider.lower(), "API_KEY")
214
+
core/ollama_models.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Helper functions to query Ollama API for available models."""
2
+
3
+ import logging
4
+ from typing import Any
5
+
6
+ import requests
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ def fetch_ollama_models(api_base: str) -> list[str]:
12
+ """
13
+ Fetch available models from Ollama API.
14
+
15
+ Args:
16
+ api_base: Ollama API base URL (e.g., "http://localhost:11434")
17
+
18
+ Returns:
19
+ List of model names available on the Ollama server
20
+ """
21
+ try:
22
+ # Ollama API endpoint for listing models
23
+ url = f"{api_base.rstrip('/')}/api/tags"
24
+ response = requests.get(url, timeout=5)
25
+ response.raise_for_status()
26
+
27
+ data = response.json()
28
+ models = data.get("models", [])
29
+
30
+ # Extract model names
31
+ model_names = [model.get("name", "") for model in models if model.get("name")]
32
+
33
+ logger.info(f"Fetched {len(model_names)} models from Ollama at {api_base}")
34
+ return model_names
35
+
36
+ except requests.exceptions.RequestException as e:
37
+ logger.error(f"Error fetching models from Ollama: {e}")
38
+ return []
39
+ except Exception as e:
40
+ logger.error(f"Unexpected error fetching models from Ollama: {e}")
41
+ return []
42
+
core/openrouter_models.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Helper functions to fetch and filter free models from OpenRouter API."""
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ import time
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ import requests
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ OPENROUTER_API_URL = "https://openrouter.ai/api/v1/models"
15
+ CACHE_DIR = Path(".cache")
16
+ CACHE_FILE = CACHE_DIR / "openrouter_models.json"
17
+ CACHE_DURATION_SECONDS = 24 * 60 * 60 # 24 hours
18
+
19
+
20
+ def is_free_model(model: dict[str, Any]) -> bool:
21
+ """
22
+ Check if a model is free based on its ID or pricing.
23
+
24
+ Args:
25
+ model: Model dictionary from OpenRouter API
26
+
27
+ Returns:
28
+ True if the model is free, False otherwise
29
+ """
30
+ model_id = model.get("id", "")
31
+
32
+ # Check if model has :free suffix
33
+ if ":free" in model_id:
34
+ return True
35
+
36
+ # Check if pricing is zero or null
37
+ pricing = model.get("pricing", {})
38
+ prompt_price = pricing.get("prompt", "0")
39
+ completion_price = pricing.get("completion", "0")
40
+
41
+ # Convert to float if possible, otherwise check if it's "0" or null
42
+ try:
43
+ prompt_price_float = float(prompt_price) if prompt_price else 0.0
44
+ completion_price_float = float(completion_price) if completion_price else 0.0
45
+ return prompt_price_float == 0.0 and completion_price_float == 0.0
46
+ except (ValueError, TypeError):
47
+ # If conversion fails, check if both are "0" or null/empty
48
+ return (prompt_price in ["0", None, ""] and
49
+ completion_price in ["0", None, ""])
50
+
51
+
52
+ def _load_cache() -> tuple[list[dict[str, Any]] | None, float | None]:
53
+ """
54
+ Load cached models from file.
55
+
56
+ Returns:
57
+ Tuple of (cached_models, cache_timestamp) or (None, None) if cache doesn't exist or is invalid
58
+ """
59
+ if not CACHE_FILE.exists():
60
+ return None, None
61
+
62
+ try:
63
+ with open(CACHE_FILE, "r", encoding="utf-8") as f:
64
+ cache_data = json.load(f)
65
+
66
+ cached_models = cache_data.get("models", None)
67
+ cache_timestamp = cache_data.get("timestamp", None)
68
+
69
+ if cached_models is None or cache_timestamp is None:
70
+ return None, None
71
+
72
+ return cached_models, cache_timestamp
73
+ except (json.JSONDecodeError, IOError) as e:
74
+ logger.warning(f"Error loading cache: {e}")
75
+ return None, None
76
+
77
+
78
+ def _save_cache(models: list[dict[str, Any]]) -> None:
79
+ """
80
+ Save models to cache file.
81
+
82
+ Args:
83
+ models: List of model dictionaries to cache
84
+ """
85
+ try:
86
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
87
+
88
+ cache_data = {
89
+ "models": models,
90
+ "timestamp": time.time(),
91
+ }
92
+
93
+ with open(CACHE_FILE, "w", encoding="utf-8") as f:
94
+ json.dump(cache_data, f)
95
+
96
+ logger.info(f"Cached {len(models)} free models to {CACHE_FILE}")
97
+ except IOError as e:
98
+ logger.warning(f"Error saving cache: {e}")
99
+
100
+
101
+ def fetch_free_models() -> list[dict[str, Any]]:
102
+ """
103
+ Fetch all free models from OpenRouter API.
104
+ Uses file-based cache that refreshes once per day.
105
+
106
+ Returns:
107
+ List of free model dictionaries with metadata
108
+ """
109
+ # Check cache first
110
+ cached_models, cache_timestamp = _load_cache()
111
+
112
+ if cached_models is not None and cache_timestamp is not None:
113
+ # Check if cache is still valid (less than 24 hours old)
114
+ age_seconds = time.time() - cache_timestamp
115
+ if age_seconds < CACHE_DURATION_SECONDS:
116
+ logger.info(f"Using cached models (age: {age_seconds / 3600:.1f} hours)")
117
+ return cached_models
118
+ else:
119
+ logger.info(f"Cache expired (age: {age_seconds / 3600:.1f} hours), fetching fresh data")
120
+
121
+ # Cache is invalid or doesn't exist, fetch from API
122
+ try:
123
+ # OpenRouter API doesn't require authentication for listing models
124
+ response = requests.get(OPENROUTER_API_URL, timeout=10)
125
+ response.raise_for_status()
126
+
127
+ data = response.json()
128
+ models = data.get("data", [])
129
+
130
+ # Filter to only free models
131
+ free_models = [model for model in models if is_free_model(model)]
132
+
133
+ logger.info(f"Fetched {len(free_models)} free models from OpenRouter")
134
+
135
+ # Save to cache
136
+ _save_cache(free_models)
137
+
138
+ return free_models
139
+
140
+ except requests.exceptions.RequestException as e:
141
+ logger.error(f"Error fetching models from OpenRouter: {e}")
142
+ # If API call fails but we have cached data, return cached data even if expired
143
+ if cached_models is not None:
144
+ logger.warning("API call failed, using expired cache as fallback")
145
+ return cached_models
146
+ return []
147
+ except Exception as e:
148
+ logger.error(f"Unexpected error fetching models: {e}")
149
+ # If API call fails but we have cached data, return cached data even if expired
150
+ if cached_models is not None:
151
+ logger.warning("Unexpected error, using expired cache as fallback")
152
+ return cached_models
153
+ return []
154
+
155
+
156
+ def get_model_config(model: dict[str, Any]) -> dict[str, Any]:
157
+ """
158
+ Extract model configuration from OpenRouter API response.
159
+
160
+ Args:
161
+ model: Model dictionary from OpenRouter API
162
+
163
+ Returns:
164
+ Model configuration dictionary with type, model, max_context, tokenizer
165
+ """
166
+ model_id = model.get("id", "")
167
+ context_length = model.get("context_length")
168
+ architecture = model.get("architecture", {})
169
+ tokenizer_group = architecture.get("tokenizer", "")
170
+
171
+ # Infer tokenizer from model ID
172
+ tokenizer = None
173
+ hugging_face_id = model.get("hugging_face_id")
174
+
175
+ # Use Hugging Face ID if available
176
+ if hugging_face_id:
177
+ tokenizer = f"hf/{hugging_face_id}"
178
+ else:
179
+ # Try to construct tokenizer name from model ID
180
+ # For example: "nvidia/nemotron-3-nano-30b-a3b:free" -> "hf/nvidia/nemotron-3-nano-30b-a3b"
181
+ parts = model_id.split("/")
182
+ if len(parts) > 1:
183
+ org = parts[0]
184
+ model_name = parts[-1].split(":")[0] # Remove :free suffix
185
+ tokenizer = f"hf/{org}/{model_name}"
186
+ else:
187
+ # Single part model ID
188
+ model_name = model_id.split(":")[0]
189
+ tokenizer = f"hf/{model_name}"
190
+
191
+ # Fallback to a generic tokenizer if we can't infer
192
+ if not tokenizer:
193
+ tokenizer = "gpt2" # Generic fallback
194
+
195
+ # Default context length if not provided
196
+ if context_length is None:
197
+ context_length = 131072
198
+
199
+ return {
200
+ "type": "free_openrouter",
201
+ "model": f"openrouter/{model_id}", # litellm format
202
+ "max_context": context_length,
203
+ "tokenizer": tokenizer,
204
+ "model_id": model_id,
205
+ "name": model.get("name", model_id),
206
+ "description": model.get("description", ""),
207
+ }
208
+
core/prompt_demo.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Standalone prompt template loader."""
2
+
3
+ import logging
4
+ import string
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ # Embedded discrepancy_generation prompt template
9
+ DISCREPANCY_GENERATION_PROMPT = """You are an expert in analyzing scientific papers and their code implementations.
10
+ Your task is to carefully identify concrete discrepancies between what is described in a paper and what is actually implemented in the code.
11
+
12
+ ## What counts as a discrepancy
13
+ - A concrete paper–code discrepancy means a mismatch between what is stated in the original paper (e.g., formulas, algorithms, logic, methods, processes, or other settings) and what is implemented in the original code repository.
14
+ - Each distinct mismatch should be reported as a separate item.
15
+
16
+ ## What does not count as a discrepancy
17
+ - Missing information in the paper like hyperparameters (e.g., "the authors did not specify X").
18
+ - Hyperparameter mismatches (e.g., learning rate, batch size, dropout rate), since these are typically configurable in code repository.
19
+ - Missing implementation in the original code repository (e.g., "the authors did not provide the code for X").
20
+ - Bugs or errors in the code that are unrelated to what the paper describes.
21
+
22
+ ## Output format
23
+
24
+ Provide your findings in the following YAML structure:
25
+
26
+ ```yaml
27
+ discrepancies:
28
+ - <a summary of the discrepancy between the paper and the code in 3-8 sentences. Your description should contain three parts focusing on the discrepancy: 1) summarize what is described in the paper, 2) summarize what is implemented in the code, and 3) summarize the difference. Do not speculate about the impact.>
29
+ - <if there are multiple discrepancies, put each of them in a separate item.>
30
+ ```
31
+
32
+ ## Paper
33
+
34
+ ${paper}
35
+
36
+ ## Code
37
+
38
+ ${code}
39
+ """
40
+
41
+
42
+ class Prompt:
43
+ """Prompt template handler."""
44
+
45
+ def __init__(self, template: str = "discrepancy_generation"):
46
+ """
47
+ Initialize prompt template.
48
+
49
+ Args:
50
+ template: Template name (currently only "discrepancy_generation" is supported)
51
+ """
52
+ self.template = template
53
+
54
+ if template == "discrepancy_generation":
55
+ self.prompt_template = DISCREPANCY_GENERATION_PROMPT
56
+ else:
57
+ raise ValueError(f"Template '{template}' not found. Available: 'discrepancy_generation'")
58
+
59
+ # Create Template object for variable substitution
60
+ self.prompt = string.Template(self.prompt_template)
61
+
62
+ # Extract variables from the template
63
+ self.prompt_vars = list(self.prompt.get_identifiers())
64
+
65
+ def __call__(self, **kwargs) -> str:
66
+ """
67
+ Substitute variables in the prompt template.
68
+
69
+ Args:
70
+ **kwargs: Variables to substitute (e.g., paper, code)
71
+
72
+ Returns:
73
+ Formatted prompt string
74
+ """
75
+ # Remove any '<|endoftext|>' from the kwargs
76
+ for k, v in kwargs.items():
77
+ if isinstance(v, str) and "<|endoftext|>" in v:
78
+ kwargs[k] = v.replace("<|endoftext|>", "endoftext")
79
+
80
+ return self.prompt.safe_substitute(**kwargs)
81
+
82
+
core/token_counter_demo.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Standalone token counter using litellm."""
2
+
3
+ import logging
4
+
5
+ from litellm import token_counter
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class TokenCounter:
11
+ """Token counter for various model types using litellm."""
12
+
13
+ def __init__(self, model: str):
14
+ """
15
+ Initialize token counter.
16
+
17
+ Args:
18
+ model: Model identifier (e.g., "gpt-4", "claude-3-5-sonnet", etc.)
19
+ """
20
+ self.model = model
21
+ logger.info(f"Using litellm token counter for {self.model}")
22
+
23
+ def __call__(self, text: str) -> int:
24
+ """Count tokens in text using litellm."""
25
+ if len(text) == 0:
26
+ return 0
27
+
28
+ try:
29
+ return token_counter(model=self.model, text=text)
30
+ except Exception as e:
31
+ logger.warning(f"Error counting tokens with litellm: {e}")
32
+ # Fallback: rough estimate (1 token ≈ 4 characters)
33
+ return len(text) // 4
34
+
35
+
parsing.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Discrepancy parsing logic for extracting discrepancies from LLM output."""
2
+
3
+ import logging
4
+ import re
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ def parse_discrepancies(text: str) -> list[str] | None:
10
+ """
11
+ Extract list items (discrepancies) from model output.
12
+
13
+ Replicates the _extract_list_items logic from scicoqa/inference/discrepancy_eval.py
14
+
15
+ Args:
16
+ text: Raw text output from LLM
17
+
18
+ Returns:
19
+ List of discrepancy strings, or None if no discrepancies found
20
+ """
21
+ if not text:
22
+ return None
23
+
24
+ # Remove redacted reasoning if present
25
+ if "</think>" in text:
26
+ text = text.split("</think>")[1]
27
+
28
+ # Detect YAML or dashed list format
29
+ if "```yaml\ndiscrepancies:" in text:
30
+ text = text.split("```yaml\ndiscrepancies:")[-1]
31
+ yaml_or_dashed = True
32
+ elif "```yaml" in text:
33
+ text = text.split("```yaml")[-1]
34
+ yaml_or_dashed = True
35
+ elif "discrepancies:" in text:
36
+ text = text.split("discrepancies:")[1]
37
+ yaml_or_dashed = True
38
+ elif re.search(r"# Discrepancies[\s\r\n]*-", text, re.IGNORECASE):
39
+ text = re.split(
40
+ r"# Discrepancies[\s\r\n]*-", text, maxsplit=1, flags=re.IGNORECASE
41
+ )[1]
42
+ text = "- " + text
43
+ yaml_or_dashed = True
44
+ else:
45
+ yaml_or_dashed = False
46
+
47
+ if yaml_or_dashed:
48
+ # Clean up the text
49
+ text = text.strip("\n").strip().strip("```yaml").strip("```").strip("\n")
50
+ text = (
51
+ text.strip("discrepancies:").strip("discrepancies").strip("\n").strip()
52
+ )
53
+
54
+ # Split by list item pattern
55
+ pattern = r"\n\s{0,2}-\s+"
56
+ parts = re.split(pattern, text)
57
+
58
+ items = []
59
+ for part in parts:
60
+ cleaned = " ".join(part.split())
61
+ if cleaned and not cleaned.startswith("discrepancies:"):
62
+ # Multiple cleaning passes
63
+ cleaned = cleaned.strip().strip("-").strip()
64
+ cleaned = cleaned.strip().strip("-").strip()
65
+ cleaned = cleaned.strip().strip("|").strip()
66
+ cleaned = cleaned.strip().strip(">-").strip()
67
+ cleaned = cleaned.strip().strip(">").strip()
68
+ cleaned = cleaned.strip().strip('"').strip()
69
+ cleaned = cleaned.strip().strip("'").strip()
70
+ cleaned = cleaned.strip("summary: |\n")
71
+ cleaned = cleaned.strip("summary: ")
72
+ cleaned = cleaned.strip("|")
73
+ cleaned = cleaned.strip("\n").strip()
74
+ # Remove numbered prefixes
75
+ cleaned = re.sub(r"^[0-9]+[\.\)]\s*", "", cleaned)
76
+ if cleaned: # Only add non-empty items
77
+ items.append(cleaned)
78
+ else:
79
+ items = None
80
+
81
+ # Handle empty list case
82
+ if items and len(items) == 1 and items[0].strip() == "[]":
83
+ items = None
84
+
85
+ return items if items else None
86
+
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ streamlit>=1.28.0
2
+ litellm>=1.17.0
3
+ requests>=2.31.0
4
+ gitpython>=3.1.40
5
+ pyyaml>=6.0
6
+ python-dotenv>=1.0.0
7
+ nbconvert>=7.10.0
8
+ nbformat>=5.9.0
9
+ tqdm>=4.66.0