diff --git a/scripts/llm/analysis_support.py b/scripts/llm/analysis_support.py index f45e4af93a222cf6fc9a66eac1a964652f4bbf2d..088b450cf3717e6802972264f3b9ac3c3bb4483d 100755 --- a/scripts/llm/analysis_support.py +++ b/scripts/llm/analysis_support.py @@ -25,6 +25,7 @@ Usage: Options: --dry-run Output the response from the LLM rather than committing back to torque. + --log-level LOG_LEVEL Set the logging level. --competition COMPETITION Competition to generate analysis support for, can alternatively be set in config.py. --proposals PROPOSALS Comma-separated list of proposal IDs to @@ -61,7 +62,10 @@ except ImportError: config = object() from dataclasses import asdict, dataclass, field +import re import requests +from requests.adapters import HTTPAdapter, Retry +import textwrap import pprint import argparse import logging @@ -89,6 +93,12 @@ parser.add_argument( action="store_true", help="Output the response from the LLM rather than committing back to Torque", ) +parser.add_argument( + "-l", + "--log-level", + help="Set the logging level", + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], +) parser.add_argument( "-p", @@ -359,20 +369,10 @@ sections = { ), "Accessibility": AnalysisPrompt( display=""" - If either budget or narrative proposal include plans/funds - earmarked for accessibility, - please summarize using the following format the proposed - solution: - - Proposal includes $XX for disability-related accommodations, - and details e.g., names that individuals with disabilities are - disproportionately impacted by the financial consequences of - IPV. - - If neither budget nor narrative proposal include plans for/ - funds earmarked for accessibility accommodations, say: - "The proposal does not include plans for disability - accommodations." + If the proposal include plans or funds + for accessibility, please summarize the proposed + solution or accomodation, including the budget if there is + one. """, ), }, @@ -445,8 +445,10 @@ class AnalysisResponse: """ text = self.value - lines = text.split("\n") - if lines[0].endswith(sep): + lines = text.strip().splitlines() + if lines[0].endswith(sep) or not ( + lines[0].strip().startswith("•") or lines[0].strip().startswith("*") + ): text = "\n".join(lines[1:]).strip() return AnalysisResponse(value=text, id=self.id) @@ -476,6 +478,16 @@ class APIClient: self.api_key = api_key self.session = requests.Session() + retry_strategy = Retry( + total=3, + backoff_factor=1, + status_forcelist=[429, 500, 502, 503, 504], + ) + + adapter = HTTPAdapter(max_retries=retry_strategy) + self.session.mount("http://", adapter) + self.session.mount("https://", adapter) + def make_request(self, method, path, **kwargs): http_method = getattr(self.session, method) response = http_method( @@ -633,6 +645,22 @@ class LLMProposal(MarkdownRenderer): return self.render(**variables) +def wrap_text(text, width=60, indent=""): + wrapper = textwrap.TextWrapper( + width=width, initial_indent=indent, subsequent_indent=indent + ) + lines = text.splitlines() + wrapped_lines = [wrapper.fill(line) for line in lines] + return "\n".join(wrapped_lines) + + +def clean_text(text): + text = re.sub(r"http[s]?://\S+", " ", text) # Remove URLs + text = re.sub(r"\([0-9\.,]+\)", " ", text) # Remove numbers in parentheses + text = re.sub(r"\[[0-9\.,]+\]", " ", text) # Remove numbers in brackets + return text + + def generate_analysis_support(llm, proposal, search_engine): llm_analysis = {} @@ -642,10 +670,12 @@ def generate_analysis_support(llm, proposal, search_engine): prompts = [] for prompt in section.prompts.values(): - text = LLMProposal(proposal).render_markdown(prompt.template_blocks) + text = clean_text( + LLMProposal(proposal).render_markdown(prompt.template_blocks) + ) if name == "Reputational Risks": - logging.debug(" Searching for controversies...") + logging.info(" Searching for controversies...") query = proposal["Organization Name"] + " controversy" results = search_engine.search(query) @@ -658,26 +688,30 @@ def generate_analysis_support(llm, proposal, search_engine): text += f""" - {result.title} {result.description} - {' '.join([snippet for snippet in result.extra_snippets])} + {" ".join([snippet for snippet in result.extra_snippets])} """ prompts.append( AnalysisRequest( text=text, - considerations=section.sent, + considerations=prompt.sent, ) ) response = llm.get_analyses(prompts, section.sent) + value = response.without_intro().value.replace("• ", "* ") + llm_analysis[name] = asdict( LLMAnalysis( id=response.id, - value=response.without_intro().value, + value=value, prompt=section, ) ) + logging.info(f"{wrap_text(value, indent=' ')}") + logging.debug("") logging.debug("*** Prompt and Analysis Support: ***") logging.debug("") @@ -691,7 +725,10 @@ def cli(): args = parser.parse_args() if args.dry_run: - logging.getLogger().setLevel(logging.DEBUG) + logging.getLogger().setLevel(logging.INFO) + + if args.log_level: + logging.getLogger().setLevel(getattr(logging, args.log_level)) if not args.torque_user or not args.torque_password or not args.torque_url: parser.error("Torque credentials not set") @@ -731,7 +768,11 @@ def cli(): logging.error("Proposal not found") continue - llm_analysis = generate_analysis_support(llm, proposal, brave) + try: + llm_analysis = generate_analysis_support(llm, proposal, brave) + except Exception as e: + logging.error(f"Error generating analysis support: {e}") + continue if not args.dry_run: # Setting this variable on a torqueclient proposal saves the data back diff --git a/scripts/llm/test_analysis_support.py b/scripts/llm/test_analysis_support.py index cff44a81261517785e1af38e87583f5bb16e02e3..d2a3ec186b7b344c715746722dba1e3f5c37605e 100644 --- a/scripts/llm/test_analysis_support.py +++ b/scripts/llm/test_analysis_support.py @@ -57,13 +57,29 @@ def test_commaize_number_float(): # Test without_intro # -def test_without_intro(): +def test_without_intro_with_colon(): assert ( AnalysisResponse(id="1", value="Intro:\n\nSome content").without_intro().value == "Some content" ) +def test_without_intro_without_colon(): + assert ( + AnalysisResponse(id="1", value="Intro.\n\nSome content").without_intro().value + == "Some content" + ) + + +def test_without_intro_not_bullets(): + assert ( + AnalysisResponse(id="1", value="• Not an intro.\n\n• Some content") + .without_intro() + .value + == "• Not an intro.\n\n• Some content" + ) + + # Test render_proposal_markdown #