group-wbl/.venv/lib/python3.13/site-packages/langchain_community/retrievers/asknews.py

import os
import re
from typing import Any, Dict, List, Literal, Optional

from langchain_core.callbacks import (
    AsyncCallbackManagerForRetrieverRun,
    CallbackManagerForRetrieverRun,
)
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever


class AskNewsRetriever(BaseRetriever):
    """AskNews retriever."""

    k: int = 10
    offset: int = 0
    start_timestamp: Optional[int] = None
    end_timestamp: Optional[int] = None
    method: Literal["nl", "kw"] = "nl"
    categories: List[
        Literal[
            "All",
            "Business",
            "Crime",
            "Politics",
            "Science",
            "Sports",
            "Technology",
            "Military",
            "Health",
            "Entertainment",
            "Finance",
            "Culture",
            "Climate",
            "Environment",
            "World",
        ]
    ] = ["All"]
    historical: bool = False
    similarity_score_threshold: float = 0.5
    kwargs: Optional[Dict[str, Any]] = {}
    client_id: Optional[str] = None
    client_secret: Optional[str] = None

    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        """Get documents relevant to a query.
        Args:
            query: String to find relevant documents for
            run_manager: The callbacks handler to use
        Returns:
            List of relevant documents
        """
        try:
            from asknews_sdk import AskNewsSDK
        except ImportError:
            raise ImportError(
                "AskNews python package not found. "
                "Please install it with `pip install asknews`."
            )
        an_client = AskNewsSDK(
            client_id=self.client_id or os.environ["ASKNEWS_CLIENT_ID"],
            client_secret=self.client_secret or os.environ["ASKNEWS_CLIENT_SECRET"],
            scopes=["news"],
        )
        response = an_client.news.search_news(
            query=query,
            n_articles=self.k,
            start_timestamp=self.start_timestamp,
            end_timestamp=self.end_timestamp,
            method=self.method,
            categories=self.categories,
            historical=self.historical,
            similarity_score_threshold=self.similarity_score_threshold,
            offset=self.offset,
            doc_start_delimiter="<doc>",
            doc_end_delimiter="</doc>",
            return_type="both",
            **self.kwargs,
        )

        return self._extract_documents(response)

    async def _aget_relevant_documents(
        self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun
    ) -> List[Document]:
        """Asynchronously get documents relevant to a query.
        Args:
            query: String to find relevant documents for
            run_manager: The callbacks handler to use
        Returns:
            List of relevant documents
        """
        try:
            from asknews_sdk import AsyncAskNewsSDK
        except ImportError:
            raise ImportError(
                "AskNews python package not found. "
                "Please install it with `pip install asknews`."
            )
        an_client = AsyncAskNewsSDK(
            client_id=self.client_id or os.environ["ASKNEWS_CLIENT_ID"],
            client_secret=self.client_secret or os.environ["ASKNEWS_CLIENT_SECRET"],
            scopes=["news"],
        )
        response = await an_client.news.search_news(
            query=query,
            n_articles=self.k,
            start_timestamp=self.start_timestamp,
            end_timestamp=self.end_timestamp,
            method=self.method,
            categories=self.categories,
            historical=self.historical,
            similarity_score_threshold=self.similarity_score_threshold,
            offset=self.offset,
            return_type="both",
            doc_start_delimiter="<doc>",
            doc_end_delimiter="</doc>",
            **self.kwargs,
        )

        return self._extract_documents(response)

    def _extract_documents(self, response: Any) -> List[Document]:
        """Extract documents from an api response."""

        from asknews_sdk.dto.news import SearchResponse

        sr: SearchResponse = response
        matches = re.findall(r"<doc>(.*?)</doc>", sr.as_string, re.DOTALL)
        docs = [
            Document(
                page_content=matches[i].strip(),
                metadata={
                    "title": sr.as_dicts[i].title,
                    "source": str(sr.as_dicts[i].article_url)
                    if sr.as_dicts[i].article_url
                    else None,
                    "images": sr.as_dicts[i].image_url,
                },
            )
            for i in range(len(matches))
        ]
        return docs
Add __pycache__ and .venv directories 2026-01-09 09:48:03 +08:00			`import os`
			`import re`
			`from typing import Any, Dict, List, Literal, Optional`

			`from langchain_core.callbacks import (`
			`AsyncCallbackManagerForRetrieverRun,`
			`CallbackManagerForRetrieverRun,`
			`)`
			`from langchain_core.documents import Document`
			`from langchain_core.retrievers import BaseRetriever`


			`class AskNewsRetriever(BaseRetriever):`
			`"""AskNews retriever."""`

			`k: int = 10`
			`offset: int = 0`
			`start_timestamp: Optional[int] = None`
			`end_timestamp: Optional[int] = None`
			`method: Literal["nl", "kw"] = "nl"`
			`categories: List[`
			`Literal[`
			`"All",`
			`"Business",`
			`"Crime",`
			`"Politics",`
			`"Science",`
			`"Sports",`
			`"Technology",`
			`"Military",`
			`"Health",`
			`"Entertainment",`
			`"Finance",`
			`"Culture",`
			`"Climate",`
			`"Environment",`
			`"World",`
			`]`
			`] = ["All"]`
			`historical: bool = False`
			`similarity_score_threshold: float = 0.5`
			`kwargs: Optional[Dict[str, Any]] = {}`
			`client_id: Optional[str] = None`
			`client_secret: Optional[str] = None`

			`def _get_relevant_documents(`
			`self, query: str, *, run_manager: CallbackManagerForRetrieverRun`
			`) -> List[Document]:`
			`"""Get documents relevant to a query.`
			`Args:`
			`query: String to find relevant documents for`
			`run_manager: The callbacks handler to use`
			`Returns:`
			`List of relevant documents`
			`"""`
			`try:`
			`from asknews_sdk import AskNewsSDK`
			`except ImportError:`
			`raise ImportError(`
			`"AskNews python package not found. "`
			"Please install it with `pip install asknews`."
			`)`
			`an_client = AskNewsSDK(`
			`client_id=self.client_id or os.environ["ASKNEWS_CLIENT_ID"],`
			`client_secret=self.client_secret or os.environ["ASKNEWS_CLIENT_SECRET"],`
			`scopes=["news"],`
			`)`
			`response = an_client.news.search_news(`
			`query=query,`
			`n_articles=self.k,`
			`start_timestamp=self.start_timestamp,`
			`end_timestamp=self.end_timestamp,`
			`method=self.method,`
			`categories=self.categories,`
			`historical=self.historical,`
			`similarity_score_threshold=self.similarity_score_threshold,`
			`offset=self.offset,`
			`doc_start_delimiter="<doc>",`
			`doc_end_delimiter="</doc>",`
			`return_type="both",`
			`**self.kwargs,`
			`)`

			`return self._extract_documents(response)`

			`async def _aget_relevant_documents(`
			`self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun`
			`) -> List[Document]:`
			`"""Asynchronously get documents relevant to a query.`
			`Args:`
			`query: String to find relevant documents for`
			`run_manager: The callbacks handler to use`
			`Returns:`
			`List of relevant documents`
			`"""`
			`try:`
			`from asknews_sdk import AsyncAskNewsSDK`
			`except ImportError:`
			`raise ImportError(`
			`"AskNews python package not found. "`
			"Please install it with `pip install asknews`."
			`)`
			`an_client = AsyncAskNewsSDK(`
			`client_id=self.client_id or os.environ["ASKNEWS_CLIENT_ID"],`
			`client_secret=self.client_secret or os.environ["ASKNEWS_CLIENT_SECRET"],`
			`scopes=["news"],`
			`)`
			`response = await an_client.news.search_news(`
			`query=query,`
			`n_articles=self.k,`
			`start_timestamp=self.start_timestamp,`
			`end_timestamp=self.end_timestamp,`
			`method=self.method,`
			`categories=self.categories,`
			`historical=self.historical,`
			`similarity_score_threshold=self.similarity_score_threshold,`
			`offset=self.offset,`
			`return_type="both",`
			`doc_start_delimiter="<doc>",`
			`doc_end_delimiter="</doc>",`
			`**self.kwargs,`
			`)`

			`return self._extract_documents(response)`

			`def _extract_documents(self, response: Any) -> List[Document]:`
			`"""Extract documents from an api response."""`

			`from asknews_sdk.dto.news import SearchResponse`

			`sr: SearchResponse = response`
			`matches = re.findall(r"<doc>(.*?)</doc>", sr.as_string, re.DOTALL)`
			`docs = [`
			`Document(`
			`page_content=matches[i].strip(),`
			`metadata={`
			`"title": sr.as_dicts[i].title,`
			`"source": str(sr.as_dicts[i].article_url)`
			`if sr.as_dicts[i].article_url`
			`else None,`
			`"images": sr.as_dicts[i].image_url,`
			`},`
			`)`
			`for i in range(len(matches))`
			`]`
			`return docs`