Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from datasets import load_dataset | |
| from pathlib import Path | |
| from llama_index.core import SimpleDirectoryReader | |
| from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
| from llama_index.core.node_parser import SentenceSplitter | |
| from llama_index.core.ingestion import IngestionPipeline | |
| from llama_index.core import VectorStoreIndex | |
| from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
| from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI | |
| import nest_asyncio | |
| from llama_index.core.evaluation import FaithfulnessEvaluator | |
| import chromadb | |
| from llama_index.vector_stores.chroma import ChromaVectorStore | |
| def query(input): | |
| dataset = load_dataset(path="dvilasuero/finepersonas-v0.1-tiny", split="train") | |
| # dataset | |
| # for i, persona in enumerate(dataset): | |
| # print(f"{persona["persona"]}") | |
| # # persona | |
| # break | |
| # 读取数据集,写入文档。 | |
| Path("data").mkdir(parents=True, exist_ok=True) | |
| # enumerate(dataset)对dataset加上序号标签后进行遍历 | |
| for i, persona in enumerate(dataset): | |
| with open(Path("data") / f"persona_{i}.txt", "w") as f: | |
| f.write(persona["persona"]) | |
| # 加载文档,分解为node对象,输入到HuggingFaceEmbedding。 | |
| reader = SimpleDirectoryReader(input_dir="data") | |
| documents = reader.load_data() | |
| # len(documents) | |
| # create the pipeline with transformations | |
| embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") | |
| pipeline = IngestionPipeline( | |
| transformations=[ | |
| SentenceSplitter(), | |
| embed_model, | |
| ] | |
| ) | |
| # run the pipeline sync or async | |
| nodes = pipeline.arun(documents=documents[:10]) | |
| # nodes | |
| # 建立向量存储 | |
| db = chromadb.PersistentClient(path="./wgf_chroma_db") | |
| chroma_collection = db.get_or_create_collection(name="wgf") | |
| vector_store = ChromaVectorStore(chroma_collection=chroma_collection) | |
| index = VectorStoreIndex.from_vector_store( | |
| vector_store=vector_store, embed_model=embed_model | |
| ) | |
| # 应用补丁,允许嵌套异步 | |
| nest_asyncio.apply() # This is needed to run the query engine | |
| llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct") | |
| query_engine = index.as_query_engine( | |
| llm=llm, | |
| response_mode="tree_summarize", | |
| ) | |
| response = query_engine.query(input) | |
| # response | |
| # 对响应进行忠实度测评。 | |
| # evaluator = FaithfulnessEvaluator(llm=llm) | |
| # eval_result = evaluator.evaluate_response(response=response) | |
| # eval_result.passing | |
| return response | |
| demo = gr.Interface(fn=query, inputs="text", outputs="text") | |
| demo.launch() |